]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
[arm][2/2] Remove support for -march=armv3 and older
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2018 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "df.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "flags.h"
53 #include "reload.h"
54 #include "explow.h"
55 #include "expr.h"
56 #include "cfgrtl.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
60 #include "intl.h"
61 #include "libfuncs.h"
62 #include "params.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "selftest.h"
73
74 /* This file should be included last. */
75 #include "target-def.h"
76
77 /* Forward definitions of types. */
78 typedef struct minipool_node Mnode;
79 typedef struct minipool_fixup Mfix;
80
81 /* The last .arch and .fpu assembly strings that we printed. */
82 static std::string arm_last_printed_arch_string;
83 static std::string arm_last_printed_fpu_string;
84
85 void (*arm_lang_output_object_attributes_hook)(void);
86
87 struct four_ints
88 {
89 int i[4];
90 };
91
92 /* Forward function declarations. */
93 static bool arm_const_not_ok_for_debug_p (rtx);
94 static int arm_needs_doubleword_align (machine_mode, const_tree);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets *arm_get_frame_offsets (void);
97 static void arm_compute_frame_layout (void);
98 static void arm_add_gc_roots (void);
99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
100 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
101 static unsigned bit_count (unsigned long);
102 static unsigned bitmap_popcount (const sbitmap);
103 static int arm_address_register_rtx_p (rtx, int);
104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
105 static bool is_called_in_ARM_mode (tree);
106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
111 inline static int thumb1_index_register_rtx_p (rtx, int);
112 static int thumb_far_jump_used_p (void);
113 static bool thumb_force_lr_save (void);
114 static unsigned arm_size_return_regs (void);
115 static bool arm_assemble_integer (rtx, unsigned int, int);
116 static void arm_print_operand (FILE *, rtx, int);
117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
118 static bool arm_print_operand_punct_valid_p (unsigned char code);
119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
120 static arm_cc get_arm_condition_code (rtx);
121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
122 static const char *output_multi_immediate (rtx *, const char *, const char *,
123 int, HOST_WIDE_INT);
124 static const char *shift_op (rtx, HOST_WIDE_INT *);
125 static struct machine_function *arm_init_machine_status (void);
126 static void thumb_exit (FILE *, int);
127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
129 static Mnode *add_minipool_forward_ref (Mfix *);
130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
131 static Mnode *add_minipool_backward_ref (Mfix *);
132 static void assign_minipool_offsets (Mfix *);
133 static void arm_print_value (FILE *, rtx);
134 static void dump_minipool (rtx_insn *);
135 static int arm_barrier_cost (rtx_insn *);
136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
139 machine_mode, rtx);
140 static void arm_reorg (void);
141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
143 static unsigned long arm_compute_save_core_reg_mask (void);
144 static unsigned long arm_isr_value (tree);
145 static unsigned long arm_compute_func_type (void);
146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
151 #endif
152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
154 static void arm_output_function_epilogue (FILE *);
155 static void arm_output_function_prologue (FILE *);
156 static int arm_comp_type_attributes (const_tree, const_tree);
157 static void arm_set_default_type_attributes (tree);
158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
160 static int optimal_immediate_sequence (enum rtx_code code,
161 unsigned HOST_WIDE_INT val,
162 struct four_ints *return_sequence);
163 static int optimal_immediate_sequence_1 (enum rtx_code code,
164 unsigned HOST_WIDE_INT val,
165 struct four_ints *return_sequence,
166 int i);
167 static int arm_get_strip_length (int);
168 static bool arm_function_ok_for_sibcall (tree, tree);
169 static machine_mode arm_promote_function_mode (const_tree,
170 machine_mode, int *,
171 const_tree, int);
172 static bool arm_return_in_memory (const_tree, const_tree);
173 static rtx arm_function_value (const_tree, const_tree, bool);
174 static rtx arm_libcall_value_1 (machine_mode);
175 static rtx arm_libcall_value (machine_mode, const_rtx);
176 static bool arm_function_value_regno_p (const unsigned int);
177 static void arm_internal_label (FILE *, const char *, unsigned long);
178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
179 tree);
180 static bool arm_have_conditional_execution (void);
181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
182 static bool arm_legitimate_constant_p (machine_mode, rtx);
183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
184 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
185 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
186 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
187 static void emit_constant_insn (rtx cond, rtx pattern);
188 static rtx_insn *emit_set_insn (rtx, rtx);
189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
190 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
191 tree, bool);
192 static rtx arm_function_arg (cumulative_args_t, machine_mode,
193 const_tree, bool);
194 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
195 const_tree, bool);
196 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
197 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
198 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
199 const_tree);
200 static rtx aapcs_libcall_value (machine_mode);
201 static int aapcs_select_return_coproc (const_tree, const_tree);
202
203 #ifdef OBJECT_FORMAT_ELF
204 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
205 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
206 #endif
207 #ifndef ARM_PE
208 static void arm_encode_section_info (tree, rtx, int);
209 #endif
210
211 static void arm_file_end (void);
212 static void arm_file_start (void);
213 static void arm_insert_attributes (tree, tree *);
214
215 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
216 tree, int *, int);
217 static bool arm_pass_by_reference (cumulative_args_t,
218 machine_mode, const_tree, bool);
219 static bool arm_promote_prototypes (const_tree);
220 static bool arm_default_short_enums (void);
221 static bool arm_align_anon_bitfield (void);
222 static bool arm_return_in_msb (const_tree);
223 static bool arm_must_pass_in_stack (machine_mode, const_tree);
224 static bool arm_return_in_memory (const_tree, const_tree);
225 #if ARM_UNWIND_INFO
226 static void arm_unwind_emit (FILE *, rtx_insn *);
227 static bool arm_output_ttype (rtx);
228 static void arm_asm_emit_except_personality (rtx);
229 #endif
230 static void arm_asm_init_sections (void);
231 static rtx arm_dwarf_register_span (rtx);
232
233 static tree arm_cxx_guard_type (void);
234 static bool arm_cxx_guard_mask_bit (void);
235 static tree arm_get_cookie_size (tree);
236 static bool arm_cookie_has_size (void);
237 static bool arm_cxx_cdtor_returns_this (void);
238 static bool arm_cxx_key_method_may_be_inline (void);
239 static void arm_cxx_determine_class_data_visibility (tree);
240 static bool arm_cxx_class_data_always_comdat (void);
241 static bool arm_cxx_use_aeabi_atexit (void);
242 static void arm_init_libfuncs (void);
243 static tree arm_build_builtin_va_list (void);
244 static void arm_expand_builtin_va_start (tree, rtx);
245 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
246 static void arm_option_override (void);
247 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
248 static void arm_option_restore (struct gcc_options *,
249 struct cl_target_option *);
250 static void arm_override_options_after_change (void);
251 static void arm_option_print (FILE *, int, struct cl_target_option *);
252 static void arm_set_current_function (tree);
253 static bool arm_can_inline_p (tree, tree);
254 static void arm_relayout_function (tree);
255 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
256 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
257 static bool arm_sched_can_speculate_insn (rtx_insn *);
258 static bool arm_macro_fusion_p (void);
259 static bool arm_cannot_copy_insn_p (rtx_insn *);
260 static int arm_issue_rate (void);
261 static int arm_first_cycle_multipass_dfa_lookahead (void);
262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
263 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
264 static bool arm_output_addr_const_extra (FILE *, rtx);
265 static bool arm_allocate_stack_slots_for_args (void);
266 static bool arm_warn_func_return (tree);
267 static tree arm_promoted_type (const_tree t);
268 static bool arm_scalar_mode_supported_p (scalar_mode);
269 static bool arm_frame_pointer_required (void);
270 static bool arm_can_eliminate (const int, const int);
271 static void arm_asm_trampoline_template (FILE *);
272 static void arm_trampoline_init (rtx, tree, rtx);
273 static rtx arm_trampoline_adjust_address (rtx);
274 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
275 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
276 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
277 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
278 static bool arm_array_mode_supported_p (machine_mode,
279 unsigned HOST_WIDE_INT);
280 static machine_mode arm_preferred_simd_mode (scalar_mode);
281 static bool arm_class_likely_spilled_p (reg_class_t);
282 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
283 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
284 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
285 const_tree type,
286 int misalignment,
287 bool is_packed);
288 static void arm_conditional_register_usage (void);
289 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
290 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
291 static void arm_autovectorize_vector_sizes (vector_sizes *);
292 static int arm_default_branch_cost (bool, bool);
293 static int arm_cortex_a5_branch_cost (bool, bool);
294 static int arm_cortex_m_branch_cost (bool, bool);
295 static int arm_cortex_m7_branch_cost (bool, bool);
296
297 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
298 const vec_perm_indices &);
299
300 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
301
302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
303 tree vectype,
304 int misalign ATTRIBUTE_UNUSED);
305 static unsigned arm_add_stmt_cost (void *data, int count,
306 enum vect_cost_for_stmt kind,
307 struct _stmt_vec_info *stmt_info,
308 int misalign,
309 enum vect_cost_model_location where);
310
311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
312 bool op0_preserve_value);
313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
314
315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
317 const_tree);
318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
321 int reloc);
322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
328 \f
329 /* Table of machine attributes. */
330 static const struct attribute_spec arm_attribute_table[] =
331 {
332 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
333 affects_type_identity, handler, exclude } */
334 /* Function calls made to this symbol must be done indirectly, because
335 it may lie outside of the 26 bit addressing range of a normal function
336 call. */
337 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
338 /* Whereas these functions are always known to reside within the 26 bit
339 addressing range. */
340 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
341 /* Specify the procedure call conventions for a function. */
342 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
343 NULL },
344 /* Interrupt Service Routines have special prologue and epilogue requirements. */
345 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
346 NULL },
347 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
348 NULL },
349 { "naked", 0, 0, true, false, false, false,
350 arm_handle_fndecl_attribute, NULL },
351 #ifdef ARM_PE
352 /* ARM/PE has three new attributes:
353 interfacearm - ?
354 dllexport - for exporting a function/variable that will live in a dll
355 dllimport - for importing a function/variable from a dll
356
357 Microsoft allows multiple declspecs in one __declspec, separating
358 them with spaces. We do NOT support this. Instead, use __declspec
359 multiple times.
360 */
361 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
362 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
363 { "interfacearm", 0, 0, true, false, false, false,
364 arm_handle_fndecl_attribute, NULL },
365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
366 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
367 NULL },
368 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
369 NULL },
370 { "notshared", 0, 0, false, true, false, false,
371 arm_handle_notshared_attribute, NULL },
372 #endif
373 /* ARMv8-M Security Extensions support. */
374 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
375 arm_handle_cmse_nonsecure_entry, NULL },
376 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
377 arm_handle_cmse_nonsecure_call, NULL },
378 { NULL, 0, 0, false, false, false, false, NULL, NULL }
379 };
380 \f
381 /* Initialize the GCC target structure. */
382 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
383 #undef TARGET_MERGE_DECL_ATTRIBUTES
384 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
385 #endif
386
387 #undef TARGET_LEGITIMIZE_ADDRESS
388 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
389
390 #undef TARGET_ATTRIBUTE_TABLE
391 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
392
393 #undef TARGET_INSERT_ATTRIBUTES
394 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
395
396 #undef TARGET_ASM_FILE_START
397 #define TARGET_ASM_FILE_START arm_file_start
398 #undef TARGET_ASM_FILE_END
399 #define TARGET_ASM_FILE_END arm_file_end
400
401 #undef TARGET_ASM_ALIGNED_SI_OP
402 #define TARGET_ASM_ALIGNED_SI_OP NULL
403 #undef TARGET_ASM_INTEGER
404 #define TARGET_ASM_INTEGER arm_assemble_integer
405
406 #undef TARGET_PRINT_OPERAND
407 #define TARGET_PRINT_OPERAND arm_print_operand
408 #undef TARGET_PRINT_OPERAND_ADDRESS
409 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
410 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
411 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
412
413 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
414 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
415
416 #undef TARGET_ASM_FUNCTION_PROLOGUE
417 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
418
419 #undef TARGET_ASM_FUNCTION_EPILOGUE
420 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
421
422 #undef TARGET_CAN_INLINE_P
423 #define TARGET_CAN_INLINE_P arm_can_inline_p
424
425 #undef TARGET_RELAYOUT_FUNCTION
426 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
427
428 #undef TARGET_OPTION_OVERRIDE
429 #define TARGET_OPTION_OVERRIDE arm_option_override
430
431 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
432 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
433
434 #undef TARGET_OPTION_SAVE
435 #define TARGET_OPTION_SAVE arm_option_save
436
437 #undef TARGET_OPTION_RESTORE
438 #define TARGET_OPTION_RESTORE arm_option_restore
439
440 #undef TARGET_OPTION_PRINT
441 #define TARGET_OPTION_PRINT arm_option_print
442
443 #undef TARGET_COMP_TYPE_ATTRIBUTES
444 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
445
446 #undef TARGET_SCHED_CAN_SPECULATE_INSN
447 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
448
449 #undef TARGET_SCHED_MACRO_FUSION_P
450 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
451
452 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
453 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
454
455 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
456 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
457
458 #undef TARGET_SCHED_ADJUST_COST
459 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
460
461 #undef TARGET_SET_CURRENT_FUNCTION
462 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
463
464 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
465 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
466
467 #undef TARGET_SCHED_REORDER
468 #define TARGET_SCHED_REORDER arm_sched_reorder
469
470 #undef TARGET_REGISTER_MOVE_COST
471 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
472
473 #undef TARGET_MEMORY_MOVE_COST
474 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
475
476 #undef TARGET_ENCODE_SECTION_INFO
477 #ifdef ARM_PE
478 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
479 #else
480 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
481 #endif
482
483 #undef TARGET_STRIP_NAME_ENCODING
484 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
485
486 #undef TARGET_ASM_INTERNAL_LABEL
487 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
488
489 #undef TARGET_FLOATN_MODE
490 #define TARGET_FLOATN_MODE arm_floatn_mode
491
492 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
493 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
494
495 #undef TARGET_FUNCTION_VALUE
496 #define TARGET_FUNCTION_VALUE arm_function_value
497
498 #undef TARGET_LIBCALL_VALUE
499 #define TARGET_LIBCALL_VALUE arm_libcall_value
500
501 #undef TARGET_FUNCTION_VALUE_REGNO_P
502 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
503
504 #undef TARGET_ASM_OUTPUT_MI_THUNK
505 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
506 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
507 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
508
509 #undef TARGET_RTX_COSTS
510 #define TARGET_RTX_COSTS arm_rtx_costs
511 #undef TARGET_ADDRESS_COST
512 #define TARGET_ADDRESS_COST arm_address_cost
513
514 #undef TARGET_SHIFT_TRUNCATION_MASK
515 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
516 #undef TARGET_VECTOR_MODE_SUPPORTED_P
517 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
518 #undef TARGET_ARRAY_MODE_SUPPORTED_P
519 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
520 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
521 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
522 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
523 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
524 arm_autovectorize_vector_sizes
525
526 #undef TARGET_MACHINE_DEPENDENT_REORG
527 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
528
529 #undef TARGET_INIT_BUILTINS
530 #define TARGET_INIT_BUILTINS arm_init_builtins
531 #undef TARGET_EXPAND_BUILTIN
532 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
533 #undef TARGET_BUILTIN_DECL
534 #define TARGET_BUILTIN_DECL arm_builtin_decl
535
536 #undef TARGET_INIT_LIBFUNCS
537 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
538
539 #undef TARGET_PROMOTE_FUNCTION_MODE
540 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
541 #undef TARGET_PROMOTE_PROTOTYPES
542 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
543 #undef TARGET_PASS_BY_REFERENCE
544 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
545 #undef TARGET_ARG_PARTIAL_BYTES
546 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
547 #undef TARGET_FUNCTION_ARG
548 #define TARGET_FUNCTION_ARG arm_function_arg
549 #undef TARGET_FUNCTION_ARG_ADVANCE
550 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
551 #undef TARGET_FUNCTION_ARG_PADDING
552 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
553 #undef TARGET_FUNCTION_ARG_BOUNDARY
554 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
555
556 #undef TARGET_SETUP_INCOMING_VARARGS
557 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
558
559 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
560 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
561
562 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
563 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
564 #undef TARGET_TRAMPOLINE_INIT
565 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
566 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
567 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
568
569 #undef TARGET_WARN_FUNC_RETURN
570 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
571
572 #undef TARGET_DEFAULT_SHORT_ENUMS
573 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
574
575 #undef TARGET_ALIGN_ANON_BITFIELD
576 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
577
578 #undef TARGET_NARROW_VOLATILE_BITFIELD
579 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
580
581 #undef TARGET_CXX_GUARD_TYPE
582 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
583
584 #undef TARGET_CXX_GUARD_MASK_BIT
585 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
586
587 #undef TARGET_CXX_GET_COOKIE_SIZE
588 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
589
590 #undef TARGET_CXX_COOKIE_HAS_SIZE
591 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
592
593 #undef TARGET_CXX_CDTOR_RETURNS_THIS
594 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
595
596 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
597 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
598
599 #undef TARGET_CXX_USE_AEABI_ATEXIT
600 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
601
602 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
603 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
604 arm_cxx_determine_class_data_visibility
605
606 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
607 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
608
609 #undef TARGET_RETURN_IN_MSB
610 #define TARGET_RETURN_IN_MSB arm_return_in_msb
611
612 #undef TARGET_RETURN_IN_MEMORY
613 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
614
615 #undef TARGET_MUST_PASS_IN_STACK
616 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
617
618 #if ARM_UNWIND_INFO
619 #undef TARGET_ASM_UNWIND_EMIT
620 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
621
622 /* EABI unwinding tables use a different format for the typeinfo tables. */
623 #undef TARGET_ASM_TTYPE
624 #define TARGET_ASM_TTYPE arm_output_ttype
625
626 #undef TARGET_ARM_EABI_UNWINDER
627 #define TARGET_ARM_EABI_UNWINDER true
628
629 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
630 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
631
632 #endif /* ARM_UNWIND_INFO */
633
634 #undef TARGET_ASM_INIT_SECTIONS
635 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
636
637 #undef TARGET_DWARF_REGISTER_SPAN
638 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
639
640 #undef TARGET_CANNOT_COPY_INSN_P
641 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
642
643 #ifdef HAVE_AS_TLS
644 #undef TARGET_HAVE_TLS
645 #define TARGET_HAVE_TLS true
646 #endif
647
648 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
649 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
650
651 #undef TARGET_LEGITIMATE_CONSTANT_P
652 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
653
654 #undef TARGET_CANNOT_FORCE_CONST_MEM
655 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
656
657 #undef TARGET_MAX_ANCHOR_OFFSET
658 #define TARGET_MAX_ANCHOR_OFFSET 4095
659
660 /* The minimum is set such that the total size of the block
661 for a particular anchor is -4088 + 1 + 4095 bytes, which is
662 divisible by eight, ensuring natural spacing of anchors. */
663 #undef TARGET_MIN_ANCHOR_OFFSET
664 #define TARGET_MIN_ANCHOR_OFFSET -4088
665
666 #undef TARGET_SCHED_ISSUE_RATE
667 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
668
669 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
670 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
671 arm_first_cycle_multipass_dfa_lookahead
672
673 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
674 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
675 arm_first_cycle_multipass_dfa_lookahead_guard
676
677 #undef TARGET_MANGLE_TYPE
678 #define TARGET_MANGLE_TYPE arm_mangle_type
679
680 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
681 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
682
683 #undef TARGET_BUILD_BUILTIN_VA_LIST
684 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
685 #undef TARGET_EXPAND_BUILTIN_VA_START
686 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
687 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
688 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
689
690 #ifdef HAVE_AS_TLS
691 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
692 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
693 #endif
694
695 #undef TARGET_LEGITIMATE_ADDRESS_P
696 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
697
698 #undef TARGET_PREFERRED_RELOAD_CLASS
699 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
700
701 #undef TARGET_PROMOTED_TYPE
702 #define TARGET_PROMOTED_TYPE arm_promoted_type
703
704 #undef TARGET_SCALAR_MODE_SUPPORTED_P
705 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
706
707 #undef TARGET_COMPUTE_FRAME_LAYOUT
708 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
709
710 #undef TARGET_FRAME_POINTER_REQUIRED
711 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
712
713 #undef TARGET_CAN_ELIMINATE
714 #define TARGET_CAN_ELIMINATE arm_can_eliminate
715
716 #undef TARGET_CONDITIONAL_REGISTER_USAGE
717 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
718
719 #undef TARGET_CLASS_LIKELY_SPILLED_P
720 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
721
722 #undef TARGET_VECTORIZE_BUILTINS
723 #define TARGET_VECTORIZE_BUILTINS
724
725 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
726 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
727 arm_builtin_vectorized_function
728
729 #undef TARGET_VECTOR_ALIGNMENT
730 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
731
732 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
733 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
734 arm_vector_alignment_reachable
735
736 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
737 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
738 arm_builtin_support_vector_misalignment
739
740 #undef TARGET_PREFERRED_RENAME_CLASS
741 #define TARGET_PREFERRED_RENAME_CLASS \
742 arm_preferred_rename_class
743
744 #undef TARGET_VECTORIZE_VEC_PERM_CONST
745 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
746
747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
749 arm_builtin_vectorization_cost
750 #undef TARGET_VECTORIZE_ADD_STMT_COST
751 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
752
753 #undef TARGET_CANONICALIZE_COMPARISON
754 #define TARGET_CANONICALIZE_COMPARISON \
755 arm_canonicalize_comparison
756
757 #undef TARGET_ASAN_SHADOW_OFFSET
758 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
759
760 #undef MAX_INSN_PER_IT_BLOCK
761 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
762
763 #undef TARGET_CAN_USE_DOLOOP_P
764 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
765
766 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
767 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
768
769 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
770 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
771
772 #undef TARGET_SCHED_FUSION_PRIORITY
773 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
774
775 #undef TARGET_ASM_FUNCTION_SECTION
776 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
777
778 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
779 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
780
781 #undef TARGET_SECTION_TYPE_FLAGS
782 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
783
784 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
785 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
786
787 #undef TARGET_C_EXCESS_PRECISION
788 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
789
790 /* Although the architecture reserves bits 0 and 1, only the former is
791 used for ARM/Thumb ISA selection in v7 and earlier versions. */
792 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
793 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
794
795 #undef TARGET_FIXED_CONDITION_CODE_REGS
796 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
797
798 #undef TARGET_HARD_REGNO_NREGS
799 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
800 #undef TARGET_HARD_REGNO_MODE_OK
801 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
802
803 #undef TARGET_MODES_TIEABLE_P
804 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
805
806 #undef TARGET_CAN_CHANGE_MODE_CLASS
807 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
808
809 #undef TARGET_CONSTANT_ALIGNMENT
810 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
811 \f
812 /* Obstack for minipool constant handling. */
813 static struct obstack minipool_obstack;
814 static char * minipool_startobj;
815
816 /* The maximum number of insns skipped which
817 will be conditionalised if possible. */
818 static int max_insns_skipped = 5;
819
820 extern FILE * asm_out_file;
821
822 /* True if we are currently building a constant table. */
823 int making_const_table;
824
825 /* The processor for which instructions should be scheduled. */
826 enum processor_type arm_tune = TARGET_CPU_arm_none;
827
828 /* The current tuning set. */
829 const struct tune_params *current_tune;
830
831 /* Which floating point hardware to schedule for. */
832 int arm_fpu_attr;
833
834 /* Used for Thumb call_via trampolines. */
835 rtx thumb_call_via_label[14];
836 static int thumb_call_reg_needed;
837
838 /* The bits in this mask specify which instruction scheduling options should
839 be used. */
840 unsigned int tune_flags = 0;
841
842 /* The highest ARM architecture version supported by the
843 target. */
844 enum base_architecture arm_base_arch = BASE_ARCH_0;
845
846 /* Active target architecture and tuning. */
847
848 struct arm_build_target arm_active_target;
849
850 /* The following are used in the arm.md file as equivalents to bits
851 in the above two flag variables. */
852
853 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
854 int arm_arch4 = 0;
855
856 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
857 int arm_arch4t = 0;
858
859 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
860 int arm_arch5t = 0;
861
862 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
863 int arm_arch5te = 0;
864
865 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
866 int arm_arch6 = 0;
867
868 /* Nonzero if this chip supports the ARM 6K extensions. */
869 int arm_arch6k = 0;
870
871 /* Nonzero if this chip supports the ARM 6KZ extensions. */
872 int arm_arch6kz = 0;
873
874 /* Nonzero if instructions present in ARMv6-M can be used. */
875 int arm_arch6m = 0;
876
877 /* Nonzero if this chip supports the ARM 7 extensions. */
878 int arm_arch7 = 0;
879
880 /* Nonzero if this chip supports the Large Physical Address Extension. */
881 int arm_arch_lpae = 0;
882
883 /* Nonzero if instructions not present in the 'M' profile can be used. */
884 int arm_arch_notm = 0;
885
886 /* Nonzero if instructions present in ARMv7E-M can be used. */
887 int arm_arch7em = 0;
888
889 /* Nonzero if instructions present in ARMv8 can be used. */
890 int arm_arch8 = 0;
891
892 /* Nonzero if this chip supports the ARMv8.1 extensions. */
893 int arm_arch8_1 = 0;
894
895 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
896 int arm_arch8_2 = 0;
897
898 /* Nonzero if this chip supports the FP16 instructions extension of ARM
899 Architecture 8.2. */
900 int arm_fp16_inst = 0;
901
902 /* Nonzero if this chip can benefit from load scheduling. */
903 int arm_ld_sched = 0;
904
905 /* Nonzero if this chip is a StrongARM. */
906 int arm_tune_strongarm = 0;
907
908 /* Nonzero if this chip supports Intel Wireless MMX technology. */
909 int arm_arch_iwmmxt = 0;
910
911 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
912 int arm_arch_iwmmxt2 = 0;
913
914 /* Nonzero if this chip is an XScale. */
915 int arm_arch_xscale = 0;
916
917 /* Nonzero if tuning for XScale */
918 int arm_tune_xscale = 0;
919
920 /* Nonzero if we want to tune for stores that access the write-buffer.
921 This typically means an ARM6 or ARM7 with MMU or MPU. */
922 int arm_tune_wbuf = 0;
923
924 /* Nonzero if tuning for Cortex-A9. */
925 int arm_tune_cortex_a9 = 0;
926
927 /* Nonzero if we should define __THUMB_INTERWORK__ in the
928 preprocessor.
929 XXX This is a bit of a hack, it's intended to help work around
930 problems in GLD which doesn't understand that armv5t code is
931 interworking clean. */
932 int arm_cpp_interwork = 0;
933
934 /* Nonzero if chip supports Thumb 1. */
935 int arm_arch_thumb1;
936
937 /* Nonzero if chip supports Thumb 2. */
938 int arm_arch_thumb2;
939
940 /* Nonzero if chip supports integer division instruction. */
941 int arm_arch_arm_hwdiv;
942 int arm_arch_thumb_hwdiv;
943
944 /* Nonzero if chip disallows volatile memory access in IT block. */
945 int arm_arch_no_volatile_ce;
946
947 /* Nonzero if we should use Neon to handle 64-bits operations rather
948 than core registers. */
949 int prefer_neon_for_64bits = 0;
950
951 /* Nonzero if we shouldn't use literal pools. */
952 bool arm_disable_literal_pool = false;
953
954 /* The register number to be used for the PIC offset register. */
955 unsigned arm_pic_register = INVALID_REGNUM;
956
957 enum arm_pcs arm_pcs_default;
958
959 /* For an explanation of these variables, see final_prescan_insn below. */
960 int arm_ccfsm_state;
961 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
962 enum arm_cond_code arm_current_cc;
963
964 rtx arm_target_insn;
965 int arm_target_label;
966 /* The number of conditionally executed insns, including the current insn. */
967 int arm_condexec_count = 0;
968 /* A bitmask specifying the patterns for the IT block.
969 Zero means do not output an IT block before this insn. */
970 int arm_condexec_mask = 0;
971 /* The number of bits used in arm_condexec_mask. */
972 int arm_condexec_masklen = 0;
973
974 /* Nonzero if chip supports the ARMv8 CRC instructions. */
975 int arm_arch_crc = 0;
976
977 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
978 int arm_arch_dotprod = 0;
979
980 /* Nonzero if chip supports the ARMv8-M security extensions. */
981 int arm_arch_cmse = 0;
982
983 /* Nonzero if the core has a very small, high-latency, multiply unit. */
984 int arm_m_profile_small_mul = 0;
985
986 /* The condition codes of the ARM, and the inverse function. */
987 static const char * const arm_condition_codes[] =
988 {
989 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
990 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
991 };
992
993 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
994 int arm_regs_in_sequence[] =
995 {
996 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
997 };
998
999 #define ARM_LSL_NAME "lsl"
1000 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1001
1002 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1003 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1004 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1005 \f
1006 /* Initialization code. */
1007
1008 struct cpu_tune
1009 {
1010 enum processor_type scheduler;
1011 unsigned int tune_flags;
1012 const struct tune_params *tune;
1013 };
1014
1015 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1016 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1017 { \
1018 num_slots, \
1019 l1_size, \
1020 l1_line_size \
1021 }
1022
1023 /* arm generic vectorizer costs. */
1024 static const
1025 struct cpu_vec_costs arm_default_vec_cost = {
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 1, /* vec_unalign_load_cost. */
1034 1, /* vec_unalign_store_cost. */
1035 1, /* vec_store_cost. */
1036 3, /* cond_taken_branch_cost. */
1037 1, /* cond_not_taken_branch_cost. */
1038 };
1039
1040 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1041 #include "aarch-cost-tables.h"
1042
1043
1044
1045 const struct cpu_cost_table cortexa9_extra_costs =
1046 {
1047 /* ALU */
1048 {
1049 0, /* arith. */
1050 0, /* logical. */
1051 0, /* shift. */
1052 COSTS_N_INSNS (1), /* shift_reg. */
1053 COSTS_N_INSNS (1), /* arith_shift. */
1054 COSTS_N_INSNS (2), /* arith_shift_reg. */
1055 0, /* log_shift. */
1056 COSTS_N_INSNS (1), /* log_shift_reg. */
1057 COSTS_N_INSNS (1), /* extend. */
1058 COSTS_N_INSNS (2), /* extend_arith. */
1059 COSTS_N_INSNS (1), /* bfi. */
1060 COSTS_N_INSNS (1), /* bfx. */
1061 0, /* clz. */
1062 0, /* rev. */
1063 0, /* non_exec. */
1064 true /* non_exec_costs_exec. */
1065 },
1066 {
1067 /* MULT SImode */
1068 {
1069 COSTS_N_INSNS (3), /* simple. */
1070 COSTS_N_INSNS (3), /* flag_setting. */
1071 COSTS_N_INSNS (2), /* extend. */
1072 COSTS_N_INSNS (3), /* add. */
1073 COSTS_N_INSNS (2), /* extend_add. */
1074 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1075 },
1076 /* MULT DImode */
1077 {
1078 0, /* simple (N/A). */
1079 0, /* flag_setting (N/A). */
1080 COSTS_N_INSNS (4), /* extend. */
1081 0, /* add (N/A). */
1082 COSTS_N_INSNS (4), /* extend_add. */
1083 0 /* idiv (N/A). */
1084 }
1085 },
1086 /* LD/ST */
1087 {
1088 COSTS_N_INSNS (2), /* load. */
1089 COSTS_N_INSNS (2), /* load_sign_extend. */
1090 COSTS_N_INSNS (2), /* ldrd. */
1091 COSTS_N_INSNS (2), /* ldm_1st. */
1092 1, /* ldm_regs_per_insn_1st. */
1093 2, /* ldm_regs_per_insn_subsequent. */
1094 COSTS_N_INSNS (5), /* loadf. */
1095 COSTS_N_INSNS (5), /* loadd. */
1096 COSTS_N_INSNS (1), /* load_unaligned. */
1097 COSTS_N_INSNS (2), /* store. */
1098 COSTS_N_INSNS (2), /* strd. */
1099 COSTS_N_INSNS (2), /* stm_1st. */
1100 1, /* stm_regs_per_insn_1st. */
1101 2, /* stm_regs_per_insn_subsequent. */
1102 COSTS_N_INSNS (1), /* storef. */
1103 COSTS_N_INSNS (1), /* stored. */
1104 COSTS_N_INSNS (1), /* store_unaligned. */
1105 COSTS_N_INSNS (1), /* loadv. */
1106 COSTS_N_INSNS (1) /* storev. */
1107 },
1108 {
1109 /* FP SFmode */
1110 {
1111 COSTS_N_INSNS (14), /* div. */
1112 COSTS_N_INSNS (4), /* mult. */
1113 COSTS_N_INSNS (7), /* mult_addsub. */
1114 COSTS_N_INSNS (30), /* fma. */
1115 COSTS_N_INSNS (3), /* addsub. */
1116 COSTS_N_INSNS (1), /* fpconst. */
1117 COSTS_N_INSNS (1), /* neg. */
1118 COSTS_N_INSNS (3), /* compare. */
1119 COSTS_N_INSNS (3), /* widen. */
1120 COSTS_N_INSNS (3), /* narrow. */
1121 COSTS_N_INSNS (3), /* toint. */
1122 COSTS_N_INSNS (3), /* fromint. */
1123 COSTS_N_INSNS (3) /* roundint. */
1124 },
1125 /* FP DFmode */
1126 {
1127 COSTS_N_INSNS (24), /* div. */
1128 COSTS_N_INSNS (5), /* mult. */
1129 COSTS_N_INSNS (8), /* mult_addsub. */
1130 COSTS_N_INSNS (30), /* fma. */
1131 COSTS_N_INSNS (3), /* addsub. */
1132 COSTS_N_INSNS (1), /* fpconst. */
1133 COSTS_N_INSNS (1), /* neg. */
1134 COSTS_N_INSNS (3), /* compare. */
1135 COSTS_N_INSNS (3), /* widen. */
1136 COSTS_N_INSNS (3), /* narrow. */
1137 COSTS_N_INSNS (3), /* toint. */
1138 COSTS_N_INSNS (3), /* fromint. */
1139 COSTS_N_INSNS (3) /* roundint. */
1140 }
1141 },
1142 /* Vector */
1143 {
1144 COSTS_N_INSNS (1) /* alu. */
1145 }
1146 };
1147
1148 const struct cpu_cost_table cortexa8_extra_costs =
1149 {
1150 /* ALU */
1151 {
1152 0, /* arith. */
1153 0, /* logical. */
1154 COSTS_N_INSNS (1), /* shift. */
1155 0, /* shift_reg. */
1156 COSTS_N_INSNS (1), /* arith_shift. */
1157 0, /* arith_shift_reg. */
1158 COSTS_N_INSNS (1), /* log_shift. */
1159 0, /* log_shift_reg. */
1160 0, /* extend. */
1161 0, /* extend_arith. */
1162 0, /* bfi. */
1163 0, /* bfx. */
1164 0, /* clz. */
1165 0, /* rev. */
1166 0, /* non_exec. */
1167 true /* non_exec_costs_exec. */
1168 },
1169 {
1170 /* MULT SImode */
1171 {
1172 COSTS_N_INSNS (1), /* simple. */
1173 COSTS_N_INSNS (1), /* flag_setting. */
1174 COSTS_N_INSNS (1), /* extend. */
1175 COSTS_N_INSNS (1), /* add. */
1176 COSTS_N_INSNS (1), /* extend_add. */
1177 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1178 },
1179 /* MULT DImode */
1180 {
1181 0, /* simple (N/A). */
1182 0, /* flag_setting (N/A). */
1183 COSTS_N_INSNS (2), /* extend. */
1184 0, /* add (N/A). */
1185 COSTS_N_INSNS (2), /* extend_add. */
1186 0 /* idiv (N/A). */
1187 }
1188 },
1189 /* LD/ST */
1190 {
1191 COSTS_N_INSNS (1), /* load. */
1192 COSTS_N_INSNS (1), /* load_sign_extend. */
1193 COSTS_N_INSNS (1), /* ldrd. */
1194 COSTS_N_INSNS (1), /* ldm_1st. */
1195 1, /* ldm_regs_per_insn_1st. */
1196 2, /* ldm_regs_per_insn_subsequent. */
1197 COSTS_N_INSNS (1), /* loadf. */
1198 COSTS_N_INSNS (1), /* loadd. */
1199 COSTS_N_INSNS (1), /* load_unaligned. */
1200 COSTS_N_INSNS (1), /* store. */
1201 COSTS_N_INSNS (1), /* strd. */
1202 COSTS_N_INSNS (1), /* stm_1st. */
1203 1, /* stm_regs_per_insn_1st. */
1204 2, /* stm_regs_per_insn_subsequent. */
1205 COSTS_N_INSNS (1), /* storef. */
1206 COSTS_N_INSNS (1), /* stored. */
1207 COSTS_N_INSNS (1), /* store_unaligned. */
1208 COSTS_N_INSNS (1), /* loadv. */
1209 COSTS_N_INSNS (1) /* storev. */
1210 },
1211 {
1212 /* FP SFmode */
1213 {
1214 COSTS_N_INSNS (36), /* div. */
1215 COSTS_N_INSNS (11), /* mult. */
1216 COSTS_N_INSNS (20), /* mult_addsub. */
1217 COSTS_N_INSNS (30), /* fma. */
1218 COSTS_N_INSNS (9), /* addsub. */
1219 COSTS_N_INSNS (3), /* fpconst. */
1220 COSTS_N_INSNS (3), /* neg. */
1221 COSTS_N_INSNS (6), /* compare. */
1222 COSTS_N_INSNS (4), /* widen. */
1223 COSTS_N_INSNS (4), /* narrow. */
1224 COSTS_N_INSNS (8), /* toint. */
1225 COSTS_N_INSNS (8), /* fromint. */
1226 COSTS_N_INSNS (8) /* roundint. */
1227 },
1228 /* FP DFmode */
1229 {
1230 COSTS_N_INSNS (64), /* div. */
1231 COSTS_N_INSNS (16), /* mult. */
1232 COSTS_N_INSNS (25), /* mult_addsub. */
1233 COSTS_N_INSNS (30), /* fma. */
1234 COSTS_N_INSNS (9), /* addsub. */
1235 COSTS_N_INSNS (3), /* fpconst. */
1236 COSTS_N_INSNS (3), /* neg. */
1237 COSTS_N_INSNS (6), /* compare. */
1238 COSTS_N_INSNS (6), /* widen. */
1239 COSTS_N_INSNS (6), /* narrow. */
1240 COSTS_N_INSNS (8), /* toint. */
1241 COSTS_N_INSNS (8), /* fromint. */
1242 COSTS_N_INSNS (8) /* roundint. */
1243 }
1244 },
1245 /* Vector */
1246 {
1247 COSTS_N_INSNS (1) /* alu. */
1248 }
1249 };
1250
1251 const struct cpu_cost_table cortexa5_extra_costs =
1252 {
1253 /* ALU */
1254 {
1255 0, /* arith. */
1256 0, /* logical. */
1257 COSTS_N_INSNS (1), /* shift. */
1258 COSTS_N_INSNS (1), /* shift_reg. */
1259 COSTS_N_INSNS (1), /* arith_shift. */
1260 COSTS_N_INSNS (1), /* arith_shift_reg. */
1261 COSTS_N_INSNS (1), /* log_shift. */
1262 COSTS_N_INSNS (1), /* log_shift_reg. */
1263 COSTS_N_INSNS (1), /* extend. */
1264 COSTS_N_INSNS (1), /* extend_arith. */
1265 COSTS_N_INSNS (1), /* bfi. */
1266 COSTS_N_INSNS (1), /* bfx. */
1267 COSTS_N_INSNS (1), /* clz. */
1268 COSTS_N_INSNS (1), /* rev. */
1269 0, /* non_exec. */
1270 true /* non_exec_costs_exec. */
1271 },
1272
1273 {
1274 /* MULT SImode */
1275 {
1276 0, /* simple. */
1277 COSTS_N_INSNS (1), /* flag_setting. */
1278 COSTS_N_INSNS (1), /* extend. */
1279 COSTS_N_INSNS (1), /* add. */
1280 COSTS_N_INSNS (1), /* extend_add. */
1281 COSTS_N_INSNS (7) /* idiv. */
1282 },
1283 /* MULT DImode */
1284 {
1285 0, /* simple (N/A). */
1286 0, /* flag_setting (N/A). */
1287 COSTS_N_INSNS (1), /* extend. */
1288 0, /* add. */
1289 COSTS_N_INSNS (2), /* extend_add. */
1290 0 /* idiv (N/A). */
1291 }
1292 },
1293 /* LD/ST */
1294 {
1295 COSTS_N_INSNS (1), /* load. */
1296 COSTS_N_INSNS (1), /* load_sign_extend. */
1297 COSTS_N_INSNS (6), /* ldrd. */
1298 COSTS_N_INSNS (1), /* ldm_1st. */
1299 1, /* ldm_regs_per_insn_1st. */
1300 2, /* ldm_regs_per_insn_subsequent. */
1301 COSTS_N_INSNS (2), /* loadf. */
1302 COSTS_N_INSNS (4), /* loadd. */
1303 COSTS_N_INSNS (1), /* load_unaligned. */
1304 COSTS_N_INSNS (1), /* store. */
1305 COSTS_N_INSNS (3), /* strd. */
1306 COSTS_N_INSNS (1), /* stm_1st. */
1307 1, /* stm_regs_per_insn_1st. */
1308 2, /* stm_regs_per_insn_subsequent. */
1309 COSTS_N_INSNS (2), /* storef. */
1310 COSTS_N_INSNS (2), /* stored. */
1311 COSTS_N_INSNS (1), /* store_unaligned. */
1312 COSTS_N_INSNS (1), /* loadv. */
1313 COSTS_N_INSNS (1) /* storev. */
1314 },
1315 {
1316 /* FP SFmode */
1317 {
1318 COSTS_N_INSNS (15), /* div. */
1319 COSTS_N_INSNS (3), /* mult. */
1320 COSTS_N_INSNS (7), /* mult_addsub. */
1321 COSTS_N_INSNS (7), /* fma. */
1322 COSTS_N_INSNS (3), /* addsub. */
1323 COSTS_N_INSNS (3), /* fpconst. */
1324 COSTS_N_INSNS (3), /* neg. */
1325 COSTS_N_INSNS (3), /* compare. */
1326 COSTS_N_INSNS (3), /* widen. */
1327 COSTS_N_INSNS (3), /* narrow. */
1328 COSTS_N_INSNS (3), /* toint. */
1329 COSTS_N_INSNS (3), /* fromint. */
1330 COSTS_N_INSNS (3) /* roundint. */
1331 },
1332 /* FP DFmode */
1333 {
1334 COSTS_N_INSNS (30), /* div. */
1335 COSTS_N_INSNS (6), /* mult. */
1336 COSTS_N_INSNS (10), /* mult_addsub. */
1337 COSTS_N_INSNS (7), /* fma. */
1338 COSTS_N_INSNS (3), /* addsub. */
1339 COSTS_N_INSNS (3), /* fpconst. */
1340 COSTS_N_INSNS (3), /* neg. */
1341 COSTS_N_INSNS (3), /* compare. */
1342 COSTS_N_INSNS (3), /* widen. */
1343 COSTS_N_INSNS (3), /* narrow. */
1344 COSTS_N_INSNS (3), /* toint. */
1345 COSTS_N_INSNS (3), /* fromint. */
1346 COSTS_N_INSNS (3) /* roundint. */
1347 }
1348 },
1349 /* Vector */
1350 {
1351 COSTS_N_INSNS (1) /* alu. */
1352 }
1353 };
1354
1355
1356 const struct cpu_cost_table cortexa7_extra_costs =
1357 {
1358 /* ALU */
1359 {
1360 0, /* arith. */
1361 0, /* logical. */
1362 COSTS_N_INSNS (1), /* shift. */
1363 COSTS_N_INSNS (1), /* shift_reg. */
1364 COSTS_N_INSNS (1), /* arith_shift. */
1365 COSTS_N_INSNS (1), /* arith_shift_reg. */
1366 COSTS_N_INSNS (1), /* log_shift. */
1367 COSTS_N_INSNS (1), /* log_shift_reg. */
1368 COSTS_N_INSNS (1), /* extend. */
1369 COSTS_N_INSNS (1), /* extend_arith. */
1370 COSTS_N_INSNS (1), /* bfi. */
1371 COSTS_N_INSNS (1), /* bfx. */
1372 COSTS_N_INSNS (1), /* clz. */
1373 COSTS_N_INSNS (1), /* rev. */
1374 0, /* non_exec. */
1375 true /* non_exec_costs_exec. */
1376 },
1377
1378 {
1379 /* MULT SImode */
1380 {
1381 0, /* simple. */
1382 COSTS_N_INSNS (1), /* flag_setting. */
1383 COSTS_N_INSNS (1), /* extend. */
1384 COSTS_N_INSNS (1), /* add. */
1385 COSTS_N_INSNS (1), /* extend_add. */
1386 COSTS_N_INSNS (7) /* idiv. */
1387 },
1388 /* MULT DImode */
1389 {
1390 0, /* simple (N/A). */
1391 0, /* flag_setting (N/A). */
1392 COSTS_N_INSNS (1), /* extend. */
1393 0, /* add. */
1394 COSTS_N_INSNS (2), /* extend_add. */
1395 0 /* idiv (N/A). */
1396 }
1397 },
1398 /* LD/ST */
1399 {
1400 COSTS_N_INSNS (1), /* load. */
1401 COSTS_N_INSNS (1), /* load_sign_extend. */
1402 COSTS_N_INSNS (3), /* ldrd. */
1403 COSTS_N_INSNS (1), /* ldm_1st. */
1404 1, /* ldm_regs_per_insn_1st. */
1405 2, /* ldm_regs_per_insn_subsequent. */
1406 COSTS_N_INSNS (2), /* loadf. */
1407 COSTS_N_INSNS (2), /* loadd. */
1408 COSTS_N_INSNS (1), /* load_unaligned. */
1409 COSTS_N_INSNS (1), /* store. */
1410 COSTS_N_INSNS (3), /* strd. */
1411 COSTS_N_INSNS (1), /* stm_1st. */
1412 1, /* stm_regs_per_insn_1st. */
1413 2, /* stm_regs_per_insn_subsequent. */
1414 COSTS_N_INSNS (2), /* storef. */
1415 COSTS_N_INSNS (2), /* stored. */
1416 COSTS_N_INSNS (1), /* store_unaligned. */
1417 COSTS_N_INSNS (1), /* loadv. */
1418 COSTS_N_INSNS (1) /* storev. */
1419 },
1420 {
1421 /* FP SFmode */
1422 {
1423 COSTS_N_INSNS (15), /* div. */
1424 COSTS_N_INSNS (3), /* mult. */
1425 COSTS_N_INSNS (7), /* mult_addsub. */
1426 COSTS_N_INSNS (7), /* fma. */
1427 COSTS_N_INSNS (3), /* addsub. */
1428 COSTS_N_INSNS (3), /* fpconst. */
1429 COSTS_N_INSNS (3), /* neg. */
1430 COSTS_N_INSNS (3), /* compare. */
1431 COSTS_N_INSNS (3), /* widen. */
1432 COSTS_N_INSNS (3), /* narrow. */
1433 COSTS_N_INSNS (3), /* toint. */
1434 COSTS_N_INSNS (3), /* fromint. */
1435 COSTS_N_INSNS (3) /* roundint. */
1436 },
1437 /* FP DFmode */
1438 {
1439 COSTS_N_INSNS (30), /* div. */
1440 COSTS_N_INSNS (6), /* mult. */
1441 COSTS_N_INSNS (10), /* mult_addsub. */
1442 COSTS_N_INSNS (7), /* fma. */
1443 COSTS_N_INSNS (3), /* addsub. */
1444 COSTS_N_INSNS (3), /* fpconst. */
1445 COSTS_N_INSNS (3), /* neg. */
1446 COSTS_N_INSNS (3), /* compare. */
1447 COSTS_N_INSNS (3), /* widen. */
1448 COSTS_N_INSNS (3), /* narrow. */
1449 COSTS_N_INSNS (3), /* toint. */
1450 COSTS_N_INSNS (3), /* fromint. */
1451 COSTS_N_INSNS (3) /* roundint. */
1452 }
1453 },
1454 /* Vector */
1455 {
1456 COSTS_N_INSNS (1) /* alu. */
1457 }
1458 };
1459
1460 const struct cpu_cost_table cortexa12_extra_costs =
1461 {
1462 /* ALU */
1463 {
1464 0, /* arith. */
1465 0, /* logical. */
1466 0, /* shift. */
1467 COSTS_N_INSNS (1), /* shift_reg. */
1468 COSTS_N_INSNS (1), /* arith_shift. */
1469 COSTS_N_INSNS (1), /* arith_shift_reg. */
1470 COSTS_N_INSNS (1), /* log_shift. */
1471 COSTS_N_INSNS (1), /* log_shift_reg. */
1472 0, /* extend. */
1473 COSTS_N_INSNS (1), /* extend_arith. */
1474 0, /* bfi. */
1475 COSTS_N_INSNS (1), /* bfx. */
1476 COSTS_N_INSNS (1), /* clz. */
1477 COSTS_N_INSNS (1), /* rev. */
1478 0, /* non_exec. */
1479 true /* non_exec_costs_exec. */
1480 },
1481 /* MULT SImode */
1482 {
1483 {
1484 COSTS_N_INSNS (2), /* simple. */
1485 COSTS_N_INSNS (3), /* flag_setting. */
1486 COSTS_N_INSNS (2), /* extend. */
1487 COSTS_N_INSNS (3), /* add. */
1488 COSTS_N_INSNS (2), /* extend_add. */
1489 COSTS_N_INSNS (18) /* idiv. */
1490 },
1491 /* MULT DImode */
1492 {
1493 0, /* simple (N/A). */
1494 0, /* flag_setting (N/A). */
1495 COSTS_N_INSNS (3), /* extend. */
1496 0, /* add (N/A). */
1497 COSTS_N_INSNS (3), /* extend_add. */
1498 0 /* idiv (N/A). */
1499 }
1500 },
1501 /* LD/ST */
1502 {
1503 COSTS_N_INSNS (3), /* load. */
1504 COSTS_N_INSNS (3), /* load_sign_extend. */
1505 COSTS_N_INSNS (3), /* ldrd. */
1506 COSTS_N_INSNS (3), /* ldm_1st. */
1507 1, /* ldm_regs_per_insn_1st. */
1508 2, /* ldm_regs_per_insn_subsequent. */
1509 COSTS_N_INSNS (3), /* loadf. */
1510 COSTS_N_INSNS (3), /* loadd. */
1511 0, /* load_unaligned. */
1512 0, /* store. */
1513 0, /* strd. */
1514 0, /* stm_1st. */
1515 1, /* stm_regs_per_insn_1st. */
1516 2, /* stm_regs_per_insn_subsequent. */
1517 COSTS_N_INSNS (2), /* storef. */
1518 COSTS_N_INSNS (2), /* stored. */
1519 0, /* store_unaligned. */
1520 COSTS_N_INSNS (1), /* loadv. */
1521 COSTS_N_INSNS (1) /* storev. */
1522 },
1523 {
1524 /* FP SFmode */
1525 {
1526 COSTS_N_INSNS (17), /* div. */
1527 COSTS_N_INSNS (4), /* mult. */
1528 COSTS_N_INSNS (8), /* mult_addsub. */
1529 COSTS_N_INSNS (8), /* fma. */
1530 COSTS_N_INSNS (4), /* addsub. */
1531 COSTS_N_INSNS (2), /* fpconst. */
1532 COSTS_N_INSNS (2), /* neg. */
1533 COSTS_N_INSNS (2), /* compare. */
1534 COSTS_N_INSNS (4), /* widen. */
1535 COSTS_N_INSNS (4), /* narrow. */
1536 COSTS_N_INSNS (4), /* toint. */
1537 COSTS_N_INSNS (4), /* fromint. */
1538 COSTS_N_INSNS (4) /* roundint. */
1539 },
1540 /* FP DFmode */
1541 {
1542 COSTS_N_INSNS (31), /* div. */
1543 COSTS_N_INSNS (4), /* mult. */
1544 COSTS_N_INSNS (8), /* mult_addsub. */
1545 COSTS_N_INSNS (8), /* fma. */
1546 COSTS_N_INSNS (4), /* addsub. */
1547 COSTS_N_INSNS (2), /* fpconst. */
1548 COSTS_N_INSNS (2), /* neg. */
1549 COSTS_N_INSNS (2), /* compare. */
1550 COSTS_N_INSNS (4), /* widen. */
1551 COSTS_N_INSNS (4), /* narrow. */
1552 COSTS_N_INSNS (4), /* toint. */
1553 COSTS_N_INSNS (4), /* fromint. */
1554 COSTS_N_INSNS (4) /* roundint. */
1555 }
1556 },
1557 /* Vector */
1558 {
1559 COSTS_N_INSNS (1) /* alu. */
1560 }
1561 };
1562
1563 const struct cpu_cost_table cortexa15_extra_costs =
1564 {
1565 /* ALU */
1566 {
1567 0, /* arith. */
1568 0, /* logical. */
1569 0, /* shift. */
1570 0, /* shift_reg. */
1571 COSTS_N_INSNS (1), /* arith_shift. */
1572 COSTS_N_INSNS (1), /* arith_shift_reg. */
1573 COSTS_N_INSNS (1), /* log_shift. */
1574 COSTS_N_INSNS (1), /* log_shift_reg. */
1575 0, /* extend. */
1576 COSTS_N_INSNS (1), /* extend_arith. */
1577 COSTS_N_INSNS (1), /* bfi. */
1578 0, /* bfx. */
1579 0, /* clz. */
1580 0, /* rev. */
1581 0, /* non_exec. */
1582 true /* non_exec_costs_exec. */
1583 },
1584 /* MULT SImode */
1585 {
1586 {
1587 COSTS_N_INSNS (2), /* simple. */
1588 COSTS_N_INSNS (3), /* flag_setting. */
1589 COSTS_N_INSNS (2), /* extend. */
1590 COSTS_N_INSNS (2), /* add. */
1591 COSTS_N_INSNS (2), /* extend_add. */
1592 COSTS_N_INSNS (18) /* idiv. */
1593 },
1594 /* MULT DImode */
1595 {
1596 0, /* simple (N/A). */
1597 0, /* flag_setting (N/A). */
1598 COSTS_N_INSNS (3), /* extend. */
1599 0, /* add (N/A). */
1600 COSTS_N_INSNS (3), /* extend_add. */
1601 0 /* idiv (N/A). */
1602 }
1603 },
1604 /* LD/ST */
1605 {
1606 COSTS_N_INSNS (3), /* load. */
1607 COSTS_N_INSNS (3), /* load_sign_extend. */
1608 COSTS_N_INSNS (3), /* ldrd. */
1609 COSTS_N_INSNS (4), /* ldm_1st. */
1610 1, /* ldm_regs_per_insn_1st. */
1611 2, /* ldm_regs_per_insn_subsequent. */
1612 COSTS_N_INSNS (4), /* loadf. */
1613 COSTS_N_INSNS (4), /* loadd. */
1614 0, /* load_unaligned. */
1615 0, /* store. */
1616 0, /* strd. */
1617 COSTS_N_INSNS (1), /* stm_1st. */
1618 1, /* stm_regs_per_insn_1st. */
1619 2, /* stm_regs_per_insn_subsequent. */
1620 0, /* storef. */
1621 0, /* stored. */
1622 0, /* store_unaligned. */
1623 COSTS_N_INSNS (1), /* loadv. */
1624 COSTS_N_INSNS (1) /* storev. */
1625 },
1626 {
1627 /* FP SFmode */
1628 {
1629 COSTS_N_INSNS (17), /* div. */
1630 COSTS_N_INSNS (4), /* mult. */
1631 COSTS_N_INSNS (8), /* mult_addsub. */
1632 COSTS_N_INSNS (8), /* fma. */
1633 COSTS_N_INSNS (4), /* addsub. */
1634 COSTS_N_INSNS (2), /* fpconst. */
1635 COSTS_N_INSNS (2), /* neg. */
1636 COSTS_N_INSNS (5), /* compare. */
1637 COSTS_N_INSNS (4), /* widen. */
1638 COSTS_N_INSNS (4), /* narrow. */
1639 COSTS_N_INSNS (4), /* toint. */
1640 COSTS_N_INSNS (4), /* fromint. */
1641 COSTS_N_INSNS (4) /* roundint. */
1642 },
1643 /* FP DFmode */
1644 {
1645 COSTS_N_INSNS (31), /* div. */
1646 COSTS_N_INSNS (4), /* mult. */
1647 COSTS_N_INSNS (8), /* mult_addsub. */
1648 COSTS_N_INSNS (8), /* fma. */
1649 COSTS_N_INSNS (4), /* addsub. */
1650 COSTS_N_INSNS (2), /* fpconst. */
1651 COSTS_N_INSNS (2), /* neg. */
1652 COSTS_N_INSNS (2), /* compare. */
1653 COSTS_N_INSNS (4), /* widen. */
1654 COSTS_N_INSNS (4), /* narrow. */
1655 COSTS_N_INSNS (4), /* toint. */
1656 COSTS_N_INSNS (4), /* fromint. */
1657 COSTS_N_INSNS (4) /* roundint. */
1658 }
1659 },
1660 /* Vector */
1661 {
1662 COSTS_N_INSNS (1) /* alu. */
1663 }
1664 };
1665
1666 const struct cpu_cost_table v7m_extra_costs =
1667 {
1668 /* ALU */
1669 {
1670 0, /* arith. */
1671 0, /* logical. */
1672 0, /* shift. */
1673 0, /* shift_reg. */
1674 0, /* arith_shift. */
1675 COSTS_N_INSNS (1), /* arith_shift_reg. */
1676 0, /* log_shift. */
1677 COSTS_N_INSNS (1), /* log_shift_reg. */
1678 0, /* extend. */
1679 COSTS_N_INSNS (1), /* extend_arith. */
1680 0, /* bfi. */
1681 0, /* bfx. */
1682 0, /* clz. */
1683 0, /* rev. */
1684 COSTS_N_INSNS (1), /* non_exec. */
1685 false /* non_exec_costs_exec. */
1686 },
1687 {
1688 /* MULT SImode */
1689 {
1690 COSTS_N_INSNS (1), /* simple. */
1691 COSTS_N_INSNS (1), /* flag_setting. */
1692 COSTS_N_INSNS (2), /* extend. */
1693 COSTS_N_INSNS (1), /* add. */
1694 COSTS_N_INSNS (3), /* extend_add. */
1695 COSTS_N_INSNS (8) /* idiv. */
1696 },
1697 /* MULT DImode */
1698 {
1699 0, /* simple (N/A). */
1700 0, /* flag_setting (N/A). */
1701 COSTS_N_INSNS (2), /* extend. */
1702 0, /* add (N/A). */
1703 COSTS_N_INSNS (3), /* extend_add. */
1704 0 /* idiv (N/A). */
1705 }
1706 },
1707 /* LD/ST */
1708 {
1709 COSTS_N_INSNS (2), /* load. */
1710 0, /* load_sign_extend. */
1711 COSTS_N_INSNS (3), /* ldrd. */
1712 COSTS_N_INSNS (2), /* ldm_1st. */
1713 1, /* ldm_regs_per_insn_1st. */
1714 1, /* ldm_regs_per_insn_subsequent. */
1715 COSTS_N_INSNS (2), /* loadf. */
1716 COSTS_N_INSNS (3), /* loadd. */
1717 COSTS_N_INSNS (1), /* load_unaligned. */
1718 COSTS_N_INSNS (2), /* store. */
1719 COSTS_N_INSNS (3), /* strd. */
1720 COSTS_N_INSNS (2), /* stm_1st. */
1721 1, /* stm_regs_per_insn_1st. */
1722 1, /* stm_regs_per_insn_subsequent. */
1723 COSTS_N_INSNS (2), /* storef. */
1724 COSTS_N_INSNS (3), /* stored. */
1725 COSTS_N_INSNS (1), /* store_unaligned. */
1726 COSTS_N_INSNS (1), /* loadv. */
1727 COSTS_N_INSNS (1) /* storev. */
1728 },
1729 {
1730 /* FP SFmode */
1731 {
1732 COSTS_N_INSNS (7), /* div. */
1733 COSTS_N_INSNS (2), /* mult. */
1734 COSTS_N_INSNS (5), /* mult_addsub. */
1735 COSTS_N_INSNS (3), /* fma. */
1736 COSTS_N_INSNS (1), /* addsub. */
1737 0, /* fpconst. */
1738 0, /* neg. */
1739 0, /* compare. */
1740 0, /* widen. */
1741 0, /* narrow. */
1742 0, /* toint. */
1743 0, /* fromint. */
1744 0 /* roundint. */
1745 },
1746 /* FP DFmode */
1747 {
1748 COSTS_N_INSNS (15), /* div. */
1749 COSTS_N_INSNS (5), /* mult. */
1750 COSTS_N_INSNS (7), /* mult_addsub. */
1751 COSTS_N_INSNS (7), /* fma. */
1752 COSTS_N_INSNS (3), /* addsub. */
1753 0, /* fpconst. */
1754 0, /* neg. */
1755 0, /* compare. */
1756 0, /* widen. */
1757 0, /* narrow. */
1758 0, /* toint. */
1759 0, /* fromint. */
1760 0 /* roundint. */
1761 }
1762 },
1763 /* Vector */
1764 {
1765 COSTS_N_INSNS (1) /* alu. */
1766 }
1767 };
1768
1769 const struct addr_mode_cost_table generic_addr_mode_costs =
1770 {
1771 /* int. */
1772 {
1773 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1774 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1775 COSTS_N_INSNS (0) /* AMO_WB. */
1776 },
1777 /* float. */
1778 {
1779 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1780 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1781 COSTS_N_INSNS (0) /* AMO_WB. */
1782 },
1783 /* vector. */
1784 {
1785 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1786 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1787 COSTS_N_INSNS (0) /* AMO_WB. */
1788 }
1789 };
1790
1791 const struct tune_params arm_slowmul_tune =
1792 {
1793 &generic_extra_costs, /* Insn extra costs. */
1794 &generic_addr_mode_costs, /* Addressing mode costs. */
1795 NULL, /* Sched adj cost. */
1796 arm_default_branch_cost,
1797 &arm_default_vec_cost,
1798 3, /* Constant limit. */
1799 5, /* Max cond insns. */
1800 8, /* Memset max inline. */
1801 1, /* Issue rate. */
1802 ARM_PREFETCH_NOT_BENEFICIAL,
1803 tune_params::PREF_CONST_POOL_TRUE,
1804 tune_params::PREF_LDRD_FALSE,
1805 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1806 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1807 tune_params::DISPARAGE_FLAGS_NEITHER,
1808 tune_params::PREF_NEON_64_FALSE,
1809 tune_params::PREF_NEON_STRINGOPS_FALSE,
1810 tune_params::FUSE_NOTHING,
1811 tune_params::SCHED_AUTOPREF_OFF
1812 };
1813
1814 const struct tune_params arm_fastmul_tune =
1815 {
1816 &generic_extra_costs, /* Insn extra costs. */
1817 &generic_addr_mode_costs, /* Addressing mode costs. */
1818 NULL, /* Sched adj cost. */
1819 arm_default_branch_cost,
1820 &arm_default_vec_cost,
1821 1, /* Constant limit. */
1822 5, /* Max cond insns. */
1823 8, /* Memset max inline. */
1824 1, /* Issue rate. */
1825 ARM_PREFETCH_NOT_BENEFICIAL,
1826 tune_params::PREF_CONST_POOL_TRUE,
1827 tune_params::PREF_LDRD_FALSE,
1828 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1829 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1830 tune_params::DISPARAGE_FLAGS_NEITHER,
1831 tune_params::PREF_NEON_64_FALSE,
1832 tune_params::PREF_NEON_STRINGOPS_FALSE,
1833 tune_params::FUSE_NOTHING,
1834 tune_params::SCHED_AUTOPREF_OFF
1835 };
1836
1837 /* StrongARM has early execution of branches, so a sequence that is worth
1838 skipping is shorter. Set max_insns_skipped to a lower value. */
1839
1840 const struct tune_params arm_strongarm_tune =
1841 {
1842 &generic_extra_costs, /* Insn extra costs. */
1843 &generic_addr_mode_costs, /* Addressing mode costs. */
1844 NULL, /* Sched adj cost. */
1845 arm_default_branch_cost,
1846 &arm_default_vec_cost,
1847 1, /* Constant limit. */
1848 3, /* Max cond insns. */
1849 8, /* Memset max inline. */
1850 1, /* Issue rate. */
1851 ARM_PREFETCH_NOT_BENEFICIAL,
1852 tune_params::PREF_CONST_POOL_TRUE,
1853 tune_params::PREF_LDRD_FALSE,
1854 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1855 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1856 tune_params::DISPARAGE_FLAGS_NEITHER,
1857 tune_params::PREF_NEON_64_FALSE,
1858 tune_params::PREF_NEON_STRINGOPS_FALSE,
1859 tune_params::FUSE_NOTHING,
1860 tune_params::SCHED_AUTOPREF_OFF
1861 };
1862
1863 const struct tune_params arm_xscale_tune =
1864 {
1865 &generic_extra_costs, /* Insn extra costs. */
1866 &generic_addr_mode_costs, /* Addressing mode costs. */
1867 xscale_sched_adjust_cost,
1868 arm_default_branch_cost,
1869 &arm_default_vec_cost,
1870 2, /* Constant limit. */
1871 3, /* Max cond insns. */
1872 8, /* Memset max inline. */
1873 1, /* Issue rate. */
1874 ARM_PREFETCH_NOT_BENEFICIAL,
1875 tune_params::PREF_CONST_POOL_TRUE,
1876 tune_params::PREF_LDRD_FALSE,
1877 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1878 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1879 tune_params::DISPARAGE_FLAGS_NEITHER,
1880 tune_params::PREF_NEON_64_FALSE,
1881 tune_params::PREF_NEON_STRINGOPS_FALSE,
1882 tune_params::FUSE_NOTHING,
1883 tune_params::SCHED_AUTOPREF_OFF
1884 };
1885
1886 const struct tune_params arm_9e_tune =
1887 {
1888 &generic_extra_costs, /* Insn extra costs. */
1889 &generic_addr_mode_costs, /* Addressing mode costs. */
1890 NULL, /* Sched adj cost. */
1891 arm_default_branch_cost,
1892 &arm_default_vec_cost,
1893 1, /* Constant limit. */
1894 5, /* Max cond insns. */
1895 8, /* Memset max inline. */
1896 1, /* Issue rate. */
1897 ARM_PREFETCH_NOT_BENEFICIAL,
1898 tune_params::PREF_CONST_POOL_TRUE,
1899 tune_params::PREF_LDRD_FALSE,
1900 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1901 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1902 tune_params::DISPARAGE_FLAGS_NEITHER,
1903 tune_params::PREF_NEON_64_FALSE,
1904 tune_params::PREF_NEON_STRINGOPS_FALSE,
1905 tune_params::FUSE_NOTHING,
1906 tune_params::SCHED_AUTOPREF_OFF
1907 };
1908
1909 const struct tune_params arm_marvell_pj4_tune =
1910 {
1911 &generic_extra_costs, /* Insn extra costs. */
1912 &generic_addr_mode_costs, /* Addressing mode costs. */
1913 NULL, /* Sched adj cost. */
1914 arm_default_branch_cost,
1915 &arm_default_vec_cost,
1916 1, /* Constant limit. */
1917 5, /* Max cond insns. */
1918 8, /* Memset max inline. */
1919 2, /* Issue rate. */
1920 ARM_PREFETCH_NOT_BENEFICIAL,
1921 tune_params::PREF_CONST_POOL_TRUE,
1922 tune_params::PREF_LDRD_FALSE,
1923 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1924 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1925 tune_params::DISPARAGE_FLAGS_NEITHER,
1926 tune_params::PREF_NEON_64_FALSE,
1927 tune_params::PREF_NEON_STRINGOPS_FALSE,
1928 tune_params::FUSE_NOTHING,
1929 tune_params::SCHED_AUTOPREF_OFF
1930 };
1931
1932 const struct tune_params arm_v6t2_tune =
1933 {
1934 &generic_extra_costs, /* Insn extra costs. */
1935 &generic_addr_mode_costs, /* Addressing mode costs. */
1936 NULL, /* Sched adj cost. */
1937 arm_default_branch_cost,
1938 &arm_default_vec_cost,
1939 1, /* Constant limit. */
1940 5, /* Max cond insns. */
1941 8, /* Memset max inline. */
1942 1, /* Issue rate. */
1943 ARM_PREFETCH_NOT_BENEFICIAL,
1944 tune_params::PREF_CONST_POOL_FALSE,
1945 tune_params::PREF_LDRD_FALSE,
1946 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1947 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1948 tune_params::DISPARAGE_FLAGS_NEITHER,
1949 tune_params::PREF_NEON_64_FALSE,
1950 tune_params::PREF_NEON_STRINGOPS_FALSE,
1951 tune_params::FUSE_NOTHING,
1952 tune_params::SCHED_AUTOPREF_OFF
1953 };
1954
1955
1956 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1957 const struct tune_params arm_cortex_tune =
1958 {
1959 &generic_extra_costs,
1960 &generic_addr_mode_costs, /* Addressing mode costs. */
1961 NULL, /* Sched adj cost. */
1962 arm_default_branch_cost,
1963 &arm_default_vec_cost,
1964 1, /* Constant limit. */
1965 5, /* Max cond insns. */
1966 8, /* Memset max inline. */
1967 2, /* Issue rate. */
1968 ARM_PREFETCH_NOT_BENEFICIAL,
1969 tune_params::PREF_CONST_POOL_FALSE,
1970 tune_params::PREF_LDRD_FALSE,
1971 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1972 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1973 tune_params::DISPARAGE_FLAGS_NEITHER,
1974 tune_params::PREF_NEON_64_FALSE,
1975 tune_params::PREF_NEON_STRINGOPS_FALSE,
1976 tune_params::FUSE_NOTHING,
1977 tune_params::SCHED_AUTOPREF_OFF
1978 };
1979
1980 const struct tune_params arm_cortex_a8_tune =
1981 {
1982 &cortexa8_extra_costs,
1983 &generic_addr_mode_costs, /* Addressing mode costs. */
1984 NULL, /* Sched adj cost. */
1985 arm_default_branch_cost,
1986 &arm_default_vec_cost,
1987 1, /* Constant limit. */
1988 5, /* Max cond insns. */
1989 8, /* Memset max inline. */
1990 2, /* Issue rate. */
1991 ARM_PREFETCH_NOT_BENEFICIAL,
1992 tune_params::PREF_CONST_POOL_FALSE,
1993 tune_params::PREF_LDRD_FALSE,
1994 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1995 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1996 tune_params::DISPARAGE_FLAGS_NEITHER,
1997 tune_params::PREF_NEON_64_FALSE,
1998 tune_params::PREF_NEON_STRINGOPS_TRUE,
1999 tune_params::FUSE_NOTHING,
2000 tune_params::SCHED_AUTOPREF_OFF
2001 };
2002
2003 const struct tune_params arm_cortex_a7_tune =
2004 {
2005 &cortexa7_extra_costs,
2006 &generic_addr_mode_costs, /* Addressing mode costs. */
2007 NULL, /* Sched adj cost. */
2008 arm_default_branch_cost,
2009 &arm_default_vec_cost,
2010 1, /* Constant limit. */
2011 5, /* Max cond insns. */
2012 8, /* Memset max inline. */
2013 2, /* Issue rate. */
2014 ARM_PREFETCH_NOT_BENEFICIAL,
2015 tune_params::PREF_CONST_POOL_FALSE,
2016 tune_params::PREF_LDRD_FALSE,
2017 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2019 tune_params::DISPARAGE_FLAGS_NEITHER,
2020 tune_params::PREF_NEON_64_FALSE,
2021 tune_params::PREF_NEON_STRINGOPS_TRUE,
2022 tune_params::FUSE_NOTHING,
2023 tune_params::SCHED_AUTOPREF_OFF
2024 };
2025
2026 const struct tune_params arm_cortex_a15_tune =
2027 {
2028 &cortexa15_extra_costs,
2029 &generic_addr_mode_costs, /* Addressing mode costs. */
2030 NULL, /* Sched adj cost. */
2031 arm_default_branch_cost,
2032 &arm_default_vec_cost,
2033 1, /* Constant limit. */
2034 2, /* Max cond insns. */
2035 8, /* Memset max inline. */
2036 3, /* Issue rate. */
2037 ARM_PREFETCH_NOT_BENEFICIAL,
2038 tune_params::PREF_CONST_POOL_FALSE,
2039 tune_params::PREF_LDRD_TRUE,
2040 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2041 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2042 tune_params::DISPARAGE_FLAGS_ALL,
2043 tune_params::PREF_NEON_64_FALSE,
2044 tune_params::PREF_NEON_STRINGOPS_TRUE,
2045 tune_params::FUSE_NOTHING,
2046 tune_params::SCHED_AUTOPREF_FULL
2047 };
2048
2049 const struct tune_params arm_cortex_a35_tune =
2050 {
2051 &cortexa53_extra_costs,
2052 &generic_addr_mode_costs, /* Addressing mode costs. */
2053 NULL, /* Sched adj cost. */
2054 arm_default_branch_cost,
2055 &arm_default_vec_cost,
2056 1, /* Constant limit. */
2057 5, /* Max cond insns. */
2058 8, /* Memset max inline. */
2059 1, /* Issue rate. */
2060 ARM_PREFETCH_NOT_BENEFICIAL,
2061 tune_params::PREF_CONST_POOL_FALSE,
2062 tune_params::PREF_LDRD_FALSE,
2063 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2064 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2065 tune_params::DISPARAGE_FLAGS_NEITHER,
2066 tune_params::PREF_NEON_64_FALSE,
2067 tune_params::PREF_NEON_STRINGOPS_TRUE,
2068 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2069 tune_params::SCHED_AUTOPREF_OFF
2070 };
2071
2072 const struct tune_params arm_cortex_a53_tune =
2073 {
2074 &cortexa53_extra_costs,
2075 &generic_addr_mode_costs, /* Addressing mode costs. */
2076 NULL, /* Sched adj cost. */
2077 arm_default_branch_cost,
2078 &arm_default_vec_cost,
2079 1, /* Constant limit. */
2080 5, /* Max cond insns. */
2081 8, /* Memset max inline. */
2082 2, /* Issue rate. */
2083 ARM_PREFETCH_NOT_BENEFICIAL,
2084 tune_params::PREF_CONST_POOL_FALSE,
2085 tune_params::PREF_LDRD_FALSE,
2086 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2087 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2088 tune_params::DISPARAGE_FLAGS_NEITHER,
2089 tune_params::PREF_NEON_64_FALSE,
2090 tune_params::PREF_NEON_STRINGOPS_TRUE,
2091 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2092 tune_params::SCHED_AUTOPREF_OFF
2093 };
2094
2095 const struct tune_params arm_cortex_a57_tune =
2096 {
2097 &cortexa57_extra_costs,
2098 &generic_addr_mode_costs, /* addressing mode costs */
2099 NULL, /* Sched adj cost. */
2100 arm_default_branch_cost,
2101 &arm_default_vec_cost,
2102 1, /* Constant limit. */
2103 2, /* Max cond insns. */
2104 8, /* Memset max inline. */
2105 3, /* Issue rate. */
2106 ARM_PREFETCH_NOT_BENEFICIAL,
2107 tune_params::PREF_CONST_POOL_FALSE,
2108 tune_params::PREF_LDRD_TRUE,
2109 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2110 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2111 tune_params::DISPARAGE_FLAGS_ALL,
2112 tune_params::PREF_NEON_64_FALSE,
2113 tune_params::PREF_NEON_STRINGOPS_TRUE,
2114 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2115 tune_params::SCHED_AUTOPREF_FULL
2116 };
2117
2118 const struct tune_params arm_exynosm1_tune =
2119 {
2120 &exynosm1_extra_costs,
2121 &generic_addr_mode_costs, /* Addressing mode costs. */
2122 NULL, /* Sched adj cost. */
2123 arm_default_branch_cost,
2124 &arm_default_vec_cost,
2125 1, /* Constant limit. */
2126 2, /* Max cond insns. */
2127 8, /* Memset max inline. */
2128 3, /* Issue rate. */
2129 ARM_PREFETCH_NOT_BENEFICIAL,
2130 tune_params::PREF_CONST_POOL_FALSE,
2131 tune_params::PREF_LDRD_TRUE,
2132 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2133 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2134 tune_params::DISPARAGE_FLAGS_ALL,
2135 tune_params::PREF_NEON_64_FALSE,
2136 tune_params::PREF_NEON_STRINGOPS_TRUE,
2137 tune_params::FUSE_NOTHING,
2138 tune_params::SCHED_AUTOPREF_OFF
2139 };
2140
2141 const struct tune_params arm_xgene1_tune =
2142 {
2143 &xgene1_extra_costs,
2144 &generic_addr_mode_costs, /* Addressing mode costs. */
2145 NULL, /* Sched adj cost. */
2146 arm_default_branch_cost,
2147 &arm_default_vec_cost,
2148 1, /* Constant limit. */
2149 2, /* Max cond insns. */
2150 32, /* Memset max inline. */
2151 4, /* Issue rate. */
2152 ARM_PREFETCH_NOT_BENEFICIAL,
2153 tune_params::PREF_CONST_POOL_FALSE,
2154 tune_params::PREF_LDRD_TRUE,
2155 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2156 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2157 tune_params::DISPARAGE_FLAGS_ALL,
2158 tune_params::PREF_NEON_64_FALSE,
2159 tune_params::PREF_NEON_STRINGOPS_FALSE,
2160 tune_params::FUSE_NOTHING,
2161 tune_params::SCHED_AUTOPREF_OFF
2162 };
2163
2164 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2165 less appealing. Set max_insns_skipped to a low value. */
2166
2167 const struct tune_params arm_cortex_a5_tune =
2168 {
2169 &cortexa5_extra_costs,
2170 &generic_addr_mode_costs, /* Addressing mode costs. */
2171 NULL, /* Sched adj cost. */
2172 arm_cortex_a5_branch_cost,
2173 &arm_default_vec_cost,
2174 1, /* Constant limit. */
2175 1, /* Max cond insns. */
2176 8, /* Memset max inline. */
2177 2, /* Issue rate. */
2178 ARM_PREFETCH_NOT_BENEFICIAL,
2179 tune_params::PREF_CONST_POOL_FALSE,
2180 tune_params::PREF_LDRD_FALSE,
2181 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2182 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2183 tune_params::DISPARAGE_FLAGS_NEITHER,
2184 tune_params::PREF_NEON_64_FALSE,
2185 tune_params::PREF_NEON_STRINGOPS_TRUE,
2186 tune_params::FUSE_NOTHING,
2187 tune_params::SCHED_AUTOPREF_OFF
2188 };
2189
2190 const struct tune_params arm_cortex_a9_tune =
2191 {
2192 &cortexa9_extra_costs,
2193 &generic_addr_mode_costs, /* Addressing mode costs. */
2194 cortex_a9_sched_adjust_cost,
2195 arm_default_branch_cost,
2196 &arm_default_vec_cost,
2197 1, /* Constant limit. */
2198 5, /* Max cond insns. */
2199 8, /* Memset max inline. */
2200 2, /* Issue rate. */
2201 ARM_PREFETCH_BENEFICIAL(4,32,32),
2202 tune_params::PREF_CONST_POOL_FALSE,
2203 tune_params::PREF_LDRD_FALSE,
2204 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2205 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2206 tune_params::DISPARAGE_FLAGS_NEITHER,
2207 tune_params::PREF_NEON_64_FALSE,
2208 tune_params::PREF_NEON_STRINGOPS_FALSE,
2209 tune_params::FUSE_NOTHING,
2210 tune_params::SCHED_AUTOPREF_OFF
2211 };
2212
2213 const struct tune_params arm_cortex_a12_tune =
2214 {
2215 &cortexa12_extra_costs,
2216 &generic_addr_mode_costs, /* Addressing mode costs. */
2217 NULL, /* Sched adj cost. */
2218 arm_default_branch_cost,
2219 &arm_default_vec_cost, /* Vectorizer costs. */
2220 1, /* Constant limit. */
2221 2, /* Max cond insns. */
2222 8, /* Memset max inline. */
2223 2, /* Issue rate. */
2224 ARM_PREFETCH_NOT_BENEFICIAL,
2225 tune_params::PREF_CONST_POOL_FALSE,
2226 tune_params::PREF_LDRD_TRUE,
2227 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2228 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2229 tune_params::DISPARAGE_FLAGS_ALL,
2230 tune_params::PREF_NEON_64_FALSE,
2231 tune_params::PREF_NEON_STRINGOPS_TRUE,
2232 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2233 tune_params::SCHED_AUTOPREF_OFF
2234 };
2235
2236 const struct tune_params arm_cortex_a73_tune =
2237 {
2238 &cortexa57_extra_costs,
2239 &generic_addr_mode_costs, /* Addressing mode costs. */
2240 NULL, /* Sched adj cost. */
2241 arm_default_branch_cost,
2242 &arm_default_vec_cost, /* Vectorizer costs. */
2243 1, /* Constant limit. */
2244 2, /* Max cond insns. */
2245 8, /* Memset max inline. */
2246 2, /* Issue rate. */
2247 ARM_PREFETCH_NOT_BENEFICIAL,
2248 tune_params::PREF_CONST_POOL_FALSE,
2249 tune_params::PREF_LDRD_TRUE,
2250 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2251 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2252 tune_params::DISPARAGE_FLAGS_ALL,
2253 tune_params::PREF_NEON_64_FALSE,
2254 tune_params::PREF_NEON_STRINGOPS_TRUE,
2255 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2256 tune_params::SCHED_AUTOPREF_FULL
2257 };
2258
2259 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2260 cycle to execute each. An LDR from the constant pool also takes two cycles
2261 to execute, but mildly increases pipelining opportunity (consecutive
2262 loads/stores can be pipelined together, saving one cycle), and may also
2263 improve icache utilisation. Hence we prefer the constant pool for such
2264 processors. */
2265
2266 const struct tune_params arm_v7m_tune =
2267 {
2268 &v7m_extra_costs,
2269 &generic_addr_mode_costs, /* Addressing mode costs. */
2270 NULL, /* Sched adj cost. */
2271 arm_cortex_m_branch_cost,
2272 &arm_default_vec_cost,
2273 1, /* Constant limit. */
2274 2, /* Max cond insns. */
2275 8, /* Memset max inline. */
2276 1, /* Issue rate. */
2277 ARM_PREFETCH_NOT_BENEFICIAL,
2278 tune_params::PREF_CONST_POOL_TRUE,
2279 tune_params::PREF_LDRD_FALSE,
2280 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2281 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2282 tune_params::DISPARAGE_FLAGS_NEITHER,
2283 tune_params::PREF_NEON_64_FALSE,
2284 tune_params::PREF_NEON_STRINGOPS_FALSE,
2285 tune_params::FUSE_NOTHING,
2286 tune_params::SCHED_AUTOPREF_OFF
2287 };
2288
2289 /* Cortex-M7 tuning. */
2290
2291 const struct tune_params arm_cortex_m7_tune =
2292 {
2293 &v7m_extra_costs,
2294 &generic_addr_mode_costs, /* Addressing mode costs. */
2295 NULL, /* Sched adj cost. */
2296 arm_cortex_m7_branch_cost,
2297 &arm_default_vec_cost,
2298 0, /* Constant limit. */
2299 1, /* Max cond insns. */
2300 8, /* Memset max inline. */
2301 2, /* Issue rate. */
2302 ARM_PREFETCH_NOT_BENEFICIAL,
2303 tune_params::PREF_CONST_POOL_TRUE,
2304 tune_params::PREF_LDRD_FALSE,
2305 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2306 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2307 tune_params::DISPARAGE_FLAGS_NEITHER,
2308 tune_params::PREF_NEON_64_FALSE,
2309 tune_params::PREF_NEON_STRINGOPS_FALSE,
2310 tune_params::FUSE_NOTHING,
2311 tune_params::SCHED_AUTOPREF_OFF
2312 };
2313
2314 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2315 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2316 cortex-m23. */
2317 const struct tune_params arm_v6m_tune =
2318 {
2319 &generic_extra_costs, /* Insn extra costs. */
2320 &generic_addr_mode_costs, /* Addressing mode costs. */
2321 NULL, /* Sched adj cost. */
2322 arm_default_branch_cost,
2323 &arm_default_vec_cost, /* Vectorizer costs. */
2324 1, /* Constant limit. */
2325 5, /* Max cond insns. */
2326 8, /* Memset max inline. */
2327 1, /* Issue rate. */
2328 ARM_PREFETCH_NOT_BENEFICIAL,
2329 tune_params::PREF_CONST_POOL_FALSE,
2330 tune_params::PREF_LDRD_FALSE,
2331 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2332 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2333 tune_params::DISPARAGE_FLAGS_NEITHER,
2334 tune_params::PREF_NEON_64_FALSE,
2335 tune_params::PREF_NEON_STRINGOPS_FALSE,
2336 tune_params::FUSE_NOTHING,
2337 tune_params::SCHED_AUTOPREF_OFF
2338 };
2339
2340 const struct tune_params arm_fa726te_tune =
2341 {
2342 &generic_extra_costs, /* Insn extra costs. */
2343 &generic_addr_mode_costs, /* Addressing mode costs. */
2344 fa726te_sched_adjust_cost,
2345 arm_default_branch_cost,
2346 &arm_default_vec_cost,
2347 1, /* Constant limit. */
2348 5, /* Max cond insns. */
2349 8, /* Memset max inline. */
2350 2, /* Issue rate. */
2351 ARM_PREFETCH_NOT_BENEFICIAL,
2352 tune_params::PREF_CONST_POOL_TRUE,
2353 tune_params::PREF_LDRD_FALSE,
2354 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2355 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2356 tune_params::DISPARAGE_FLAGS_NEITHER,
2357 tune_params::PREF_NEON_64_FALSE,
2358 tune_params::PREF_NEON_STRINGOPS_FALSE,
2359 tune_params::FUSE_NOTHING,
2360 tune_params::SCHED_AUTOPREF_OFF
2361 };
2362
2363 /* Auto-generated CPU, FPU and architecture tables. */
2364 #include "arm-cpu-data.h"
2365
2366 /* The name of the preprocessor macro to define for this architecture. PROFILE
2367 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2368 is thus chosen to be big enough to hold the longest architecture name. */
2369
2370 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2371
2372 /* Supported TLS relocations. */
2373
2374 enum tls_reloc {
2375 TLS_GD32,
2376 TLS_LDM32,
2377 TLS_LDO32,
2378 TLS_IE32,
2379 TLS_LE32,
2380 TLS_DESCSEQ /* GNU scheme */
2381 };
2382
2383 /* The maximum number of insns to be used when loading a constant. */
2384 inline static int
2385 arm_constant_limit (bool size_p)
2386 {
2387 return size_p ? 1 : current_tune->constant_limit;
2388 }
2389
2390 /* Emit an insn that's a simple single-set. Both the operands must be known
2391 to be valid. */
2392 inline static rtx_insn *
2393 emit_set_insn (rtx x, rtx y)
2394 {
2395 return emit_insn (gen_rtx_SET (x, y));
2396 }
2397
2398 /* Return the number of bits set in VALUE. */
2399 static unsigned
2400 bit_count (unsigned long value)
2401 {
2402 unsigned long count = 0;
2403
2404 while (value)
2405 {
2406 count++;
2407 value &= value - 1; /* Clear the least-significant set bit. */
2408 }
2409
2410 return count;
2411 }
2412
2413 /* Return the number of bits set in BMAP. */
2414 static unsigned
2415 bitmap_popcount (const sbitmap bmap)
2416 {
2417 unsigned int count = 0;
2418 unsigned int n = 0;
2419 sbitmap_iterator sbi;
2420
2421 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2422 count++;
2423 return count;
2424 }
2425
2426 typedef struct
2427 {
2428 machine_mode mode;
2429 const char *name;
2430 } arm_fixed_mode_set;
2431
2432 /* A small helper for setting fixed-point library libfuncs. */
2433
2434 static void
2435 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2436 const char *funcname, const char *modename,
2437 int num_suffix)
2438 {
2439 char buffer[50];
2440
2441 if (num_suffix == 0)
2442 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2443 else
2444 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2445
2446 set_optab_libfunc (optable, mode, buffer);
2447 }
2448
2449 static void
2450 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2451 machine_mode from, const char *funcname,
2452 const char *toname, const char *fromname)
2453 {
2454 char buffer[50];
2455 const char *maybe_suffix_2 = "";
2456
2457 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2458 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2459 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2460 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2461 maybe_suffix_2 = "2";
2462
2463 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2464 maybe_suffix_2);
2465
2466 set_conv_libfunc (optable, to, from, buffer);
2467 }
2468
2469 /* Set up library functions unique to ARM. */
2470
2471 static void
2472 arm_init_libfuncs (void)
2473 {
2474 /* For Linux, we have access to kernel support for atomic operations. */
2475 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2476 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2477
2478 /* There are no special library functions unless we are using the
2479 ARM BPABI. */
2480 if (!TARGET_BPABI)
2481 return;
2482
2483 /* The functions below are described in Section 4 of the "Run-Time
2484 ABI for the ARM architecture", Version 1.0. */
2485
2486 /* Double-precision floating-point arithmetic. Table 2. */
2487 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2488 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2489 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2490 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2491 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2492
2493 /* Double-precision comparisons. Table 3. */
2494 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2495 set_optab_libfunc (ne_optab, DFmode, NULL);
2496 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2497 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2498 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2499 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2500 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2501
2502 /* Single-precision floating-point arithmetic. Table 4. */
2503 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2504 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2505 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2506 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2507 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2508
2509 /* Single-precision comparisons. Table 5. */
2510 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2511 set_optab_libfunc (ne_optab, SFmode, NULL);
2512 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2513 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2514 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2515 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2516 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2517
2518 /* Floating-point to integer conversions. Table 6. */
2519 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2520 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2521 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2522 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2523 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2524 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2525 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2526 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2527
2528 /* Conversions between floating types. Table 7. */
2529 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2530 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2531
2532 /* Integer to floating-point conversions. Table 8. */
2533 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2534 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2535 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2536 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2537 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2538 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2539 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2540 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2541
2542 /* Long long. Table 9. */
2543 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2544 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2545 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2546 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2547 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2548 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2549 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2550 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2551
2552 /* Integer (32/32->32) division. \S 4.3.1. */
2553 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2554 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2555
2556 /* The divmod functions are designed so that they can be used for
2557 plain division, even though they return both the quotient and the
2558 remainder. The quotient is returned in the usual location (i.e.,
2559 r0 for SImode, {r0, r1} for DImode), just as would be expected
2560 for an ordinary division routine. Because the AAPCS calling
2561 conventions specify that all of { r0, r1, r2, r3 } are
2562 callee-saved registers, there is no need to tell the compiler
2563 explicitly that those registers are clobbered by these
2564 routines. */
2565 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2566 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2567
2568 /* For SImode division the ABI provides div-without-mod routines,
2569 which are faster. */
2570 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2571 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2572
2573 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2574 divmod libcalls instead. */
2575 set_optab_libfunc (smod_optab, DImode, NULL);
2576 set_optab_libfunc (umod_optab, DImode, NULL);
2577 set_optab_libfunc (smod_optab, SImode, NULL);
2578 set_optab_libfunc (umod_optab, SImode, NULL);
2579
2580 /* Half-precision float operations. The compiler handles all operations
2581 with NULL libfuncs by converting the SFmode. */
2582 switch (arm_fp16_format)
2583 {
2584 case ARM_FP16_FORMAT_IEEE:
2585 case ARM_FP16_FORMAT_ALTERNATIVE:
2586
2587 /* Conversions. */
2588 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2589 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2590 ? "__gnu_f2h_ieee"
2591 : "__gnu_f2h_alternative"));
2592 set_conv_libfunc (sext_optab, SFmode, HFmode,
2593 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2594 ? "__gnu_h2f_ieee"
2595 : "__gnu_h2f_alternative"));
2596
2597 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2598 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2599 ? "__gnu_d2h_ieee"
2600 : "__gnu_d2h_alternative"));
2601
2602 /* Arithmetic. */
2603 set_optab_libfunc (add_optab, HFmode, NULL);
2604 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2605 set_optab_libfunc (smul_optab, HFmode, NULL);
2606 set_optab_libfunc (neg_optab, HFmode, NULL);
2607 set_optab_libfunc (sub_optab, HFmode, NULL);
2608
2609 /* Comparisons. */
2610 set_optab_libfunc (eq_optab, HFmode, NULL);
2611 set_optab_libfunc (ne_optab, HFmode, NULL);
2612 set_optab_libfunc (lt_optab, HFmode, NULL);
2613 set_optab_libfunc (le_optab, HFmode, NULL);
2614 set_optab_libfunc (ge_optab, HFmode, NULL);
2615 set_optab_libfunc (gt_optab, HFmode, NULL);
2616 set_optab_libfunc (unord_optab, HFmode, NULL);
2617 break;
2618
2619 default:
2620 break;
2621 }
2622
2623 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2624 {
2625 const arm_fixed_mode_set fixed_arith_modes[] =
2626 {
2627 { E_QQmode, "qq" },
2628 { E_UQQmode, "uqq" },
2629 { E_HQmode, "hq" },
2630 { E_UHQmode, "uhq" },
2631 { E_SQmode, "sq" },
2632 { E_USQmode, "usq" },
2633 { E_DQmode, "dq" },
2634 { E_UDQmode, "udq" },
2635 { E_TQmode, "tq" },
2636 { E_UTQmode, "utq" },
2637 { E_HAmode, "ha" },
2638 { E_UHAmode, "uha" },
2639 { E_SAmode, "sa" },
2640 { E_USAmode, "usa" },
2641 { E_DAmode, "da" },
2642 { E_UDAmode, "uda" },
2643 { E_TAmode, "ta" },
2644 { E_UTAmode, "uta" }
2645 };
2646 const arm_fixed_mode_set fixed_conv_modes[] =
2647 {
2648 { E_QQmode, "qq" },
2649 { E_UQQmode, "uqq" },
2650 { E_HQmode, "hq" },
2651 { E_UHQmode, "uhq" },
2652 { E_SQmode, "sq" },
2653 { E_USQmode, "usq" },
2654 { E_DQmode, "dq" },
2655 { E_UDQmode, "udq" },
2656 { E_TQmode, "tq" },
2657 { E_UTQmode, "utq" },
2658 { E_HAmode, "ha" },
2659 { E_UHAmode, "uha" },
2660 { E_SAmode, "sa" },
2661 { E_USAmode, "usa" },
2662 { E_DAmode, "da" },
2663 { E_UDAmode, "uda" },
2664 { E_TAmode, "ta" },
2665 { E_UTAmode, "uta" },
2666 { E_QImode, "qi" },
2667 { E_HImode, "hi" },
2668 { E_SImode, "si" },
2669 { E_DImode, "di" },
2670 { E_TImode, "ti" },
2671 { E_SFmode, "sf" },
2672 { E_DFmode, "df" }
2673 };
2674 unsigned int i, j;
2675
2676 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2677 {
2678 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2679 "add", fixed_arith_modes[i].name, 3);
2680 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2681 "ssadd", fixed_arith_modes[i].name, 3);
2682 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2683 "usadd", fixed_arith_modes[i].name, 3);
2684 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2685 "sub", fixed_arith_modes[i].name, 3);
2686 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2687 "sssub", fixed_arith_modes[i].name, 3);
2688 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2689 "ussub", fixed_arith_modes[i].name, 3);
2690 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2691 "mul", fixed_arith_modes[i].name, 3);
2692 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2693 "ssmul", fixed_arith_modes[i].name, 3);
2694 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2695 "usmul", fixed_arith_modes[i].name, 3);
2696 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2697 "div", fixed_arith_modes[i].name, 3);
2698 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2699 "udiv", fixed_arith_modes[i].name, 3);
2700 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2701 "ssdiv", fixed_arith_modes[i].name, 3);
2702 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2703 "usdiv", fixed_arith_modes[i].name, 3);
2704 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2705 "neg", fixed_arith_modes[i].name, 2);
2706 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2707 "ssneg", fixed_arith_modes[i].name, 2);
2708 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2709 "usneg", fixed_arith_modes[i].name, 2);
2710 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2711 "ashl", fixed_arith_modes[i].name, 3);
2712 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2713 "ashr", fixed_arith_modes[i].name, 3);
2714 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2715 "lshr", fixed_arith_modes[i].name, 3);
2716 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2717 "ssashl", fixed_arith_modes[i].name, 3);
2718 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2719 "usashl", fixed_arith_modes[i].name, 3);
2720 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2721 "cmp", fixed_arith_modes[i].name, 2);
2722 }
2723
2724 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2725 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2726 {
2727 if (i == j
2728 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2729 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2730 continue;
2731
2732 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2733 fixed_conv_modes[j].mode, "fract",
2734 fixed_conv_modes[i].name,
2735 fixed_conv_modes[j].name);
2736 arm_set_fixed_conv_libfunc (satfract_optab,
2737 fixed_conv_modes[i].mode,
2738 fixed_conv_modes[j].mode, "satfract",
2739 fixed_conv_modes[i].name,
2740 fixed_conv_modes[j].name);
2741 arm_set_fixed_conv_libfunc (fractuns_optab,
2742 fixed_conv_modes[i].mode,
2743 fixed_conv_modes[j].mode, "fractuns",
2744 fixed_conv_modes[i].name,
2745 fixed_conv_modes[j].name);
2746 arm_set_fixed_conv_libfunc (satfractuns_optab,
2747 fixed_conv_modes[i].mode,
2748 fixed_conv_modes[j].mode, "satfractuns",
2749 fixed_conv_modes[i].name,
2750 fixed_conv_modes[j].name);
2751 }
2752 }
2753
2754 if (TARGET_AAPCS_BASED)
2755 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2756 }
2757
2758 /* On AAPCS systems, this is the "struct __va_list". */
2759 static GTY(()) tree va_list_type;
2760
2761 /* Return the type to use as __builtin_va_list. */
2762 static tree
2763 arm_build_builtin_va_list (void)
2764 {
2765 tree va_list_name;
2766 tree ap_field;
2767
2768 if (!TARGET_AAPCS_BASED)
2769 return std_build_builtin_va_list ();
2770
2771 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2772 defined as:
2773
2774 struct __va_list
2775 {
2776 void *__ap;
2777 };
2778
2779 The C Library ABI further reinforces this definition in \S
2780 4.1.
2781
2782 We must follow this definition exactly. The structure tag
2783 name is visible in C++ mangled names, and thus forms a part
2784 of the ABI. The field name may be used by people who
2785 #include <stdarg.h>. */
2786 /* Create the type. */
2787 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2788 /* Give it the required name. */
2789 va_list_name = build_decl (BUILTINS_LOCATION,
2790 TYPE_DECL,
2791 get_identifier ("__va_list"),
2792 va_list_type);
2793 DECL_ARTIFICIAL (va_list_name) = 1;
2794 TYPE_NAME (va_list_type) = va_list_name;
2795 TYPE_STUB_DECL (va_list_type) = va_list_name;
2796 /* Create the __ap field. */
2797 ap_field = build_decl (BUILTINS_LOCATION,
2798 FIELD_DECL,
2799 get_identifier ("__ap"),
2800 ptr_type_node);
2801 DECL_ARTIFICIAL (ap_field) = 1;
2802 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2803 TYPE_FIELDS (va_list_type) = ap_field;
2804 /* Compute its layout. */
2805 layout_type (va_list_type);
2806
2807 return va_list_type;
2808 }
2809
2810 /* Return an expression of type "void *" pointing to the next
2811 available argument in a variable-argument list. VALIST is the
2812 user-level va_list object, of type __builtin_va_list. */
2813 static tree
2814 arm_extract_valist_ptr (tree valist)
2815 {
2816 if (TREE_TYPE (valist) == error_mark_node)
2817 return error_mark_node;
2818
2819 /* On an AAPCS target, the pointer is stored within "struct
2820 va_list". */
2821 if (TARGET_AAPCS_BASED)
2822 {
2823 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2824 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2825 valist, ap_field, NULL_TREE);
2826 }
2827
2828 return valist;
2829 }
2830
2831 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2832 static void
2833 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2834 {
2835 valist = arm_extract_valist_ptr (valist);
2836 std_expand_builtin_va_start (valist, nextarg);
2837 }
2838
2839 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2840 static tree
2841 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2842 gimple_seq *post_p)
2843 {
2844 valist = arm_extract_valist_ptr (valist);
2845 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2846 }
2847
2848 /* Check any incompatible options that the user has specified. */
2849 static void
2850 arm_option_check_internal (struct gcc_options *opts)
2851 {
2852 int flags = opts->x_target_flags;
2853
2854 /* iWMMXt and NEON are incompatible. */
2855 if (TARGET_IWMMXT
2856 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2857 error ("iWMMXt and NEON are incompatible");
2858
2859 /* Make sure that the processor choice does not conflict with any of the
2860 other command line choices. */
2861 if (TARGET_ARM_P (flags)
2862 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2863 error ("target CPU does not support ARM mode");
2864
2865 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2866 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2867 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2868
2869 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2870 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2871
2872 /* If this target is normally configured to use APCS frames, warn if they
2873 are turned off and debugging is turned on. */
2874 if (TARGET_ARM_P (flags)
2875 && write_symbols != NO_DEBUG
2876 && !TARGET_APCS_FRAME
2877 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2878 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2879
2880 /* iWMMXt unsupported under Thumb mode. */
2881 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2882 error ("iWMMXt unsupported under Thumb mode");
2883
2884 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2885 error ("can not use -mtp=cp15 with 16-bit Thumb");
2886
2887 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2888 {
2889 error ("RTP PIC is incompatible with Thumb");
2890 flag_pic = 0;
2891 }
2892
2893 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2894 with MOVT. */
2895 if ((target_pure_code || target_slow_flash_data)
2896 && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2897 {
2898 const char *flag = (target_pure_code ? "-mpure-code" :
2899 "-mslow-flash-data");
2900 error ("%s only supports non-pic code on M-profile targets with the "
2901 "MOVT instruction", flag);
2902 }
2903
2904 }
2905
2906 /* Recompute the global settings depending on target attribute options. */
2907
2908 static void
2909 arm_option_params_internal (void)
2910 {
2911 /* If we are not using the default (ARM mode) section anchor offset
2912 ranges, then set the correct ranges now. */
2913 if (TARGET_THUMB1)
2914 {
2915 /* Thumb-1 LDR instructions cannot have negative offsets.
2916 Permissible positive offset ranges are 5-bit (for byte loads),
2917 6-bit (for halfword loads), or 7-bit (for word loads).
2918 Empirical results suggest a 7-bit anchor range gives the best
2919 overall code size. */
2920 targetm.min_anchor_offset = 0;
2921 targetm.max_anchor_offset = 127;
2922 }
2923 else if (TARGET_THUMB2)
2924 {
2925 /* The minimum is set such that the total size of the block
2926 for a particular anchor is 248 + 1 + 4095 bytes, which is
2927 divisible by eight, ensuring natural spacing of anchors. */
2928 targetm.min_anchor_offset = -248;
2929 targetm.max_anchor_offset = 4095;
2930 }
2931 else
2932 {
2933 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2934 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2935 }
2936
2937 /* Increase the number of conditional instructions with -Os. */
2938 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2939
2940 /* For THUMB2, we limit the conditional sequence to one IT block. */
2941 if (TARGET_THUMB2)
2942 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2943 }
2944
2945 /* True if -mflip-thumb should next add an attribute for the default
2946 mode, false if it should next add an attribute for the opposite mode. */
2947 static GTY(()) bool thumb_flipper;
2948
2949 /* Options after initial target override. */
2950 static GTY(()) tree init_optimize;
2951
2952 static void
2953 arm_override_options_after_change_1 (struct gcc_options *opts)
2954 {
2955 if (opts->x_align_functions <= 0)
2956 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2957 && opts->x_optimize_size ? 2 : 4;
2958 }
2959
2960 /* Implement targetm.override_options_after_change. */
2961
2962 static void
2963 arm_override_options_after_change (void)
2964 {
2965 arm_configure_build_target (&arm_active_target,
2966 TREE_TARGET_OPTION (target_option_default_node),
2967 &global_options_set, false);
2968
2969 arm_override_options_after_change_1 (&global_options);
2970 }
2971
2972 /* Implement TARGET_OPTION_SAVE. */
2973 static void
2974 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2975 {
2976 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2977 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2978 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2979 }
2980
2981 /* Implement TARGET_OPTION_RESTORE. */
2982 static void
2983 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2984 {
2985 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2986 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2987 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2988 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2989 false);
2990 }
2991
2992 /* Reset options between modes that the user has specified. */
2993 static void
2994 arm_option_override_internal (struct gcc_options *opts,
2995 struct gcc_options *opts_set)
2996 {
2997 arm_override_options_after_change_1 (opts);
2998
2999 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3000 {
3001 /* The default is to enable interworking, so this warning message would
3002 be confusing to users who have just compiled with
3003 eg, -march=armv4. */
3004 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3005 opts->x_target_flags &= ~MASK_INTERWORK;
3006 }
3007
3008 if (TARGET_THUMB_P (opts->x_target_flags)
3009 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3010 {
3011 warning (0, "target CPU does not support THUMB instructions");
3012 opts->x_target_flags &= ~MASK_THUMB;
3013 }
3014
3015 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3016 {
3017 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3018 opts->x_target_flags &= ~MASK_APCS_FRAME;
3019 }
3020
3021 /* Callee super interworking implies thumb interworking. Adding
3022 this to the flags here simplifies the logic elsewhere. */
3023 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3024 opts->x_target_flags |= MASK_INTERWORK;
3025
3026 /* need to remember initial values so combinaisons of options like
3027 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3028 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3029
3030 if (! opts_set->x_arm_restrict_it)
3031 opts->x_arm_restrict_it = arm_arch8;
3032
3033 /* ARM execution state and M profile don't have [restrict] IT. */
3034 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3035 opts->x_arm_restrict_it = 0;
3036
3037 /* Enable -munaligned-access by default for
3038 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3039 i.e. Thumb2 and ARM state only.
3040 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3041 - ARMv8 architecture-base processors.
3042
3043 Disable -munaligned-access by default for
3044 - all pre-ARMv6 architecture-based processors
3045 - ARMv6-M architecture-based processors
3046 - ARMv8-M Baseline processors. */
3047
3048 if (! opts_set->x_unaligned_access)
3049 {
3050 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3051 && arm_arch6 && (arm_arch_notm || arm_arch7));
3052 }
3053 else if (opts->x_unaligned_access == 1
3054 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3055 {
3056 warning (0, "target CPU does not support unaligned accesses");
3057 opts->x_unaligned_access = 0;
3058 }
3059
3060 /* Don't warn since it's on by default in -O2. */
3061 if (TARGET_THUMB1_P (opts->x_target_flags))
3062 opts->x_flag_schedule_insns = 0;
3063 else
3064 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3065
3066 /* Disable shrink-wrap when optimizing function for size, since it tends to
3067 generate additional returns. */
3068 if (optimize_function_for_size_p (cfun)
3069 && TARGET_THUMB2_P (opts->x_target_flags))
3070 opts->x_flag_shrink_wrap = false;
3071 else
3072 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3073
3074 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3075 - epilogue_insns - does not accurately model the corresponding insns
3076 emitted in the asm file. In particular, see the comment in thumb_exit
3077 'Find out how many of the (return) argument registers we can corrupt'.
3078 As a consequence, the epilogue may clobber registers without fipa-ra
3079 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3080 TODO: Accurately model clobbers for epilogue_insns and reenable
3081 fipa-ra. */
3082 if (TARGET_THUMB1_P (opts->x_target_flags))
3083 opts->x_flag_ipa_ra = 0;
3084 else
3085 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3086
3087 /* Thumb2 inline assembly code should always use unified syntax.
3088 This will apply to ARM and Thumb1 eventually. */
3089 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3090
3091 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3092 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3093 #endif
3094 }
3095
3096 static sbitmap isa_all_fpubits;
3097 static sbitmap isa_quirkbits;
3098
3099 /* Configure a build target TARGET from the user-specified options OPTS and
3100 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3101 architecture have been specified, but the two are not identical. */
3102 void
3103 arm_configure_build_target (struct arm_build_target *target,
3104 struct cl_target_option *opts,
3105 struct gcc_options *opts_set,
3106 bool warn_compatible)
3107 {
3108 const cpu_option *arm_selected_tune = NULL;
3109 const arch_option *arm_selected_arch = NULL;
3110 const cpu_option *arm_selected_cpu = NULL;
3111 const arm_fpu_desc *arm_selected_fpu = NULL;
3112 const char *tune_opts = NULL;
3113 const char *arch_opts = NULL;
3114 const char *cpu_opts = NULL;
3115
3116 bitmap_clear (target->isa);
3117 target->core_name = NULL;
3118 target->arch_name = NULL;
3119
3120 if (opts_set->x_arm_arch_string)
3121 {
3122 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3123 "-march",
3124 opts->x_arm_arch_string);
3125 arch_opts = strchr (opts->x_arm_arch_string, '+');
3126 }
3127
3128 if (opts_set->x_arm_cpu_string)
3129 {
3130 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3131 opts->x_arm_cpu_string);
3132 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3133 arm_selected_tune = arm_selected_cpu;
3134 /* If taking the tuning from -mcpu, we don't need to rescan the
3135 options for tuning. */
3136 }
3137
3138 if (opts_set->x_arm_tune_string)
3139 {
3140 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3141 opts->x_arm_tune_string);
3142 tune_opts = strchr (opts->x_arm_tune_string, '+');
3143 }
3144
3145 if (arm_selected_arch)
3146 {
3147 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3148 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3149 arch_opts);
3150
3151 if (arm_selected_cpu)
3152 {
3153 auto_sbitmap cpu_isa (isa_num_bits);
3154 auto_sbitmap isa_delta (isa_num_bits);
3155
3156 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3157 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3158 cpu_opts);
3159 bitmap_xor (isa_delta, cpu_isa, target->isa);
3160 /* Ignore any bits that are quirk bits. */
3161 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3162 /* Ignore (for now) any bits that might be set by -mfpu. */
3163 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3164
3165 if (!bitmap_empty_p (isa_delta))
3166 {
3167 if (warn_compatible)
3168 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3169 arm_selected_cpu->common.name,
3170 arm_selected_arch->common.name);
3171 /* -march wins for code generation.
3172 -mcpu wins for default tuning. */
3173 if (!arm_selected_tune)
3174 arm_selected_tune = arm_selected_cpu;
3175
3176 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3177 target->arch_name = arm_selected_arch->common.name;
3178 }
3179 else
3180 {
3181 /* Architecture and CPU are essentially the same.
3182 Prefer the CPU setting. */
3183 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3184 target->core_name = arm_selected_cpu->common.name;
3185 /* Copy the CPU's capabilities, so that we inherit the
3186 appropriate extensions and quirks. */
3187 bitmap_copy (target->isa, cpu_isa);
3188 }
3189 }
3190 else
3191 {
3192 /* Pick a CPU based on the architecture. */
3193 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3194 target->arch_name = arm_selected_arch->common.name;
3195 /* Note: target->core_name is left unset in this path. */
3196 }
3197 }
3198 else if (arm_selected_cpu)
3199 {
3200 target->core_name = arm_selected_cpu->common.name;
3201 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3202 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3203 cpu_opts);
3204 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3205 }
3206 /* If the user did not specify a processor or architecture, choose
3207 one for them. */
3208 else
3209 {
3210 const cpu_option *sel;
3211 auto_sbitmap sought_isa (isa_num_bits);
3212 bitmap_clear (sought_isa);
3213 auto_sbitmap default_isa (isa_num_bits);
3214
3215 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3216 TARGET_CPU_DEFAULT);
3217 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3218 gcc_assert (arm_selected_cpu->common.name);
3219
3220 /* RWE: All of the selection logic below (to the end of this
3221 'if' clause) looks somewhat suspect. It appears to be mostly
3222 there to support forcing thumb support when the default CPU
3223 does not have thumb (somewhat dubious in terms of what the
3224 user might be expecting). I think it should be removed once
3225 support for the pre-thumb era cores is removed. */
3226 sel = arm_selected_cpu;
3227 arm_initialize_isa (default_isa, sel->common.isa_bits);
3228 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3229 cpu_opts);
3230
3231 /* Now check to see if the user has specified any command line
3232 switches that require certain abilities from the cpu. */
3233
3234 if (TARGET_INTERWORK || TARGET_THUMB)
3235 {
3236 bitmap_set_bit (sought_isa, isa_bit_thumb);
3237
3238 /* There are no ARM processors that support both APCS-26 and
3239 interworking. Therefore we forcibly remove MODE26 from
3240 from the isa features here (if it was set), so that the
3241 search below will always be able to find a compatible
3242 processor. */
3243 bitmap_clear_bit (default_isa, isa_bit_mode26);
3244 }
3245
3246 /* If there are such requirements and the default CPU does not
3247 satisfy them, we need to run over the complete list of
3248 cores looking for one that is satisfactory. */
3249 if (!bitmap_empty_p (sought_isa)
3250 && !bitmap_subset_p (sought_isa, default_isa))
3251 {
3252 auto_sbitmap candidate_isa (isa_num_bits);
3253 /* We're only interested in a CPU with at least the
3254 capabilities of the default CPU and the required
3255 additional features. */
3256 bitmap_ior (default_isa, default_isa, sought_isa);
3257
3258 /* Try to locate a CPU type that supports all of the abilities
3259 of the default CPU, plus the extra abilities requested by
3260 the user. */
3261 for (sel = all_cores; sel->common.name != NULL; sel++)
3262 {
3263 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3264 /* An exact match? */
3265 if (bitmap_equal_p (default_isa, candidate_isa))
3266 break;
3267 }
3268
3269 if (sel->common.name == NULL)
3270 {
3271 unsigned current_bit_count = isa_num_bits;
3272 const cpu_option *best_fit = NULL;
3273
3274 /* Ideally we would like to issue an error message here
3275 saying that it was not possible to find a CPU compatible
3276 with the default CPU, but which also supports the command
3277 line options specified by the programmer, and so they
3278 ought to use the -mcpu=<name> command line option to
3279 override the default CPU type.
3280
3281 If we cannot find a CPU that has exactly the
3282 characteristics of the default CPU and the given
3283 command line options we scan the array again looking
3284 for a best match. The best match must have at least
3285 the capabilities of the perfect match. */
3286 for (sel = all_cores; sel->common.name != NULL; sel++)
3287 {
3288 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3289
3290 if (bitmap_subset_p (default_isa, candidate_isa))
3291 {
3292 unsigned count;
3293
3294 bitmap_and_compl (candidate_isa, candidate_isa,
3295 default_isa);
3296 count = bitmap_popcount (candidate_isa);
3297
3298 if (count < current_bit_count)
3299 {
3300 best_fit = sel;
3301 current_bit_count = count;
3302 }
3303 }
3304
3305 gcc_assert (best_fit);
3306 sel = best_fit;
3307 }
3308 }
3309 arm_selected_cpu = sel;
3310 }
3311
3312 /* Now we know the CPU, we can finally initialize the target
3313 structure. */
3314 target->core_name = arm_selected_cpu->common.name;
3315 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3316 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3317 cpu_opts);
3318 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3319 }
3320
3321 gcc_assert (arm_selected_cpu);
3322 gcc_assert (arm_selected_arch);
3323
3324 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3325 {
3326 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3327 auto_sbitmap fpu_bits (isa_num_bits);
3328
3329 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3330 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3331 bitmap_ior (target->isa, target->isa, fpu_bits);
3332 }
3333
3334 if (!arm_selected_tune)
3335 arm_selected_tune = arm_selected_cpu;
3336 else /* Validate the features passed to -mtune. */
3337 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3338
3339 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3340
3341 /* Finish initializing the target structure. */
3342 target->arch_pp_name = arm_selected_arch->arch;
3343 target->base_arch = arm_selected_arch->base_arch;
3344 target->profile = arm_selected_arch->profile;
3345
3346 target->tune_flags = tune_data->tune_flags;
3347 target->tune = tune_data->tune;
3348 target->tune_core = tune_data->scheduler;
3349 arm_option_reconfigure_globals ();
3350 }
3351
3352 /* Fix up any incompatible options that the user has specified. */
3353 static void
3354 arm_option_override (void)
3355 {
3356 static const enum isa_feature fpu_bitlist[]
3357 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3358 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3359 cl_target_option opts;
3360
3361 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3362 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3363
3364 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3365 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3366
3367 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3368
3369 if (!global_options_set.x_arm_fpu_index)
3370 {
3371 bool ok;
3372 int fpu_index;
3373
3374 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3375 CL_TARGET);
3376 gcc_assert (ok);
3377 arm_fpu_index = (enum fpu_type) fpu_index;
3378 }
3379
3380 cl_target_option_save (&opts, &global_options);
3381 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3382 true);
3383
3384 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3385 SUBTARGET_OVERRIDE_OPTIONS;
3386 #endif
3387
3388 /* Initialize boolean versions of the architectural flags, for use
3389 in the arm.md file and for enabling feature flags. */
3390 arm_option_reconfigure_globals ();
3391
3392 arm_tune = arm_active_target.tune_core;
3393 tune_flags = arm_active_target.tune_flags;
3394 current_tune = arm_active_target.tune;
3395
3396 /* TBD: Dwarf info for apcs frame is not handled yet. */
3397 if (TARGET_APCS_FRAME)
3398 flag_shrink_wrap = false;
3399
3400 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3401 {
3402 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3403 target_flags |= MASK_APCS_FRAME;
3404 }
3405
3406 if (TARGET_POKE_FUNCTION_NAME)
3407 target_flags |= MASK_APCS_FRAME;
3408
3409 if (TARGET_APCS_REENT && flag_pic)
3410 error ("-fpic and -mapcs-reent are incompatible");
3411
3412 if (TARGET_APCS_REENT)
3413 warning (0, "APCS reentrant code not supported. Ignored");
3414
3415 /* Set up some tuning parameters. */
3416 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3417 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3418 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3419 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3420 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3421 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3422
3423 /* For arm2/3 there is no need to do any scheduling if we are doing
3424 software floating-point. */
3425 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3426 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3427
3428 /* Override the default structure alignment for AAPCS ABI. */
3429 if (!global_options_set.x_arm_structure_size_boundary)
3430 {
3431 if (TARGET_AAPCS_BASED)
3432 arm_structure_size_boundary = 8;
3433 }
3434 else
3435 {
3436 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3437
3438 if (arm_structure_size_boundary != 8
3439 && arm_structure_size_boundary != 32
3440 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3441 {
3442 if (ARM_DOUBLEWORD_ALIGN)
3443 warning (0,
3444 "structure size boundary can only be set to 8, 32 or 64");
3445 else
3446 warning (0, "structure size boundary can only be set to 8 or 32");
3447 arm_structure_size_boundary
3448 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3449 }
3450 }
3451
3452 if (TARGET_VXWORKS_RTP)
3453 {
3454 if (!global_options_set.x_arm_pic_data_is_text_relative)
3455 arm_pic_data_is_text_relative = 0;
3456 }
3457 else if (flag_pic
3458 && !arm_pic_data_is_text_relative
3459 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3460 /* When text & data segments don't have a fixed displacement, the
3461 intended use is with a single, read only, pic base register.
3462 Unless the user explicitly requested not to do that, set
3463 it. */
3464 target_flags |= MASK_SINGLE_PIC_BASE;
3465
3466 /* If stack checking is disabled, we can use r10 as the PIC register,
3467 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3468 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3469 {
3470 if (TARGET_VXWORKS_RTP)
3471 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3472 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3473 }
3474
3475 if (flag_pic && TARGET_VXWORKS_RTP)
3476 arm_pic_register = 9;
3477
3478 if (arm_pic_register_string != NULL)
3479 {
3480 int pic_register = decode_reg_name (arm_pic_register_string);
3481
3482 if (!flag_pic)
3483 warning (0, "-mpic-register= is useless without -fpic");
3484
3485 /* Prevent the user from choosing an obviously stupid PIC register. */
3486 else if (pic_register < 0 || call_used_regs[pic_register]
3487 || pic_register == HARD_FRAME_POINTER_REGNUM
3488 || pic_register == STACK_POINTER_REGNUM
3489 || pic_register >= PC_REGNUM
3490 || (TARGET_VXWORKS_RTP
3491 && (unsigned int) pic_register != arm_pic_register))
3492 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3493 else
3494 arm_pic_register = pic_register;
3495 }
3496
3497 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3498 if (fix_cm3_ldrd == 2)
3499 {
3500 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3501 fix_cm3_ldrd = 1;
3502 else
3503 fix_cm3_ldrd = 0;
3504 }
3505
3506 /* Hot/Cold partitioning is not currently supported, since we can't
3507 handle literal pool placement in that case. */
3508 if (flag_reorder_blocks_and_partition)
3509 {
3510 inform (input_location,
3511 "-freorder-blocks-and-partition not supported on this architecture");
3512 flag_reorder_blocks_and_partition = 0;
3513 flag_reorder_blocks = 1;
3514 }
3515
3516 if (flag_pic)
3517 /* Hoisting PIC address calculations more aggressively provides a small,
3518 but measurable, size reduction for PIC code. Therefore, we decrease
3519 the bar for unrestricted expression hoisting to the cost of PIC address
3520 calculation, which is 2 instructions. */
3521 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3522 global_options.x_param_values,
3523 global_options_set.x_param_values);
3524
3525 /* ARM EABI defaults to strict volatile bitfields. */
3526 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3527 && abi_version_at_least(2))
3528 flag_strict_volatile_bitfields = 1;
3529
3530 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3531 have deemed it beneficial (signified by setting
3532 prefetch.num_slots to 1 or more). */
3533 if (flag_prefetch_loop_arrays < 0
3534 && HAVE_prefetch
3535 && optimize >= 3
3536 && current_tune->prefetch.num_slots > 0)
3537 flag_prefetch_loop_arrays = 1;
3538
3539 /* Set up parameters to be used in prefetching algorithm. Do not
3540 override the defaults unless we are tuning for a core we have
3541 researched values for. */
3542 if (current_tune->prefetch.num_slots > 0)
3543 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3544 current_tune->prefetch.num_slots,
3545 global_options.x_param_values,
3546 global_options_set.x_param_values);
3547 if (current_tune->prefetch.l1_cache_line_size >= 0)
3548 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3549 current_tune->prefetch.l1_cache_line_size,
3550 global_options.x_param_values,
3551 global_options_set.x_param_values);
3552 if (current_tune->prefetch.l1_cache_size >= 0)
3553 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3554 current_tune->prefetch.l1_cache_size,
3555 global_options.x_param_values,
3556 global_options_set.x_param_values);
3557
3558 /* Use Neon to perform 64-bits operations rather than core
3559 registers. */
3560 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3561 if (use_neon_for_64bits == 1)
3562 prefer_neon_for_64bits = true;
3563
3564 /* Use the alternative scheduling-pressure algorithm by default. */
3565 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3566 global_options.x_param_values,
3567 global_options_set.x_param_values);
3568
3569 /* Look through ready list and all of queue for instructions
3570 relevant for L2 auto-prefetcher. */
3571 int param_sched_autopref_queue_depth;
3572
3573 switch (current_tune->sched_autopref)
3574 {
3575 case tune_params::SCHED_AUTOPREF_OFF:
3576 param_sched_autopref_queue_depth = -1;
3577 break;
3578
3579 case tune_params::SCHED_AUTOPREF_RANK:
3580 param_sched_autopref_queue_depth = 0;
3581 break;
3582
3583 case tune_params::SCHED_AUTOPREF_FULL:
3584 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3585 break;
3586
3587 default:
3588 gcc_unreachable ();
3589 }
3590
3591 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3592 param_sched_autopref_queue_depth,
3593 global_options.x_param_values,
3594 global_options_set.x_param_values);
3595
3596 /* Currently, for slow flash data, we just disable literal pools. We also
3597 disable it for pure-code. */
3598 if (target_slow_flash_data || target_pure_code)
3599 arm_disable_literal_pool = true;
3600
3601 /* Disable scheduling fusion by default if it's not armv7 processor
3602 or doesn't prefer ldrd/strd. */
3603 if (flag_schedule_fusion == 2
3604 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3605 flag_schedule_fusion = 0;
3606
3607 /* Need to remember initial options before they are overriden. */
3608 init_optimize = build_optimization_node (&global_options);
3609
3610 arm_options_perform_arch_sanity_checks ();
3611 arm_option_override_internal (&global_options, &global_options_set);
3612 arm_option_check_internal (&global_options);
3613 arm_option_params_internal ();
3614
3615 /* Create the default target_options structure. */
3616 target_option_default_node = target_option_current_node
3617 = build_target_option_node (&global_options);
3618
3619 /* Register global variables with the garbage collector. */
3620 arm_add_gc_roots ();
3621
3622 /* Init initial mode for testing. */
3623 thumb_flipper = TARGET_THUMB;
3624 }
3625
3626
3627 /* Reconfigure global status flags from the active_target.isa. */
3628 void
3629 arm_option_reconfigure_globals (void)
3630 {
3631 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3632 arm_base_arch = arm_active_target.base_arch;
3633
3634 /* Initialize boolean versions of the architectural flags, for use
3635 in the arm.md file. */
3636 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3637 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3638 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3639 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3640 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3641 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3642 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3643 arm_arch6m = arm_arch6 && !arm_arch_notm;
3644 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3645 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3646 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3647 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3648 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3649 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3650 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3651 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3652 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3653 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3654 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3655 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3656 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3657 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3658 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3659 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3660 if (arm_fp16_inst)
3661 {
3662 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3663 error ("selected fp16 options are incompatible");
3664 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3665 }
3666
3667 /* And finally, set up some quirks. */
3668 arm_arch_no_volatile_ce
3669 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3670 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3671 isa_bit_quirk_armv6kz);
3672
3673 /* Use the cp15 method if it is available. */
3674 if (target_thread_pointer == TP_AUTO)
3675 {
3676 if (arm_arch6k && !TARGET_THUMB1)
3677 target_thread_pointer = TP_CP15;
3678 else
3679 target_thread_pointer = TP_SOFT;
3680 }
3681 }
3682
3683 /* Perform some validation between the desired architecture and the rest of the
3684 options. */
3685 void
3686 arm_options_perform_arch_sanity_checks (void)
3687 {
3688 /* V5T code we generate is completely interworking capable, so we turn off
3689 TARGET_INTERWORK here to avoid many tests later on. */
3690
3691 /* XXX However, we must pass the right pre-processor defines to CPP
3692 or GLD can get confused. This is a hack. */
3693 if (TARGET_INTERWORK)
3694 arm_cpp_interwork = 1;
3695
3696 if (arm_arch5t)
3697 target_flags &= ~MASK_INTERWORK;
3698
3699 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3700 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3701
3702 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3703 error ("iwmmxt abi requires an iwmmxt capable cpu");
3704
3705 /* BPABI targets use linker tricks to allow interworking on cores
3706 without thumb support. */
3707 if (TARGET_INTERWORK
3708 && !TARGET_BPABI
3709 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3710 {
3711 warning (0, "target CPU does not support interworking" );
3712 target_flags &= ~MASK_INTERWORK;
3713 }
3714
3715 /* If soft-float is specified then don't use FPU. */
3716 if (TARGET_SOFT_FLOAT)
3717 arm_fpu_attr = FPU_NONE;
3718 else
3719 arm_fpu_attr = FPU_VFP;
3720
3721 if (TARGET_AAPCS_BASED)
3722 {
3723 if (TARGET_CALLER_INTERWORKING)
3724 error ("AAPCS does not support -mcaller-super-interworking");
3725 else
3726 if (TARGET_CALLEE_INTERWORKING)
3727 error ("AAPCS does not support -mcallee-super-interworking");
3728 }
3729
3730 /* __fp16 support currently assumes the core has ldrh. */
3731 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3732 sorry ("__fp16 and no ldrh");
3733
3734 if (use_cmse && !arm_arch_cmse)
3735 error ("target CPU does not support ARMv8-M Security Extensions");
3736
3737 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3738 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3739 if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3740 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3741
3742
3743 if (TARGET_AAPCS_BASED)
3744 {
3745 if (arm_abi == ARM_ABI_IWMMXT)
3746 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3747 else if (TARGET_HARD_FLOAT_ABI)
3748 {
3749 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3750 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3751 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3752 }
3753 else
3754 arm_pcs_default = ARM_PCS_AAPCS;
3755 }
3756 else
3757 {
3758 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3759 sorry ("-mfloat-abi=hard and VFP");
3760
3761 if (arm_abi == ARM_ABI_APCS)
3762 arm_pcs_default = ARM_PCS_APCS;
3763 else
3764 arm_pcs_default = ARM_PCS_ATPCS;
3765 }
3766 }
3767
3768 static void
3769 arm_add_gc_roots (void)
3770 {
3771 gcc_obstack_init(&minipool_obstack);
3772 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3773 }
3774 \f
3775 /* A table of known ARM exception types.
3776 For use with the interrupt function attribute. */
3777
3778 typedef struct
3779 {
3780 const char *const arg;
3781 const unsigned long return_value;
3782 }
3783 isr_attribute_arg;
3784
3785 static const isr_attribute_arg isr_attribute_args [] =
3786 {
3787 { "IRQ", ARM_FT_ISR },
3788 { "irq", ARM_FT_ISR },
3789 { "FIQ", ARM_FT_FIQ },
3790 { "fiq", ARM_FT_FIQ },
3791 { "ABORT", ARM_FT_ISR },
3792 { "abort", ARM_FT_ISR },
3793 { "ABORT", ARM_FT_ISR },
3794 { "abort", ARM_FT_ISR },
3795 { "UNDEF", ARM_FT_EXCEPTION },
3796 { "undef", ARM_FT_EXCEPTION },
3797 { "SWI", ARM_FT_EXCEPTION },
3798 { "swi", ARM_FT_EXCEPTION },
3799 { NULL, ARM_FT_NORMAL }
3800 };
3801
3802 /* Returns the (interrupt) function type of the current
3803 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3804
3805 static unsigned long
3806 arm_isr_value (tree argument)
3807 {
3808 const isr_attribute_arg * ptr;
3809 const char * arg;
3810
3811 if (!arm_arch_notm)
3812 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3813
3814 /* No argument - default to IRQ. */
3815 if (argument == NULL_TREE)
3816 return ARM_FT_ISR;
3817
3818 /* Get the value of the argument. */
3819 if (TREE_VALUE (argument) == NULL_TREE
3820 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3821 return ARM_FT_UNKNOWN;
3822
3823 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3824
3825 /* Check it against the list of known arguments. */
3826 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3827 if (streq (arg, ptr->arg))
3828 return ptr->return_value;
3829
3830 /* An unrecognized interrupt type. */
3831 return ARM_FT_UNKNOWN;
3832 }
3833
3834 /* Computes the type of the current function. */
3835
3836 static unsigned long
3837 arm_compute_func_type (void)
3838 {
3839 unsigned long type = ARM_FT_UNKNOWN;
3840 tree a;
3841 tree attr;
3842
3843 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3844
3845 /* Decide if the current function is volatile. Such functions
3846 never return, and many memory cycles can be saved by not storing
3847 register values that will never be needed again. This optimization
3848 was added to speed up context switching in a kernel application. */
3849 if (optimize > 0
3850 && (TREE_NOTHROW (current_function_decl)
3851 || !(flag_unwind_tables
3852 || (flag_exceptions
3853 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3854 && TREE_THIS_VOLATILE (current_function_decl))
3855 type |= ARM_FT_VOLATILE;
3856
3857 if (cfun->static_chain_decl != NULL)
3858 type |= ARM_FT_NESTED;
3859
3860 attr = DECL_ATTRIBUTES (current_function_decl);
3861
3862 a = lookup_attribute ("naked", attr);
3863 if (a != NULL_TREE)
3864 type |= ARM_FT_NAKED;
3865
3866 a = lookup_attribute ("isr", attr);
3867 if (a == NULL_TREE)
3868 a = lookup_attribute ("interrupt", attr);
3869
3870 if (a == NULL_TREE)
3871 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3872 else
3873 type |= arm_isr_value (TREE_VALUE (a));
3874
3875 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3876 type |= ARM_FT_CMSE_ENTRY;
3877
3878 return type;
3879 }
3880
3881 /* Returns the type of the current function. */
3882
3883 unsigned long
3884 arm_current_func_type (void)
3885 {
3886 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3887 cfun->machine->func_type = arm_compute_func_type ();
3888
3889 return cfun->machine->func_type;
3890 }
3891
3892 bool
3893 arm_allocate_stack_slots_for_args (void)
3894 {
3895 /* Naked functions should not allocate stack slots for arguments. */
3896 return !IS_NAKED (arm_current_func_type ());
3897 }
3898
3899 static bool
3900 arm_warn_func_return (tree decl)
3901 {
3902 /* Naked functions are implemented entirely in assembly, including the
3903 return sequence, so suppress warnings about this. */
3904 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3905 }
3906
3907 \f
3908 /* Output assembler code for a block containing the constant parts
3909 of a trampoline, leaving space for the variable parts.
3910
3911 On the ARM, (if r8 is the static chain regnum, and remembering that
3912 referencing pc adds an offset of 8) the trampoline looks like:
3913 ldr r8, [pc, #0]
3914 ldr pc, [pc]
3915 .word static chain value
3916 .word function's address
3917 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3918
3919 static void
3920 arm_asm_trampoline_template (FILE *f)
3921 {
3922 fprintf (f, "\t.syntax unified\n");
3923
3924 if (TARGET_ARM)
3925 {
3926 fprintf (f, "\t.arm\n");
3927 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3928 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3929 }
3930 else if (TARGET_THUMB2)
3931 {
3932 fprintf (f, "\t.thumb\n");
3933 /* The Thumb-2 trampoline is similar to the arm implementation.
3934 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3935 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3936 STATIC_CHAIN_REGNUM, PC_REGNUM);
3937 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3938 }
3939 else
3940 {
3941 ASM_OUTPUT_ALIGN (f, 2);
3942 fprintf (f, "\t.code\t16\n");
3943 fprintf (f, ".Ltrampoline_start:\n");
3944 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3945 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3946 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3947 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3948 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3949 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3950 }
3951 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3952 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3953 }
3954
3955 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3956
3957 static void
3958 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3959 {
3960 rtx fnaddr, mem, a_tramp;
3961
3962 emit_block_move (m_tramp, assemble_trampoline_template (),
3963 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3964
3965 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3966 emit_move_insn (mem, chain_value);
3967
3968 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3969 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3970 emit_move_insn (mem, fnaddr);
3971
3972 a_tramp = XEXP (m_tramp, 0);
3973 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3974 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3975 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3976 }
3977
3978 /* Thumb trampolines should be entered in thumb mode, so set
3979 the bottom bit of the address. */
3980
3981 static rtx
3982 arm_trampoline_adjust_address (rtx addr)
3983 {
3984 if (TARGET_THUMB)
3985 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3986 NULL, 0, OPTAB_LIB_WIDEN);
3987 return addr;
3988 }
3989 \f
3990 /* Return 1 if it is possible to return using a single instruction.
3991 If SIBLING is non-null, this is a test for a return before a sibling
3992 call. SIBLING is the call insn, so we can examine its register usage. */
3993
3994 int
3995 use_return_insn (int iscond, rtx sibling)
3996 {
3997 int regno;
3998 unsigned int func_type;
3999 unsigned long saved_int_regs;
4000 unsigned HOST_WIDE_INT stack_adjust;
4001 arm_stack_offsets *offsets;
4002
4003 /* Never use a return instruction before reload has run. */
4004 if (!reload_completed)
4005 return 0;
4006
4007 func_type = arm_current_func_type ();
4008
4009 /* Naked, volatile and stack alignment functions need special
4010 consideration. */
4011 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4012 return 0;
4013
4014 /* So do interrupt functions that use the frame pointer and Thumb
4015 interrupt functions. */
4016 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4017 return 0;
4018
4019 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4020 && !optimize_function_for_size_p (cfun))
4021 return 0;
4022
4023 offsets = arm_get_frame_offsets ();
4024 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4025
4026 /* As do variadic functions. */
4027 if (crtl->args.pretend_args_size
4028 || cfun->machine->uses_anonymous_args
4029 /* Or if the function calls __builtin_eh_return () */
4030 || crtl->calls_eh_return
4031 /* Or if the function calls alloca */
4032 || cfun->calls_alloca
4033 /* Or if there is a stack adjustment. However, if the stack pointer
4034 is saved on the stack, we can use a pre-incrementing stack load. */
4035 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4036 && stack_adjust == 4))
4037 /* Or if the static chain register was saved above the frame, under the
4038 assumption that the stack pointer isn't saved on the stack. */
4039 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4040 && arm_compute_static_chain_stack_bytes() != 0))
4041 return 0;
4042
4043 saved_int_regs = offsets->saved_regs_mask;
4044
4045 /* Unfortunately, the insn
4046
4047 ldmib sp, {..., sp, ...}
4048
4049 triggers a bug on most SA-110 based devices, such that the stack
4050 pointer won't be correctly restored if the instruction takes a
4051 page fault. We work around this problem by popping r3 along with
4052 the other registers, since that is never slower than executing
4053 another instruction.
4054
4055 We test for !arm_arch5t here, because code for any architecture
4056 less than this could potentially be run on one of the buggy
4057 chips. */
4058 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4059 {
4060 /* Validate that r3 is a call-clobbered register (always true in
4061 the default abi) ... */
4062 if (!call_used_regs[3])
4063 return 0;
4064
4065 /* ... that it isn't being used for a return value ... */
4066 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4067 return 0;
4068
4069 /* ... or for a tail-call argument ... */
4070 if (sibling)
4071 {
4072 gcc_assert (CALL_P (sibling));
4073
4074 if (find_regno_fusage (sibling, USE, 3))
4075 return 0;
4076 }
4077
4078 /* ... and that there are no call-saved registers in r0-r2
4079 (always true in the default ABI). */
4080 if (saved_int_regs & 0x7)
4081 return 0;
4082 }
4083
4084 /* Can't be done if interworking with Thumb, and any registers have been
4085 stacked. */
4086 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4087 return 0;
4088
4089 /* On StrongARM, conditional returns are expensive if they aren't
4090 taken and multiple registers have been stacked. */
4091 if (iscond && arm_tune_strongarm)
4092 {
4093 /* Conditional return when just the LR is stored is a simple
4094 conditional-load instruction, that's not expensive. */
4095 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4096 return 0;
4097
4098 if (flag_pic
4099 && arm_pic_register != INVALID_REGNUM
4100 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4101 return 0;
4102 }
4103
4104 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4105 several instructions if anything needs to be popped. */
4106 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4107 return 0;
4108
4109 /* If there are saved registers but the LR isn't saved, then we need
4110 two instructions for the return. */
4111 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4112 return 0;
4113
4114 /* Can't be done if any of the VFP regs are pushed,
4115 since this also requires an insn. */
4116 if (TARGET_HARD_FLOAT)
4117 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4118 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4119 return 0;
4120
4121 if (TARGET_REALLY_IWMMXT)
4122 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4123 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4124 return 0;
4125
4126 return 1;
4127 }
4128
4129 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4130 shrink-wrapping if possible. This is the case if we need to emit a
4131 prologue, which we can test by looking at the offsets. */
4132 bool
4133 use_simple_return_p (void)
4134 {
4135 arm_stack_offsets *offsets;
4136
4137 /* Note this function can be called before or after reload. */
4138 if (!reload_completed)
4139 arm_compute_frame_layout ();
4140
4141 offsets = arm_get_frame_offsets ();
4142 return offsets->outgoing_args != 0;
4143 }
4144
4145 /* Return TRUE if int I is a valid immediate ARM constant. */
4146
4147 int
4148 const_ok_for_arm (HOST_WIDE_INT i)
4149 {
4150 int lowbit;
4151
4152 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4153 be all zero, or all one. */
4154 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4155 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4156 != ((~(unsigned HOST_WIDE_INT) 0)
4157 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4158 return FALSE;
4159
4160 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4161
4162 /* Fast return for 0 and small values. We must do this for zero, since
4163 the code below can't handle that one case. */
4164 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4165 return TRUE;
4166
4167 /* Get the number of trailing zeros. */
4168 lowbit = ffs((int) i) - 1;
4169
4170 /* Only even shifts are allowed in ARM mode so round down to the
4171 nearest even number. */
4172 if (TARGET_ARM)
4173 lowbit &= ~1;
4174
4175 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4176 return TRUE;
4177
4178 if (TARGET_ARM)
4179 {
4180 /* Allow rotated constants in ARM mode. */
4181 if (lowbit <= 4
4182 && ((i & ~0xc000003f) == 0
4183 || (i & ~0xf000000f) == 0
4184 || (i & ~0xfc000003) == 0))
4185 return TRUE;
4186 }
4187 else if (TARGET_THUMB2)
4188 {
4189 HOST_WIDE_INT v;
4190
4191 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4192 v = i & 0xff;
4193 v |= v << 16;
4194 if (i == v || i == (v | (v << 8)))
4195 return TRUE;
4196
4197 /* Allow repeated pattern 0xXY00XY00. */
4198 v = i & 0xff00;
4199 v |= v << 16;
4200 if (i == v)
4201 return TRUE;
4202 }
4203 else if (TARGET_HAVE_MOVT)
4204 {
4205 /* Thumb-1 Targets with MOVT. */
4206 if (i > 0xffff)
4207 return FALSE;
4208 else
4209 return TRUE;
4210 }
4211
4212 return FALSE;
4213 }
4214
4215 /* Return true if I is a valid constant for the operation CODE. */
4216 int
4217 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4218 {
4219 if (const_ok_for_arm (i))
4220 return 1;
4221
4222 switch (code)
4223 {
4224 case SET:
4225 /* See if we can use movw. */
4226 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4227 return 1;
4228 else
4229 /* Otherwise, try mvn. */
4230 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4231
4232 case PLUS:
4233 /* See if we can use addw or subw. */
4234 if (TARGET_THUMB2
4235 && ((i & 0xfffff000) == 0
4236 || ((-i) & 0xfffff000) == 0))
4237 return 1;
4238 /* Fall through. */
4239 case COMPARE:
4240 case EQ:
4241 case NE:
4242 case GT:
4243 case LE:
4244 case LT:
4245 case GE:
4246 case GEU:
4247 case LTU:
4248 case GTU:
4249 case LEU:
4250 case UNORDERED:
4251 case ORDERED:
4252 case UNEQ:
4253 case UNGE:
4254 case UNLT:
4255 case UNGT:
4256 case UNLE:
4257 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4258
4259 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4260 case XOR:
4261 return 0;
4262
4263 case IOR:
4264 if (TARGET_THUMB2)
4265 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4266 return 0;
4267
4268 case AND:
4269 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4270
4271 default:
4272 gcc_unreachable ();
4273 }
4274 }
4275
4276 /* Return true if I is a valid di mode constant for the operation CODE. */
4277 int
4278 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4279 {
4280 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4281 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4282 rtx hi = GEN_INT (hi_val);
4283 rtx lo = GEN_INT (lo_val);
4284
4285 if (TARGET_THUMB1)
4286 return 0;
4287
4288 switch (code)
4289 {
4290 case AND:
4291 case IOR:
4292 case XOR:
4293 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4294 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4295 case PLUS:
4296 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4297
4298 default:
4299 return 0;
4300 }
4301 }
4302
4303 /* Emit a sequence of insns to handle a large constant.
4304 CODE is the code of the operation required, it can be any of SET, PLUS,
4305 IOR, AND, XOR, MINUS;
4306 MODE is the mode in which the operation is being performed;
4307 VAL is the integer to operate on;
4308 SOURCE is the other operand (a register, or a null-pointer for SET);
4309 SUBTARGETS means it is safe to create scratch registers if that will
4310 either produce a simpler sequence, or we will want to cse the values.
4311 Return value is the number of insns emitted. */
4312
4313 /* ??? Tweak this for thumb2. */
4314 int
4315 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4316 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4317 {
4318 rtx cond;
4319
4320 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4321 cond = COND_EXEC_TEST (PATTERN (insn));
4322 else
4323 cond = NULL_RTX;
4324
4325 if (subtargets || code == SET
4326 || (REG_P (target) && REG_P (source)
4327 && REGNO (target) != REGNO (source)))
4328 {
4329 /* After arm_reorg has been called, we can't fix up expensive
4330 constants by pushing them into memory so we must synthesize
4331 them in-line, regardless of the cost. This is only likely to
4332 be more costly on chips that have load delay slots and we are
4333 compiling without running the scheduler (so no splitting
4334 occurred before the final instruction emission).
4335
4336 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4337 */
4338 if (!cfun->machine->after_arm_reorg
4339 && !cond
4340 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4341 1, 0)
4342 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4343 + (code != SET))))
4344 {
4345 if (code == SET)
4346 {
4347 /* Currently SET is the only monadic value for CODE, all
4348 the rest are diadic. */
4349 if (TARGET_USE_MOVT)
4350 arm_emit_movpair (target, GEN_INT (val));
4351 else
4352 emit_set_insn (target, GEN_INT (val));
4353
4354 return 1;
4355 }
4356 else
4357 {
4358 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4359
4360 if (TARGET_USE_MOVT)
4361 arm_emit_movpair (temp, GEN_INT (val));
4362 else
4363 emit_set_insn (temp, GEN_INT (val));
4364
4365 /* For MINUS, the value is subtracted from, since we never
4366 have subtraction of a constant. */
4367 if (code == MINUS)
4368 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4369 else
4370 emit_set_insn (target,
4371 gen_rtx_fmt_ee (code, mode, source, temp));
4372 return 2;
4373 }
4374 }
4375 }
4376
4377 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4378 1);
4379 }
4380
4381 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4382 ARM/THUMB2 immediates, and add up to VAL.
4383 Thr function return value gives the number of insns required. */
4384 static int
4385 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4386 struct four_ints *return_sequence)
4387 {
4388 int best_consecutive_zeros = 0;
4389 int i;
4390 int best_start = 0;
4391 int insns1, insns2;
4392 struct four_ints tmp_sequence;
4393
4394 /* If we aren't targeting ARM, the best place to start is always at
4395 the bottom, otherwise look more closely. */
4396 if (TARGET_ARM)
4397 {
4398 for (i = 0; i < 32; i += 2)
4399 {
4400 int consecutive_zeros = 0;
4401
4402 if (!(val & (3 << i)))
4403 {
4404 while ((i < 32) && !(val & (3 << i)))
4405 {
4406 consecutive_zeros += 2;
4407 i += 2;
4408 }
4409 if (consecutive_zeros > best_consecutive_zeros)
4410 {
4411 best_consecutive_zeros = consecutive_zeros;
4412 best_start = i - consecutive_zeros;
4413 }
4414 i -= 2;
4415 }
4416 }
4417 }
4418
4419 /* So long as it won't require any more insns to do so, it's
4420 desirable to emit a small constant (in bits 0...9) in the last
4421 insn. This way there is more chance that it can be combined with
4422 a later addressing insn to form a pre-indexed load or store
4423 operation. Consider:
4424
4425 *((volatile int *)0xe0000100) = 1;
4426 *((volatile int *)0xe0000110) = 2;
4427
4428 We want this to wind up as:
4429
4430 mov rA, #0xe0000000
4431 mov rB, #1
4432 str rB, [rA, #0x100]
4433 mov rB, #2
4434 str rB, [rA, #0x110]
4435
4436 rather than having to synthesize both large constants from scratch.
4437
4438 Therefore, we calculate how many insns would be required to emit
4439 the constant starting from `best_start', and also starting from
4440 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4441 yield a shorter sequence, we may as well use zero. */
4442 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4443 if (best_start != 0
4444 && ((HOST_WIDE_INT_1U << best_start) < val))
4445 {
4446 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4447 if (insns2 <= insns1)
4448 {
4449 *return_sequence = tmp_sequence;
4450 insns1 = insns2;
4451 }
4452 }
4453
4454 return insns1;
4455 }
4456
4457 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4458 static int
4459 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4460 struct four_ints *return_sequence, int i)
4461 {
4462 int remainder = val & 0xffffffff;
4463 int insns = 0;
4464
4465 /* Try and find a way of doing the job in either two or three
4466 instructions.
4467
4468 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4469 location. We start at position I. This may be the MSB, or
4470 optimial_immediate_sequence may have positioned it at the largest block
4471 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4472 wrapping around to the top of the word when we drop off the bottom.
4473 In the worst case this code should produce no more than four insns.
4474
4475 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4476 constants, shifted to any arbitrary location. We should always start
4477 at the MSB. */
4478 do
4479 {
4480 int end;
4481 unsigned int b1, b2, b3, b4;
4482 unsigned HOST_WIDE_INT result;
4483 int loc;
4484
4485 gcc_assert (insns < 4);
4486
4487 if (i <= 0)
4488 i += 32;
4489
4490 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4491 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4492 {
4493 loc = i;
4494 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4495 /* We can use addw/subw for the last 12 bits. */
4496 result = remainder;
4497 else
4498 {
4499 /* Use an 8-bit shifted/rotated immediate. */
4500 end = i - 8;
4501 if (end < 0)
4502 end += 32;
4503 result = remainder & ((0x0ff << end)
4504 | ((i < end) ? (0xff >> (32 - end))
4505 : 0));
4506 i -= 8;
4507 }
4508 }
4509 else
4510 {
4511 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4512 arbitrary shifts. */
4513 i -= TARGET_ARM ? 2 : 1;
4514 continue;
4515 }
4516
4517 /* Next, see if we can do a better job with a thumb2 replicated
4518 constant.
4519
4520 We do it this way around to catch the cases like 0x01F001E0 where
4521 two 8-bit immediates would work, but a replicated constant would
4522 make it worse.
4523
4524 TODO: 16-bit constants that don't clear all the bits, but still win.
4525 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4526 if (TARGET_THUMB2)
4527 {
4528 b1 = (remainder & 0xff000000) >> 24;
4529 b2 = (remainder & 0x00ff0000) >> 16;
4530 b3 = (remainder & 0x0000ff00) >> 8;
4531 b4 = remainder & 0xff;
4532
4533 if (loc > 24)
4534 {
4535 /* The 8-bit immediate already found clears b1 (and maybe b2),
4536 but must leave b3 and b4 alone. */
4537
4538 /* First try to find a 32-bit replicated constant that clears
4539 almost everything. We can assume that we can't do it in one,
4540 or else we wouldn't be here. */
4541 unsigned int tmp = b1 & b2 & b3 & b4;
4542 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4543 + (tmp << 24);
4544 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4545 + (tmp == b3) + (tmp == b4);
4546 if (tmp
4547 && (matching_bytes >= 3
4548 || (matching_bytes == 2
4549 && const_ok_for_op (remainder & ~tmp2, code))))
4550 {
4551 /* At least 3 of the bytes match, and the fourth has at
4552 least as many bits set, or two of the bytes match
4553 and it will only require one more insn to finish. */
4554 result = tmp2;
4555 i = tmp != b1 ? 32
4556 : tmp != b2 ? 24
4557 : tmp != b3 ? 16
4558 : 8;
4559 }
4560
4561 /* Second, try to find a 16-bit replicated constant that can
4562 leave three of the bytes clear. If b2 or b4 is already
4563 zero, then we can. If the 8-bit from above would not
4564 clear b2 anyway, then we still win. */
4565 else if (b1 == b3 && (!b2 || !b4
4566 || (remainder & 0x00ff0000 & ~result)))
4567 {
4568 result = remainder & 0xff00ff00;
4569 i = 24;
4570 }
4571 }
4572 else if (loc > 16)
4573 {
4574 /* The 8-bit immediate already found clears b2 (and maybe b3)
4575 and we don't get here unless b1 is alredy clear, but it will
4576 leave b4 unchanged. */
4577
4578 /* If we can clear b2 and b4 at once, then we win, since the
4579 8-bits couldn't possibly reach that far. */
4580 if (b2 == b4)
4581 {
4582 result = remainder & 0x00ff00ff;
4583 i = 16;
4584 }
4585 }
4586 }
4587
4588 return_sequence->i[insns++] = result;
4589 remainder &= ~result;
4590
4591 if (code == SET || code == MINUS)
4592 code = PLUS;
4593 }
4594 while (remainder);
4595
4596 return insns;
4597 }
4598
4599 /* Emit an instruction with the indicated PATTERN. If COND is
4600 non-NULL, conditionalize the execution of the instruction on COND
4601 being true. */
4602
4603 static void
4604 emit_constant_insn (rtx cond, rtx pattern)
4605 {
4606 if (cond)
4607 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4608 emit_insn (pattern);
4609 }
4610
4611 /* As above, but extra parameter GENERATE which, if clear, suppresses
4612 RTL generation. */
4613
4614 static int
4615 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4616 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4617 int subtargets, int generate)
4618 {
4619 int can_invert = 0;
4620 int can_negate = 0;
4621 int final_invert = 0;
4622 int i;
4623 int set_sign_bit_copies = 0;
4624 int clear_sign_bit_copies = 0;
4625 int clear_zero_bit_copies = 0;
4626 int set_zero_bit_copies = 0;
4627 int insns = 0, neg_insns, inv_insns;
4628 unsigned HOST_WIDE_INT temp1, temp2;
4629 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4630 struct four_ints *immediates;
4631 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4632
4633 /* Find out which operations are safe for a given CODE. Also do a quick
4634 check for degenerate cases; these can occur when DImode operations
4635 are split. */
4636 switch (code)
4637 {
4638 case SET:
4639 can_invert = 1;
4640 break;
4641
4642 case PLUS:
4643 can_negate = 1;
4644 break;
4645
4646 case IOR:
4647 if (remainder == 0xffffffff)
4648 {
4649 if (generate)
4650 emit_constant_insn (cond,
4651 gen_rtx_SET (target,
4652 GEN_INT (ARM_SIGN_EXTEND (val))));
4653 return 1;
4654 }
4655
4656 if (remainder == 0)
4657 {
4658 if (reload_completed && rtx_equal_p (target, source))
4659 return 0;
4660
4661 if (generate)
4662 emit_constant_insn (cond, gen_rtx_SET (target, source));
4663 return 1;
4664 }
4665 break;
4666
4667 case AND:
4668 if (remainder == 0)
4669 {
4670 if (generate)
4671 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4672 return 1;
4673 }
4674 if (remainder == 0xffffffff)
4675 {
4676 if (reload_completed && rtx_equal_p (target, source))
4677 return 0;
4678 if (generate)
4679 emit_constant_insn (cond, gen_rtx_SET (target, source));
4680 return 1;
4681 }
4682 can_invert = 1;
4683 break;
4684
4685 case XOR:
4686 if (remainder == 0)
4687 {
4688 if (reload_completed && rtx_equal_p (target, source))
4689 return 0;
4690 if (generate)
4691 emit_constant_insn (cond, gen_rtx_SET (target, source));
4692 return 1;
4693 }
4694
4695 if (remainder == 0xffffffff)
4696 {
4697 if (generate)
4698 emit_constant_insn (cond,
4699 gen_rtx_SET (target,
4700 gen_rtx_NOT (mode, source)));
4701 return 1;
4702 }
4703 final_invert = 1;
4704 break;
4705
4706 case MINUS:
4707 /* We treat MINUS as (val - source), since (source - val) is always
4708 passed as (source + (-val)). */
4709 if (remainder == 0)
4710 {
4711 if (generate)
4712 emit_constant_insn (cond,
4713 gen_rtx_SET (target,
4714 gen_rtx_NEG (mode, source)));
4715 return 1;
4716 }
4717 if (const_ok_for_arm (val))
4718 {
4719 if (generate)
4720 emit_constant_insn (cond,
4721 gen_rtx_SET (target,
4722 gen_rtx_MINUS (mode, GEN_INT (val),
4723 source)));
4724 return 1;
4725 }
4726
4727 break;
4728
4729 default:
4730 gcc_unreachable ();
4731 }
4732
4733 /* If we can do it in one insn get out quickly. */
4734 if (const_ok_for_op (val, code))
4735 {
4736 if (generate)
4737 emit_constant_insn (cond,
4738 gen_rtx_SET (target,
4739 (source
4740 ? gen_rtx_fmt_ee (code, mode, source,
4741 GEN_INT (val))
4742 : GEN_INT (val))));
4743 return 1;
4744 }
4745
4746 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4747 insn. */
4748 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4749 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4750 {
4751 if (generate)
4752 {
4753 if (mode == SImode && i == 16)
4754 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4755 smaller insn. */
4756 emit_constant_insn (cond,
4757 gen_zero_extendhisi2
4758 (target, gen_lowpart (HImode, source)));
4759 else
4760 /* Extz only supports SImode, but we can coerce the operands
4761 into that mode. */
4762 emit_constant_insn (cond,
4763 gen_extzv_t2 (gen_lowpart (SImode, target),
4764 gen_lowpart (SImode, source),
4765 GEN_INT (i), const0_rtx));
4766 }
4767
4768 return 1;
4769 }
4770
4771 /* Calculate a few attributes that may be useful for specific
4772 optimizations. */
4773 /* Count number of leading zeros. */
4774 for (i = 31; i >= 0; i--)
4775 {
4776 if ((remainder & (1 << i)) == 0)
4777 clear_sign_bit_copies++;
4778 else
4779 break;
4780 }
4781
4782 /* Count number of leading 1's. */
4783 for (i = 31; i >= 0; i--)
4784 {
4785 if ((remainder & (1 << i)) != 0)
4786 set_sign_bit_copies++;
4787 else
4788 break;
4789 }
4790
4791 /* Count number of trailing zero's. */
4792 for (i = 0; i <= 31; i++)
4793 {
4794 if ((remainder & (1 << i)) == 0)
4795 clear_zero_bit_copies++;
4796 else
4797 break;
4798 }
4799
4800 /* Count number of trailing 1's. */
4801 for (i = 0; i <= 31; i++)
4802 {
4803 if ((remainder & (1 << i)) != 0)
4804 set_zero_bit_copies++;
4805 else
4806 break;
4807 }
4808
4809 switch (code)
4810 {
4811 case SET:
4812 /* See if we can do this by sign_extending a constant that is known
4813 to be negative. This is a good, way of doing it, since the shift
4814 may well merge into a subsequent insn. */
4815 if (set_sign_bit_copies > 1)
4816 {
4817 if (const_ok_for_arm
4818 (temp1 = ARM_SIGN_EXTEND (remainder
4819 << (set_sign_bit_copies - 1))))
4820 {
4821 if (generate)
4822 {
4823 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4824 emit_constant_insn (cond,
4825 gen_rtx_SET (new_src, GEN_INT (temp1)));
4826 emit_constant_insn (cond,
4827 gen_ashrsi3 (target, new_src,
4828 GEN_INT (set_sign_bit_copies - 1)));
4829 }
4830 return 2;
4831 }
4832 /* For an inverted constant, we will need to set the low bits,
4833 these will be shifted out of harm's way. */
4834 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4835 if (const_ok_for_arm (~temp1))
4836 {
4837 if (generate)
4838 {
4839 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4840 emit_constant_insn (cond,
4841 gen_rtx_SET (new_src, GEN_INT (temp1)));
4842 emit_constant_insn (cond,
4843 gen_ashrsi3 (target, new_src,
4844 GEN_INT (set_sign_bit_copies - 1)));
4845 }
4846 return 2;
4847 }
4848 }
4849
4850 /* See if we can calculate the value as the difference between two
4851 valid immediates. */
4852 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4853 {
4854 int topshift = clear_sign_bit_copies & ~1;
4855
4856 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4857 & (0xff000000 >> topshift));
4858
4859 /* If temp1 is zero, then that means the 9 most significant
4860 bits of remainder were 1 and we've caused it to overflow.
4861 When topshift is 0 we don't need to do anything since we
4862 can borrow from 'bit 32'. */
4863 if (temp1 == 0 && topshift != 0)
4864 temp1 = 0x80000000 >> (topshift - 1);
4865
4866 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4867
4868 if (const_ok_for_arm (temp2))
4869 {
4870 if (generate)
4871 {
4872 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4873 emit_constant_insn (cond,
4874 gen_rtx_SET (new_src, GEN_INT (temp1)));
4875 emit_constant_insn (cond,
4876 gen_addsi3 (target, new_src,
4877 GEN_INT (-temp2)));
4878 }
4879
4880 return 2;
4881 }
4882 }
4883
4884 /* See if we can generate this by setting the bottom (or the top)
4885 16 bits, and then shifting these into the other half of the
4886 word. We only look for the simplest cases, to do more would cost
4887 too much. Be careful, however, not to generate this when the
4888 alternative would take fewer insns. */
4889 if (val & 0xffff0000)
4890 {
4891 temp1 = remainder & 0xffff0000;
4892 temp2 = remainder & 0x0000ffff;
4893
4894 /* Overlaps outside this range are best done using other methods. */
4895 for (i = 9; i < 24; i++)
4896 {
4897 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4898 && !const_ok_for_arm (temp2))
4899 {
4900 rtx new_src = (subtargets
4901 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4902 : target);
4903 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4904 source, subtargets, generate);
4905 source = new_src;
4906 if (generate)
4907 emit_constant_insn
4908 (cond,
4909 gen_rtx_SET
4910 (target,
4911 gen_rtx_IOR (mode,
4912 gen_rtx_ASHIFT (mode, source,
4913 GEN_INT (i)),
4914 source)));
4915 return insns + 1;
4916 }
4917 }
4918
4919 /* Don't duplicate cases already considered. */
4920 for (i = 17; i < 24; i++)
4921 {
4922 if (((temp1 | (temp1 >> i)) == remainder)
4923 && !const_ok_for_arm (temp1))
4924 {
4925 rtx new_src = (subtargets
4926 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4927 : target);
4928 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4929 source, subtargets, generate);
4930 source = new_src;
4931 if (generate)
4932 emit_constant_insn
4933 (cond,
4934 gen_rtx_SET (target,
4935 gen_rtx_IOR
4936 (mode,
4937 gen_rtx_LSHIFTRT (mode, source,
4938 GEN_INT (i)),
4939 source)));
4940 return insns + 1;
4941 }
4942 }
4943 }
4944 break;
4945
4946 case IOR:
4947 case XOR:
4948 /* If we have IOR or XOR, and the constant can be loaded in a
4949 single instruction, and we can find a temporary to put it in,
4950 then this can be done in two instructions instead of 3-4. */
4951 if (subtargets
4952 /* TARGET can't be NULL if SUBTARGETS is 0 */
4953 || (reload_completed && !reg_mentioned_p (target, source)))
4954 {
4955 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4956 {
4957 if (generate)
4958 {
4959 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4960
4961 emit_constant_insn (cond,
4962 gen_rtx_SET (sub, GEN_INT (val)));
4963 emit_constant_insn (cond,
4964 gen_rtx_SET (target,
4965 gen_rtx_fmt_ee (code, mode,
4966 source, sub)));
4967 }
4968 return 2;
4969 }
4970 }
4971
4972 if (code == XOR)
4973 break;
4974
4975 /* Convert.
4976 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4977 and the remainder 0s for e.g. 0xfff00000)
4978 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4979
4980 This can be done in 2 instructions by using shifts with mov or mvn.
4981 e.g. for
4982 x = x | 0xfff00000;
4983 we generate.
4984 mvn r0, r0, asl #12
4985 mvn r0, r0, lsr #12 */
4986 if (set_sign_bit_copies > 8
4987 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4988 {
4989 if (generate)
4990 {
4991 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4992 rtx shift = GEN_INT (set_sign_bit_copies);
4993
4994 emit_constant_insn
4995 (cond,
4996 gen_rtx_SET (sub,
4997 gen_rtx_NOT (mode,
4998 gen_rtx_ASHIFT (mode,
4999 source,
5000 shift))));
5001 emit_constant_insn
5002 (cond,
5003 gen_rtx_SET (target,
5004 gen_rtx_NOT (mode,
5005 gen_rtx_LSHIFTRT (mode, sub,
5006 shift))));
5007 }
5008 return 2;
5009 }
5010
5011 /* Convert
5012 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5013 to
5014 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5015
5016 For eg. r0 = r0 | 0xfff
5017 mvn r0, r0, lsr #12
5018 mvn r0, r0, asl #12
5019
5020 */
5021 if (set_zero_bit_copies > 8
5022 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5023 {
5024 if (generate)
5025 {
5026 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5027 rtx shift = GEN_INT (set_zero_bit_copies);
5028
5029 emit_constant_insn
5030 (cond,
5031 gen_rtx_SET (sub,
5032 gen_rtx_NOT (mode,
5033 gen_rtx_LSHIFTRT (mode,
5034 source,
5035 shift))));
5036 emit_constant_insn
5037 (cond,
5038 gen_rtx_SET (target,
5039 gen_rtx_NOT (mode,
5040 gen_rtx_ASHIFT (mode, sub,
5041 shift))));
5042 }
5043 return 2;
5044 }
5045
5046 /* This will never be reached for Thumb2 because orn is a valid
5047 instruction. This is for Thumb1 and the ARM 32 bit cases.
5048
5049 x = y | constant (such that ~constant is a valid constant)
5050 Transform this to
5051 x = ~(~y & ~constant).
5052 */
5053 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5054 {
5055 if (generate)
5056 {
5057 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5058 emit_constant_insn (cond,
5059 gen_rtx_SET (sub,
5060 gen_rtx_NOT (mode, source)));
5061 source = sub;
5062 if (subtargets)
5063 sub = gen_reg_rtx (mode);
5064 emit_constant_insn (cond,
5065 gen_rtx_SET (sub,
5066 gen_rtx_AND (mode, source,
5067 GEN_INT (temp1))));
5068 emit_constant_insn (cond,
5069 gen_rtx_SET (target,
5070 gen_rtx_NOT (mode, sub)));
5071 }
5072 return 3;
5073 }
5074 break;
5075
5076 case AND:
5077 /* See if two shifts will do 2 or more insn's worth of work. */
5078 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5079 {
5080 HOST_WIDE_INT shift_mask = ((0xffffffff
5081 << (32 - clear_sign_bit_copies))
5082 & 0xffffffff);
5083
5084 if ((remainder | shift_mask) != 0xffffffff)
5085 {
5086 HOST_WIDE_INT new_val
5087 = ARM_SIGN_EXTEND (remainder | shift_mask);
5088
5089 if (generate)
5090 {
5091 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5092 insns = arm_gen_constant (AND, SImode, cond, new_val,
5093 new_src, source, subtargets, 1);
5094 source = new_src;
5095 }
5096 else
5097 {
5098 rtx targ = subtargets ? NULL_RTX : target;
5099 insns = arm_gen_constant (AND, mode, cond, new_val,
5100 targ, source, subtargets, 0);
5101 }
5102 }
5103
5104 if (generate)
5105 {
5106 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5107 rtx shift = GEN_INT (clear_sign_bit_copies);
5108
5109 emit_insn (gen_ashlsi3 (new_src, source, shift));
5110 emit_insn (gen_lshrsi3 (target, new_src, shift));
5111 }
5112
5113 return insns + 2;
5114 }
5115
5116 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5117 {
5118 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5119
5120 if ((remainder | shift_mask) != 0xffffffff)
5121 {
5122 HOST_WIDE_INT new_val
5123 = ARM_SIGN_EXTEND (remainder | shift_mask);
5124 if (generate)
5125 {
5126 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5127
5128 insns = arm_gen_constant (AND, mode, cond, new_val,
5129 new_src, source, subtargets, 1);
5130 source = new_src;
5131 }
5132 else
5133 {
5134 rtx targ = subtargets ? NULL_RTX : target;
5135
5136 insns = arm_gen_constant (AND, mode, cond, new_val,
5137 targ, source, subtargets, 0);
5138 }
5139 }
5140
5141 if (generate)
5142 {
5143 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5144 rtx shift = GEN_INT (clear_zero_bit_copies);
5145
5146 emit_insn (gen_lshrsi3 (new_src, source, shift));
5147 emit_insn (gen_ashlsi3 (target, new_src, shift));
5148 }
5149
5150 return insns + 2;
5151 }
5152
5153 break;
5154
5155 default:
5156 break;
5157 }
5158
5159 /* Calculate what the instruction sequences would be if we generated it
5160 normally, negated, or inverted. */
5161 if (code == AND)
5162 /* AND cannot be split into multiple insns, so invert and use BIC. */
5163 insns = 99;
5164 else
5165 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5166
5167 if (can_negate)
5168 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5169 &neg_immediates);
5170 else
5171 neg_insns = 99;
5172
5173 if (can_invert || final_invert)
5174 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5175 &inv_immediates);
5176 else
5177 inv_insns = 99;
5178
5179 immediates = &pos_immediates;
5180
5181 /* Is the negated immediate sequence more efficient? */
5182 if (neg_insns < insns && neg_insns <= inv_insns)
5183 {
5184 insns = neg_insns;
5185 immediates = &neg_immediates;
5186 }
5187 else
5188 can_negate = 0;
5189
5190 /* Is the inverted immediate sequence more efficient?
5191 We must allow for an extra NOT instruction for XOR operations, although
5192 there is some chance that the final 'mvn' will get optimized later. */
5193 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5194 {
5195 insns = inv_insns;
5196 immediates = &inv_immediates;
5197 }
5198 else
5199 {
5200 can_invert = 0;
5201 final_invert = 0;
5202 }
5203
5204 /* Now output the chosen sequence as instructions. */
5205 if (generate)
5206 {
5207 for (i = 0; i < insns; i++)
5208 {
5209 rtx new_src, temp1_rtx;
5210
5211 temp1 = immediates->i[i];
5212
5213 if (code == SET || code == MINUS)
5214 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5215 else if ((final_invert || i < (insns - 1)) && subtargets)
5216 new_src = gen_reg_rtx (mode);
5217 else
5218 new_src = target;
5219
5220 if (can_invert)
5221 temp1 = ~temp1;
5222 else if (can_negate)
5223 temp1 = -temp1;
5224
5225 temp1 = trunc_int_for_mode (temp1, mode);
5226 temp1_rtx = GEN_INT (temp1);
5227
5228 if (code == SET)
5229 ;
5230 else if (code == MINUS)
5231 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5232 else
5233 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5234
5235 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5236 source = new_src;
5237
5238 if (code == SET)
5239 {
5240 can_negate = can_invert;
5241 can_invert = 0;
5242 code = PLUS;
5243 }
5244 else if (code == MINUS)
5245 code = PLUS;
5246 }
5247 }
5248
5249 if (final_invert)
5250 {
5251 if (generate)
5252 emit_constant_insn (cond, gen_rtx_SET (target,
5253 gen_rtx_NOT (mode, source)));
5254 insns++;
5255 }
5256
5257 return insns;
5258 }
5259
5260 /* Canonicalize a comparison so that we are more likely to recognize it.
5261 This can be done for a few constant compares, where we can make the
5262 immediate value easier to load. */
5263
5264 static void
5265 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5266 bool op0_preserve_value)
5267 {
5268 machine_mode mode;
5269 unsigned HOST_WIDE_INT i, maxval;
5270
5271 mode = GET_MODE (*op0);
5272 if (mode == VOIDmode)
5273 mode = GET_MODE (*op1);
5274
5275 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5276
5277 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5278 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5279 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5280 for GTU/LEU in Thumb mode. */
5281 if (mode == DImode)
5282 {
5283
5284 if (*code == GT || *code == LE
5285 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5286 {
5287 /* Missing comparison. First try to use an available
5288 comparison. */
5289 if (CONST_INT_P (*op1))
5290 {
5291 i = INTVAL (*op1);
5292 switch (*code)
5293 {
5294 case GT:
5295 case LE:
5296 if (i != maxval
5297 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5298 {
5299 *op1 = GEN_INT (i + 1);
5300 *code = *code == GT ? GE : LT;
5301 return;
5302 }
5303 break;
5304 case GTU:
5305 case LEU:
5306 if (i != ~((unsigned HOST_WIDE_INT) 0)
5307 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5308 {
5309 *op1 = GEN_INT (i + 1);
5310 *code = *code == GTU ? GEU : LTU;
5311 return;
5312 }
5313 break;
5314 default:
5315 gcc_unreachable ();
5316 }
5317 }
5318
5319 /* If that did not work, reverse the condition. */
5320 if (!op0_preserve_value)
5321 {
5322 std::swap (*op0, *op1);
5323 *code = (int)swap_condition ((enum rtx_code)*code);
5324 }
5325 }
5326 return;
5327 }
5328
5329 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5330 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5331 to facilitate possible combining with a cmp into 'ands'. */
5332 if (mode == SImode
5333 && GET_CODE (*op0) == ZERO_EXTEND
5334 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5335 && GET_MODE (XEXP (*op0, 0)) == QImode
5336 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5337 && subreg_lowpart_p (XEXP (*op0, 0))
5338 && *op1 == const0_rtx)
5339 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5340 GEN_INT (255));
5341
5342 /* Comparisons smaller than DImode. Only adjust comparisons against
5343 an out-of-range constant. */
5344 if (!CONST_INT_P (*op1)
5345 || const_ok_for_arm (INTVAL (*op1))
5346 || const_ok_for_arm (- INTVAL (*op1)))
5347 return;
5348
5349 i = INTVAL (*op1);
5350
5351 switch (*code)
5352 {
5353 case EQ:
5354 case NE:
5355 return;
5356
5357 case GT:
5358 case LE:
5359 if (i != maxval
5360 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5361 {
5362 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5363 *code = *code == GT ? GE : LT;
5364 return;
5365 }
5366 break;
5367
5368 case GE:
5369 case LT:
5370 if (i != ~maxval
5371 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5372 {
5373 *op1 = GEN_INT (i - 1);
5374 *code = *code == GE ? GT : LE;
5375 return;
5376 }
5377 break;
5378
5379 case GTU:
5380 case LEU:
5381 if (i != ~((unsigned HOST_WIDE_INT) 0)
5382 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5383 {
5384 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5385 *code = *code == GTU ? GEU : LTU;
5386 return;
5387 }
5388 break;
5389
5390 case GEU:
5391 case LTU:
5392 if (i != 0
5393 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5394 {
5395 *op1 = GEN_INT (i - 1);
5396 *code = *code == GEU ? GTU : LEU;
5397 return;
5398 }
5399 break;
5400
5401 default:
5402 gcc_unreachable ();
5403 }
5404 }
5405
5406
5407 /* Define how to find the value returned by a function. */
5408
5409 static rtx
5410 arm_function_value(const_tree type, const_tree func,
5411 bool outgoing ATTRIBUTE_UNUSED)
5412 {
5413 machine_mode mode;
5414 int unsignedp ATTRIBUTE_UNUSED;
5415 rtx r ATTRIBUTE_UNUSED;
5416
5417 mode = TYPE_MODE (type);
5418
5419 if (TARGET_AAPCS_BASED)
5420 return aapcs_allocate_return_reg (mode, type, func);
5421
5422 /* Promote integer types. */
5423 if (INTEGRAL_TYPE_P (type))
5424 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5425
5426 /* Promotes small structs returned in a register to full-word size
5427 for big-endian AAPCS. */
5428 if (arm_return_in_msb (type))
5429 {
5430 HOST_WIDE_INT size = int_size_in_bytes (type);
5431 if (size % UNITS_PER_WORD != 0)
5432 {
5433 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5434 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5435 }
5436 }
5437
5438 return arm_libcall_value_1 (mode);
5439 }
5440
5441 /* libcall hashtable helpers. */
5442
5443 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5444 {
5445 static inline hashval_t hash (const rtx_def *);
5446 static inline bool equal (const rtx_def *, const rtx_def *);
5447 static inline void remove (rtx_def *);
5448 };
5449
5450 inline bool
5451 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5452 {
5453 return rtx_equal_p (p1, p2);
5454 }
5455
5456 inline hashval_t
5457 libcall_hasher::hash (const rtx_def *p1)
5458 {
5459 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5460 }
5461
5462 typedef hash_table<libcall_hasher> libcall_table_type;
5463
5464 static void
5465 add_libcall (libcall_table_type *htab, rtx libcall)
5466 {
5467 *htab->find_slot (libcall, INSERT) = libcall;
5468 }
5469
5470 static bool
5471 arm_libcall_uses_aapcs_base (const_rtx libcall)
5472 {
5473 static bool init_done = false;
5474 static libcall_table_type *libcall_htab = NULL;
5475
5476 if (!init_done)
5477 {
5478 init_done = true;
5479
5480 libcall_htab = new libcall_table_type (31);
5481 add_libcall (libcall_htab,
5482 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5483 add_libcall (libcall_htab,
5484 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5485 add_libcall (libcall_htab,
5486 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5487 add_libcall (libcall_htab,
5488 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5489
5490 add_libcall (libcall_htab,
5491 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5492 add_libcall (libcall_htab,
5493 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5494 add_libcall (libcall_htab,
5495 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5496 add_libcall (libcall_htab,
5497 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5498
5499 add_libcall (libcall_htab,
5500 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5501 add_libcall (libcall_htab,
5502 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5503 add_libcall (libcall_htab,
5504 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5505 add_libcall (libcall_htab,
5506 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5507 add_libcall (libcall_htab,
5508 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5509 add_libcall (libcall_htab,
5510 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5511 add_libcall (libcall_htab,
5512 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5513 add_libcall (libcall_htab,
5514 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5515
5516 /* Values from double-precision helper functions are returned in core
5517 registers if the selected core only supports single-precision
5518 arithmetic, even if we are using the hard-float ABI. The same is
5519 true for single-precision helpers, but we will never be using the
5520 hard-float ABI on a CPU which doesn't support single-precision
5521 operations in hardware. */
5522 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5523 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5524 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5525 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5526 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5527 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5528 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5529 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5530 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5531 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5532 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5533 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5534 SFmode));
5535 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5536 DFmode));
5537 add_libcall (libcall_htab,
5538 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5539 }
5540
5541 return libcall && libcall_htab->find (libcall) != NULL;
5542 }
5543
5544 static rtx
5545 arm_libcall_value_1 (machine_mode mode)
5546 {
5547 if (TARGET_AAPCS_BASED)
5548 return aapcs_libcall_value (mode);
5549 else if (TARGET_IWMMXT_ABI
5550 && arm_vector_mode_supported_p (mode))
5551 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5552 else
5553 return gen_rtx_REG (mode, ARG_REGISTER (1));
5554 }
5555
5556 /* Define how to find the value returned by a library function
5557 assuming the value has mode MODE. */
5558
5559 static rtx
5560 arm_libcall_value (machine_mode mode, const_rtx libcall)
5561 {
5562 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5563 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5564 {
5565 /* The following libcalls return their result in integer registers,
5566 even though they return a floating point value. */
5567 if (arm_libcall_uses_aapcs_base (libcall))
5568 return gen_rtx_REG (mode, ARG_REGISTER(1));
5569
5570 }
5571
5572 return arm_libcall_value_1 (mode);
5573 }
5574
5575 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5576
5577 static bool
5578 arm_function_value_regno_p (const unsigned int regno)
5579 {
5580 if (regno == ARG_REGISTER (1)
5581 || (TARGET_32BIT
5582 && TARGET_AAPCS_BASED
5583 && TARGET_HARD_FLOAT
5584 && regno == FIRST_VFP_REGNUM)
5585 || (TARGET_IWMMXT_ABI
5586 && regno == FIRST_IWMMXT_REGNUM))
5587 return true;
5588
5589 return false;
5590 }
5591
5592 /* Determine the amount of memory needed to store the possible return
5593 registers of an untyped call. */
5594 int
5595 arm_apply_result_size (void)
5596 {
5597 int size = 16;
5598
5599 if (TARGET_32BIT)
5600 {
5601 if (TARGET_HARD_FLOAT_ABI)
5602 size += 32;
5603 if (TARGET_IWMMXT_ABI)
5604 size += 8;
5605 }
5606
5607 return size;
5608 }
5609
5610 /* Decide whether TYPE should be returned in memory (true)
5611 or in a register (false). FNTYPE is the type of the function making
5612 the call. */
5613 static bool
5614 arm_return_in_memory (const_tree type, const_tree fntype)
5615 {
5616 HOST_WIDE_INT size;
5617
5618 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5619
5620 if (TARGET_AAPCS_BASED)
5621 {
5622 /* Simple, non-aggregate types (ie not including vectors and
5623 complex) are always returned in a register (or registers).
5624 We don't care about which register here, so we can short-cut
5625 some of the detail. */
5626 if (!AGGREGATE_TYPE_P (type)
5627 && TREE_CODE (type) != VECTOR_TYPE
5628 && TREE_CODE (type) != COMPLEX_TYPE)
5629 return false;
5630
5631 /* Any return value that is no larger than one word can be
5632 returned in r0. */
5633 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5634 return false;
5635
5636 /* Check any available co-processors to see if they accept the
5637 type as a register candidate (VFP, for example, can return
5638 some aggregates in consecutive registers). These aren't
5639 available if the call is variadic. */
5640 if (aapcs_select_return_coproc (type, fntype) >= 0)
5641 return false;
5642
5643 /* Vector values should be returned using ARM registers, not
5644 memory (unless they're over 16 bytes, which will break since
5645 we only have four call-clobbered registers to play with). */
5646 if (TREE_CODE (type) == VECTOR_TYPE)
5647 return (size < 0 || size > (4 * UNITS_PER_WORD));
5648
5649 /* The rest go in memory. */
5650 return true;
5651 }
5652
5653 if (TREE_CODE (type) == VECTOR_TYPE)
5654 return (size < 0 || size > (4 * UNITS_PER_WORD));
5655
5656 if (!AGGREGATE_TYPE_P (type) &&
5657 (TREE_CODE (type) != VECTOR_TYPE))
5658 /* All simple types are returned in registers. */
5659 return false;
5660
5661 if (arm_abi != ARM_ABI_APCS)
5662 {
5663 /* ATPCS and later return aggregate types in memory only if they are
5664 larger than a word (or are variable size). */
5665 return (size < 0 || size > UNITS_PER_WORD);
5666 }
5667
5668 /* For the arm-wince targets we choose to be compatible with Microsoft's
5669 ARM and Thumb compilers, which always return aggregates in memory. */
5670 #ifndef ARM_WINCE
5671 /* All structures/unions bigger than one word are returned in memory.
5672 Also catch the case where int_size_in_bytes returns -1. In this case
5673 the aggregate is either huge or of variable size, and in either case
5674 we will want to return it via memory and not in a register. */
5675 if (size < 0 || size > UNITS_PER_WORD)
5676 return true;
5677
5678 if (TREE_CODE (type) == RECORD_TYPE)
5679 {
5680 tree field;
5681
5682 /* For a struct the APCS says that we only return in a register
5683 if the type is 'integer like' and every addressable element
5684 has an offset of zero. For practical purposes this means
5685 that the structure can have at most one non bit-field element
5686 and that this element must be the first one in the structure. */
5687
5688 /* Find the first field, ignoring non FIELD_DECL things which will
5689 have been created by C++. */
5690 for (field = TYPE_FIELDS (type);
5691 field && TREE_CODE (field) != FIELD_DECL;
5692 field = DECL_CHAIN (field))
5693 continue;
5694
5695 if (field == NULL)
5696 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5697
5698 /* Check that the first field is valid for returning in a register. */
5699
5700 /* ... Floats are not allowed */
5701 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5702 return true;
5703
5704 /* ... Aggregates that are not themselves valid for returning in
5705 a register are not allowed. */
5706 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5707 return true;
5708
5709 /* Now check the remaining fields, if any. Only bitfields are allowed,
5710 since they are not addressable. */
5711 for (field = DECL_CHAIN (field);
5712 field;
5713 field = DECL_CHAIN (field))
5714 {
5715 if (TREE_CODE (field) != FIELD_DECL)
5716 continue;
5717
5718 if (!DECL_BIT_FIELD_TYPE (field))
5719 return true;
5720 }
5721
5722 return false;
5723 }
5724
5725 if (TREE_CODE (type) == UNION_TYPE)
5726 {
5727 tree field;
5728
5729 /* Unions can be returned in registers if every element is
5730 integral, or can be returned in an integer register. */
5731 for (field = TYPE_FIELDS (type);
5732 field;
5733 field = DECL_CHAIN (field))
5734 {
5735 if (TREE_CODE (field) != FIELD_DECL)
5736 continue;
5737
5738 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5739 return true;
5740
5741 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5742 return true;
5743 }
5744
5745 return false;
5746 }
5747 #endif /* not ARM_WINCE */
5748
5749 /* Return all other types in memory. */
5750 return true;
5751 }
5752
5753 const struct pcs_attribute_arg
5754 {
5755 const char *arg;
5756 enum arm_pcs value;
5757 } pcs_attribute_args[] =
5758 {
5759 {"aapcs", ARM_PCS_AAPCS},
5760 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5761 #if 0
5762 /* We could recognize these, but changes would be needed elsewhere
5763 * to implement them. */
5764 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5765 {"atpcs", ARM_PCS_ATPCS},
5766 {"apcs", ARM_PCS_APCS},
5767 #endif
5768 {NULL, ARM_PCS_UNKNOWN}
5769 };
5770
5771 static enum arm_pcs
5772 arm_pcs_from_attribute (tree attr)
5773 {
5774 const struct pcs_attribute_arg *ptr;
5775 const char *arg;
5776
5777 /* Get the value of the argument. */
5778 if (TREE_VALUE (attr) == NULL_TREE
5779 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5780 return ARM_PCS_UNKNOWN;
5781
5782 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5783
5784 /* Check it against the list of known arguments. */
5785 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5786 if (streq (arg, ptr->arg))
5787 return ptr->value;
5788
5789 /* An unrecognized interrupt type. */
5790 return ARM_PCS_UNKNOWN;
5791 }
5792
5793 /* Get the PCS variant to use for this call. TYPE is the function's type
5794 specification, DECL is the specific declartion. DECL may be null if
5795 the call could be indirect or if this is a library call. */
5796 static enum arm_pcs
5797 arm_get_pcs_model (const_tree type, const_tree decl)
5798 {
5799 bool user_convention = false;
5800 enum arm_pcs user_pcs = arm_pcs_default;
5801 tree attr;
5802
5803 gcc_assert (type);
5804
5805 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5806 if (attr)
5807 {
5808 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5809 user_convention = true;
5810 }
5811
5812 if (TARGET_AAPCS_BASED)
5813 {
5814 /* Detect varargs functions. These always use the base rules
5815 (no argument is ever a candidate for a co-processor
5816 register). */
5817 bool base_rules = stdarg_p (type);
5818
5819 if (user_convention)
5820 {
5821 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5822 sorry ("non-AAPCS derived PCS variant");
5823 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5824 error ("variadic functions must use the base AAPCS variant");
5825 }
5826
5827 if (base_rules)
5828 return ARM_PCS_AAPCS;
5829 else if (user_convention)
5830 return user_pcs;
5831 else if (decl && flag_unit_at_a_time)
5832 {
5833 /* Local functions never leak outside this compilation unit,
5834 so we are free to use whatever conventions are
5835 appropriate. */
5836 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5837 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5838 if (i && i->local)
5839 return ARM_PCS_AAPCS_LOCAL;
5840 }
5841 }
5842 else if (user_convention && user_pcs != arm_pcs_default)
5843 sorry ("PCS variant");
5844
5845 /* For everything else we use the target's default. */
5846 return arm_pcs_default;
5847 }
5848
5849
5850 static void
5851 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5852 const_tree fntype ATTRIBUTE_UNUSED,
5853 rtx libcall ATTRIBUTE_UNUSED,
5854 const_tree fndecl ATTRIBUTE_UNUSED)
5855 {
5856 /* Record the unallocated VFP registers. */
5857 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5858 pcum->aapcs_vfp_reg_alloc = 0;
5859 }
5860
5861 /* Walk down the type tree of TYPE counting consecutive base elements.
5862 If *MODEP is VOIDmode, then set it to the first valid floating point
5863 type. If a non-floating point type is found, or if a floating point
5864 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5865 otherwise return the count in the sub-tree. */
5866 static int
5867 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5868 {
5869 machine_mode mode;
5870 HOST_WIDE_INT size;
5871
5872 switch (TREE_CODE (type))
5873 {
5874 case REAL_TYPE:
5875 mode = TYPE_MODE (type);
5876 if (mode != DFmode && mode != SFmode && mode != HFmode)
5877 return -1;
5878
5879 if (*modep == VOIDmode)
5880 *modep = mode;
5881
5882 if (*modep == mode)
5883 return 1;
5884
5885 break;
5886
5887 case COMPLEX_TYPE:
5888 mode = TYPE_MODE (TREE_TYPE (type));
5889 if (mode != DFmode && mode != SFmode)
5890 return -1;
5891
5892 if (*modep == VOIDmode)
5893 *modep = mode;
5894
5895 if (*modep == mode)
5896 return 2;
5897
5898 break;
5899
5900 case VECTOR_TYPE:
5901 /* Use V2SImode and V4SImode as representatives of all 64-bit
5902 and 128-bit vector types, whether or not those modes are
5903 supported with the present options. */
5904 size = int_size_in_bytes (type);
5905 switch (size)
5906 {
5907 case 8:
5908 mode = V2SImode;
5909 break;
5910 case 16:
5911 mode = V4SImode;
5912 break;
5913 default:
5914 return -1;
5915 }
5916
5917 if (*modep == VOIDmode)
5918 *modep = mode;
5919
5920 /* Vector modes are considered to be opaque: two vectors are
5921 equivalent for the purposes of being homogeneous aggregates
5922 if they are the same size. */
5923 if (*modep == mode)
5924 return 1;
5925
5926 break;
5927
5928 case ARRAY_TYPE:
5929 {
5930 int count;
5931 tree index = TYPE_DOMAIN (type);
5932
5933 /* Can't handle incomplete types nor sizes that are not
5934 fixed. */
5935 if (!COMPLETE_TYPE_P (type)
5936 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5937 return -1;
5938
5939 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5940 if (count == -1
5941 || !index
5942 || !TYPE_MAX_VALUE (index)
5943 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5944 || !TYPE_MIN_VALUE (index)
5945 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5946 || count < 0)
5947 return -1;
5948
5949 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5950 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5951
5952 /* There must be no padding. */
5953 if (wi::to_wide (TYPE_SIZE (type))
5954 != count * GET_MODE_BITSIZE (*modep))
5955 return -1;
5956
5957 return count;
5958 }
5959
5960 case RECORD_TYPE:
5961 {
5962 int count = 0;
5963 int sub_count;
5964 tree field;
5965
5966 /* Can't handle incomplete types nor sizes that are not
5967 fixed. */
5968 if (!COMPLETE_TYPE_P (type)
5969 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5970 return -1;
5971
5972 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5973 {
5974 if (TREE_CODE (field) != FIELD_DECL)
5975 continue;
5976
5977 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5978 if (sub_count < 0)
5979 return -1;
5980 count += sub_count;
5981 }
5982
5983 /* There must be no padding. */
5984 if (wi::to_wide (TYPE_SIZE (type))
5985 != count * GET_MODE_BITSIZE (*modep))
5986 return -1;
5987
5988 return count;
5989 }
5990
5991 case UNION_TYPE:
5992 case QUAL_UNION_TYPE:
5993 {
5994 /* These aren't very interesting except in a degenerate case. */
5995 int count = 0;
5996 int sub_count;
5997 tree field;
5998
5999 /* Can't handle incomplete types nor sizes that are not
6000 fixed. */
6001 if (!COMPLETE_TYPE_P (type)
6002 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6003 return -1;
6004
6005 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6006 {
6007 if (TREE_CODE (field) != FIELD_DECL)
6008 continue;
6009
6010 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6011 if (sub_count < 0)
6012 return -1;
6013 count = count > sub_count ? count : sub_count;
6014 }
6015
6016 /* There must be no padding. */
6017 if (wi::to_wide (TYPE_SIZE (type))
6018 != count * GET_MODE_BITSIZE (*modep))
6019 return -1;
6020
6021 return count;
6022 }
6023
6024 default:
6025 break;
6026 }
6027
6028 return -1;
6029 }
6030
6031 /* Return true if PCS_VARIANT should use VFP registers. */
6032 static bool
6033 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6034 {
6035 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6036 {
6037 static bool seen_thumb1_vfp = false;
6038
6039 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6040 {
6041 sorry ("Thumb-1 hard-float VFP ABI");
6042 /* sorry() is not immediately fatal, so only display this once. */
6043 seen_thumb1_vfp = true;
6044 }
6045
6046 return true;
6047 }
6048
6049 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6050 return false;
6051
6052 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6053 (TARGET_VFP_DOUBLE || !is_double));
6054 }
6055
6056 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6057 suitable for passing or returning in VFP registers for the PCS
6058 variant selected. If it is, then *BASE_MODE is updated to contain
6059 a machine mode describing each element of the argument's type and
6060 *COUNT to hold the number of such elements. */
6061 static bool
6062 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6063 machine_mode mode, const_tree type,
6064 machine_mode *base_mode, int *count)
6065 {
6066 machine_mode new_mode = VOIDmode;
6067
6068 /* If we have the type information, prefer that to working things
6069 out from the mode. */
6070 if (type)
6071 {
6072 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6073
6074 if (ag_count > 0 && ag_count <= 4)
6075 *count = ag_count;
6076 else
6077 return false;
6078 }
6079 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6080 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6081 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6082 {
6083 *count = 1;
6084 new_mode = mode;
6085 }
6086 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6087 {
6088 *count = 2;
6089 new_mode = (mode == DCmode ? DFmode : SFmode);
6090 }
6091 else
6092 return false;
6093
6094
6095 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6096 return false;
6097
6098 *base_mode = new_mode;
6099 return true;
6100 }
6101
6102 static bool
6103 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6104 machine_mode mode, const_tree type)
6105 {
6106 int count ATTRIBUTE_UNUSED;
6107 machine_mode ag_mode ATTRIBUTE_UNUSED;
6108
6109 if (!use_vfp_abi (pcs_variant, false))
6110 return false;
6111 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6112 &ag_mode, &count);
6113 }
6114
6115 static bool
6116 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6117 const_tree type)
6118 {
6119 if (!use_vfp_abi (pcum->pcs_variant, false))
6120 return false;
6121
6122 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6123 &pcum->aapcs_vfp_rmode,
6124 &pcum->aapcs_vfp_rcount);
6125 }
6126
6127 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6128 for the behaviour of this function. */
6129
6130 static bool
6131 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6132 const_tree type ATTRIBUTE_UNUSED)
6133 {
6134 int rmode_size
6135 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6136 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6137 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6138 int regno;
6139
6140 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6141 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6142 {
6143 pcum->aapcs_vfp_reg_alloc = mask << regno;
6144 if (mode == BLKmode
6145 || (mode == TImode && ! TARGET_NEON)
6146 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6147 {
6148 int i;
6149 int rcount = pcum->aapcs_vfp_rcount;
6150 int rshift = shift;
6151 machine_mode rmode = pcum->aapcs_vfp_rmode;
6152 rtx par;
6153 if (!TARGET_NEON)
6154 {
6155 /* Avoid using unsupported vector modes. */
6156 if (rmode == V2SImode)
6157 rmode = DImode;
6158 else if (rmode == V4SImode)
6159 {
6160 rmode = DImode;
6161 rcount *= 2;
6162 rshift /= 2;
6163 }
6164 }
6165 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6166 for (i = 0; i < rcount; i++)
6167 {
6168 rtx tmp = gen_rtx_REG (rmode,
6169 FIRST_VFP_REGNUM + regno + i * rshift);
6170 tmp = gen_rtx_EXPR_LIST
6171 (VOIDmode, tmp,
6172 GEN_INT (i * GET_MODE_SIZE (rmode)));
6173 XVECEXP (par, 0, i) = tmp;
6174 }
6175
6176 pcum->aapcs_reg = par;
6177 }
6178 else
6179 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6180 return true;
6181 }
6182 return false;
6183 }
6184
6185 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6186 comment there for the behaviour of this function. */
6187
6188 static rtx
6189 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6190 machine_mode mode,
6191 const_tree type ATTRIBUTE_UNUSED)
6192 {
6193 if (!use_vfp_abi (pcs_variant, false))
6194 return NULL;
6195
6196 if (mode == BLKmode
6197 || (GET_MODE_CLASS (mode) == MODE_INT
6198 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6199 && !TARGET_NEON))
6200 {
6201 int count;
6202 machine_mode ag_mode;
6203 int i;
6204 rtx par;
6205 int shift;
6206
6207 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6208 &ag_mode, &count);
6209
6210 if (!TARGET_NEON)
6211 {
6212 if (ag_mode == V2SImode)
6213 ag_mode = DImode;
6214 else if (ag_mode == V4SImode)
6215 {
6216 ag_mode = DImode;
6217 count *= 2;
6218 }
6219 }
6220 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6221 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6222 for (i = 0; i < count; i++)
6223 {
6224 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6225 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6226 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6227 XVECEXP (par, 0, i) = tmp;
6228 }
6229
6230 return par;
6231 }
6232
6233 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6234 }
6235
6236 static void
6237 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6238 machine_mode mode ATTRIBUTE_UNUSED,
6239 const_tree type ATTRIBUTE_UNUSED)
6240 {
6241 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6242 pcum->aapcs_vfp_reg_alloc = 0;
6243 return;
6244 }
6245
6246 #define AAPCS_CP(X) \
6247 { \
6248 aapcs_ ## X ## _cum_init, \
6249 aapcs_ ## X ## _is_call_candidate, \
6250 aapcs_ ## X ## _allocate, \
6251 aapcs_ ## X ## _is_return_candidate, \
6252 aapcs_ ## X ## _allocate_return_reg, \
6253 aapcs_ ## X ## _advance \
6254 }
6255
6256 /* Table of co-processors that can be used to pass arguments in
6257 registers. Idealy no arugment should be a candidate for more than
6258 one co-processor table entry, but the table is processed in order
6259 and stops after the first match. If that entry then fails to put
6260 the argument into a co-processor register, the argument will go on
6261 the stack. */
6262 static struct
6263 {
6264 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6265 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6266
6267 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6268 BLKmode) is a candidate for this co-processor's registers; this
6269 function should ignore any position-dependent state in
6270 CUMULATIVE_ARGS and only use call-type dependent information. */
6271 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6272
6273 /* Return true if the argument does get a co-processor register; it
6274 should set aapcs_reg to an RTX of the register allocated as is
6275 required for a return from FUNCTION_ARG. */
6276 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6277
6278 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6279 be returned in this co-processor's registers. */
6280 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6281
6282 /* Allocate and return an RTX element to hold the return type of a call. This
6283 routine must not fail and will only be called if is_return_candidate
6284 returned true with the same parameters. */
6285 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6286
6287 /* Finish processing this argument and prepare to start processing
6288 the next one. */
6289 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6290 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6291 {
6292 AAPCS_CP(vfp)
6293 };
6294
6295 #undef AAPCS_CP
6296
6297 static int
6298 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6299 const_tree type)
6300 {
6301 int i;
6302
6303 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6304 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6305 return i;
6306
6307 return -1;
6308 }
6309
6310 static int
6311 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6312 {
6313 /* We aren't passed a decl, so we can't check that a call is local.
6314 However, it isn't clear that that would be a win anyway, since it
6315 might limit some tail-calling opportunities. */
6316 enum arm_pcs pcs_variant;
6317
6318 if (fntype)
6319 {
6320 const_tree fndecl = NULL_TREE;
6321
6322 if (TREE_CODE (fntype) == FUNCTION_DECL)
6323 {
6324 fndecl = fntype;
6325 fntype = TREE_TYPE (fntype);
6326 }
6327
6328 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6329 }
6330 else
6331 pcs_variant = arm_pcs_default;
6332
6333 if (pcs_variant != ARM_PCS_AAPCS)
6334 {
6335 int i;
6336
6337 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6338 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6339 TYPE_MODE (type),
6340 type))
6341 return i;
6342 }
6343 return -1;
6344 }
6345
6346 static rtx
6347 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6348 const_tree fntype)
6349 {
6350 /* We aren't passed a decl, so we can't check that a call is local.
6351 However, it isn't clear that that would be a win anyway, since it
6352 might limit some tail-calling opportunities. */
6353 enum arm_pcs pcs_variant;
6354 int unsignedp ATTRIBUTE_UNUSED;
6355
6356 if (fntype)
6357 {
6358 const_tree fndecl = NULL_TREE;
6359
6360 if (TREE_CODE (fntype) == FUNCTION_DECL)
6361 {
6362 fndecl = fntype;
6363 fntype = TREE_TYPE (fntype);
6364 }
6365
6366 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6367 }
6368 else
6369 pcs_variant = arm_pcs_default;
6370
6371 /* Promote integer types. */
6372 if (type && INTEGRAL_TYPE_P (type))
6373 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6374
6375 if (pcs_variant != ARM_PCS_AAPCS)
6376 {
6377 int i;
6378
6379 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6380 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6381 type))
6382 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6383 mode, type);
6384 }
6385
6386 /* Promotes small structs returned in a register to full-word size
6387 for big-endian AAPCS. */
6388 if (type && arm_return_in_msb (type))
6389 {
6390 HOST_WIDE_INT size = int_size_in_bytes (type);
6391 if (size % UNITS_PER_WORD != 0)
6392 {
6393 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6394 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6395 }
6396 }
6397
6398 return gen_rtx_REG (mode, R0_REGNUM);
6399 }
6400
6401 static rtx
6402 aapcs_libcall_value (machine_mode mode)
6403 {
6404 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6405 && GET_MODE_SIZE (mode) <= 4)
6406 mode = SImode;
6407
6408 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6409 }
6410
6411 /* Lay out a function argument using the AAPCS rules. The rule
6412 numbers referred to here are those in the AAPCS. */
6413 static void
6414 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6415 const_tree type, bool named)
6416 {
6417 int nregs, nregs2;
6418 int ncrn;
6419
6420 /* We only need to do this once per argument. */
6421 if (pcum->aapcs_arg_processed)
6422 return;
6423
6424 pcum->aapcs_arg_processed = true;
6425
6426 /* Special case: if named is false then we are handling an incoming
6427 anonymous argument which is on the stack. */
6428 if (!named)
6429 return;
6430
6431 /* Is this a potential co-processor register candidate? */
6432 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6433 {
6434 int slot = aapcs_select_call_coproc (pcum, mode, type);
6435 pcum->aapcs_cprc_slot = slot;
6436
6437 /* We don't have to apply any of the rules from part B of the
6438 preparation phase, these are handled elsewhere in the
6439 compiler. */
6440
6441 if (slot >= 0)
6442 {
6443 /* A Co-processor register candidate goes either in its own
6444 class of registers or on the stack. */
6445 if (!pcum->aapcs_cprc_failed[slot])
6446 {
6447 /* C1.cp - Try to allocate the argument to co-processor
6448 registers. */
6449 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6450 return;
6451
6452 /* C2.cp - Put the argument on the stack and note that we
6453 can't assign any more candidates in this slot. We also
6454 need to note that we have allocated stack space, so that
6455 we won't later try to split a non-cprc candidate between
6456 core registers and the stack. */
6457 pcum->aapcs_cprc_failed[slot] = true;
6458 pcum->can_split = false;
6459 }
6460
6461 /* We didn't get a register, so this argument goes on the
6462 stack. */
6463 gcc_assert (pcum->can_split == false);
6464 return;
6465 }
6466 }
6467
6468 /* C3 - For double-word aligned arguments, round the NCRN up to the
6469 next even number. */
6470 ncrn = pcum->aapcs_ncrn;
6471 if (ncrn & 1)
6472 {
6473 int res = arm_needs_doubleword_align (mode, type);
6474 /* Only warn during RTL expansion of call stmts, otherwise we would
6475 warn e.g. during gimplification even on functions that will be
6476 always inlined, and we'd warn multiple times. Don't warn when
6477 called in expand_function_start either, as we warn instead in
6478 arm_function_arg_boundary in that case. */
6479 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6480 inform (input_location, "parameter passing for argument of type "
6481 "%qT changed in GCC 7.1", type);
6482 else if (res > 0)
6483 ncrn++;
6484 }
6485
6486 nregs = ARM_NUM_REGS2(mode, type);
6487
6488 /* Sigh, this test should really assert that nregs > 0, but a GCC
6489 extension allows empty structs and then gives them empty size; it
6490 then allows such a structure to be passed by value. For some of
6491 the code below we have to pretend that such an argument has
6492 non-zero size so that we 'locate' it correctly either in
6493 registers or on the stack. */
6494 gcc_assert (nregs >= 0);
6495
6496 nregs2 = nregs ? nregs : 1;
6497
6498 /* C4 - Argument fits entirely in core registers. */
6499 if (ncrn + nregs2 <= NUM_ARG_REGS)
6500 {
6501 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6502 pcum->aapcs_next_ncrn = ncrn + nregs;
6503 return;
6504 }
6505
6506 /* C5 - Some core registers left and there are no arguments already
6507 on the stack: split this argument between the remaining core
6508 registers and the stack. */
6509 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6510 {
6511 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6512 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6513 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6514 return;
6515 }
6516
6517 /* C6 - NCRN is set to 4. */
6518 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6519
6520 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6521 return;
6522 }
6523
6524 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6525 for a call to a function whose data type is FNTYPE.
6526 For a library call, FNTYPE is NULL. */
6527 void
6528 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6529 rtx libname,
6530 tree fndecl ATTRIBUTE_UNUSED)
6531 {
6532 /* Long call handling. */
6533 if (fntype)
6534 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6535 else
6536 pcum->pcs_variant = arm_pcs_default;
6537
6538 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6539 {
6540 if (arm_libcall_uses_aapcs_base (libname))
6541 pcum->pcs_variant = ARM_PCS_AAPCS;
6542
6543 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6544 pcum->aapcs_reg = NULL_RTX;
6545 pcum->aapcs_partial = 0;
6546 pcum->aapcs_arg_processed = false;
6547 pcum->aapcs_cprc_slot = -1;
6548 pcum->can_split = true;
6549
6550 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6551 {
6552 int i;
6553
6554 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6555 {
6556 pcum->aapcs_cprc_failed[i] = false;
6557 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6558 }
6559 }
6560 return;
6561 }
6562
6563 /* Legacy ABIs */
6564
6565 /* On the ARM, the offset starts at 0. */
6566 pcum->nregs = 0;
6567 pcum->iwmmxt_nregs = 0;
6568 pcum->can_split = true;
6569
6570 /* Varargs vectors are treated the same as long long.
6571 named_count avoids having to change the way arm handles 'named' */
6572 pcum->named_count = 0;
6573 pcum->nargs = 0;
6574
6575 if (TARGET_REALLY_IWMMXT && fntype)
6576 {
6577 tree fn_arg;
6578
6579 for (fn_arg = TYPE_ARG_TYPES (fntype);
6580 fn_arg;
6581 fn_arg = TREE_CHAIN (fn_arg))
6582 pcum->named_count += 1;
6583
6584 if (! pcum->named_count)
6585 pcum->named_count = INT_MAX;
6586 }
6587 }
6588
6589 /* Return 1 if double word alignment is required for argument passing.
6590 Return -1 if double word alignment used to be required for argument
6591 passing before PR77728 ABI fix, but is not required anymore.
6592 Return 0 if double word alignment is not required and wasn't requried
6593 before either. */
6594 static int
6595 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6596 {
6597 if (!type)
6598 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6599
6600 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6601 if (!AGGREGATE_TYPE_P (type))
6602 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6603
6604 /* Array types: Use member alignment of element type. */
6605 if (TREE_CODE (type) == ARRAY_TYPE)
6606 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6607
6608 int ret = 0;
6609 /* Record/aggregate types: Use greatest member alignment of any member. */
6610 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6611 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6612 {
6613 if (TREE_CODE (field) == FIELD_DECL)
6614 return 1;
6615 else
6616 /* Before PR77728 fix, we were incorrectly considering also
6617 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6618 Make sure we can warn about that with -Wpsabi. */
6619 ret = -1;
6620 }
6621
6622 return ret;
6623 }
6624
6625
6626 /* Determine where to put an argument to a function.
6627 Value is zero to push the argument on the stack,
6628 or a hard register in which to store the argument.
6629
6630 MODE is the argument's machine mode.
6631 TYPE is the data type of the argument (as a tree).
6632 This is null for libcalls where that information may
6633 not be available.
6634 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6635 the preceding args and about the function being called.
6636 NAMED is nonzero if this argument is a named parameter
6637 (otherwise it is an extra parameter matching an ellipsis).
6638
6639 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6640 other arguments are passed on the stack. If (NAMED == 0) (which happens
6641 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6642 defined), say it is passed in the stack (function_prologue will
6643 indeed make it pass in the stack if necessary). */
6644
6645 static rtx
6646 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6647 const_tree type, bool named)
6648 {
6649 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6650 int nregs;
6651
6652 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6653 a call insn (op3 of a call_value insn). */
6654 if (mode == VOIDmode)
6655 return const0_rtx;
6656
6657 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6658 {
6659 aapcs_layout_arg (pcum, mode, type, named);
6660 return pcum->aapcs_reg;
6661 }
6662
6663 /* Varargs vectors are treated the same as long long.
6664 named_count avoids having to change the way arm handles 'named' */
6665 if (TARGET_IWMMXT_ABI
6666 && arm_vector_mode_supported_p (mode)
6667 && pcum->named_count > pcum->nargs + 1)
6668 {
6669 if (pcum->iwmmxt_nregs <= 9)
6670 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6671 else
6672 {
6673 pcum->can_split = false;
6674 return NULL_RTX;
6675 }
6676 }
6677
6678 /* Put doubleword aligned quantities in even register pairs. */
6679 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6680 {
6681 int res = arm_needs_doubleword_align (mode, type);
6682 if (res < 0 && warn_psabi)
6683 inform (input_location, "parameter passing for argument of type "
6684 "%qT changed in GCC 7.1", type);
6685 else if (res > 0)
6686 pcum->nregs++;
6687 }
6688
6689 /* Only allow splitting an arg between regs and memory if all preceding
6690 args were allocated to regs. For args passed by reference we only count
6691 the reference pointer. */
6692 if (pcum->can_split)
6693 nregs = 1;
6694 else
6695 nregs = ARM_NUM_REGS2 (mode, type);
6696
6697 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6698 return NULL_RTX;
6699
6700 return gen_rtx_REG (mode, pcum->nregs);
6701 }
6702
6703 static unsigned int
6704 arm_function_arg_boundary (machine_mode mode, const_tree type)
6705 {
6706 if (!ARM_DOUBLEWORD_ALIGN)
6707 return PARM_BOUNDARY;
6708
6709 int res = arm_needs_doubleword_align (mode, type);
6710 if (res < 0 && warn_psabi)
6711 inform (input_location, "parameter passing for argument of type %qT "
6712 "changed in GCC 7.1", type);
6713
6714 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6715 }
6716
6717 static int
6718 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6719 tree type, bool named)
6720 {
6721 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6722 int nregs = pcum->nregs;
6723
6724 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6725 {
6726 aapcs_layout_arg (pcum, mode, type, named);
6727 return pcum->aapcs_partial;
6728 }
6729
6730 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6731 return 0;
6732
6733 if (NUM_ARG_REGS > nregs
6734 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6735 && pcum->can_split)
6736 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6737
6738 return 0;
6739 }
6740
6741 /* Update the data in PCUM to advance over an argument
6742 of mode MODE and data type TYPE.
6743 (TYPE is null for libcalls where that information may not be available.) */
6744
6745 static void
6746 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6747 const_tree type, bool named)
6748 {
6749 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6750
6751 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6752 {
6753 aapcs_layout_arg (pcum, mode, type, named);
6754
6755 if (pcum->aapcs_cprc_slot >= 0)
6756 {
6757 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6758 type);
6759 pcum->aapcs_cprc_slot = -1;
6760 }
6761
6762 /* Generic stuff. */
6763 pcum->aapcs_arg_processed = false;
6764 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6765 pcum->aapcs_reg = NULL_RTX;
6766 pcum->aapcs_partial = 0;
6767 }
6768 else
6769 {
6770 pcum->nargs += 1;
6771 if (arm_vector_mode_supported_p (mode)
6772 && pcum->named_count > pcum->nargs
6773 && TARGET_IWMMXT_ABI)
6774 pcum->iwmmxt_nregs += 1;
6775 else
6776 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6777 }
6778 }
6779
6780 /* Variable sized types are passed by reference. This is a GCC
6781 extension to the ARM ABI. */
6782
6783 static bool
6784 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6785 machine_mode mode ATTRIBUTE_UNUSED,
6786 const_tree type, bool named ATTRIBUTE_UNUSED)
6787 {
6788 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6789 }
6790 \f
6791 /* Encode the current state of the #pragma [no_]long_calls. */
6792 typedef enum
6793 {
6794 OFF, /* No #pragma [no_]long_calls is in effect. */
6795 LONG, /* #pragma long_calls is in effect. */
6796 SHORT /* #pragma no_long_calls is in effect. */
6797 } arm_pragma_enum;
6798
6799 static arm_pragma_enum arm_pragma_long_calls = OFF;
6800
6801 void
6802 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6803 {
6804 arm_pragma_long_calls = LONG;
6805 }
6806
6807 void
6808 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6809 {
6810 arm_pragma_long_calls = SHORT;
6811 }
6812
6813 void
6814 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6815 {
6816 arm_pragma_long_calls = OFF;
6817 }
6818 \f
6819 /* Handle an attribute requiring a FUNCTION_DECL;
6820 arguments as in struct attribute_spec.handler. */
6821 static tree
6822 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6823 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6824 {
6825 if (TREE_CODE (*node) != FUNCTION_DECL)
6826 {
6827 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6828 name);
6829 *no_add_attrs = true;
6830 }
6831
6832 return NULL_TREE;
6833 }
6834
6835 /* Handle an "interrupt" or "isr" attribute;
6836 arguments as in struct attribute_spec.handler. */
6837 static tree
6838 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6839 bool *no_add_attrs)
6840 {
6841 if (DECL_P (*node))
6842 {
6843 if (TREE_CODE (*node) != FUNCTION_DECL)
6844 {
6845 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6846 name);
6847 *no_add_attrs = true;
6848 }
6849 /* FIXME: the argument if any is checked for type attributes;
6850 should it be checked for decl ones? */
6851 }
6852 else
6853 {
6854 if (TREE_CODE (*node) == FUNCTION_TYPE
6855 || TREE_CODE (*node) == METHOD_TYPE)
6856 {
6857 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6858 {
6859 warning (OPT_Wattributes, "%qE attribute ignored",
6860 name);
6861 *no_add_attrs = true;
6862 }
6863 }
6864 else if (TREE_CODE (*node) == POINTER_TYPE
6865 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6866 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6867 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6868 {
6869 *node = build_variant_type_copy (*node);
6870 TREE_TYPE (*node) = build_type_attribute_variant
6871 (TREE_TYPE (*node),
6872 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6873 *no_add_attrs = true;
6874 }
6875 else
6876 {
6877 /* Possibly pass this attribute on from the type to a decl. */
6878 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6879 | (int) ATTR_FLAG_FUNCTION_NEXT
6880 | (int) ATTR_FLAG_ARRAY_NEXT))
6881 {
6882 *no_add_attrs = true;
6883 return tree_cons (name, args, NULL_TREE);
6884 }
6885 else
6886 {
6887 warning (OPT_Wattributes, "%qE attribute ignored",
6888 name);
6889 }
6890 }
6891 }
6892
6893 return NULL_TREE;
6894 }
6895
6896 /* Handle a "pcs" attribute; arguments as in struct
6897 attribute_spec.handler. */
6898 static tree
6899 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6900 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6901 {
6902 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6903 {
6904 warning (OPT_Wattributes, "%qE attribute ignored", name);
6905 *no_add_attrs = true;
6906 }
6907 return NULL_TREE;
6908 }
6909
6910 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6911 /* Handle the "notshared" attribute. This attribute is another way of
6912 requesting hidden visibility. ARM's compiler supports
6913 "__declspec(notshared)"; we support the same thing via an
6914 attribute. */
6915
6916 static tree
6917 arm_handle_notshared_attribute (tree *node,
6918 tree name ATTRIBUTE_UNUSED,
6919 tree args ATTRIBUTE_UNUSED,
6920 int flags ATTRIBUTE_UNUSED,
6921 bool *no_add_attrs)
6922 {
6923 tree decl = TYPE_NAME (*node);
6924
6925 if (decl)
6926 {
6927 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6928 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6929 *no_add_attrs = false;
6930 }
6931 return NULL_TREE;
6932 }
6933 #endif
6934
6935 /* This function returns true if a function with declaration FNDECL and type
6936 FNTYPE uses the stack to pass arguments or return variables and false
6937 otherwise. This is used for functions with the attributes
6938 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6939 diagnostic messages if the stack is used. NAME is the name of the attribute
6940 used. */
6941
6942 static bool
6943 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6944 {
6945 function_args_iterator args_iter;
6946 CUMULATIVE_ARGS args_so_far_v;
6947 cumulative_args_t args_so_far;
6948 bool first_param = true;
6949 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6950
6951 /* Error out if any argument is passed on the stack. */
6952 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6953 args_so_far = pack_cumulative_args (&args_so_far_v);
6954 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6955 {
6956 rtx arg_rtx;
6957 machine_mode arg_mode = TYPE_MODE (arg_type);
6958
6959 prev_arg_type = arg_type;
6960 if (VOID_TYPE_P (arg_type))
6961 continue;
6962
6963 if (!first_param)
6964 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6965 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6966 if (!arg_rtx
6967 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6968 {
6969 error ("%qE attribute not available to functions with arguments "
6970 "passed on the stack", name);
6971 return true;
6972 }
6973 first_param = false;
6974 }
6975
6976 /* Error out for variadic functions since we cannot control how many
6977 arguments will be passed and thus stack could be used. stdarg_p () is not
6978 used for the checking to avoid browsing arguments twice. */
6979 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6980 {
6981 error ("%qE attribute not available to functions with variable number "
6982 "of arguments", name);
6983 return true;
6984 }
6985
6986 /* Error out if return value is passed on the stack. */
6987 ret_type = TREE_TYPE (fntype);
6988 if (arm_return_in_memory (ret_type, fntype))
6989 {
6990 error ("%qE attribute not available to functions that return value on "
6991 "the stack", name);
6992 return true;
6993 }
6994 return false;
6995 }
6996
6997 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6998 function will check whether the attribute is allowed here and will add the
6999 attribute to the function declaration tree or otherwise issue a warning. */
7000
7001 static tree
7002 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7003 tree /* args */,
7004 int /* flags */,
7005 bool *no_add_attrs)
7006 {
7007 tree fndecl;
7008
7009 if (!use_cmse)
7010 {
7011 *no_add_attrs = true;
7012 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7013 name);
7014 return NULL_TREE;
7015 }
7016
7017 /* Ignore attribute for function types. */
7018 if (TREE_CODE (*node) != FUNCTION_DECL)
7019 {
7020 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7021 name);
7022 *no_add_attrs = true;
7023 return NULL_TREE;
7024 }
7025
7026 fndecl = *node;
7027
7028 /* Warn for static linkage functions. */
7029 if (!TREE_PUBLIC (fndecl))
7030 {
7031 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7032 "with static linkage", name);
7033 *no_add_attrs = true;
7034 return NULL_TREE;
7035 }
7036
7037 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7038 TREE_TYPE (fndecl));
7039 return NULL_TREE;
7040 }
7041
7042
7043 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7044 function will check whether the attribute is allowed here and will add the
7045 attribute to the function type tree or otherwise issue a diagnostic. The
7046 reason we check this at declaration time is to only allow the use of the
7047 attribute with declarations of function pointers and not function
7048 declarations. This function checks NODE is of the expected type and issues
7049 diagnostics otherwise using NAME. If it is not of the expected type
7050 *NO_ADD_ATTRS will be set to true. */
7051
7052 static tree
7053 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7054 tree /* args */,
7055 int /* flags */,
7056 bool *no_add_attrs)
7057 {
7058 tree decl = NULL_TREE, fntype = NULL_TREE;
7059 tree type;
7060
7061 if (!use_cmse)
7062 {
7063 *no_add_attrs = true;
7064 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7065 name);
7066 return NULL_TREE;
7067 }
7068
7069 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7070 {
7071 decl = *node;
7072 fntype = TREE_TYPE (decl);
7073 }
7074
7075 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7076 fntype = TREE_TYPE (fntype);
7077
7078 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7079 {
7080 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7081 "function pointer", name);
7082 *no_add_attrs = true;
7083 return NULL_TREE;
7084 }
7085
7086 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7087
7088 if (*no_add_attrs)
7089 return NULL_TREE;
7090
7091 /* Prevent trees being shared among function types with and without
7092 cmse_nonsecure_call attribute. */
7093 type = TREE_TYPE (decl);
7094
7095 type = build_distinct_type_copy (type);
7096 TREE_TYPE (decl) = type;
7097 fntype = type;
7098
7099 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7100 {
7101 type = fntype;
7102 fntype = TREE_TYPE (fntype);
7103 fntype = build_distinct_type_copy (fntype);
7104 TREE_TYPE (type) = fntype;
7105 }
7106
7107 /* Construct a type attribute and add it to the function type. */
7108 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7109 TYPE_ATTRIBUTES (fntype));
7110 TYPE_ATTRIBUTES (fntype) = attrs;
7111 return NULL_TREE;
7112 }
7113
7114 /* Return 0 if the attributes for two types are incompatible, 1 if they
7115 are compatible, and 2 if they are nearly compatible (which causes a
7116 warning to be generated). */
7117 static int
7118 arm_comp_type_attributes (const_tree type1, const_tree type2)
7119 {
7120 int l1, l2, s1, s2;
7121
7122 /* Check for mismatch of non-default calling convention. */
7123 if (TREE_CODE (type1) != FUNCTION_TYPE)
7124 return 1;
7125
7126 /* Check for mismatched call attributes. */
7127 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7128 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7129 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7130 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7131
7132 /* Only bother to check if an attribute is defined. */
7133 if (l1 | l2 | s1 | s2)
7134 {
7135 /* If one type has an attribute, the other must have the same attribute. */
7136 if ((l1 != l2) || (s1 != s2))
7137 return 0;
7138
7139 /* Disallow mixed attributes. */
7140 if ((l1 & s2) || (l2 & s1))
7141 return 0;
7142 }
7143
7144 /* Check for mismatched ISR attribute. */
7145 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7146 if (! l1)
7147 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7148 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7149 if (! l2)
7150 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7151 if (l1 != l2)
7152 return 0;
7153
7154 l1 = lookup_attribute ("cmse_nonsecure_call",
7155 TYPE_ATTRIBUTES (type1)) != NULL;
7156 l2 = lookup_attribute ("cmse_nonsecure_call",
7157 TYPE_ATTRIBUTES (type2)) != NULL;
7158
7159 if (l1 != l2)
7160 return 0;
7161
7162 return 1;
7163 }
7164
7165 /* Assigns default attributes to newly defined type. This is used to
7166 set short_call/long_call attributes for function types of
7167 functions defined inside corresponding #pragma scopes. */
7168 static void
7169 arm_set_default_type_attributes (tree type)
7170 {
7171 /* Add __attribute__ ((long_call)) to all functions, when
7172 inside #pragma long_calls or __attribute__ ((short_call)),
7173 when inside #pragma no_long_calls. */
7174 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7175 {
7176 tree type_attr_list, attr_name;
7177 type_attr_list = TYPE_ATTRIBUTES (type);
7178
7179 if (arm_pragma_long_calls == LONG)
7180 attr_name = get_identifier ("long_call");
7181 else if (arm_pragma_long_calls == SHORT)
7182 attr_name = get_identifier ("short_call");
7183 else
7184 return;
7185
7186 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7187 TYPE_ATTRIBUTES (type) = type_attr_list;
7188 }
7189 }
7190 \f
7191 /* Return true if DECL is known to be linked into section SECTION. */
7192
7193 static bool
7194 arm_function_in_section_p (tree decl, section *section)
7195 {
7196 /* We can only be certain about the prevailing symbol definition. */
7197 if (!decl_binds_to_current_def_p (decl))
7198 return false;
7199
7200 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7201 if (!DECL_SECTION_NAME (decl))
7202 {
7203 /* Make sure that we will not create a unique section for DECL. */
7204 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7205 return false;
7206 }
7207
7208 return function_section (decl) == section;
7209 }
7210
7211 /* Return nonzero if a 32-bit "long_call" should be generated for
7212 a call from the current function to DECL. We generate a long_call
7213 if the function:
7214
7215 a. has an __attribute__((long call))
7216 or b. is within the scope of a #pragma long_calls
7217 or c. the -mlong-calls command line switch has been specified
7218
7219 However we do not generate a long call if the function:
7220
7221 d. has an __attribute__ ((short_call))
7222 or e. is inside the scope of a #pragma no_long_calls
7223 or f. is defined in the same section as the current function. */
7224
7225 bool
7226 arm_is_long_call_p (tree decl)
7227 {
7228 tree attrs;
7229
7230 if (!decl)
7231 return TARGET_LONG_CALLS;
7232
7233 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7234 if (lookup_attribute ("short_call", attrs))
7235 return false;
7236
7237 /* For "f", be conservative, and only cater for cases in which the
7238 whole of the current function is placed in the same section. */
7239 if (!flag_reorder_blocks_and_partition
7240 && TREE_CODE (decl) == FUNCTION_DECL
7241 && arm_function_in_section_p (decl, current_function_section ()))
7242 return false;
7243
7244 if (lookup_attribute ("long_call", attrs))
7245 return true;
7246
7247 return TARGET_LONG_CALLS;
7248 }
7249
7250 /* Return nonzero if it is ok to make a tail-call to DECL. */
7251 static bool
7252 arm_function_ok_for_sibcall (tree decl, tree exp)
7253 {
7254 unsigned long func_type;
7255
7256 if (cfun->machine->sibcall_blocked)
7257 return false;
7258
7259 /* Never tailcall something if we are generating code for Thumb-1. */
7260 if (TARGET_THUMB1)
7261 return false;
7262
7263 /* The PIC register is live on entry to VxWorks PLT entries, so we
7264 must make the call before restoring the PIC register. */
7265 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7266 return false;
7267
7268 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7269 may be used both as target of the call and base register for restoring
7270 the VFP registers */
7271 if (TARGET_APCS_FRAME && TARGET_ARM
7272 && TARGET_HARD_FLOAT
7273 && decl && arm_is_long_call_p (decl))
7274 return false;
7275
7276 /* If we are interworking and the function is not declared static
7277 then we can't tail-call it unless we know that it exists in this
7278 compilation unit (since it might be a Thumb routine). */
7279 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7280 && !TREE_ASM_WRITTEN (decl))
7281 return false;
7282
7283 func_type = arm_current_func_type ();
7284 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7285 if (IS_INTERRUPT (func_type))
7286 return false;
7287
7288 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7289 generated for entry functions themselves. */
7290 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7291 return false;
7292
7293 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7294 this would complicate matters for later code generation. */
7295 if (TREE_CODE (exp) == CALL_EXPR)
7296 {
7297 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7298 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7299 return false;
7300 }
7301
7302 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7303 {
7304 /* Check that the return value locations are the same. For
7305 example that we aren't returning a value from the sibling in
7306 a VFP register but then need to transfer it to a core
7307 register. */
7308 rtx a, b;
7309 tree decl_or_type = decl;
7310
7311 /* If it is an indirect function pointer, get the function type. */
7312 if (!decl)
7313 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7314
7315 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7316 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7317 cfun->decl, false);
7318 if (!rtx_equal_p (a, b))
7319 return false;
7320 }
7321
7322 /* Never tailcall if function may be called with a misaligned SP. */
7323 if (IS_STACKALIGN (func_type))
7324 return false;
7325
7326 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7327 references should become a NOP. Don't convert such calls into
7328 sibling calls. */
7329 if (TARGET_AAPCS_BASED
7330 && arm_abi == ARM_ABI_AAPCS
7331 && decl
7332 && DECL_WEAK (decl))
7333 return false;
7334
7335 /* We cannot do a tailcall for an indirect call by descriptor if all the
7336 argument registers are used because the only register left to load the
7337 address is IP and it will already contain the static chain. */
7338 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7339 {
7340 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7341 CUMULATIVE_ARGS cum;
7342 cumulative_args_t cum_v;
7343
7344 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7345 cum_v = pack_cumulative_args (&cum);
7346
7347 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7348 {
7349 tree type = TREE_VALUE (t);
7350 if (!VOID_TYPE_P (type))
7351 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7352 }
7353
7354 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7355 return false;
7356 }
7357
7358 /* Everything else is ok. */
7359 return true;
7360 }
7361
7362 \f
7363 /* Addressing mode support functions. */
7364
7365 /* Return nonzero if X is a legitimate immediate operand when compiling
7366 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7367 int
7368 legitimate_pic_operand_p (rtx x)
7369 {
7370 if (GET_CODE (x) == SYMBOL_REF
7371 || (GET_CODE (x) == CONST
7372 && GET_CODE (XEXP (x, 0)) == PLUS
7373 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7374 return 0;
7375
7376 return 1;
7377 }
7378
7379 /* Record that the current function needs a PIC register. Initialize
7380 cfun->machine->pic_reg if we have not already done so. */
7381
7382 static void
7383 require_pic_register (void)
7384 {
7385 /* A lot of the logic here is made obscure by the fact that this
7386 routine gets called as part of the rtx cost estimation process.
7387 We don't want those calls to affect any assumptions about the real
7388 function; and further, we can't call entry_of_function() until we
7389 start the real expansion process. */
7390 if (!crtl->uses_pic_offset_table)
7391 {
7392 gcc_assert (can_create_pseudo_p ());
7393 if (arm_pic_register != INVALID_REGNUM
7394 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7395 {
7396 if (!cfun->machine->pic_reg)
7397 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7398
7399 /* Play games to avoid marking the function as needing pic
7400 if we are being called as part of the cost-estimation
7401 process. */
7402 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7403 crtl->uses_pic_offset_table = 1;
7404 }
7405 else
7406 {
7407 rtx_insn *seq, *insn;
7408
7409 if (!cfun->machine->pic_reg)
7410 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7411
7412 /* Play games to avoid marking the function as needing pic
7413 if we are being called as part of the cost-estimation
7414 process. */
7415 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7416 {
7417 crtl->uses_pic_offset_table = 1;
7418 start_sequence ();
7419
7420 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7421 && arm_pic_register > LAST_LO_REGNUM)
7422 emit_move_insn (cfun->machine->pic_reg,
7423 gen_rtx_REG (Pmode, arm_pic_register));
7424 else
7425 arm_load_pic_register (0UL);
7426
7427 seq = get_insns ();
7428 end_sequence ();
7429
7430 for (insn = seq; insn; insn = NEXT_INSN (insn))
7431 if (INSN_P (insn))
7432 INSN_LOCATION (insn) = prologue_location;
7433
7434 /* We can be called during expansion of PHI nodes, where
7435 we can't yet emit instructions directly in the final
7436 insn stream. Queue the insns on the entry edge, they will
7437 be committed after everything else is expanded. */
7438 insert_insn_on_edge (seq,
7439 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7440 }
7441 }
7442 }
7443 }
7444
7445 rtx
7446 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7447 {
7448 if (GET_CODE (orig) == SYMBOL_REF
7449 || GET_CODE (orig) == LABEL_REF)
7450 {
7451 if (reg == 0)
7452 {
7453 gcc_assert (can_create_pseudo_p ());
7454 reg = gen_reg_rtx (Pmode);
7455 }
7456
7457 /* VxWorks does not impose a fixed gap between segments; the run-time
7458 gap can be different from the object-file gap. We therefore can't
7459 use GOTOFF unless we are absolutely sure that the symbol is in the
7460 same segment as the GOT. Unfortunately, the flexibility of linker
7461 scripts means that we can't be sure of that in general, so assume
7462 that GOTOFF is never valid on VxWorks. */
7463 /* References to weak symbols cannot be resolved locally: they
7464 may be overridden by a non-weak definition at link time. */
7465 rtx_insn *insn;
7466 if ((GET_CODE (orig) == LABEL_REF
7467 || (GET_CODE (orig) == SYMBOL_REF
7468 && SYMBOL_REF_LOCAL_P (orig)
7469 && (SYMBOL_REF_DECL (orig)
7470 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7471 && NEED_GOT_RELOC
7472 && arm_pic_data_is_text_relative)
7473 insn = arm_pic_static_addr (orig, reg);
7474 else
7475 {
7476 rtx pat;
7477 rtx mem;
7478
7479 /* If this function doesn't have a pic register, create one now. */
7480 require_pic_register ();
7481
7482 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7483
7484 /* Make the MEM as close to a constant as possible. */
7485 mem = SET_SRC (pat);
7486 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7487 MEM_READONLY_P (mem) = 1;
7488 MEM_NOTRAP_P (mem) = 1;
7489
7490 insn = emit_insn (pat);
7491 }
7492
7493 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7494 by loop. */
7495 set_unique_reg_note (insn, REG_EQUAL, orig);
7496
7497 return reg;
7498 }
7499 else if (GET_CODE (orig) == CONST)
7500 {
7501 rtx base, offset;
7502
7503 if (GET_CODE (XEXP (orig, 0)) == PLUS
7504 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7505 return orig;
7506
7507 /* Handle the case where we have: const (UNSPEC_TLS). */
7508 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7509 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7510 return orig;
7511
7512 /* Handle the case where we have:
7513 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7514 CONST_INT. */
7515 if (GET_CODE (XEXP (orig, 0)) == PLUS
7516 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7517 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7518 {
7519 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7520 return orig;
7521 }
7522
7523 if (reg == 0)
7524 {
7525 gcc_assert (can_create_pseudo_p ());
7526 reg = gen_reg_rtx (Pmode);
7527 }
7528
7529 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7530
7531 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7532 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7533 base == reg ? 0 : reg);
7534
7535 if (CONST_INT_P (offset))
7536 {
7537 /* The base register doesn't really matter, we only want to
7538 test the index for the appropriate mode. */
7539 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7540 {
7541 gcc_assert (can_create_pseudo_p ());
7542 offset = force_reg (Pmode, offset);
7543 }
7544
7545 if (CONST_INT_P (offset))
7546 return plus_constant (Pmode, base, INTVAL (offset));
7547 }
7548
7549 if (GET_MODE_SIZE (mode) > 4
7550 && (GET_MODE_CLASS (mode) == MODE_INT
7551 || TARGET_SOFT_FLOAT))
7552 {
7553 emit_insn (gen_addsi3 (reg, base, offset));
7554 return reg;
7555 }
7556
7557 return gen_rtx_PLUS (Pmode, base, offset);
7558 }
7559
7560 return orig;
7561 }
7562
7563
7564 /* Find a spare register to use during the prolog of a function. */
7565
7566 static int
7567 thumb_find_work_register (unsigned long pushed_regs_mask)
7568 {
7569 int reg;
7570
7571 /* Check the argument registers first as these are call-used. The
7572 register allocation order means that sometimes r3 might be used
7573 but earlier argument registers might not, so check them all. */
7574 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7575 if (!df_regs_ever_live_p (reg))
7576 return reg;
7577
7578 /* Before going on to check the call-saved registers we can try a couple
7579 more ways of deducing that r3 is available. The first is when we are
7580 pushing anonymous arguments onto the stack and we have less than 4
7581 registers worth of fixed arguments(*). In this case r3 will be part of
7582 the variable argument list and so we can be sure that it will be
7583 pushed right at the start of the function. Hence it will be available
7584 for the rest of the prologue.
7585 (*): ie crtl->args.pretend_args_size is greater than 0. */
7586 if (cfun->machine->uses_anonymous_args
7587 && crtl->args.pretend_args_size > 0)
7588 return LAST_ARG_REGNUM;
7589
7590 /* The other case is when we have fixed arguments but less than 4 registers
7591 worth. In this case r3 might be used in the body of the function, but
7592 it is not being used to convey an argument into the function. In theory
7593 we could just check crtl->args.size to see how many bytes are
7594 being passed in argument registers, but it seems that it is unreliable.
7595 Sometimes it will have the value 0 when in fact arguments are being
7596 passed. (See testcase execute/20021111-1.c for an example). So we also
7597 check the args_info.nregs field as well. The problem with this field is
7598 that it makes no allowances for arguments that are passed to the
7599 function but which are not used. Hence we could miss an opportunity
7600 when a function has an unused argument in r3. But it is better to be
7601 safe than to be sorry. */
7602 if (! cfun->machine->uses_anonymous_args
7603 && crtl->args.size >= 0
7604 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7605 && (TARGET_AAPCS_BASED
7606 ? crtl->args.info.aapcs_ncrn < 4
7607 : crtl->args.info.nregs < 4))
7608 return LAST_ARG_REGNUM;
7609
7610 /* Otherwise look for a call-saved register that is going to be pushed. */
7611 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7612 if (pushed_regs_mask & (1 << reg))
7613 return reg;
7614
7615 if (TARGET_THUMB2)
7616 {
7617 /* Thumb-2 can use high regs. */
7618 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7619 if (pushed_regs_mask & (1 << reg))
7620 return reg;
7621 }
7622 /* Something went wrong - thumb_compute_save_reg_mask()
7623 should have arranged for a suitable register to be pushed. */
7624 gcc_unreachable ();
7625 }
7626
7627 static GTY(()) int pic_labelno;
7628
7629 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7630 low register. */
7631
7632 void
7633 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7634 {
7635 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7636
7637 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7638 return;
7639
7640 gcc_assert (flag_pic);
7641
7642 pic_reg = cfun->machine->pic_reg;
7643 if (TARGET_VXWORKS_RTP)
7644 {
7645 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7646 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7647 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7648
7649 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7650
7651 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7652 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7653 }
7654 else
7655 {
7656 /* We use an UNSPEC rather than a LABEL_REF because this label
7657 never appears in the code stream. */
7658
7659 labelno = GEN_INT (pic_labelno++);
7660 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7661 l1 = gen_rtx_CONST (VOIDmode, l1);
7662
7663 /* On the ARM the PC register contains 'dot + 8' at the time of the
7664 addition, on the Thumb it is 'dot + 4'. */
7665 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7666 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7667 UNSPEC_GOTSYM_OFF);
7668 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7669
7670 if (TARGET_32BIT)
7671 {
7672 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7673 }
7674 else /* TARGET_THUMB1 */
7675 {
7676 if (arm_pic_register != INVALID_REGNUM
7677 && REGNO (pic_reg) > LAST_LO_REGNUM)
7678 {
7679 /* We will have pushed the pic register, so we should always be
7680 able to find a work register. */
7681 pic_tmp = gen_rtx_REG (SImode,
7682 thumb_find_work_register (saved_regs));
7683 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7684 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7685 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7686 }
7687 else if (arm_pic_register != INVALID_REGNUM
7688 && arm_pic_register > LAST_LO_REGNUM
7689 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7690 {
7691 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7692 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7693 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7694 }
7695 else
7696 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7697 }
7698 }
7699
7700 /* Need to emit this whether or not we obey regdecls,
7701 since setjmp/longjmp can cause life info to screw up. */
7702 emit_use (pic_reg);
7703 }
7704
7705 /* Generate code to load the address of a static var when flag_pic is set. */
7706 static rtx_insn *
7707 arm_pic_static_addr (rtx orig, rtx reg)
7708 {
7709 rtx l1, labelno, offset_rtx;
7710
7711 gcc_assert (flag_pic);
7712
7713 /* We use an UNSPEC rather than a LABEL_REF because this label
7714 never appears in the code stream. */
7715 labelno = GEN_INT (pic_labelno++);
7716 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7717 l1 = gen_rtx_CONST (VOIDmode, l1);
7718
7719 /* On the ARM the PC register contains 'dot + 8' at the time of the
7720 addition, on the Thumb it is 'dot + 4'. */
7721 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7722 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7723 UNSPEC_SYMBOL_OFFSET);
7724 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7725
7726 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7727 }
7728
7729 /* Return nonzero if X is valid as an ARM state addressing register. */
7730 static int
7731 arm_address_register_rtx_p (rtx x, int strict_p)
7732 {
7733 int regno;
7734
7735 if (!REG_P (x))
7736 return 0;
7737
7738 regno = REGNO (x);
7739
7740 if (strict_p)
7741 return ARM_REGNO_OK_FOR_BASE_P (regno);
7742
7743 return (regno <= LAST_ARM_REGNUM
7744 || regno >= FIRST_PSEUDO_REGISTER
7745 || regno == FRAME_POINTER_REGNUM
7746 || regno == ARG_POINTER_REGNUM);
7747 }
7748
7749 /* Return TRUE if this rtx is the difference of a symbol and a label,
7750 and will reduce to a PC-relative relocation in the object file.
7751 Expressions like this can be left alone when generating PIC, rather
7752 than forced through the GOT. */
7753 static int
7754 pcrel_constant_p (rtx x)
7755 {
7756 if (GET_CODE (x) == MINUS)
7757 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7758
7759 return FALSE;
7760 }
7761
7762 /* Return true if X will surely end up in an index register after next
7763 splitting pass. */
7764 static bool
7765 will_be_in_index_register (const_rtx x)
7766 {
7767 /* arm.md: calculate_pic_address will split this into a register. */
7768 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7769 }
7770
7771 /* Return nonzero if X is a valid ARM state address operand. */
7772 int
7773 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7774 int strict_p)
7775 {
7776 bool use_ldrd;
7777 enum rtx_code code = GET_CODE (x);
7778
7779 if (arm_address_register_rtx_p (x, strict_p))
7780 return 1;
7781
7782 use_ldrd = (TARGET_LDRD
7783 && (mode == DImode || mode == DFmode));
7784
7785 if (code == POST_INC || code == PRE_DEC
7786 || ((code == PRE_INC || code == POST_DEC)
7787 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7788 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7789
7790 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7791 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7792 && GET_CODE (XEXP (x, 1)) == PLUS
7793 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7794 {
7795 rtx addend = XEXP (XEXP (x, 1), 1);
7796
7797 /* Don't allow ldrd post increment by register because it's hard
7798 to fixup invalid register choices. */
7799 if (use_ldrd
7800 && GET_CODE (x) == POST_MODIFY
7801 && REG_P (addend))
7802 return 0;
7803
7804 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7805 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7806 }
7807
7808 /* After reload constants split into minipools will have addresses
7809 from a LABEL_REF. */
7810 else if (reload_completed
7811 && (code == LABEL_REF
7812 || (code == CONST
7813 && GET_CODE (XEXP (x, 0)) == PLUS
7814 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7815 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7816 return 1;
7817
7818 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7819 return 0;
7820
7821 else if (code == PLUS)
7822 {
7823 rtx xop0 = XEXP (x, 0);
7824 rtx xop1 = XEXP (x, 1);
7825
7826 return ((arm_address_register_rtx_p (xop0, strict_p)
7827 && ((CONST_INT_P (xop1)
7828 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7829 || (!strict_p && will_be_in_index_register (xop1))))
7830 || (arm_address_register_rtx_p (xop1, strict_p)
7831 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7832 }
7833
7834 #if 0
7835 /* Reload currently can't handle MINUS, so disable this for now */
7836 else if (GET_CODE (x) == MINUS)
7837 {
7838 rtx xop0 = XEXP (x, 0);
7839 rtx xop1 = XEXP (x, 1);
7840
7841 return (arm_address_register_rtx_p (xop0, strict_p)
7842 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7843 }
7844 #endif
7845
7846 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7847 && code == SYMBOL_REF
7848 && CONSTANT_POOL_ADDRESS_P (x)
7849 && ! (flag_pic
7850 && symbol_mentioned_p (get_pool_constant (x))
7851 && ! pcrel_constant_p (get_pool_constant (x))))
7852 return 1;
7853
7854 return 0;
7855 }
7856
7857 /* Return true if we can avoid creating a constant pool entry for x. */
7858 static bool
7859 can_avoid_literal_pool_for_label_p (rtx x)
7860 {
7861 /* Normally we can assign constant values to target registers without
7862 the help of constant pool. But there are cases we have to use constant
7863 pool like:
7864 1) assign a label to register.
7865 2) sign-extend a 8bit value to 32bit and then assign to register.
7866
7867 Constant pool access in format:
7868 (set (reg r0) (mem (symbol_ref (".LC0"))))
7869 will cause the use of literal pool (later in function arm_reorg).
7870 So here we mark such format as an invalid format, then the compiler
7871 will adjust it into:
7872 (set (reg r0) (symbol_ref (".LC0")))
7873 (set (reg r0) (mem (reg r0))).
7874 No extra register is required, and (mem (reg r0)) won't cause the use
7875 of literal pools. */
7876 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7877 && CONSTANT_POOL_ADDRESS_P (x))
7878 return 1;
7879 return 0;
7880 }
7881
7882
7883 /* Return nonzero if X is a valid Thumb-2 address operand. */
7884 static int
7885 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7886 {
7887 bool use_ldrd;
7888 enum rtx_code code = GET_CODE (x);
7889
7890 if (arm_address_register_rtx_p (x, strict_p))
7891 return 1;
7892
7893 use_ldrd = (TARGET_LDRD
7894 && (mode == DImode || mode == DFmode));
7895
7896 if (code == POST_INC || code == PRE_DEC
7897 || ((code == PRE_INC || code == POST_DEC)
7898 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7899 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7900
7901 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7902 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7903 && GET_CODE (XEXP (x, 1)) == PLUS
7904 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7905 {
7906 /* Thumb-2 only has autoincrement by constant. */
7907 rtx addend = XEXP (XEXP (x, 1), 1);
7908 HOST_WIDE_INT offset;
7909
7910 if (!CONST_INT_P (addend))
7911 return 0;
7912
7913 offset = INTVAL(addend);
7914 if (GET_MODE_SIZE (mode) <= 4)
7915 return (offset > -256 && offset < 256);
7916
7917 return (use_ldrd && offset > -1024 && offset < 1024
7918 && (offset & 3) == 0);
7919 }
7920
7921 /* After reload constants split into minipools will have addresses
7922 from a LABEL_REF. */
7923 else if (reload_completed
7924 && (code == LABEL_REF
7925 || (code == CONST
7926 && GET_CODE (XEXP (x, 0)) == PLUS
7927 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7928 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7929 return 1;
7930
7931 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7932 return 0;
7933
7934 else if (code == PLUS)
7935 {
7936 rtx xop0 = XEXP (x, 0);
7937 rtx xop1 = XEXP (x, 1);
7938
7939 return ((arm_address_register_rtx_p (xop0, strict_p)
7940 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7941 || (!strict_p && will_be_in_index_register (xop1))))
7942 || (arm_address_register_rtx_p (xop1, strict_p)
7943 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7944 }
7945
7946 else if (can_avoid_literal_pool_for_label_p (x))
7947 return 0;
7948
7949 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7950 && code == SYMBOL_REF
7951 && CONSTANT_POOL_ADDRESS_P (x)
7952 && ! (flag_pic
7953 && symbol_mentioned_p (get_pool_constant (x))
7954 && ! pcrel_constant_p (get_pool_constant (x))))
7955 return 1;
7956
7957 return 0;
7958 }
7959
7960 /* Return nonzero if INDEX is valid for an address index operand in
7961 ARM state. */
7962 static int
7963 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7964 int strict_p)
7965 {
7966 HOST_WIDE_INT range;
7967 enum rtx_code code = GET_CODE (index);
7968
7969 /* Standard coprocessor addressing modes. */
7970 if (TARGET_HARD_FLOAT
7971 && (mode == SFmode || mode == DFmode))
7972 return (code == CONST_INT && INTVAL (index) < 1024
7973 && INTVAL (index) > -1024
7974 && (INTVAL (index) & 3) == 0);
7975
7976 /* For quad modes, we restrict the constant offset to be slightly less
7977 than what the instruction format permits. We do this because for
7978 quad mode moves, we will actually decompose them into two separate
7979 double-mode reads or writes. INDEX must therefore be a valid
7980 (double-mode) offset and so should INDEX+8. */
7981 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7982 return (code == CONST_INT
7983 && INTVAL (index) < 1016
7984 && INTVAL (index) > -1024
7985 && (INTVAL (index) & 3) == 0);
7986
7987 /* We have no such constraint on double mode offsets, so we permit the
7988 full range of the instruction format. */
7989 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7990 return (code == CONST_INT
7991 && INTVAL (index) < 1024
7992 && INTVAL (index) > -1024
7993 && (INTVAL (index) & 3) == 0);
7994
7995 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7996 return (code == CONST_INT
7997 && INTVAL (index) < 1024
7998 && INTVAL (index) > -1024
7999 && (INTVAL (index) & 3) == 0);
8000
8001 if (arm_address_register_rtx_p (index, strict_p)
8002 && (GET_MODE_SIZE (mode) <= 4))
8003 return 1;
8004
8005 if (mode == DImode || mode == DFmode)
8006 {
8007 if (code == CONST_INT)
8008 {
8009 HOST_WIDE_INT val = INTVAL (index);
8010
8011 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8012 If vldr is selected it uses arm_coproc_mem_operand. */
8013 if (TARGET_LDRD)
8014 return val > -256 && val < 256;
8015 else
8016 return val > -4096 && val < 4092;
8017 }
8018
8019 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8020 }
8021
8022 if (GET_MODE_SIZE (mode) <= 4
8023 && ! (arm_arch4
8024 && (mode == HImode
8025 || mode == HFmode
8026 || (mode == QImode && outer == SIGN_EXTEND))))
8027 {
8028 if (code == MULT)
8029 {
8030 rtx xiop0 = XEXP (index, 0);
8031 rtx xiop1 = XEXP (index, 1);
8032
8033 return ((arm_address_register_rtx_p (xiop0, strict_p)
8034 && power_of_two_operand (xiop1, SImode))
8035 || (arm_address_register_rtx_p (xiop1, strict_p)
8036 && power_of_two_operand (xiop0, SImode)));
8037 }
8038 else if (code == LSHIFTRT || code == ASHIFTRT
8039 || code == ASHIFT || code == ROTATERT)
8040 {
8041 rtx op = XEXP (index, 1);
8042
8043 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8044 && CONST_INT_P (op)
8045 && INTVAL (op) > 0
8046 && INTVAL (op) <= 31);
8047 }
8048 }
8049
8050 /* For ARM v4 we may be doing a sign-extend operation during the
8051 load. */
8052 if (arm_arch4)
8053 {
8054 if (mode == HImode
8055 || mode == HFmode
8056 || (outer == SIGN_EXTEND && mode == QImode))
8057 range = 256;
8058 else
8059 range = 4096;
8060 }
8061 else
8062 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8063
8064 return (code == CONST_INT
8065 && INTVAL (index) < range
8066 && INTVAL (index) > -range);
8067 }
8068
8069 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8070 index operand. i.e. 1, 2, 4 or 8. */
8071 static bool
8072 thumb2_index_mul_operand (rtx op)
8073 {
8074 HOST_WIDE_INT val;
8075
8076 if (!CONST_INT_P (op))
8077 return false;
8078
8079 val = INTVAL(op);
8080 return (val == 1 || val == 2 || val == 4 || val == 8);
8081 }
8082
8083 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8084 static int
8085 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8086 {
8087 enum rtx_code code = GET_CODE (index);
8088
8089 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8090 /* Standard coprocessor addressing modes. */
8091 if (TARGET_HARD_FLOAT
8092 && (mode == SFmode || mode == DFmode))
8093 return (code == CONST_INT && INTVAL (index) < 1024
8094 /* Thumb-2 allows only > -256 index range for it's core register
8095 load/stores. Since we allow SF/DF in core registers, we have
8096 to use the intersection between -256~4096 (core) and -1024~1024
8097 (coprocessor). */
8098 && INTVAL (index) > -256
8099 && (INTVAL (index) & 3) == 0);
8100
8101 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8102 {
8103 /* For DImode assume values will usually live in core regs
8104 and only allow LDRD addressing modes. */
8105 if (!TARGET_LDRD || mode != DImode)
8106 return (code == CONST_INT
8107 && INTVAL (index) < 1024
8108 && INTVAL (index) > -1024
8109 && (INTVAL (index) & 3) == 0);
8110 }
8111
8112 /* For quad modes, we restrict the constant offset to be slightly less
8113 than what the instruction format permits. We do this because for
8114 quad mode moves, we will actually decompose them into two separate
8115 double-mode reads or writes. INDEX must therefore be a valid
8116 (double-mode) offset and so should INDEX+8. */
8117 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8118 return (code == CONST_INT
8119 && INTVAL (index) < 1016
8120 && INTVAL (index) > -1024
8121 && (INTVAL (index) & 3) == 0);
8122
8123 /* We have no such constraint on double mode offsets, so we permit the
8124 full range of the instruction format. */
8125 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8126 return (code == CONST_INT
8127 && INTVAL (index) < 1024
8128 && INTVAL (index) > -1024
8129 && (INTVAL (index) & 3) == 0);
8130
8131 if (arm_address_register_rtx_p (index, strict_p)
8132 && (GET_MODE_SIZE (mode) <= 4))
8133 return 1;
8134
8135 if (mode == DImode || mode == DFmode)
8136 {
8137 if (code == CONST_INT)
8138 {
8139 HOST_WIDE_INT val = INTVAL (index);
8140 /* Thumb-2 ldrd only has reg+const addressing modes.
8141 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8142 If vldr is selected it uses arm_coproc_mem_operand. */
8143 if (TARGET_LDRD)
8144 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8145 else
8146 return IN_RANGE (val, -255, 4095 - 4);
8147 }
8148 else
8149 return 0;
8150 }
8151
8152 if (code == MULT)
8153 {
8154 rtx xiop0 = XEXP (index, 0);
8155 rtx xiop1 = XEXP (index, 1);
8156
8157 return ((arm_address_register_rtx_p (xiop0, strict_p)
8158 && thumb2_index_mul_operand (xiop1))
8159 || (arm_address_register_rtx_p (xiop1, strict_p)
8160 && thumb2_index_mul_operand (xiop0)));
8161 }
8162 else if (code == ASHIFT)
8163 {
8164 rtx op = XEXP (index, 1);
8165
8166 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8167 && CONST_INT_P (op)
8168 && INTVAL (op) > 0
8169 && INTVAL (op) <= 3);
8170 }
8171
8172 return (code == CONST_INT
8173 && INTVAL (index) < 4096
8174 && INTVAL (index) > -256);
8175 }
8176
8177 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8178 static int
8179 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8180 {
8181 int regno;
8182
8183 if (!REG_P (x))
8184 return 0;
8185
8186 regno = REGNO (x);
8187
8188 if (strict_p)
8189 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8190
8191 return (regno <= LAST_LO_REGNUM
8192 || regno > LAST_VIRTUAL_REGISTER
8193 || regno == FRAME_POINTER_REGNUM
8194 || (GET_MODE_SIZE (mode) >= 4
8195 && (regno == STACK_POINTER_REGNUM
8196 || regno >= FIRST_PSEUDO_REGISTER
8197 || x == hard_frame_pointer_rtx
8198 || x == arg_pointer_rtx)));
8199 }
8200
8201 /* Return nonzero if x is a legitimate index register. This is the case
8202 for any base register that can access a QImode object. */
8203 inline static int
8204 thumb1_index_register_rtx_p (rtx x, int strict_p)
8205 {
8206 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8207 }
8208
8209 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8210
8211 The AP may be eliminated to either the SP or the FP, so we use the
8212 least common denominator, e.g. SImode, and offsets from 0 to 64.
8213
8214 ??? Verify whether the above is the right approach.
8215
8216 ??? Also, the FP may be eliminated to the SP, so perhaps that
8217 needs special handling also.
8218
8219 ??? Look at how the mips16 port solves this problem. It probably uses
8220 better ways to solve some of these problems.
8221
8222 Although it is not incorrect, we don't accept QImode and HImode
8223 addresses based on the frame pointer or arg pointer until the
8224 reload pass starts. This is so that eliminating such addresses
8225 into stack based ones won't produce impossible code. */
8226 int
8227 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8228 {
8229 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8230 return 0;
8231
8232 /* ??? Not clear if this is right. Experiment. */
8233 if (GET_MODE_SIZE (mode) < 4
8234 && !(reload_in_progress || reload_completed)
8235 && (reg_mentioned_p (frame_pointer_rtx, x)
8236 || reg_mentioned_p (arg_pointer_rtx, x)
8237 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8238 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8239 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8240 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8241 return 0;
8242
8243 /* Accept any base register. SP only in SImode or larger. */
8244 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8245 return 1;
8246
8247 /* This is PC relative data before arm_reorg runs. */
8248 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8249 && GET_CODE (x) == SYMBOL_REF
8250 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8251 return 1;
8252
8253 /* This is PC relative data after arm_reorg runs. */
8254 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8255 && reload_completed
8256 && (GET_CODE (x) == LABEL_REF
8257 || (GET_CODE (x) == CONST
8258 && GET_CODE (XEXP (x, 0)) == PLUS
8259 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8260 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8261 return 1;
8262
8263 /* Post-inc indexing only supported for SImode and larger. */
8264 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8265 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8266 return 1;
8267
8268 else if (GET_CODE (x) == PLUS)
8269 {
8270 /* REG+REG address can be any two index registers. */
8271 /* We disallow FRAME+REG addressing since we know that FRAME
8272 will be replaced with STACK, and SP relative addressing only
8273 permits SP+OFFSET. */
8274 if (GET_MODE_SIZE (mode) <= 4
8275 && XEXP (x, 0) != frame_pointer_rtx
8276 && XEXP (x, 1) != frame_pointer_rtx
8277 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8278 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8279 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8280 return 1;
8281
8282 /* REG+const has 5-7 bit offset for non-SP registers. */
8283 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8284 || XEXP (x, 0) == arg_pointer_rtx)
8285 && CONST_INT_P (XEXP (x, 1))
8286 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8287 return 1;
8288
8289 /* REG+const has 10-bit offset for SP, but only SImode and
8290 larger is supported. */
8291 /* ??? Should probably check for DI/DFmode overflow here
8292 just like GO_IF_LEGITIMATE_OFFSET does. */
8293 else if (REG_P (XEXP (x, 0))
8294 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8295 && GET_MODE_SIZE (mode) >= 4
8296 && CONST_INT_P (XEXP (x, 1))
8297 && INTVAL (XEXP (x, 1)) >= 0
8298 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8299 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8300 return 1;
8301
8302 else if (REG_P (XEXP (x, 0))
8303 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8304 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8305 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8306 && REGNO (XEXP (x, 0))
8307 <= LAST_VIRTUAL_POINTER_REGISTER))
8308 && GET_MODE_SIZE (mode) >= 4
8309 && CONST_INT_P (XEXP (x, 1))
8310 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8311 return 1;
8312 }
8313
8314 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8315 && GET_MODE_SIZE (mode) == 4
8316 && GET_CODE (x) == SYMBOL_REF
8317 && CONSTANT_POOL_ADDRESS_P (x)
8318 && ! (flag_pic
8319 && symbol_mentioned_p (get_pool_constant (x))
8320 && ! pcrel_constant_p (get_pool_constant (x))))
8321 return 1;
8322
8323 return 0;
8324 }
8325
8326 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8327 instruction of mode MODE. */
8328 int
8329 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8330 {
8331 switch (GET_MODE_SIZE (mode))
8332 {
8333 case 1:
8334 return val >= 0 && val < 32;
8335
8336 case 2:
8337 return val >= 0 && val < 64 && (val & 1) == 0;
8338
8339 default:
8340 return (val >= 0
8341 && (val + GET_MODE_SIZE (mode)) <= 128
8342 && (val & 3) == 0);
8343 }
8344 }
8345
8346 bool
8347 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8348 {
8349 if (TARGET_ARM)
8350 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8351 else if (TARGET_THUMB2)
8352 return thumb2_legitimate_address_p (mode, x, strict_p);
8353 else /* if (TARGET_THUMB1) */
8354 return thumb1_legitimate_address_p (mode, x, strict_p);
8355 }
8356
8357 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8358
8359 Given an rtx X being reloaded into a reg required to be
8360 in class CLASS, return the class of reg to actually use.
8361 In general this is just CLASS, but for the Thumb core registers and
8362 immediate constants we prefer a LO_REGS class or a subset. */
8363
8364 static reg_class_t
8365 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8366 {
8367 if (TARGET_32BIT)
8368 return rclass;
8369 else
8370 {
8371 if (rclass == GENERAL_REGS)
8372 return LO_REGS;
8373 else
8374 return rclass;
8375 }
8376 }
8377
8378 /* Build the SYMBOL_REF for __tls_get_addr. */
8379
8380 static GTY(()) rtx tls_get_addr_libfunc;
8381
8382 static rtx
8383 get_tls_get_addr (void)
8384 {
8385 if (!tls_get_addr_libfunc)
8386 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8387 return tls_get_addr_libfunc;
8388 }
8389
8390 rtx
8391 arm_load_tp (rtx target)
8392 {
8393 if (!target)
8394 target = gen_reg_rtx (SImode);
8395
8396 if (TARGET_HARD_TP)
8397 {
8398 /* Can return in any reg. */
8399 emit_insn (gen_load_tp_hard (target));
8400 }
8401 else
8402 {
8403 /* Always returned in r0. Immediately copy the result into a pseudo,
8404 otherwise other uses of r0 (e.g. setting up function arguments) may
8405 clobber the value. */
8406
8407 rtx tmp;
8408
8409 emit_insn (gen_load_tp_soft ());
8410
8411 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8412 emit_move_insn (target, tmp);
8413 }
8414 return target;
8415 }
8416
8417 static rtx
8418 load_tls_operand (rtx x, rtx reg)
8419 {
8420 rtx tmp;
8421
8422 if (reg == NULL_RTX)
8423 reg = gen_reg_rtx (SImode);
8424
8425 tmp = gen_rtx_CONST (SImode, x);
8426
8427 emit_move_insn (reg, tmp);
8428
8429 return reg;
8430 }
8431
8432 static rtx_insn *
8433 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8434 {
8435 rtx label, labelno, sum;
8436
8437 gcc_assert (reloc != TLS_DESCSEQ);
8438 start_sequence ();
8439
8440 labelno = GEN_INT (pic_labelno++);
8441 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8442 label = gen_rtx_CONST (VOIDmode, label);
8443
8444 sum = gen_rtx_UNSPEC (Pmode,
8445 gen_rtvec (4, x, GEN_INT (reloc), label,
8446 GEN_INT (TARGET_ARM ? 8 : 4)),
8447 UNSPEC_TLS);
8448 reg = load_tls_operand (sum, reg);
8449
8450 if (TARGET_ARM)
8451 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8452 else
8453 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8454
8455 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8456 LCT_PURE, /* LCT_CONST? */
8457 Pmode, reg, Pmode);
8458
8459 rtx_insn *insns = get_insns ();
8460 end_sequence ();
8461
8462 return insns;
8463 }
8464
8465 static rtx
8466 arm_tls_descseq_addr (rtx x, rtx reg)
8467 {
8468 rtx labelno = GEN_INT (pic_labelno++);
8469 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8470 rtx sum = gen_rtx_UNSPEC (Pmode,
8471 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8472 gen_rtx_CONST (VOIDmode, label),
8473 GEN_INT (!TARGET_ARM)),
8474 UNSPEC_TLS);
8475 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8476
8477 emit_insn (gen_tlscall (x, labelno));
8478 if (!reg)
8479 reg = gen_reg_rtx (SImode);
8480 else
8481 gcc_assert (REGNO (reg) != R0_REGNUM);
8482
8483 emit_move_insn (reg, reg0);
8484
8485 return reg;
8486 }
8487
8488 rtx
8489 legitimize_tls_address (rtx x, rtx reg)
8490 {
8491 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8492 rtx_insn *insns;
8493 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8494
8495 switch (model)
8496 {
8497 case TLS_MODEL_GLOBAL_DYNAMIC:
8498 if (TARGET_GNU2_TLS)
8499 {
8500 reg = arm_tls_descseq_addr (x, reg);
8501
8502 tp = arm_load_tp (NULL_RTX);
8503
8504 dest = gen_rtx_PLUS (Pmode, tp, reg);
8505 }
8506 else
8507 {
8508 /* Original scheme */
8509 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8510 dest = gen_reg_rtx (Pmode);
8511 emit_libcall_block (insns, dest, ret, x);
8512 }
8513 return dest;
8514
8515 case TLS_MODEL_LOCAL_DYNAMIC:
8516 if (TARGET_GNU2_TLS)
8517 {
8518 reg = arm_tls_descseq_addr (x, reg);
8519
8520 tp = arm_load_tp (NULL_RTX);
8521
8522 dest = gen_rtx_PLUS (Pmode, tp, reg);
8523 }
8524 else
8525 {
8526 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8527
8528 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8529 share the LDM result with other LD model accesses. */
8530 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8531 UNSPEC_TLS);
8532 dest = gen_reg_rtx (Pmode);
8533 emit_libcall_block (insns, dest, ret, eqv);
8534
8535 /* Load the addend. */
8536 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8537 GEN_INT (TLS_LDO32)),
8538 UNSPEC_TLS);
8539 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8540 dest = gen_rtx_PLUS (Pmode, dest, addend);
8541 }
8542 return dest;
8543
8544 case TLS_MODEL_INITIAL_EXEC:
8545 labelno = GEN_INT (pic_labelno++);
8546 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8547 label = gen_rtx_CONST (VOIDmode, label);
8548 sum = gen_rtx_UNSPEC (Pmode,
8549 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8550 GEN_INT (TARGET_ARM ? 8 : 4)),
8551 UNSPEC_TLS);
8552 reg = load_tls_operand (sum, reg);
8553
8554 if (TARGET_ARM)
8555 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8556 else if (TARGET_THUMB2)
8557 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8558 else
8559 {
8560 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8561 emit_move_insn (reg, gen_const_mem (SImode, reg));
8562 }
8563
8564 tp = arm_load_tp (NULL_RTX);
8565
8566 return gen_rtx_PLUS (Pmode, tp, reg);
8567
8568 case TLS_MODEL_LOCAL_EXEC:
8569 tp = arm_load_tp (NULL_RTX);
8570
8571 reg = gen_rtx_UNSPEC (Pmode,
8572 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8573 UNSPEC_TLS);
8574 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8575
8576 return gen_rtx_PLUS (Pmode, tp, reg);
8577
8578 default:
8579 abort ();
8580 }
8581 }
8582
8583 /* Try machine-dependent ways of modifying an illegitimate address
8584 to be legitimate. If we find one, return the new, valid address. */
8585 rtx
8586 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8587 {
8588 if (arm_tls_referenced_p (x))
8589 {
8590 rtx addend = NULL;
8591
8592 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8593 {
8594 addend = XEXP (XEXP (x, 0), 1);
8595 x = XEXP (XEXP (x, 0), 0);
8596 }
8597
8598 if (GET_CODE (x) != SYMBOL_REF)
8599 return x;
8600
8601 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8602
8603 x = legitimize_tls_address (x, NULL_RTX);
8604
8605 if (addend)
8606 {
8607 x = gen_rtx_PLUS (SImode, x, addend);
8608 orig_x = x;
8609 }
8610 else
8611 return x;
8612 }
8613
8614 if (!TARGET_ARM)
8615 {
8616 /* TODO: legitimize_address for Thumb2. */
8617 if (TARGET_THUMB2)
8618 return x;
8619 return thumb_legitimize_address (x, orig_x, mode);
8620 }
8621
8622 if (GET_CODE (x) == PLUS)
8623 {
8624 rtx xop0 = XEXP (x, 0);
8625 rtx xop1 = XEXP (x, 1);
8626
8627 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8628 xop0 = force_reg (SImode, xop0);
8629
8630 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8631 && !symbol_mentioned_p (xop1))
8632 xop1 = force_reg (SImode, xop1);
8633
8634 if (ARM_BASE_REGISTER_RTX_P (xop0)
8635 && CONST_INT_P (xop1))
8636 {
8637 HOST_WIDE_INT n, low_n;
8638 rtx base_reg, val;
8639 n = INTVAL (xop1);
8640
8641 /* VFP addressing modes actually allow greater offsets, but for
8642 now we just stick with the lowest common denominator. */
8643 if (mode == DImode || mode == DFmode)
8644 {
8645 low_n = n & 0x0f;
8646 n &= ~0x0f;
8647 if (low_n > 4)
8648 {
8649 n += 16;
8650 low_n -= 16;
8651 }
8652 }
8653 else
8654 {
8655 low_n = ((mode) == TImode ? 0
8656 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8657 n -= low_n;
8658 }
8659
8660 base_reg = gen_reg_rtx (SImode);
8661 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8662 emit_move_insn (base_reg, val);
8663 x = plus_constant (Pmode, base_reg, low_n);
8664 }
8665 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8666 x = gen_rtx_PLUS (SImode, xop0, xop1);
8667 }
8668
8669 /* XXX We don't allow MINUS any more -- see comment in
8670 arm_legitimate_address_outer_p (). */
8671 else if (GET_CODE (x) == MINUS)
8672 {
8673 rtx xop0 = XEXP (x, 0);
8674 rtx xop1 = XEXP (x, 1);
8675
8676 if (CONSTANT_P (xop0))
8677 xop0 = force_reg (SImode, xop0);
8678
8679 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8680 xop1 = force_reg (SImode, xop1);
8681
8682 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8683 x = gen_rtx_MINUS (SImode, xop0, xop1);
8684 }
8685
8686 /* Make sure to take full advantage of the pre-indexed addressing mode
8687 with absolute addresses which often allows for the base register to
8688 be factorized for multiple adjacent memory references, and it might
8689 even allows for the mini pool to be avoided entirely. */
8690 else if (CONST_INT_P (x) && optimize > 0)
8691 {
8692 unsigned int bits;
8693 HOST_WIDE_INT mask, base, index;
8694 rtx base_reg;
8695
8696 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8697 use a 8-bit index. So let's use a 12-bit index for SImode only and
8698 hope that arm_gen_constant will enable ldrb to use more bits. */
8699 bits = (mode == SImode) ? 12 : 8;
8700 mask = (1 << bits) - 1;
8701 base = INTVAL (x) & ~mask;
8702 index = INTVAL (x) & mask;
8703 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8704 {
8705 /* It'll most probably be more efficient to generate the base
8706 with more bits set and use a negative index instead. */
8707 base |= mask;
8708 index -= mask;
8709 }
8710 base_reg = force_reg (SImode, GEN_INT (base));
8711 x = plus_constant (Pmode, base_reg, index);
8712 }
8713
8714 if (flag_pic)
8715 {
8716 /* We need to find and carefully transform any SYMBOL and LABEL
8717 references; so go back to the original address expression. */
8718 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8719
8720 if (new_x != orig_x)
8721 x = new_x;
8722 }
8723
8724 return x;
8725 }
8726
8727
8728 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8729 to be legitimate. If we find one, return the new, valid address. */
8730 rtx
8731 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8732 {
8733 if (GET_CODE (x) == PLUS
8734 && CONST_INT_P (XEXP (x, 1))
8735 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8736 || INTVAL (XEXP (x, 1)) < 0))
8737 {
8738 rtx xop0 = XEXP (x, 0);
8739 rtx xop1 = XEXP (x, 1);
8740 HOST_WIDE_INT offset = INTVAL (xop1);
8741
8742 /* Try and fold the offset into a biasing of the base register and
8743 then offsetting that. Don't do this when optimizing for space
8744 since it can cause too many CSEs. */
8745 if (optimize_size && offset >= 0
8746 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8747 {
8748 HOST_WIDE_INT delta;
8749
8750 if (offset >= 256)
8751 delta = offset - (256 - GET_MODE_SIZE (mode));
8752 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8753 delta = 31 * GET_MODE_SIZE (mode);
8754 else
8755 delta = offset & (~31 * GET_MODE_SIZE (mode));
8756
8757 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8758 NULL_RTX);
8759 x = plus_constant (Pmode, xop0, delta);
8760 }
8761 else if (offset < 0 && offset > -256)
8762 /* Small negative offsets are best done with a subtract before the
8763 dereference, forcing these into a register normally takes two
8764 instructions. */
8765 x = force_operand (x, NULL_RTX);
8766 else
8767 {
8768 /* For the remaining cases, force the constant into a register. */
8769 xop1 = force_reg (SImode, xop1);
8770 x = gen_rtx_PLUS (SImode, xop0, xop1);
8771 }
8772 }
8773 else if (GET_CODE (x) == PLUS
8774 && s_register_operand (XEXP (x, 1), SImode)
8775 && !s_register_operand (XEXP (x, 0), SImode))
8776 {
8777 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8778
8779 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8780 }
8781
8782 if (flag_pic)
8783 {
8784 /* We need to find and carefully transform any SYMBOL and LABEL
8785 references; so go back to the original address expression. */
8786 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8787
8788 if (new_x != orig_x)
8789 x = new_x;
8790 }
8791
8792 return x;
8793 }
8794
8795 /* Return TRUE if X contains any TLS symbol references. */
8796
8797 bool
8798 arm_tls_referenced_p (rtx x)
8799 {
8800 if (! TARGET_HAVE_TLS)
8801 return false;
8802
8803 subrtx_iterator::array_type array;
8804 FOR_EACH_SUBRTX (iter, array, x, ALL)
8805 {
8806 const_rtx x = *iter;
8807 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8808 {
8809 /* ARM currently does not provide relocations to encode TLS variables
8810 into AArch32 instructions, only data, so there is no way to
8811 currently implement these if a literal pool is disabled. */
8812 if (arm_disable_literal_pool)
8813 sorry ("accessing thread-local storage is not currently supported "
8814 "with -mpure-code or -mslow-flash-data");
8815
8816 return true;
8817 }
8818
8819 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8820 TLS offsets, not real symbol references. */
8821 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8822 iter.skip_subrtxes ();
8823 }
8824 return false;
8825 }
8826
8827 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8828
8829 On the ARM, allow any integer (invalid ones are removed later by insn
8830 patterns), nice doubles and symbol_refs which refer to the function's
8831 constant pool XXX.
8832
8833 When generating pic allow anything. */
8834
8835 static bool
8836 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8837 {
8838 return flag_pic || !label_mentioned_p (x);
8839 }
8840
8841 static bool
8842 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8843 {
8844 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8845 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8846 for ARMv8-M Baseline or later the result is valid. */
8847 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8848 x = XEXP (x, 0);
8849
8850 return (CONST_INT_P (x)
8851 || CONST_DOUBLE_P (x)
8852 || CONSTANT_ADDRESS_P (x)
8853 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8854 || flag_pic);
8855 }
8856
8857 static bool
8858 arm_legitimate_constant_p (machine_mode mode, rtx x)
8859 {
8860 return (!arm_cannot_force_const_mem (mode, x)
8861 && (TARGET_32BIT
8862 ? arm_legitimate_constant_p_1 (mode, x)
8863 : thumb_legitimate_constant_p (mode, x)));
8864 }
8865
8866 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8867
8868 static bool
8869 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8870 {
8871 rtx base, offset;
8872
8873 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8874 {
8875 split_const (x, &base, &offset);
8876 if (GET_CODE (base) == SYMBOL_REF
8877 && !offset_within_block_p (base, INTVAL (offset)))
8878 return true;
8879 }
8880 return arm_tls_referenced_p (x);
8881 }
8882 \f
8883 #define REG_OR_SUBREG_REG(X) \
8884 (REG_P (X) \
8885 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8886
8887 #define REG_OR_SUBREG_RTX(X) \
8888 (REG_P (X) ? (X) : SUBREG_REG (X))
8889
8890 static inline int
8891 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8892 {
8893 machine_mode mode = GET_MODE (x);
8894 int total, words;
8895
8896 switch (code)
8897 {
8898 case ASHIFT:
8899 case ASHIFTRT:
8900 case LSHIFTRT:
8901 case ROTATERT:
8902 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8903
8904 case PLUS:
8905 case MINUS:
8906 case COMPARE:
8907 case NEG:
8908 case NOT:
8909 return COSTS_N_INSNS (1);
8910
8911 case MULT:
8912 if (arm_arch6m && arm_m_profile_small_mul)
8913 return COSTS_N_INSNS (32);
8914
8915 if (CONST_INT_P (XEXP (x, 1)))
8916 {
8917 int cycles = 0;
8918 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8919
8920 while (i)
8921 {
8922 i >>= 2;
8923 cycles++;
8924 }
8925 return COSTS_N_INSNS (2) + cycles;
8926 }
8927 return COSTS_N_INSNS (1) + 16;
8928
8929 case SET:
8930 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8931 the mode. */
8932 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8933 return (COSTS_N_INSNS (words)
8934 + 4 * ((MEM_P (SET_SRC (x)))
8935 + MEM_P (SET_DEST (x))));
8936
8937 case CONST_INT:
8938 if (outer == SET)
8939 {
8940 if (UINTVAL (x) < 256
8941 /* 16-bit constant. */
8942 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8943 return 0;
8944 if (thumb_shiftable_const (INTVAL (x)))
8945 return COSTS_N_INSNS (2);
8946 return COSTS_N_INSNS (3);
8947 }
8948 else if ((outer == PLUS || outer == COMPARE)
8949 && INTVAL (x) < 256 && INTVAL (x) > -256)
8950 return 0;
8951 else if ((outer == IOR || outer == XOR || outer == AND)
8952 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8953 return COSTS_N_INSNS (1);
8954 else if (outer == AND)
8955 {
8956 int i;
8957 /* This duplicates the tests in the andsi3 expander. */
8958 for (i = 9; i <= 31; i++)
8959 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8960 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8961 return COSTS_N_INSNS (2);
8962 }
8963 else if (outer == ASHIFT || outer == ASHIFTRT
8964 || outer == LSHIFTRT)
8965 return 0;
8966 return COSTS_N_INSNS (2);
8967
8968 case CONST:
8969 case CONST_DOUBLE:
8970 case LABEL_REF:
8971 case SYMBOL_REF:
8972 return COSTS_N_INSNS (3);
8973
8974 case UDIV:
8975 case UMOD:
8976 case DIV:
8977 case MOD:
8978 return 100;
8979
8980 case TRUNCATE:
8981 return 99;
8982
8983 case AND:
8984 case XOR:
8985 case IOR:
8986 /* XXX guess. */
8987 return 8;
8988
8989 case MEM:
8990 /* XXX another guess. */
8991 /* Memory costs quite a lot for the first word, but subsequent words
8992 load at the equivalent of a single insn each. */
8993 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8994 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8995 ? 4 : 0));
8996
8997 case IF_THEN_ELSE:
8998 /* XXX a guess. */
8999 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9000 return 14;
9001 return 2;
9002
9003 case SIGN_EXTEND:
9004 case ZERO_EXTEND:
9005 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9006 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9007
9008 if (mode == SImode)
9009 return total;
9010
9011 if (arm_arch6)
9012 return total + COSTS_N_INSNS (1);
9013
9014 /* Assume a two-shift sequence. Increase the cost slightly so
9015 we prefer actual shifts over an extend operation. */
9016 return total + 1 + COSTS_N_INSNS (2);
9017
9018 default:
9019 return 99;
9020 }
9021 }
9022
9023 /* Estimates the size cost of thumb1 instructions.
9024 For now most of the code is copied from thumb1_rtx_costs. We need more
9025 fine grain tuning when we have more related test cases. */
9026 static inline int
9027 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9028 {
9029 machine_mode mode = GET_MODE (x);
9030 int words, cost;
9031
9032 switch (code)
9033 {
9034 case ASHIFT:
9035 case ASHIFTRT:
9036 case LSHIFTRT:
9037 case ROTATERT:
9038 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9039
9040 case PLUS:
9041 case MINUS:
9042 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9043 defined by RTL expansion, especially for the expansion of
9044 multiplication. */
9045 if ((GET_CODE (XEXP (x, 0)) == MULT
9046 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9047 || (GET_CODE (XEXP (x, 1)) == MULT
9048 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9049 return COSTS_N_INSNS (2);
9050 /* Fall through. */
9051 case COMPARE:
9052 case NEG:
9053 case NOT:
9054 return COSTS_N_INSNS (1);
9055
9056 case MULT:
9057 if (CONST_INT_P (XEXP (x, 1)))
9058 {
9059 /* Thumb1 mul instruction can't operate on const. We must Load it
9060 into a register first. */
9061 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9062 /* For the targets which have a very small and high-latency multiply
9063 unit, we prefer to synthesize the mult with up to 5 instructions,
9064 giving a good balance between size and performance. */
9065 if (arm_arch6m && arm_m_profile_small_mul)
9066 return COSTS_N_INSNS (5);
9067 else
9068 return COSTS_N_INSNS (1) + const_size;
9069 }
9070 return COSTS_N_INSNS (1);
9071
9072 case SET:
9073 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9074 the mode. */
9075 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9076 cost = COSTS_N_INSNS (words);
9077 if (satisfies_constraint_J (SET_SRC (x))
9078 || satisfies_constraint_K (SET_SRC (x))
9079 /* Too big an immediate for a 2-byte mov, using MOVT. */
9080 || (CONST_INT_P (SET_SRC (x))
9081 && UINTVAL (SET_SRC (x)) >= 256
9082 && TARGET_HAVE_MOVT
9083 && satisfies_constraint_j (SET_SRC (x)))
9084 /* thumb1_movdi_insn. */
9085 || ((words > 1) && MEM_P (SET_SRC (x))))
9086 cost += COSTS_N_INSNS (1);
9087 return cost;
9088
9089 case CONST_INT:
9090 if (outer == SET)
9091 {
9092 if (UINTVAL (x) < 256)
9093 return COSTS_N_INSNS (1);
9094 /* movw is 4byte long. */
9095 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9096 return COSTS_N_INSNS (2);
9097 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9098 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9099 return COSTS_N_INSNS (2);
9100 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9101 if (thumb_shiftable_const (INTVAL (x)))
9102 return COSTS_N_INSNS (2);
9103 return COSTS_N_INSNS (3);
9104 }
9105 else if ((outer == PLUS || outer == COMPARE)
9106 && INTVAL (x) < 256 && INTVAL (x) > -256)
9107 return 0;
9108 else if ((outer == IOR || outer == XOR || outer == AND)
9109 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9110 return COSTS_N_INSNS (1);
9111 else if (outer == AND)
9112 {
9113 int i;
9114 /* This duplicates the tests in the andsi3 expander. */
9115 for (i = 9; i <= 31; i++)
9116 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9117 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9118 return COSTS_N_INSNS (2);
9119 }
9120 else if (outer == ASHIFT || outer == ASHIFTRT
9121 || outer == LSHIFTRT)
9122 return 0;
9123 return COSTS_N_INSNS (2);
9124
9125 case CONST:
9126 case CONST_DOUBLE:
9127 case LABEL_REF:
9128 case SYMBOL_REF:
9129 return COSTS_N_INSNS (3);
9130
9131 case UDIV:
9132 case UMOD:
9133 case DIV:
9134 case MOD:
9135 return 100;
9136
9137 case TRUNCATE:
9138 return 99;
9139
9140 case AND:
9141 case XOR:
9142 case IOR:
9143 return COSTS_N_INSNS (1);
9144
9145 case MEM:
9146 return (COSTS_N_INSNS (1)
9147 + COSTS_N_INSNS (1)
9148 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9149 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9150 ? COSTS_N_INSNS (1) : 0));
9151
9152 case IF_THEN_ELSE:
9153 /* XXX a guess. */
9154 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9155 return 14;
9156 return 2;
9157
9158 case ZERO_EXTEND:
9159 /* XXX still guessing. */
9160 switch (GET_MODE (XEXP (x, 0)))
9161 {
9162 case E_QImode:
9163 return (1 + (mode == DImode ? 4 : 0)
9164 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9165
9166 case E_HImode:
9167 return (4 + (mode == DImode ? 4 : 0)
9168 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9169
9170 case E_SImode:
9171 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9172
9173 default:
9174 return 99;
9175 }
9176
9177 default:
9178 return 99;
9179 }
9180 }
9181
9182 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9183 operand, then return the operand that is being shifted. If the shift
9184 is not by a constant, then set SHIFT_REG to point to the operand.
9185 Return NULL if OP is not a shifter operand. */
9186 static rtx
9187 shifter_op_p (rtx op, rtx *shift_reg)
9188 {
9189 enum rtx_code code = GET_CODE (op);
9190
9191 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9192 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9193 return XEXP (op, 0);
9194 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9195 return XEXP (op, 0);
9196 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9197 || code == ASHIFTRT)
9198 {
9199 if (!CONST_INT_P (XEXP (op, 1)))
9200 *shift_reg = XEXP (op, 1);
9201 return XEXP (op, 0);
9202 }
9203
9204 return NULL;
9205 }
9206
9207 static bool
9208 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9209 {
9210 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9211 rtx_code code = GET_CODE (x);
9212 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9213
9214 switch (XINT (x, 1))
9215 {
9216 case UNSPEC_UNALIGNED_LOAD:
9217 /* We can only do unaligned loads into the integer unit, and we can't
9218 use LDM or LDRD. */
9219 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9220 if (speed_p)
9221 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9222 + extra_cost->ldst.load_unaligned);
9223
9224 #ifdef NOT_YET
9225 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9226 ADDR_SPACE_GENERIC, speed_p);
9227 #endif
9228 return true;
9229
9230 case UNSPEC_UNALIGNED_STORE:
9231 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9232 if (speed_p)
9233 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9234 + extra_cost->ldst.store_unaligned);
9235
9236 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9237 #ifdef NOT_YET
9238 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9239 ADDR_SPACE_GENERIC, speed_p);
9240 #endif
9241 return true;
9242
9243 case UNSPEC_VRINTZ:
9244 case UNSPEC_VRINTP:
9245 case UNSPEC_VRINTM:
9246 case UNSPEC_VRINTR:
9247 case UNSPEC_VRINTX:
9248 case UNSPEC_VRINTA:
9249 if (speed_p)
9250 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9251
9252 return true;
9253 default:
9254 *cost = COSTS_N_INSNS (2);
9255 break;
9256 }
9257 return true;
9258 }
9259
9260 /* Cost of a libcall. We assume one insn per argument, an amount for the
9261 call (one insn for -Os) and then one for processing the result. */
9262 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9263
9264 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9265 do \
9266 { \
9267 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9268 if (shift_op != NULL \
9269 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9270 { \
9271 if (shift_reg) \
9272 { \
9273 if (speed_p) \
9274 *cost += extra_cost->alu.arith_shift_reg; \
9275 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9276 ASHIFT, 1, speed_p); \
9277 } \
9278 else if (speed_p) \
9279 *cost += extra_cost->alu.arith_shift; \
9280 \
9281 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9282 ASHIFT, 0, speed_p) \
9283 + rtx_cost (XEXP (x, 1 - IDX), \
9284 GET_MODE (shift_op), \
9285 OP, 1, speed_p)); \
9286 return true; \
9287 } \
9288 } \
9289 while (0)
9290
9291 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9292 considering the costs of the addressing mode and memory access
9293 separately. */
9294 static bool
9295 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9296 int *cost, bool speed_p)
9297 {
9298 machine_mode mode = GET_MODE (x);
9299
9300 *cost = COSTS_N_INSNS (1);
9301
9302 if (flag_pic
9303 && GET_CODE (XEXP (x, 0)) == PLUS
9304 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9305 /* This will be split into two instructions. Add the cost of the
9306 additional instruction here. The cost of the memory access is computed
9307 below. See arm.md:calculate_pic_address. */
9308 *cost += COSTS_N_INSNS (1);
9309
9310 /* Calculate cost of the addressing mode. */
9311 if (speed_p)
9312 {
9313 arm_addr_mode_op op_type;
9314 switch (GET_CODE (XEXP (x, 0)))
9315 {
9316 default:
9317 case REG:
9318 op_type = AMO_DEFAULT;
9319 break;
9320 case MINUS:
9321 /* MINUS does not appear in RTL, but the architecture supports it,
9322 so handle this case defensively. */
9323 /* fall through */
9324 case PLUS:
9325 op_type = AMO_NO_WB;
9326 break;
9327 case PRE_INC:
9328 case PRE_DEC:
9329 case POST_INC:
9330 case POST_DEC:
9331 case PRE_MODIFY:
9332 case POST_MODIFY:
9333 op_type = AMO_WB;
9334 break;
9335 }
9336
9337 if (VECTOR_MODE_P (mode))
9338 *cost += current_tune->addr_mode_costs->vector[op_type];
9339 else if (FLOAT_MODE_P (mode))
9340 *cost += current_tune->addr_mode_costs->fp[op_type];
9341 else
9342 *cost += current_tune->addr_mode_costs->integer[op_type];
9343 }
9344
9345 /* Calculate cost of memory access. */
9346 if (speed_p)
9347 {
9348 if (FLOAT_MODE_P (mode))
9349 {
9350 if (GET_MODE_SIZE (mode) == 8)
9351 *cost += extra_cost->ldst.loadd;
9352 else
9353 *cost += extra_cost->ldst.loadf;
9354 }
9355 else if (VECTOR_MODE_P (mode))
9356 *cost += extra_cost->ldst.loadv;
9357 else
9358 {
9359 /* Integer modes */
9360 if (GET_MODE_SIZE (mode) == 8)
9361 *cost += extra_cost->ldst.ldrd;
9362 else
9363 *cost += extra_cost->ldst.load;
9364 }
9365 }
9366
9367 return true;
9368 }
9369
9370 /* RTX costs. Make an estimate of the cost of executing the operation
9371 X, which is contained within an operation with code OUTER_CODE.
9372 SPEED_P indicates whether the cost desired is the performance cost,
9373 or the size cost. The estimate is stored in COST and the return
9374 value is TRUE if the cost calculation is final, or FALSE if the
9375 caller should recurse through the operands of X to add additional
9376 costs.
9377
9378 We currently make no attempt to model the size savings of Thumb-2
9379 16-bit instructions. At the normal points in compilation where
9380 this code is called we have no measure of whether the condition
9381 flags are live or not, and thus no realistic way to determine what
9382 the size will eventually be. */
9383 static bool
9384 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9385 const struct cpu_cost_table *extra_cost,
9386 int *cost, bool speed_p)
9387 {
9388 machine_mode mode = GET_MODE (x);
9389
9390 *cost = COSTS_N_INSNS (1);
9391
9392 if (TARGET_THUMB1)
9393 {
9394 if (speed_p)
9395 *cost = thumb1_rtx_costs (x, code, outer_code);
9396 else
9397 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9398 return true;
9399 }
9400
9401 switch (code)
9402 {
9403 case SET:
9404 *cost = 0;
9405 /* SET RTXs don't have a mode so we get it from the destination. */
9406 mode = GET_MODE (SET_DEST (x));
9407
9408 if (REG_P (SET_SRC (x))
9409 && REG_P (SET_DEST (x)))
9410 {
9411 /* Assume that most copies can be done with a single insn,
9412 unless we don't have HW FP, in which case everything
9413 larger than word mode will require two insns. */
9414 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9415 && GET_MODE_SIZE (mode) > 4)
9416 || mode == DImode)
9417 ? 2 : 1);
9418 /* Conditional register moves can be encoded
9419 in 16 bits in Thumb mode. */
9420 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9421 *cost >>= 1;
9422
9423 return true;
9424 }
9425
9426 if (CONST_INT_P (SET_SRC (x)))
9427 {
9428 /* Handle CONST_INT here, since the value doesn't have a mode
9429 and we would otherwise be unable to work out the true cost. */
9430 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9431 0, speed_p);
9432 outer_code = SET;
9433 /* Slightly lower the cost of setting a core reg to a constant.
9434 This helps break up chains and allows for better scheduling. */
9435 if (REG_P (SET_DEST (x))
9436 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9437 *cost -= 1;
9438 x = SET_SRC (x);
9439 /* Immediate moves with an immediate in the range [0, 255] can be
9440 encoded in 16 bits in Thumb mode. */
9441 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9442 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9443 *cost >>= 1;
9444 goto const_int_cost;
9445 }
9446
9447 return false;
9448
9449 case MEM:
9450 return arm_mem_costs (x, extra_cost, cost, speed_p);
9451
9452 case PARALLEL:
9453 {
9454 /* Calculations of LDM costs are complex. We assume an initial cost
9455 (ldm_1st) which will load the number of registers mentioned in
9456 ldm_regs_per_insn_1st registers; then each additional
9457 ldm_regs_per_insn_subsequent registers cost one more insn. The
9458 formula for N regs is thus:
9459
9460 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9461 + ldm_regs_per_insn_subsequent - 1)
9462 / ldm_regs_per_insn_subsequent).
9463
9464 Additional costs may also be added for addressing. A similar
9465 formula is used for STM. */
9466
9467 bool is_ldm = load_multiple_operation (x, SImode);
9468 bool is_stm = store_multiple_operation (x, SImode);
9469
9470 if (is_ldm || is_stm)
9471 {
9472 if (speed_p)
9473 {
9474 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9475 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9476 ? extra_cost->ldst.ldm_regs_per_insn_1st
9477 : extra_cost->ldst.stm_regs_per_insn_1st;
9478 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9479 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9480 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9481
9482 *cost += regs_per_insn_1st
9483 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9484 + regs_per_insn_sub - 1)
9485 / regs_per_insn_sub);
9486 return true;
9487 }
9488
9489 }
9490 return false;
9491 }
9492 case DIV:
9493 case UDIV:
9494 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9495 && (mode == SFmode || !TARGET_VFP_SINGLE))
9496 *cost += COSTS_N_INSNS (speed_p
9497 ? extra_cost->fp[mode != SFmode].div : 0);
9498 else if (mode == SImode && TARGET_IDIV)
9499 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9500 else
9501 *cost = LIBCALL_COST (2);
9502
9503 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9504 possible udiv is prefered. */
9505 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9506 return false; /* All arguments must be in registers. */
9507
9508 case MOD:
9509 /* MOD by a power of 2 can be expanded as:
9510 rsbs r1, r0, #0
9511 and r0, r0, #(n - 1)
9512 and r1, r1, #(n - 1)
9513 rsbpl r0, r1, #0. */
9514 if (CONST_INT_P (XEXP (x, 1))
9515 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9516 && mode == SImode)
9517 {
9518 *cost += COSTS_N_INSNS (3);
9519
9520 if (speed_p)
9521 *cost += 2 * extra_cost->alu.logical
9522 + extra_cost->alu.arith;
9523 return true;
9524 }
9525
9526 /* Fall-through. */
9527 case UMOD:
9528 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9529 possible udiv is prefered. */
9530 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9531 return false; /* All arguments must be in registers. */
9532
9533 case ROTATE:
9534 if (mode == SImode && REG_P (XEXP (x, 1)))
9535 {
9536 *cost += (COSTS_N_INSNS (1)
9537 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9538 if (speed_p)
9539 *cost += extra_cost->alu.shift_reg;
9540 return true;
9541 }
9542 /* Fall through */
9543 case ROTATERT:
9544 case ASHIFT:
9545 case LSHIFTRT:
9546 case ASHIFTRT:
9547 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9548 {
9549 *cost += (COSTS_N_INSNS (2)
9550 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9551 if (speed_p)
9552 *cost += 2 * extra_cost->alu.shift;
9553 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9554 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9555 *cost += 1;
9556 return true;
9557 }
9558 else if (mode == SImode)
9559 {
9560 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9561 /* Slightly disparage register shifts at -Os, but not by much. */
9562 if (!CONST_INT_P (XEXP (x, 1)))
9563 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9564 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9565 return true;
9566 }
9567 else if (GET_MODE_CLASS (mode) == MODE_INT
9568 && GET_MODE_SIZE (mode) < 4)
9569 {
9570 if (code == ASHIFT)
9571 {
9572 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9573 /* Slightly disparage register shifts at -Os, but not by
9574 much. */
9575 if (!CONST_INT_P (XEXP (x, 1)))
9576 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9577 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9578 }
9579 else if (code == LSHIFTRT || code == ASHIFTRT)
9580 {
9581 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9582 {
9583 /* Can use SBFX/UBFX. */
9584 if (speed_p)
9585 *cost += extra_cost->alu.bfx;
9586 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9587 }
9588 else
9589 {
9590 *cost += COSTS_N_INSNS (1);
9591 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9592 if (speed_p)
9593 {
9594 if (CONST_INT_P (XEXP (x, 1)))
9595 *cost += 2 * extra_cost->alu.shift;
9596 else
9597 *cost += (extra_cost->alu.shift
9598 + extra_cost->alu.shift_reg);
9599 }
9600 else
9601 /* Slightly disparage register shifts. */
9602 *cost += !CONST_INT_P (XEXP (x, 1));
9603 }
9604 }
9605 else /* Rotates. */
9606 {
9607 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9608 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9609 if (speed_p)
9610 {
9611 if (CONST_INT_P (XEXP (x, 1)))
9612 *cost += (2 * extra_cost->alu.shift
9613 + extra_cost->alu.log_shift);
9614 else
9615 *cost += (extra_cost->alu.shift
9616 + extra_cost->alu.shift_reg
9617 + extra_cost->alu.log_shift_reg);
9618 }
9619 }
9620 return true;
9621 }
9622
9623 *cost = LIBCALL_COST (2);
9624 return false;
9625
9626 case BSWAP:
9627 if (arm_arch6)
9628 {
9629 if (mode == SImode)
9630 {
9631 if (speed_p)
9632 *cost += extra_cost->alu.rev;
9633
9634 return false;
9635 }
9636 }
9637 else
9638 {
9639 /* No rev instruction available. Look at arm_legacy_rev
9640 and thumb_legacy_rev for the form of RTL used then. */
9641 if (TARGET_THUMB)
9642 {
9643 *cost += COSTS_N_INSNS (9);
9644
9645 if (speed_p)
9646 {
9647 *cost += 6 * extra_cost->alu.shift;
9648 *cost += 3 * extra_cost->alu.logical;
9649 }
9650 }
9651 else
9652 {
9653 *cost += COSTS_N_INSNS (4);
9654
9655 if (speed_p)
9656 {
9657 *cost += 2 * extra_cost->alu.shift;
9658 *cost += extra_cost->alu.arith_shift;
9659 *cost += 2 * extra_cost->alu.logical;
9660 }
9661 }
9662 return true;
9663 }
9664 return false;
9665
9666 case MINUS:
9667 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9668 && (mode == SFmode || !TARGET_VFP_SINGLE))
9669 {
9670 if (GET_CODE (XEXP (x, 0)) == MULT
9671 || GET_CODE (XEXP (x, 1)) == MULT)
9672 {
9673 rtx mul_op0, mul_op1, sub_op;
9674
9675 if (speed_p)
9676 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9677
9678 if (GET_CODE (XEXP (x, 0)) == MULT)
9679 {
9680 mul_op0 = XEXP (XEXP (x, 0), 0);
9681 mul_op1 = XEXP (XEXP (x, 0), 1);
9682 sub_op = XEXP (x, 1);
9683 }
9684 else
9685 {
9686 mul_op0 = XEXP (XEXP (x, 1), 0);
9687 mul_op1 = XEXP (XEXP (x, 1), 1);
9688 sub_op = XEXP (x, 0);
9689 }
9690
9691 /* The first operand of the multiply may be optionally
9692 negated. */
9693 if (GET_CODE (mul_op0) == NEG)
9694 mul_op0 = XEXP (mul_op0, 0);
9695
9696 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9697 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9698 + rtx_cost (sub_op, mode, code, 0, speed_p));
9699
9700 return true;
9701 }
9702
9703 if (speed_p)
9704 *cost += extra_cost->fp[mode != SFmode].addsub;
9705 return false;
9706 }
9707
9708 if (mode == SImode)
9709 {
9710 rtx shift_by_reg = NULL;
9711 rtx shift_op;
9712 rtx non_shift_op;
9713
9714 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9715 if (shift_op == NULL)
9716 {
9717 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9718 non_shift_op = XEXP (x, 0);
9719 }
9720 else
9721 non_shift_op = XEXP (x, 1);
9722
9723 if (shift_op != NULL)
9724 {
9725 if (shift_by_reg != NULL)
9726 {
9727 if (speed_p)
9728 *cost += extra_cost->alu.arith_shift_reg;
9729 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9730 }
9731 else if (speed_p)
9732 *cost += extra_cost->alu.arith_shift;
9733
9734 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9735 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9736 return true;
9737 }
9738
9739 if (arm_arch_thumb2
9740 && GET_CODE (XEXP (x, 1)) == MULT)
9741 {
9742 /* MLS. */
9743 if (speed_p)
9744 *cost += extra_cost->mult[0].add;
9745 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9746 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9747 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9748 return true;
9749 }
9750
9751 if (CONST_INT_P (XEXP (x, 0)))
9752 {
9753 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9754 INTVAL (XEXP (x, 0)), NULL_RTX,
9755 NULL_RTX, 1, 0);
9756 *cost = COSTS_N_INSNS (insns);
9757 if (speed_p)
9758 *cost += insns * extra_cost->alu.arith;
9759 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9760 return true;
9761 }
9762 else if (speed_p)
9763 *cost += extra_cost->alu.arith;
9764
9765 return false;
9766 }
9767
9768 if (GET_MODE_CLASS (mode) == MODE_INT
9769 && GET_MODE_SIZE (mode) < 4)
9770 {
9771 rtx shift_op, shift_reg;
9772 shift_reg = NULL;
9773
9774 /* We check both sides of the MINUS for shifter operands since,
9775 unlike PLUS, it's not commutative. */
9776
9777 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
9778 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
9779
9780 /* Slightly disparage, as we might need to widen the result. */
9781 *cost += 1;
9782 if (speed_p)
9783 *cost += extra_cost->alu.arith;
9784
9785 if (CONST_INT_P (XEXP (x, 0)))
9786 {
9787 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9788 return true;
9789 }
9790
9791 return false;
9792 }
9793
9794 if (mode == DImode)
9795 {
9796 *cost += COSTS_N_INSNS (1);
9797
9798 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9799 {
9800 rtx op1 = XEXP (x, 1);
9801
9802 if (speed_p)
9803 *cost += 2 * extra_cost->alu.arith;
9804
9805 if (GET_CODE (op1) == ZERO_EXTEND)
9806 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9807 0, speed_p);
9808 else
9809 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9810 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9811 0, speed_p);
9812 return true;
9813 }
9814 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9815 {
9816 if (speed_p)
9817 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9818 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9819 0, speed_p)
9820 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9821 return true;
9822 }
9823 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9824 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9825 {
9826 if (speed_p)
9827 *cost += (extra_cost->alu.arith
9828 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9829 ? extra_cost->alu.arith
9830 : extra_cost->alu.arith_shift));
9831 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9832 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9833 GET_CODE (XEXP (x, 1)), 0, speed_p));
9834 return true;
9835 }
9836
9837 if (speed_p)
9838 *cost += 2 * extra_cost->alu.arith;
9839 return false;
9840 }
9841
9842 /* Vector mode? */
9843
9844 *cost = LIBCALL_COST (2);
9845 return false;
9846
9847 case PLUS:
9848 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9849 && (mode == SFmode || !TARGET_VFP_SINGLE))
9850 {
9851 if (GET_CODE (XEXP (x, 0)) == MULT)
9852 {
9853 rtx mul_op0, mul_op1, add_op;
9854
9855 if (speed_p)
9856 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9857
9858 mul_op0 = XEXP (XEXP (x, 0), 0);
9859 mul_op1 = XEXP (XEXP (x, 0), 1);
9860 add_op = XEXP (x, 1);
9861
9862 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9863 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9864 + rtx_cost (add_op, mode, code, 0, speed_p));
9865
9866 return true;
9867 }
9868
9869 if (speed_p)
9870 *cost += extra_cost->fp[mode != SFmode].addsub;
9871 return false;
9872 }
9873 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9874 {
9875 *cost = LIBCALL_COST (2);
9876 return false;
9877 }
9878
9879 /* Narrow modes can be synthesized in SImode, but the range
9880 of useful sub-operations is limited. Check for shift operations
9881 on one of the operands. Only left shifts can be used in the
9882 narrow modes. */
9883 if (GET_MODE_CLASS (mode) == MODE_INT
9884 && GET_MODE_SIZE (mode) < 4)
9885 {
9886 rtx shift_op, shift_reg;
9887 shift_reg = NULL;
9888
9889 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
9890
9891 if (CONST_INT_P (XEXP (x, 1)))
9892 {
9893 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9894 INTVAL (XEXP (x, 1)), NULL_RTX,
9895 NULL_RTX, 1, 0);
9896 *cost = COSTS_N_INSNS (insns);
9897 if (speed_p)
9898 *cost += insns * extra_cost->alu.arith;
9899 /* Slightly penalize a narrow operation as the result may
9900 need widening. */
9901 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9902 return true;
9903 }
9904
9905 /* Slightly penalize a narrow operation as the result may
9906 need widening. */
9907 *cost += 1;
9908 if (speed_p)
9909 *cost += extra_cost->alu.arith;
9910
9911 return false;
9912 }
9913
9914 if (mode == SImode)
9915 {
9916 rtx shift_op, shift_reg;
9917
9918 if (TARGET_INT_SIMD
9919 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9920 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9921 {
9922 /* UXTA[BH] or SXTA[BH]. */
9923 if (speed_p)
9924 *cost += extra_cost->alu.extend_arith;
9925 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9926 0, speed_p)
9927 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9928 return true;
9929 }
9930
9931 shift_reg = NULL;
9932 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9933 if (shift_op != NULL)
9934 {
9935 if (shift_reg)
9936 {
9937 if (speed_p)
9938 *cost += extra_cost->alu.arith_shift_reg;
9939 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9940 }
9941 else if (speed_p)
9942 *cost += extra_cost->alu.arith_shift;
9943
9944 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9945 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9946 return true;
9947 }
9948 if (GET_CODE (XEXP (x, 0)) == MULT)
9949 {
9950 rtx mul_op = XEXP (x, 0);
9951
9952 if (TARGET_DSP_MULTIPLY
9953 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9954 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9955 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9956 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9957 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9958 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9959 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9960 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9961 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9962 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9963 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9964 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9965 == 16))))))
9966 {
9967 /* SMLA[BT][BT]. */
9968 if (speed_p)
9969 *cost += extra_cost->mult[0].extend_add;
9970 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9971 SIGN_EXTEND, 0, speed_p)
9972 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9973 SIGN_EXTEND, 0, speed_p)
9974 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9975 return true;
9976 }
9977
9978 if (speed_p)
9979 *cost += extra_cost->mult[0].add;
9980 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9981 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9982 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9983 return true;
9984 }
9985 if (CONST_INT_P (XEXP (x, 1)))
9986 {
9987 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9988 INTVAL (XEXP (x, 1)), NULL_RTX,
9989 NULL_RTX, 1, 0);
9990 *cost = COSTS_N_INSNS (insns);
9991 if (speed_p)
9992 *cost += insns * extra_cost->alu.arith;
9993 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9994 return true;
9995 }
9996 else if (speed_p)
9997 *cost += extra_cost->alu.arith;
9998
9999 return false;
10000 }
10001
10002 if (mode == DImode)
10003 {
10004 if (GET_CODE (XEXP (x, 0)) == MULT
10005 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10006 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10007 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10008 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10009 {
10010 if (speed_p)
10011 *cost += extra_cost->mult[1].extend_add;
10012 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10013 ZERO_EXTEND, 0, speed_p)
10014 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10015 ZERO_EXTEND, 0, speed_p)
10016 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10017 return true;
10018 }
10019
10020 *cost += COSTS_N_INSNS (1);
10021
10022 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10023 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10024 {
10025 if (speed_p)
10026 *cost += (extra_cost->alu.arith
10027 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10028 ? extra_cost->alu.arith
10029 : extra_cost->alu.arith_shift));
10030
10031 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10032 0, speed_p)
10033 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10034 return true;
10035 }
10036
10037 if (speed_p)
10038 *cost += 2 * extra_cost->alu.arith;
10039 return false;
10040 }
10041
10042 /* Vector mode? */
10043 *cost = LIBCALL_COST (2);
10044 return false;
10045 case IOR:
10046 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10047 {
10048 if (speed_p)
10049 *cost += extra_cost->alu.rev;
10050
10051 return true;
10052 }
10053 /* Fall through. */
10054 case AND: case XOR:
10055 if (mode == SImode)
10056 {
10057 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10058 rtx op0 = XEXP (x, 0);
10059 rtx shift_op, shift_reg;
10060
10061 if (subcode == NOT
10062 && (code == AND
10063 || (code == IOR && TARGET_THUMB2)))
10064 op0 = XEXP (op0, 0);
10065
10066 shift_reg = NULL;
10067 shift_op = shifter_op_p (op0, &shift_reg);
10068 if (shift_op != NULL)
10069 {
10070 if (shift_reg)
10071 {
10072 if (speed_p)
10073 *cost += extra_cost->alu.log_shift_reg;
10074 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10075 }
10076 else if (speed_p)
10077 *cost += extra_cost->alu.log_shift;
10078
10079 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10080 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10081 return true;
10082 }
10083
10084 if (CONST_INT_P (XEXP (x, 1)))
10085 {
10086 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10087 INTVAL (XEXP (x, 1)), NULL_RTX,
10088 NULL_RTX, 1, 0);
10089
10090 *cost = COSTS_N_INSNS (insns);
10091 if (speed_p)
10092 *cost += insns * extra_cost->alu.logical;
10093 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10094 return true;
10095 }
10096
10097 if (speed_p)
10098 *cost += extra_cost->alu.logical;
10099 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10100 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10101 return true;
10102 }
10103
10104 if (mode == DImode)
10105 {
10106 rtx op0 = XEXP (x, 0);
10107 enum rtx_code subcode = GET_CODE (op0);
10108
10109 *cost += COSTS_N_INSNS (1);
10110
10111 if (subcode == NOT
10112 && (code == AND
10113 || (code == IOR && TARGET_THUMB2)))
10114 op0 = XEXP (op0, 0);
10115
10116 if (GET_CODE (op0) == ZERO_EXTEND)
10117 {
10118 if (speed_p)
10119 *cost += 2 * extra_cost->alu.logical;
10120
10121 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10122 0, speed_p)
10123 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10124 return true;
10125 }
10126 else if (GET_CODE (op0) == SIGN_EXTEND)
10127 {
10128 if (speed_p)
10129 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10130
10131 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10132 0, speed_p)
10133 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10134 return true;
10135 }
10136
10137 if (speed_p)
10138 *cost += 2 * extra_cost->alu.logical;
10139
10140 return true;
10141 }
10142 /* Vector mode? */
10143
10144 *cost = LIBCALL_COST (2);
10145 return false;
10146
10147 case MULT:
10148 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10149 && (mode == SFmode || !TARGET_VFP_SINGLE))
10150 {
10151 rtx op0 = XEXP (x, 0);
10152
10153 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10154 op0 = XEXP (op0, 0);
10155
10156 if (speed_p)
10157 *cost += extra_cost->fp[mode != SFmode].mult;
10158
10159 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10160 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10161 return true;
10162 }
10163 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10164 {
10165 *cost = LIBCALL_COST (2);
10166 return false;
10167 }
10168
10169 if (mode == SImode)
10170 {
10171 if (TARGET_DSP_MULTIPLY
10172 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10173 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10174 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10175 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10176 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10177 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10178 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10179 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10180 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10181 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10182 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10183 && (INTVAL (XEXP (XEXP (x, 1), 1))
10184 == 16))))))
10185 {
10186 /* SMUL[TB][TB]. */
10187 if (speed_p)
10188 *cost += extra_cost->mult[0].extend;
10189 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10190 SIGN_EXTEND, 0, speed_p);
10191 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10192 SIGN_EXTEND, 1, speed_p);
10193 return true;
10194 }
10195 if (speed_p)
10196 *cost += extra_cost->mult[0].simple;
10197 return false;
10198 }
10199
10200 if (mode == DImode)
10201 {
10202 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10203 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10204 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10205 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
10206 {
10207 if (speed_p)
10208 *cost += extra_cost->mult[1].extend;
10209 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10210 ZERO_EXTEND, 0, speed_p)
10211 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10212 ZERO_EXTEND, 0, speed_p));
10213 return true;
10214 }
10215
10216 *cost = LIBCALL_COST (2);
10217 return false;
10218 }
10219
10220 /* Vector mode? */
10221 *cost = LIBCALL_COST (2);
10222 return false;
10223
10224 case NEG:
10225 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10226 && (mode == SFmode || !TARGET_VFP_SINGLE))
10227 {
10228 if (GET_CODE (XEXP (x, 0)) == MULT)
10229 {
10230 /* VNMUL. */
10231 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10232 return true;
10233 }
10234
10235 if (speed_p)
10236 *cost += extra_cost->fp[mode != SFmode].neg;
10237
10238 return false;
10239 }
10240 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10241 {
10242 *cost = LIBCALL_COST (1);
10243 return false;
10244 }
10245
10246 if (mode == SImode)
10247 {
10248 if (GET_CODE (XEXP (x, 0)) == ABS)
10249 {
10250 *cost += COSTS_N_INSNS (1);
10251 /* Assume the non-flag-changing variant. */
10252 if (speed_p)
10253 *cost += (extra_cost->alu.log_shift
10254 + extra_cost->alu.arith_shift);
10255 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10256 return true;
10257 }
10258
10259 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10260 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10261 {
10262 *cost += COSTS_N_INSNS (1);
10263 /* No extra cost for MOV imm and MVN imm. */
10264 /* If the comparison op is using the flags, there's no further
10265 cost, otherwise we need to add the cost of the comparison. */
10266 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10267 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10268 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10269 {
10270 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10271 *cost += (COSTS_N_INSNS (1)
10272 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10273 0, speed_p)
10274 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10275 1, speed_p));
10276 if (speed_p)
10277 *cost += extra_cost->alu.arith;
10278 }
10279 return true;
10280 }
10281
10282 if (speed_p)
10283 *cost += extra_cost->alu.arith;
10284 return false;
10285 }
10286
10287 if (GET_MODE_CLASS (mode) == MODE_INT
10288 && GET_MODE_SIZE (mode) < 4)
10289 {
10290 /* Slightly disparage, as we might need an extend operation. */
10291 *cost += 1;
10292 if (speed_p)
10293 *cost += extra_cost->alu.arith;
10294 return false;
10295 }
10296
10297 if (mode == DImode)
10298 {
10299 *cost += COSTS_N_INSNS (1);
10300 if (speed_p)
10301 *cost += 2 * extra_cost->alu.arith;
10302 return false;
10303 }
10304
10305 /* Vector mode? */
10306 *cost = LIBCALL_COST (1);
10307 return false;
10308
10309 case NOT:
10310 if (mode == SImode)
10311 {
10312 rtx shift_op;
10313 rtx shift_reg = NULL;
10314
10315 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10316
10317 if (shift_op)
10318 {
10319 if (shift_reg != NULL)
10320 {
10321 if (speed_p)
10322 *cost += extra_cost->alu.log_shift_reg;
10323 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10324 }
10325 else if (speed_p)
10326 *cost += extra_cost->alu.log_shift;
10327 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10328 return true;
10329 }
10330
10331 if (speed_p)
10332 *cost += extra_cost->alu.logical;
10333 return false;
10334 }
10335 if (mode == DImode)
10336 {
10337 *cost += COSTS_N_INSNS (1);
10338 return false;
10339 }
10340
10341 /* Vector mode? */
10342
10343 *cost += LIBCALL_COST (1);
10344 return false;
10345
10346 case IF_THEN_ELSE:
10347 {
10348 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10349 {
10350 *cost += COSTS_N_INSNS (3);
10351 return true;
10352 }
10353 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10354 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10355
10356 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10357 /* Assume that if one arm of the if_then_else is a register,
10358 that it will be tied with the result and eliminate the
10359 conditional insn. */
10360 if (REG_P (XEXP (x, 1)))
10361 *cost += op2cost;
10362 else if (REG_P (XEXP (x, 2)))
10363 *cost += op1cost;
10364 else
10365 {
10366 if (speed_p)
10367 {
10368 if (extra_cost->alu.non_exec_costs_exec)
10369 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10370 else
10371 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10372 }
10373 else
10374 *cost += op1cost + op2cost;
10375 }
10376 }
10377 return true;
10378
10379 case COMPARE:
10380 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10381 *cost = 0;
10382 else
10383 {
10384 machine_mode op0mode;
10385 /* We'll mostly assume that the cost of a compare is the cost of the
10386 LHS. However, there are some notable exceptions. */
10387
10388 /* Floating point compares are never done as side-effects. */
10389 op0mode = GET_MODE (XEXP (x, 0));
10390 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10391 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10392 {
10393 if (speed_p)
10394 *cost += extra_cost->fp[op0mode != SFmode].compare;
10395
10396 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10397 {
10398 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10399 return true;
10400 }
10401
10402 return false;
10403 }
10404 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10405 {
10406 *cost = LIBCALL_COST (2);
10407 return false;
10408 }
10409
10410 /* DImode compares normally take two insns. */
10411 if (op0mode == DImode)
10412 {
10413 *cost += COSTS_N_INSNS (1);
10414 if (speed_p)
10415 *cost += 2 * extra_cost->alu.arith;
10416 return false;
10417 }
10418
10419 if (op0mode == SImode)
10420 {
10421 rtx shift_op;
10422 rtx shift_reg;
10423
10424 if (XEXP (x, 1) == const0_rtx
10425 && !(REG_P (XEXP (x, 0))
10426 || (GET_CODE (XEXP (x, 0)) == SUBREG
10427 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10428 {
10429 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10430
10431 /* Multiply operations that set the flags are often
10432 significantly more expensive. */
10433 if (speed_p
10434 && GET_CODE (XEXP (x, 0)) == MULT
10435 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10436 *cost += extra_cost->mult[0].flag_setting;
10437
10438 if (speed_p
10439 && GET_CODE (XEXP (x, 0)) == PLUS
10440 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10441 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10442 0), 1), mode))
10443 *cost += extra_cost->mult[0].flag_setting;
10444 return true;
10445 }
10446
10447 shift_reg = NULL;
10448 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10449 if (shift_op != NULL)
10450 {
10451 if (shift_reg != NULL)
10452 {
10453 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10454 1, speed_p);
10455 if (speed_p)
10456 *cost += extra_cost->alu.arith_shift_reg;
10457 }
10458 else if (speed_p)
10459 *cost += extra_cost->alu.arith_shift;
10460 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10461 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10462 return true;
10463 }
10464
10465 if (speed_p)
10466 *cost += extra_cost->alu.arith;
10467 if (CONST_INT_P (XEXP (x, 1))
10468 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10469 {
10470 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10471 return true;
10472 }
10473 return false;
10474 }
10475
10476 /* Vector mode? */
10477
10478 *cost = LIBCALL_COST (2);
10479 return false;
10480 }
10481 return true;
10482
10483 case EQ:
10484 case NE:
10485 case LT:
10486 case LE:
10487 case GT:
10488 case GE:
10489 case LTU:
10490 case LEU:
10491 case GEU:
10492 case GTU:
10493 case ORDERED:
10494 case UNORDERED:
10495 case UNEQ:
10496 case UNLE:
10497 case UNLT:
10498 case UNGE:
10499 case UNGT:
10500 case LTGT:
10501 if (outer_code == SET)
10502 {
10503 /* Is it a store-flag operation? */
10504 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10505 && XEXP (x, 1) == const0_rtx)
10506 {
10507 /* Thumb also needs an IT insn. */
10508 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10509 return true;
10510 }
10511 if (XEXP (x, 1) == const0_rtx)
10512 {
10513 switch (code)
10514 {
10515 case LT:
10516 /* LSR Rd, Rn, #31. */
10517 if (speed_p)
10518 *cost += extra_cost->alu.shift;
10519 break;
10520
10521 case EQ:
10522 /* RSBS T1, Rn, #0
10523 ADC Rd, Rn, T1. */
10524
10525 case NE:
10526 /* SUBS T1, Rn, #1
10527 SBC Rd, Rn, T1. */
10528 *cost += COSTS_N_INSNS (1);
10529 break;
10530
10531 case LE:
10532 /* RSBS T1, Rn, Rn, LSR #31
10533 ADC Rd, Rn, T1. */
10534 *cost += COSTS_N_INSNS (1);
10535 if (speed_p)
10536 *cost += extra_cost->alu.arith_shift;
10537 break;
10538
10539 case GT:
10540 /* RSB Rd, Rn, Rn, ASR #1
10541 LSR Rd, Rd, #31. */
10542 *cost += COSTS_N_INSNS (1);
10543 if (speed_p)
10544 *cost += (extra_cost->alu.arith_shift
10545 + extra_cost->alu.shift);
10546 break;
10547
10548 case GE:
10549 /* ASR Rd, Rn, #31
10550 ADD Rd, Rn, #1. */
10551 *cost += COSTS_N_INSNS (1);
10552 if (speed_p)
10553 *cost += extra_cost->alu.shift;
10554 break;
10555
10556 default:
10557 /* Remaining cases are either meaningless or would take
10558 three insns anyway. */
10559 *cost = COSTS_N_INSNS (3);
10560 break;
10561 }
10562 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10563 return true;
10564 }
10565 else
10566 {
10567 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10568 if (CONST_INT_P (XEXP (x, 1))
10569 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10570 {
10571 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10572 return true;
10573 }
10574
10575 return false;
10576 }
10577 }
10578 /* Not directly inside a set. If it involves the condition code
10579 register it must be the condition for a branch, cond_exec or
10580 I_T_E operation. Since the comparison is performed elsewhere
10581 this is just the control part which has no additional
10582 cost. */
10583 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10584 && XEXP (x, 1) == const0_rtx)
10585 {
10586 *cost = 0;
10587 return true;
10588 }
10589 return false;
10590
10591 case ABS:
10592 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10593 && (mode == SFmode || !TARGET_VFP_SINGLE))
10594 {
10595 if (speed_p)
10596 *cost += extra_cost->fp[mode != SFmode].neg;
10597
10598 return false;
10599 }
10600 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10601 {
10602 *cost = LIBCALL_COST (1);
10603 return false;
10604 }
10605
10606 if (mode == SImode)
10607 {
10608 if (speed_p)
10609 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10610 return false;
10611 }
10612 /* Vector mode? */
10613 *cost = LIBCALL_COST (1);
10614 return false;
10615
10616 case SIGN_EXTEND:
10617 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10618 && MEM_P (XEXP (x, 0)))
10619 {
10620 if (mode == DImode)
10621 *cost += COSTS_N_INSNS (1);
10622
10623 if (!speed_p)
10624 return true;
10625
10626 if (GET_MODE (XEXP (x, 0)) == SImode)
10627 *cost += extra_cost->ldst.load;
10628 else
10629 *cost += extra_cost->ldst.load_sign_extend;
10630
10631 if (mode == DImode)
10632 *cost += extra_cost->alu.shift;
10633
10634 return true;
10635 }
10636
10637 /* Widening from less than 32-bits requires an extend operation. */
10638 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10639 {
10640 /* We have SXTB/SXTH. */
10641 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10642 if (speed_p)
10643 *cost += extra_cost->alu.extend;
10644 }
10645 else if (GET_MODE (XEXP (x, 0)) != SImode)
10646 {
10647 /* Needs two shifts. */
10648 *cost += COSTS_N_INSNS (1);
10649 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10650 if (speed_p)
10651 *cost += 2 * extra_cost->alu.shift;
10652 }
10653
10654 /* Widening beyond 32-bits requires one more insn. */
10655 if (mode == DImode)
10656 {
10657 *cost += COSTS_N_INSNS (1);
10658 if (speed_p)
10659 *cost += extra_cost->alu.shift;
10660 }
10661
10662 return true;
10663
10664 case ZERO_EXTEND:
10665 if ((arm_arch4
10666 || GET_MODE (XEXP (x, 0)) == SImode
10667 || GET_MODE (XEXP (x, 0)) == QImode)
10668 && MEM_P (XEXP (x, 0)))
10669 {
10670 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10671
10672 if (mode == DImode)
10673 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10674
10675 return true;
10676 }
10677
10678 /* Widening from less than 32-bits requires an extend operation. */
10679 if (GET_MODE (XEXP (x, 0)) == QImode)
10680 {
10681 /* UXTB can be a shorter instruction in Thumb2, but it might
10682 be slower than the AND Rd, Rn, #255 alternative. When
10683 optimizing for speed it should never be slower to use
10684 AND, and we don't really model 16-bit vs 32-bit insns
10685 here. */
10686 if (speed_p)
10687 *cost += extra_cost->alu.logical;
10688 }
10689 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10690 {
10691 /* We have UXTB/UXTH. */
10692 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10693 if (speed_p)
10694 *cost += extra_cost->alu.extend;
10695 }
10696 else if (GET_MODE (XEXP (x, 0)) != SImode)
10697 {
10698 /* Needs two shifts. It's marginally preferable to use
10699 shifts rather than two BIC instructions as the second
10700 shift may merge with a subsequent insn as a shifter
10701 op. */
10702 *cost = COSTS_N_INSNS (2);
10703 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10704 if (speed_p)
10705 *cost += 2 * extra_cost->alu.shift;
10706 }
10707
10708 /* Widening beyond 32-bits requires one more insn. */
10709 if (mode == DImode)
10710 {
10711 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10712 }
10713
10714 return true;
10715
10716 case CONST_INT:
10717 *cost = 0;
10718 /* CONST_INT has no mode, so we cannot tell for sure how many
10719 insns are really going to be needed. The best we can do is
10720 look at the value passed. If it fits in SImode, then assume
10721 that's the mode it will be used for. Otherwise assume it
10722 will be used in DImode. */
10723 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10724 mode = SImode;
10725 else
10726 mode = DImode;
10727
10728 /* Avoid blowing up in arm_gen_constant (). */
10729 if (!(outer_code == PLUS
10730 || outer_code == AND
10731 || outer_code == IOR
10732 || outer_code == XOR
10733 || outer_code == MINUS))
10734 outer_code = SET;
10735
10736 const_int_cost:
10737 if (mode == SImode)
10738 {
10739 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10740 INTVAL (x), NULL, NULL,
10741 0, 0));
10742 /* Extra costs? */
10743 }
10744 else
10745 {
10746 *cost += COSTS_N_INSNS (arm_gen_constant
10747 (outer_code, SImode, NULL,
10748 trunc_int_for_mode (INTVAL (x), SImode),
10749 NULL, NULL, 0, 0)
10750 + arm_gen_constant (outer_code, SImode, NULL,
10751 INTVAL (x) >> 32, NULL,
10752 NULL, 0, 0));
10753 /* Extra costs? */
10754 }
10755
10756 return true;
10757
10758 case CONST:
10759 case LABEL_REF:
10760 case SYMBOL_REF:
10761 if (speed_p)
10762 {
10763 if (arm_arch_thumb2 && !flag_pic)
10764 *cost += COSTS_N_INSNS (1);
10765 else
10766 *cost += extra_cost->ldst.load;
10767 }
10768 else
10769 *cost += COSTS_N_INSNS (1);
10770
10771 if (flag_pic)
10772 {
10773 *cost += COSTS_N_INSNS (1);
10774 if (speed_p)
10775 *cost += extra_cost->alu.arith;
10776 }
10777
10778 return true;
10779
10780 case CONST_FIXED:
10781 *cost = COSTS_N_INSNS (4);
10782 /* Fixme. */
10783 return true;
10784
10785 case CONST_DOUBLE:
10786 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10787 && (mode == SFmode || !TARGET_VFP_SINGLE))
10788 {
10789 if (vfp3_const_double_rtx (x))
10790 {
10791 if (speed_p)
10792 *cost += extra_cost->fp[mode == DFmode].fpconst;
10793 return true;
10794 }
10795
10796 if (speed_p)
10797 {
10798 if (mode == DFmode)
10799 *cost += extra_cost->ldst.loadd;
10800 else
10801 *cost += extra_cost->ldst.loadf;
10802 }
10803 else
10804 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10805
10806 return true;
10807 }
10808 *cost = COSTS_N_INSNS (4);
10809 return true;
10810
10811 case CONST_VECTOR:
10812 /* Fixme. */
10813 if (TARGET_NEON
10814 && TARGET_HARD_FLOAT
10815 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10816 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10817 *cost = COSTS_N_INSNS (1);
10818 else
10819 *cost = COSTS_N_INSNS (4);
10820 return true;
10821
10822 case HIGH:
10823 case LO_SUM:
10824 /* When optimizing for size, we prefer constant pool entries to
10825 MOVW/MOVT pairs, so bump the cost of these slightly. */
10826 if (!speed_p)
10827 *cost += 1;
10828 return true;
10829
10830 case CLZ:
10831 if (speed_p)
10832 *cost += extra_cost->alu.clz;
10833 return false;
10834
10835 case SMIN:
10836 if (XEXP (x, 1) == const0_rtx)
10837 {
10838 if (speed_p)
10839 *cost += extra_cost->alu.log_shift;
10840 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10841 return true;
10842 }
10843 /* Fall through. */
10844 case SMAX:
10845 case UMIN:
10846 case UMAX:
10847 *cost += COSTS_N_INSNS (1);
10848 return false;
10849
10850 case TRUNCATE:
10851 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10852 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10853 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10854 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10855 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10856 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10857 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10858 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10859 == ZERO_EXTEND))))
10860 {
10861 if (speed_p)
10862 *cost += extra_cost->mult[1].extend;
10863 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10864 ZERO_EXTEND, 0, speed_p)
10865 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10866 ZERO_EXTEND, 0, speed_p));
10867 return true;
10868 }
10869 *cost = LIBCALL_COST (1);
10870 return false;
10871
10872 case UNSPEC_VOLATILE:
10873 case UNSPEC:
10874 return arm_unspec_cost (x, outer_code, speed_p, cost);
10875
10876 case PC:
10877 /* Reading the PC is like reading any other register. Writing it
10878 is more expensive, but we take that into account elsewhere. */
10879 *cost = 0;
10880 return true;
10881
10882 case ZERO_EXTRACT:
10883 /* TODO: Simple zero_extract of bottom bits using AND. */
10884 /* Fall through. */
10885 case SIGN_EXTRACT:
10886 if (arm_arch6
10887 && mode == SImode
10888 && CONST_INT_P (XEXP (x, 1))
10889 && CONST_INT_P (XEXP (x, 2)))
10890 {
10891 if (speed_p)
10892 *cost += extra_cost->alu.bfx;
10893 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10894 return true;
10895 }
10896 /* Without UBFX/SBFX, need to resort to shift operations. */
10897 *cost += COSTS_N_INSNS (1);
10898 if (speed_p)
10899 *cost += 2 * extra_cost->alu.shift;
10900 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10901 return true;
10902
10903 case FLOAT_EXTEND:
10904 if (TARGET_HARD_FLOAT)
10905 {
10906 if (speed_p)
10907 *cost += extra_cost->fp[mode == DFmode].widen;
10908 if (!TARGET_VFP5
10909 && GET_MODE (XEXP (x, 0)) == HFmode)
10910 {
10911 /* Pre v8, widening HF->DF is a two-step process, first
10912 widening to SFmode. */
10913 *cost += COSTS_N_INSNS (1);
10914 if (speed_p)
10915 *cost += extra_cost->fp[0].widen;
10916 }
10917 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10918 return true;
10919 }
10920
10921 *cost = LIBCALL_COST (1);
10922 return false;
10923
10924 case FLOAT_TRUNCATE:
10925 if (TARGET_HARD_FLOAT)
10926 {
10927 if (speed_p)
10928 *cost += extra_cost->fp[mode == DFmode].narrow;
10929 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10930 return true;
10931 /* Vector modes? */
10932 }
10933 *cost = LIBCALL_COST (1);
10934 return false;
10935
10936 case FMA:
10937 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10938 {
10939 rtx op0 = XEXP (x, 0);
10940 rtx op1 = XEXP (x, 1);
10941 rtx op2 = XEXP (x, 2);
10942
10943
10944 /* vfms or vfnma. */
10945 if (GET_CODE (op0) == NEG)
10946 op0 = XEXP (op0, 0);
10947
10948 /* vfnms or vfnma. */
10949 if (GET_CODE (op2) == NEG)
10950 op2 = XEXP (op2, 0);
10951
10952 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10953 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10954 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10955
10956 if (speed_p)
10957 *cost += extra_cost->fp[mode ==DFmode].fma;
10958
10959 return true;
10960 }
10961
10962 *cost = LIBCALL_COST (3);
10963 return false;
10964
10965 case FIX:
10966 case UNSIGNED_FIX:
10967 if (TARGET_HARD_FLOAT)
10968 {
10969 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10970 a vcvt fixed-point conversion. */
10971 if (code == FIX && mode == SImode
10972 && GET_CODE (XEXP (x, 0)) == FIX
10973 && GET_MODE (XEXP (x, 0)) == SFmode
10974 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10975 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10976 > 0)
10977 {
10978 if (speed_p)
10979 *cost += extra_cost->fp[0].toint;
10980
10981 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10982 code, 0, speed_p);
10983 return true;
10984 }
10985
10986 if (GET_MODE_CLASS (mode) == MODE_INT)
10987 {
10988 mode = GET_MODE (XEXP (x, 0));
10989 if (speed_p)
10990 *cost += extra_cost->fp[mode == DFmode].toint;
10991 /* Strip of the 'cost' of rounding towards zero. */
10992 if (GET_CODE (XEXP (x, 0)) == FIX)
10993 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10994 0, speed_p);
10995 else
10996 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10997 /* ??? Increase the cost to deal with transferring from
10998 FP -> CORE registers? */
10999 return true;
11000 }
11001 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11002 && TARGET_VFP5)
11003 {
11004 if (speed_p)
11005 *cost += extra_cost->fp[mode == DFmode].roundint;
11006 return false;
11007 }
11008 /* Vector costs? */
11009 }
11010 *cost = LIBCALL_COST (1);
11011 return false;
11012
11013 case FLOAT:
11014 case UNSIGNED_FLOAT:
11015 if (TARGET_HARD_FLOAT)
11016 {
11017 /* ??? Increase the cost to deal with transferring from CORE
11018 -> FP registers? */
11019 if (speed_p)
11020 *cost += extra_cost->fp[mode == DFmode].fromint;
11021 return false;
11022 }
11023 *cost = LIBCALL_COST (1);
11024 return false;
11025
11026 case CALL:
11027 return true;
11028
11029 case ASM_OPERANDS:
11030 {
11031 /* Just a guess. Guess number of instructions in the asm
11032 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11033 though (see PR60663). */
11034 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11035 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11036
11037 *cost = COSTS_N_INSNS (asm_length + num_operands);
11038 return true;
11039 }
11040 default:
11041 if (mode != VOIDmode)
11042 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11043 else
11044 *cost = COSTS_N_INSNS (4); /* Who knows? */
11045 return false;
11046 }
11047 }
11048
11049 #undef HANDLE_NARROW_SHIFT_ARITH
11050
11051 /* RTX costs entry point. */
11052
11053 static bool
11054 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11055 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11056 {
11057 bool result;
11058 int code = GET_CODE (x);
11059 gcc_assert (current_tune->insn_extra_cost);
11060
11061 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
11062 (enum rtx_code) outer_code,
11063 current_tune->insn_extra_cost,
11064 total, speed);
11065
11066 if (dump_file && arm_verbose_cost)
11067 {
11068 print_rtl_single (dump_file, x);
11069 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11070 *total, result ? "final" : "partial");
11071 }
11072 return result;
11073 }
11074
11075 /* All address computations that can be done are free, but rtx cost returns
11076 the same for practically all of them. So we weight the different types
11077 of address here in the order (most pref first):
11078 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11079 static inline int
11080 arm_arm_address_cost (rtx x)
11081 {
11082 enum rtx_code c = GET_CODE (x);
11083
11084 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11085 return 0;
11086 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11087 return 10;
11088
11089 if (c == PLUS)
11090 {
11091 if (CONST_INT_P (XEXP (x, 1)))
11092 return 2;
11093
11094 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11095 return 3;
11096
11097 return 4;
11098 }
11099
11100 return 6;
11101 }
11102
11103 static inline int
11104 arm_thumb_address_cost (rtx x)
11105 {
11106 enum rtx_code c = GET_CODE (x);
11107
11108 if (c == REG)
11109 return 1;
11110 if (c == PLUS
11111 && REG_P (XEXP (x, 0))
11112 && CONST_INT_P (XEXP (x, 1)))
11113 return 1;
11114
11115 return 2;
11116 }
11117
11118 static int
11119 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11120 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11121 {
11122 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11123 }
11124
11125 /* Adjust cost hook for XScale. */
11126 static bool
11127 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11128 int * cost)
11129 {
11130 /* Some true dependencies can have a higher cost depending
11131 on precisely how certain input operands are used. */
11132 if (dep_type == 0
11133 && recog_memoized (insn) >= 0
11134 && recog_memoized (dep) >= 0)
11135 {
11136 int shift_opnum = get_attr_shift (insn);
11137 enum attr_type attr_type = get_attr_type (dep);
11138
11139 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11140 operand for INSN. If we have a shifted input operand and the
11141 instruction we depend on is another ALU instruction, then we may
11142 have to account for an additional stall. */
11143 if (shift_opnum != 0
11144 && (attr_type == TYPE_ALU_SHIFT_IMM
11145 || attr_type == TYPE_ALUS_SHIFT_IMM
11146 || attr_type == TYPE_LOGIC_SHIFT_IMM
11147 || attr_type == TYPE_LOGICS_SHIFT_IMM
11148 || attr_type == TYPE_ALU_SHIFT_REG
11149 || attr_type == TYPE_ALUS_SHIFT_REG
11150 || attr_type == TYPE_LOGIC_SHIFT_REG
11151 || attr_type == TYPE_LOGICS_SHIFT_REG
11152 || attr_type == TYPE_MOV_SHIFT
11153 || attr_type == TYPE_MVN_SHIFT
11154 || attr_type == TYPE_MOV_SHIFT_REG
11155 || attr_type == TYPE_MVN_SHIFT_REG))
11156 {
11157 rtx shifted_operand;
11158 int opno;
11159
11160 /* Get the shifted operand. */
11161 extract_insn (insn);
11162 shifted_operand = recog_data.operand[shift_opnum];
11163
11164 /* Iterate over all the operands in DEP. If we write an operand
11165 that overlaps with SHIFTED_OPERAND, then we have increase the
11166 cost of this dependency. */
11167 extract_insn (dep);
11168 preprocess_constraints (dep);
11169 for (opno = 0; opno < recog_data.n_operands; opno++)
11170 {
11171 /* We can ignore strict inputs. */
11172 if (recog_data.operand_type[opno] == OP_IN)
11173 continue;
11174
11175 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11176 shifted_operand))
11177 {
11178 *cost = 2;
11179 return false;
11180 }
11181 }
11182 }
11183 }
11184 return true;
11185 }
11186
11187 /* Adjust cost hook for Cortex A9. */
11188 static bool
11189 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11190 int * cost)
11191 {
11192 switch (dep_type)
11193 {
11194 case REG_DEP_ANTI:
11195 *cost = 0;
11196 return false;
11197
11198 case REG_DEP_TRUE:
11199 case REG_DEP_OUTPUT:
11200 if (recog_memoized (insn) >= 0
11201 && recog_memoized (dep) >= 0)
11202 {
11203 if (GET_CODE (PATTERN (insn)) == SET)
11204 {
11205 if (GET_MODE_CLASS
11206 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11207 || GET_MODE_CLASS
11208 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11209 {
11210 enum attr_type attr_type_insn = get_attr_type (insn);
11211 enum attr_type attr_type_dep = get_attr_type (dep);
11212
11213 /* By default all dependencies of the form
11214 s0 = s0 <op> s1
11215 s0 = s0 <op> s2
11216 have an extra latency of 1 cycle because
11217 of the input and output dependency in this
11218 case. However this gets modeled as an true
11219 dependency and hence all these checks. */
11220 if (REG_P (SET_DEST (PATTERN (insn)))
11221 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11222 {
11223 /* FMACS is a special case where the dependent
11224 instruction can be issued 3 cycles before
11225 the normal latency in case of an output
11226 dependency. */
11227 if ((attr_type_insn == TYPE_FMACS
11228 || attr_type_insn == TYPE_FMACD)
11229 && (attr_type_dep == TYPE_FMACS
11230 || attr_type_dep == TYPE_FMACD))
11231 {
11232 if (dep_type == REG_DEP_OUTPUT)
11233 *cost = insn_default_latency (dep) - 3;
11234 else
11235 *cost = insn_default_latency (dep);
11236 return false;
11237 }
11238 else
11239 {
11240 if (dep_type == REG_DEP_OUTPUT)
11241 *cost = insn_default_latency (dep) + 1;
11242 else
11243 *cost = insn_default_latency (dep);
11244 }
11245 return false;
11246 }
11247 }
11248 }
11249 }
11250 break;
11251
11252 default:
11253 gcc_unreachable ();
11254 }
11255
11256 return true;
11257 }
11258
11259 /* Adjust cost hook for FA726TE. */
11260 static bool
11261 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11262 int * cost)
11263 {
11264 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11265 have penalty of 3. */
11266 if (dep_type == REG_DEP_TRUE
11267 && recog_memoized (insn) >= 0
11268 && recog_memoized (dep) >= 0
11269 && get_attr_conds (dep) == CONDS_SET)
11270 {
11271 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11272 if (get_attr_conds (insn) == CONDS_USE
11273 && get_attr_type (insn) != TYPE_BRANCH)
11274 {
11275 *cost = 3;
11276 return false;
11277 }
11278
11279 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11280 || get_attr_conds (insn) == CONDS_USE)
11281 {
11282 *cost = 0;
11283 return false;
11284 }
11285 }
11286
11287 return true;
11288 }
11289
11290 /* Implement TARGET_REGISTER_MOVE_COST.
11291
11292 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11293 it is typically more expensive than a single memory access. We set
11294 the cost to less than two memory accesses so that floating
11295 point to integer conversion does not go through memory. */
11296
11297 int
11298 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11299 reg_class_t from, reg_class_t to)
11300 {
11301 if (TARGET_32BIT)
11302 {
11303 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11304 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11305 return 15;
11306 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11307 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11308 return 4;
11309 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11310 return 20;
11311 else
11312 return 2;
11313 }
11314 else
11315 {
11316 if (from == HI_REGS || to == HI_REGS)
11317 return 4;
11318 else
11319 return 2;
11320 }
11321 }
11322
11323 /* Implement TARGET_MEMORY_MOVE_COST. */
11324
11325 int
11326 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11327 bool in ATTRIBUTE_UNUSED)
11328 {
11329 if (TARGET_32BIT)
11330 return 10;
11331 else
11332 {
11333 if (GET_MODE_SIZE (mode) < 4)
11334 return 8;
11335 else
11336 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11337 }
11338 }
11339
11340 /* Vectorizer cost model implementation. */
11341
11342 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11343 static int
11344 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11345 tree vectype,
11346 int misalign ATTRIBUTE_UNUSED)
11347 {
11348 unsigned elements;
11349
11350 switch (type_of_cost)
11351 {
11352 case scalar_stmt:
11353 return current_tune->vec_costs->scalar_stmt_cost;
11354
11355 case scalar_load:
11356 return current_tune->vec_costs->scalar_load_cost;
11357
11358 case scalar_store:
11359 return current_tune->vec_costs->scalar_store_cost;
11360
11361 case vector_stmt:
11362 return current_tune->vec_costs->vec_stmt_cost;
11363
11364 case vector_load:
11365 return current_tune->vec_costs->vec_align_load_cost;
11366
11367 case vector_store:
11368 return current_tune->vec_costs->vec_store_cost;
11369
11370 case vec_to_scalar:
11371 return current_tune->vec_costs->vec_to_scalar_cost;
11372
11373 case scalar_to_vec:
11374 return current_tune->vec_costs->scalar_to_vec_cost;
11375
11376 case unaligned_load:
11377 case vector_gather_load:
11378 return current_tune->vec_costs->vec_unalign_load_cost;
11379
11380 case unaligned_store:
11381 case vector_scatter_store:
11382 return current_tune->vec_costs->vec_unalign_store_cost;
11383
11384 case cond_branch_taken:
11385 return current_tune->vec_costs->cond_taken_branch_cost;
11386
11387 case cond_branch_not_taken:
11388 return current_tune->vec_costs->cond_not_taken_branch_cost;
11389
11390 case vec_perm:
11391 case vec_promote_demote:
11392 return current_tune->vec_costs->vec_stmt_cost;
11393
11394 case vec_construct:
11395 elements = TYPE_VECTOR_SUBPARTS (vectype);
11396 return elements / 2 + 1;
11397
11398 default:
11399 gcc_unreachable ();
11400 }
11401 }
11402
11403 /* Implement targetm.vectorize.add_stmt_cost. */
11404
11405 static unsigned
11406 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11407 struct _stmt_vec_info *stmt_info, int misalign,
11408 enum vect_cost_model_location where)
11409 {
11410 unsigned *cost = (unsigned *) data;
11411 unsigned retval = 0;
11412
11413 if (flag_vect_cost_model)
11414 {
11415 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11416 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11417
11418 /* Statements in an inner loop relative to the loop being
11419 vectorized are weighted more heavily. The value here is
11420 arbitrary and could potentially be improved with analysis. */
11421 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11422 count *= 50; /* FIXME. */
11423
11424 retval = (unsigned) (count * stmt_cost);
11425 cost[where] += retval;
11426 }
11427
11428 return retval;
11429 }
11430
11431 /* Return true if and only if this insn can dual-issue only as older. */
11432 static bool
11433 cortexa7_older_only (rtx_insn *insn)
11434 {
11435 if (recog_memoized (insn) < 0)
11436 return false;
11437
11438 switch (get_attr_type (insn))
11439 {
11440 case TYPE_ALU_DSP_REG:
11441 case TYPE_ALU_SREG:
11442 case TYPE_ALUS_SREG:
11443 case TYPE_LOGIC_REG:
11444 case TYPE_LOGICS_REG:
11445 case TYPE_ADC_REG:
11446 case TYPE_ADCS_REG:
11447 case TYPE_ADR:
11448 case TYPE_BFM:
11449 case TYPE_REV:
11450 case TYPE_MVN_REG:
11451 case TYPE_SHIFT_IMM:
11452 case TYPE_SHIFT_REG:
11453 case TYPE_LOAD_BYTE:
11454 case TYPE_LOAD_4:
11455 case TYPE_STORE_4:
11456 case TYPE_FFARITHS:
11457 case TYPE_FADDS:
11458 case TYPE_FFARITHD:
11459 case TYPE_FADDD:
11460 case TYPE_FMOV:
11461 case TYPE_F_CVT:
11462 case TYPE_FCMPS:
11463 case TYPE_FCMPD:
11464 case TYPE_FCONSTS:
11465 case TYPE_FCONSTD:
11466 case TYPE_FMULS:
11467 case TYPE_FMACS:
11468 case TYPE_FMULD:
11469 case TYPE_FMACD:
11470 case TYPE_FDIVS:
11471 case TYPE_FDIVD:
11472 case TYPE_F_MRC:
11473 case TYPE_F_MRRC:
11474 case TYPE_F_FLAG:
11475 case TYPE_F_LOADS:
11476 case TYPE_F_STORES:
11477 return true;
11478 default:
11479 return false;
11480 }
11481 }
11482
11483 /* Return true if and only if this insn can dual-issue as younger. */
11484 static bool
11485 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11486 {
11487 if (recog_memoized (insn) < 0)
11488 {
11489 if (verbose > 5)
11490 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11491 return false;
11492 }
11493
11494 switch (get_attr_type (insn))
11495 {
11496 case TYPE_ALU_IMM:
11497 case TYPE_ALUS_IMM:
11498 case TYPE_LOGIC_IMM:
11499 case TYPE_LOGICS_IMM:
11500 case TYPE_EXTEND:
11501 case TYPE_MVN_IMM:
11502 case TYPE_MOV_IMM:
11503 case TYPE_MOV_REG:
11504 case TYPE_MOV_SHIFT:
11505 case TYPE_MOV_SHIFT_REG:
11506 case TYPE_BRANCH:
11507 case TYPE_CALL:
11508 return true;
11509 default:
11510 return false;
11511 }
11512 }
11513
11514
11515 /* Look for an instruction that can dual issue only as an older
11516 instruction, and move it in front of any instructions that can
11517 dual-issue as younger, while preserving the relative order of all
11518 other instructions in the ready list. This is a hueuristic to help
11519 dual-issue in later cycles, by postponing issue of more flexible
11520 instructions. This heuristic may affect dual issue opportunities
11521 in the current cycle. */
11522 static void
11523 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11524 int *n_readyp, int clock)
11525 {
11526 int i;
11527 int first_older_only = -1, first_younger = -1;
11528
11529 if (verbose > 5)
11530 fprintf (file,
11531 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11532 clock,
11533 *n_readyp);
11534
11535 /* Traverse the ready list from the head (the instruction to issue
11536 first), and looking for the first instruction that can issue as
11537 younger and the first instruction that can dual-issue only as
11538 older. */
11539 for (i = *n_readyp - 1; i >= 0; i--)
11540 {
11541 rtx_insn *insn = ready[i];
11542 if (cortexa7_older_only (insn))
11543 {
11544 first_older_only = i;
11545 if (verbose > 5)
11546 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11547 break;
11548 }
11549 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11550 first_younger = i;
11551 }
11552
11553 /* Nothing to reorder because either no younger insn found or insn
11554 that can dual-issue only as older appears before any insn that
11555 can dual-issue as younger. */
11556 if (first_younger == -1)
11557 {
11558 if (verbose > 5)
11559 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11560 return;
11561 }
11562
11563 /* Nothing to reorder because no older-only insn in the ready list. */
11564 if (first_older_only == -1)
11565 {
11566 if (verbose > 5)
11567 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11568 return;
11569 }
11570
11571 /* Move first_older_only insn before first_younger. */
11572 if (verbose > 5)
11573 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11574 INSN_UID(ready [first_older_only]),
11575 INSN_UID(ready [first_younger]));
11576 rtx_insn *first_older_only_insn = ready [first_older_only];
11577 for (i = first_older_only; i < first_younger; i++)
11578 {
11579 ready[i] = ready[i+1];
11580 }
11581
11582 ready[i] = first_older_only_insn;
11583 return;
11584 }
11585
11586 /* Implement TARGET_SCHED_REORDER. */
11587 static int
11588 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11589 int clock)
11590 {
11591 switch (arm_tune)
11592 {
11593 case TARGET_CPU_cortexa7:
11594 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11595 break;
11596 default:
11597 /* Do nothing for other cores. */
11598 break;
11599 }
11600
11601 return arm_issue_rate ();
11602 }
11603
11604 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11605 It corrects the value of COST based on the relationship between
11606 INSN and DEP through the dependence LINK. It returns the new
11607 value. There is a per-core adjust_cost hook to adjust scheduler costs
11608 and the per-core hook can choose to completely override the generic
11609 adjust_cost function. Only put bits of code into arm_adjust_cost that
11610 are common across all cores. */
11611 static int
11612 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11613 unsigned int)
11614 {
11615 rtx i_pat, d_pat;
11616
11617 /* When generating Thumb-1 code, we want to place flag-setting operations
11618 close to a conditional branch which depends on them, so that we can
11619 omit the comparison. */
11620 if (TARGET_THUMB1
11621 && dep_type == 0
11622 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11623 && recog_memoized (dep) >= 0
11624 && get_attr_conds (dep) == CONDS_SET)
11625 return 0;
11626
11627 if (current_tune->sched_adjust_cost != NULL)
11628 {
11629 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11630 return cost;
11631 }
11632
11633 /* XXX Is this strictly true? */
11634 if (dep_type == REG_DEP_ANTI
11635 || dep_type == REG_DEP_OUTPUT)
11636 return 0;
11637
11638 /* Call insns don't incur a stall, even if they follow a load. */
11639 if (dep_type == 0
11640 && CALL_P (insn))
11641 return 1;
11642
11643 if ((i_pat = single_set (insn)) != NULL
11644 && MEM_P (SET_SRC (i_pat))
11645 && (d_pat = single_set (dep)) != NULL
11646 && MEM_P (SET_DEST (d_pat)))
11647 {
11648 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11649 /* This is a load after a store, there is no conflict if the load reads
11650 from a cached area. Assume that loads from the stack, and from the
11651 constant pool are cached, and that others will miss. This is a
11652 hack. */
11653
11654 if ((GET_CODE (src_mem) == SYMBOL_REF
11655 && CONSTANT_POOL_ADDRESS_P (src_mem))
11656 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11657 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11658 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11659 return 1;
11660 }
11661
11662 return cost;
11663 }
11664
11665 int
11666 arm_max_conditional_execute (void)
11667 {
11668 return max_insns_skipped;
11669 }
11670
11671 static int
11672 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11673 {
11674 if (TARGET_32BIT)
11675 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11676 else
11677 return (optimize > 0) ? 2 : 0;
11678 }
11679
11680 static int
11681 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11682 {
11683 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11684 }
11685
11686 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11687 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11688 sequences of non-executed instructions in IT blocks probably take the same
11689 amount of time as executed instructions (and the IT instruction itself takes
11690 space in icache). This function was experimentally determined to give good
11691 results on a popular embedded benchmark. */
11692
11693 static int
11694 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11695 {
11696 return (TARGET_32BIT && speed_p) ? 1
11697 : arm_default_branch_cost (speed_p, predictable_p);
11698 }
11699
11700 static int
11701 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11702 {
11703 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11704 }
11705
11706 static bool fp_consts_inited = false;
11707
11708 static REAL_VALUE_TYPE value_fp0;
11709
11710 static void
11711 init_fp_table (void)
11712 {
11713 REAL_VALUE_TYPE r;
11714
11715 r = REAL_VALUE_ATOF ("0", DFmode);
11716 value_fp0 = r;
11717 fp_consts_inited = true;
11718 }
11719
11720 /* Return TRUE if rtx X is a valid immediate FP constant. */
11721 int
11722 arm_const_double_rtx (rtx x)
11723 {
11724 const REAL_VALUE_TYPE *r;
11725
11726 if (!fp_consts_inited)
11727 init_fp_table ();
11728
11729 r = CONST_DOUBLE_REAL_VALUE (x);
11730 if (REAL_VALUE_MINUS_ZERO (*r))
11731 return 0;
11732
11733 if (real_equal (r, &value_fp0))
11734 return 1;
11735
11736 return 0;
11737 }
11738
11739 /* VFPv3 has a fairly wide range of representable immediates, formed from
11740 "quarter-precision" floating-point values. These can be evaluated using this
11741 formula (with ^ for exponentiation):
11742
11743 -1^s * n * 2^-r
11744
11745 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11746 16 <= n <= 31 and 0 <= r <= 7.
11747
11748 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11749
11750 - A (most-significant) is the sign bit.
11751 - BCD are the exponent (encoded as r XOR 3).
11752 - EFGH are the mantissa (encoded as n - 16).
11753 */
11754
11755 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11756 fconst[sd] instruction, or -1 if X isn't suitable. */
11757 static int
11758 vfp3_const_double_index (rtx x)
11759 {
11760 REAL_VALUE_TYPE r, m;
11761 int sign, exponent;
11762 unsigned HOST_WIDE_INT mantissa, mant_hi;
11763 unsigned HOST_WIDE_INT mask;
11764 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11765 bool fail;
11766
11767 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11768 return -1;
11769
11770 r = *CONST_DOUBLE_REAL_VALUE (x);
11771
11772 /* We can't represent these things, so detect them first. */
11773 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11774 return -1;
11775
11776 /* Extract sign, exponent and mantissa. */
11777 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11778 r = real_value_abs (&r);
11779 exponent = REAL_EXP (&r);
11780 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11781 highest (sign) bit, with a fixed binary point at bit point_pos.
11782 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11783 bits for the mantissa, this may fail (low bits would be lost). */
11784 real_ldexp (&m, &r, point_pos - exponent);
11785 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11786 mantissa = w.elt (0);
11787 mant_hi = w.elt (1);
11788
11789 /* If there are bits set in the low part of the mantissa, we can't
11790 represent this value. */
11791 if (mantissa != 0)
11792 return -1;
11793
11794 /* Now make it so that mantissa contains the most-significant bits, and move
11795 the point_pos to indicate that the least-significant bits have been
11796 discarded. */
11797 point_pos -= HOST_BITS_PER_WIDE_INT;
11798 mantissa = mant_hi;
11799
11800 /* We can permit four significant bits of mantissa only, plus a high bit
11801 which is always 1. */
11802 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11803 if ((mantissa & mask) != 0)
11804 return -1;
11805
11806 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11807 mantissa >>= point_pos - 5;
11808
11809 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11810 floating-point immediate zero with Neon using an integer-zero load, but
11811 that case is handled elsewhere.) */
11812 if (mantissa == 0)
11813 return -1;
11814
11815 gcc_assert (mantissa >= 16 && mantissa <= 31);
11816
11817 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11818 normalized significands are in the range [1, 2). (Our mantissa is shifted
11819 left 4 places at this point relative to normalized IEEE754 values). GCC
11820 internally uses [0.5, 1) (see real.c), so the exponent returned from
11821 REAL_EXP must be altered. */
11822 exponent = 5 - exponent;
11823
11824 if (exponent < 0 || exponent > 7)
11825 return -1;
11826
11827 /* Sign, mantissa and exponent are now in the correct form to plug into the
11828 formula described in the comment above. */
11829 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11830 }
11831
11832 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11833 int
11834 vfp3_const_double_rtx (rtx x)
11835 {
11836 if (!TARGET_VFP3)
11837 return 0;
11838
11839 return vfp3_const_double_index (x) != -1;
11840 }
11841
11842 /* Recognize immediates which can be used in various Neon instructions. Legal
11843 immediates are described by the following table (for VMVN variants, the
11844 bitwise inverse of the constant shown is recognized. In either case, VMOV
11845 is output and the correct instruction to use for a given constant is chosen
11846 by the assembler). The constant shown is replicated across all elements of
11847 the destination vector.
11848
11849 insn elems variant constant (binary)
11850 ---- ----- ------- -----------------
11851 vmov i32 0 00000000 00000000 00000000 abcdefgh
11852 vmov i32 1 00000000 00000000 abcdefgh 00000000
11853 vmov i32 2 00000000 abcdefgh 00000000 00000000
11854 vmov i32 3 abcdefgh 00000000 00000000 00000000
11855 vmov i16 4 00000000 abcdefgh
11856 vmov i16 5 abcdefgh 00000000
11857 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11858 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11859 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11860 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11861 vmvn i16 10 00000000 abcdefgh
11862 vmvn i16 11 abcdefgh 00000000
11863 vmov i32 12 00000000 00000000 abcdefgh 11111111
11864 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11865 vmov i32 14 00000000 abcdefgh 11111111 11111111
11866 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11867 vmov i8 16 abcdefgh
11868 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11869 eeeeeeee ffffffff gggggggg hhhhhhhh
11870 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11871 vmov f32 19 00000000 00000000 00000000 00000000
11872
11873 For case 18, B = !b. Representable values are exactly those accepted by
11874 vfp3_const_double_index, but are output as floating-point numbers rather
11875 than indices.
11876
11877 For case 19, we will change it to vmov.i32 when assembling.
11878
11879 Variants 0-5 (inclusive) may also be used as immediates for the second
11880 operand of VORR/VBIC instructions.
11881
11882 The INVERSE argument causes the bitwise inverse of the given operand to be
11883 recognized instead (used for recognizing legal immediates for the VAND/VORN
11884 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11885 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11886 output, rather than the real insns vbic/vorr).
11887
11888 INVERSE makes no difference to the recognition of float vectors.
11889
11890 The return value is the variant of immediate as shown in the above table, or
11891 -1 if the given value doesn't match any of the listed patterns.
11892 */
11893 static int
11894 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11895 rtx *modconst, int *elementwidth)
11896 {
11897 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11898 matches = 1; \
11899 for (i = 0; i < idx; i += (STRIDE)) \
11900 if (!(TEST)) \
11901 matches = 0; \
11902 if (matches) \
11903 { \
11904 immtype = (CLASS); \
11905 elsize = (ELSIZE); \
11906 break; \
11907 }
11908
11909 unsigned int i, elsize = 0, idx = 0, n_elts;
11910 unsigned int innersize;
11911 unsigned char bytes[16];
11912 int immtype = -1, matches;
11913 unsigned int invmask = inverse ? 0xff : 0;
11914 bool vector = GET_CODE (op) == CONST_VECTOR;
11915
11916 if (vector)
11917 n_elts = CONST_VECTOR_NUNITS (op);
11918 else
11919 {
11920 n_elts = 1;
11921 if (mode == VOIDmode)
11922 mode = DImode;
11923 }
11924
11925 innersize = GET_MODE_UNIT_SIZE (mode);
11926
11927 /* Vectors of float constants. */
11928 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11929 {
11930 rtx el0 = CONST_VECTOR_ELT (op, 0);
11931
11932 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11933 return -1;
11934
11935 /* FP16 vectors cannot be represented. */
11936 if (GET_MODE_INNER (mode) == HFmode)
11937 return -1;
11938
11939 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11940 are distinct in this context. */
11941 if (!const_vec_duplicate_p (op))
11942 return -1;
11943
11944 if (modconst)
11945 *modconst = CONST_VECTOR_ELT (op, 0);
11946
11947 if (elementwidth)
11948 *elementwidth = 0;
11949
11950 if (el0 == CONST0_RTX (GET_MODE (el0)))
11951 return 19;
11952 else
11953 return 18;
11954 }
11955
11956 /* The tricks done in the code below apply for little-endian vector layout.
11957 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11958 FIXME: Implement logic for big-endian vectors. */
11959 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11960 return -1;
11961
11962 /* Splat vector constant out into a byte vector. */
11963 for (i = 0; i < n_elts; i++)
11964 {
11965 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11966 unsigned HOST_WIDE_INT elpart;
11967
11968 gcc_assert (CONST_INT_P (el));
11969 elpart = INTVAL (el);
11970
11971 for (unsigned int byte = 0; byte < innersize; byte++)
11972 {
11973 bytes[idx++] = (elpart & 0xff) ^ invmask;
11974 elpart >>= BITS_PER_UNIT;
11975 }
11976 }
11977
11978 /* Sanity check. */
11979 gcc_assert (idx == GET_MODE_SIZE (mode));
11980
11981 do
11982 {
11983 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11984 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11985
11986 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11987 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11988
11989 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11990 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11991
11992 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11993 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11994
11995 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11996
11997 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11998
11999 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12000 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12001
12002 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12003 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12004
12005 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12006 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12007
12008 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12009 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12010
12011 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12012
12013 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12014
12015 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12016 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12017
12018 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12019 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12020
12021 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12022 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12023
12024 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12025 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12026
12027 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12028
12029 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12030 && bytes[i] == bytes[(i + 8) % idx]);
12031 }
12032 while (0);
12033
12034 if (immtype == -1)
12035 return -1;
12036
12037 if (elementwidth)
12038 *elementwidth = elsize;
12039
12040 if (modconst)
12041 {
12042 unsigned HOST_WIDE_INT imm = 0;
12043
12044 /* Un-invert bytes of recognized vector, if necessary. */
12045 if (invmask != 0)
12046 for (i = 0; i < idx; i++)
12047 bytes[i] ^= invmask;
12048
12049 if (immtype == 17)
12050 {
12051 /* FIXME: Broken on 32-bit H_W_I hosts. */
12052 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12053
12054 for (i = 0; i < 8; i++)
12055 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12056 << (i * BITS_PER_UNIT);
12057
12058 *modconst = GEN_INT (imm);
12059 }
12060 else
12061 {
12062 unsigned HOST_WIDE_INT imm = 0;
12063
12064 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12065 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12066
12067 *modconst = GEN_INT (imm);
12068 }
12069 }
12070
12071 return immtype;
12072 #undef CHECK
12073 }
12074
12075 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12076 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12077 float elements), and a modified constant (whatever should be output for a
12078 VMOV) in *MODCONST. */
12079
12080 int
12081 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12082 rtx *modconst, int *elementwidth)
12083 {
12084 rtx tmpconst;
12085 int tmpwidth;
12086 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12087
12088 if (retval == -1)
12089 return 0;
12090
12091 if (modconst)
12092 *modconst = tmpconst;
12093
12094 if (elementwidth)
12095 *elementwidth = tmpwidth;
12096
12097 return 1;
12098 }
12099
12100 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12101 the immediate is valid, write a constant suitable for using as an operand
12102 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12103 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12104
12105 int
12106 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12107 rtx *modconst, int *elementwidth)
12108 {
12109 rtx tmpconst;
12110 int tmpwidth;
12111 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12112
12113 if (retval < 0 || retval > 5)
12114 return 0;
12115
12116 if (modconst)
12117 *modconst = tmpconst;
12118
12119 if (elementwidth)
12120 *elementwidth = tmpwidth;
12121
12122 return 1;
12123 }
12124
12125 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12126 the immediate is valid, write a constant suitable for using as an operand
12127 to VSHR/VSHL to *MODCONST and the corresponding element width to
12128 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12129 because they have different limitations. */
12130
12131 int
12132 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12133 rtx *modconst, int *elementwidth,
12134 bool isleftshift)
12135 {
12136 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12137 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12138 unsigned HOST_WIDE_INT last_elt = 0;
12139 unsigned HOST_WIDE_INT maxshift;
12140
12141 /* Split vector constant out into a byte vector. */
12142 for (i = 0; i < n_elts; i++)
12143 {
12144 rtx el = CONST_VECTOR_ELT (op, i);
12145 unsigned HOST_WIDE_INT elpart;
12146
12147 if (CONST_INT_P (el))
12148 elpart = INTVAL (el);
12149 else if (CONST_DOUBLE_P (el))
12150 return 0;
12151 else
12152 gcc_unreachable ();
12153
12154 if (i != 0 && elpart != last_elt)
12155 return 0;
12156
12157 last_elt = elpart;
12158 }
12159
12160 /* Shift less than element size. */
12161 maxshift = innersize * 8;
12162
12163 if (isleftshift)
12164 {
12165 /* Left shift immediate value can be from 0 to <size>-1. */
12166 if (last_elt >= maxshift)
12167 return 0;
12168 }
12169 else
12170 {
12171 /* Right shift immediate value can be from 1 to <size>. */
12172 if (last_elt == 0 || last_elt > maxshift)
12173 return 0;
12174 }
12175
12176 if (elementwidth)
12177 *elementwidth = innersize * 8;
12178
12179 if (modconst)
12180 *modconst = CONST_VECTOR_ELT (op, 0);
12181
12182 return 1;
12183 }
12184
12185 /* Return a string suitable for output of Neon immediate logic operation
12186 MNEM. */
12187
12188 char *
12189 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12190 int inverse, int quad)
12191 {
12192 int width, is_valid;
12193 static char templ[40];
12194
12195 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12196
12197 gcc_assert (is_valid != 0);
12198
12199 if (quad)
12200 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12201 else
12202 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12203
12204 return templ;
12205 }
12206
12207 /* Return a string suitable for output of Neon immediate shift operation
12208 (VSHR or VSHL) MNEM. */
12209
12210 char *
12211 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12212 machine_mode mode, int quad,
12213 bool isleftshift)
12214 {
12215 int width, is_valid;
12216 static char templ[40];
12217
12218 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12219 gcc_assert (is_valid != 0);
12220
12221 if (quad)
12222 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12223 else
12224 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12225
12226 return templ;
12227 }
12228
12229 /* Output a sequence of pairwise operations to implement a reduction.
12230 NOTE: We do "too much work" here, because pairwise operations work on two
12231 registers-worth of operands in one go. Unfortunately we can't exploit those
12232 extra calculations to do the full operation in fewer steps, I don't think.
12233 Although all vector elements of the result but the first are ignored, we
12234 actually calculate the same result in each of the elements. An alternative
12235 such as initially loading a vector with zero to use as each of the second
12236 operands would use up an additional register and take an extra instruction,
12237 for no particular gain. */
12238
12239 void
12240 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12241 rtx (*reduc) (rtx, rtx, rtx))
12242 {
12243 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12244 rtx tmpsum = op1;
12245
12246 for (i = parts / 2; i >= 1; i /= 2)
12247 {
12248 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12249 emit_insn (reduc (dest, tmpsum, tmpsum));
12250 tmpsum = dest;
12251 }
12252 }
12253
12254 /* If VALS is a vector constant that can be loaded into a register
12255 using VDUP, generate instructions to do so and return an RTX to
12256 assign to the register. Otherwise return NULL_RTX. */
12257
12258 static rtx
12259 neon_vdup_constant (rtx vals)
12260 {
12261 machine_mode mode = GET_MODE (vals);
12262 machine_mode inner_mode = GET_MODE_INNER (mode);
12263 rtx x;
12264
12265 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12266 return NULL_RTX;
12267
12268 if (!const_vec_duplicate_p (vals, &x))
12269 /* The elements are not all the same. We could handle repeating
12270 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12271 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12272 vdup.i16). */
12273 return NULL_RTX;
12274
12275 /* We can load this constant by using VDUP and a constant in a
12276 single ARM register. This will be cheaper than a vector
12277 load. */
12278
12279 x = copy_to_mode_reg (inner_mode, x);
12280 return gen_vec_duplicate (mode, x);
12281 }
12282
12283 /* Generate code to load VALS, which is a PARALLEL containing only
12284 constants (for vec_init) or CONST_VECTOR, efficiently into a
12285 register. Returns an RTX to copy into the register, or NULL_RTX
12286 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12287
12288 rtx
12289 neon_make_constant (rtx vals)
12290 {
12291 machine_mode mode = GET_MODE (vals);
12292 rtx target;
12293 rtx const_vec = NULL_RTX;
12294 int n_elts = GET_MODE_NUNITS (mode);
12295 int n_const = 0;
12296 int i;
12297
12298 if (GET_CODE (vals) == CONST_VECTOR)
12299 const_vec = vals;
12300 else if (GET_CODE (vals) == PARALLEL)
12301 {
12302 /* A CONST_VECTOR must contain only CONST_INTs and
12303 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12304 Only store valid constants in a CONST_VECTOR. */
12305 for (i = 0; i < n_elts; ++i)
12306 {
12307 rtx x = XVECEXP (vals, 0, i);
12308 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12309 n_const++;
12310 }
12311 if (n_const == n_elts)
12312 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12313 }
12314 else
12315 gcc_unreachable ();
12316
12317 if (const_vec != NULL
12318 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12319 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12320 return const_vec;
12321 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12322 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12323 pipeline cycle; creating the constant takes one or two ARM
12324 pipeline cycles. */
12325 return target;
12326 else if (const_vec != NULL_RTX)
12327 /* Load from constant pool. On Cortex-A8 this takes two cycles
12328 (for either double or quad vectors). We can not take advantage
12329 of single-cycle VLD1 because we need a PC-relative addressing
12330 mode. */
12331 return const_vec;
12332 else
12333 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12334 We can not construct an initializer. */
12335 return NULL_RTX;
12336 }
12337
12338 /* Initialize vector TARGET to VALS. */
12339
12340 void
12341 neon_expand_vector_init (rtx target, rtx vals)
12342 {
12343 machine_mode mode = GET_MODE (target);
12344 machine_mode inner_mode = GET_MODE_INNER (mode);
12345 int n_elts = GET_MODE_NUNITS (mode);
12346 int n_var = 0, one_var = -1;
12347 bool all_same = true;
12348 rtx x, mem;
12349 int i;
12350
12351 for (i = 0; i < n_elts; ++i)
12352 {
12353 x = XVECEXP (vals, 0, i);
12354 if (!CONSTANT_P (x))
12355 ++n_var, one_var = i;
12356
12357 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12358 all_same = false;
12359 }
12360
12361 if (n_var == 0)
12362 {
12363 rtx constant = neon_make_constant (vals);
12364 if (constant != NULL_RTX)
12365 {
12366 emit_move_insn (target, constant);
12367 return;
12368 }
12369 }
12370
12371 /* Splat a single non-constant element if we can. */
12372 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12373 {
12374 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12375 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12376 return;
12377 }
12378
12379 /* One field is non-constant. Load constant then overwrite varying
12380 field. This is more efficient than using the stack. */
12381 if (n_var == 1)
12382 {
12383 rtx copy = copy_rtx (vals);
12384 rtx index = GEN_INT (one_var);
12385
12386 /* Load constant part of vector, substitute neighboring value for
12387 varying element. */
12388 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12389 neon_expand_vector_init (target, copy);
12390
12391 /* Insert variable. */
12392 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12393 switch (mode)
12394 {
12395 case E_V8QImode:
12396 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12397 break;
12398 case E_V16QImode:
12399 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12400 break;
12401 case E_V4HImode:
12402 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12403 break;
12404 case E_V8HImode:
12405 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12406 break;
12407 case E_V2SImode:
12408 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12409 break;
12410 case E_V4SImode:
12411 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12412 break;
12413 case E_V2SFmode:
12414 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12415 break;
12416 case E_V4SFmode:
12417 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12418 break;
12419 case E_V2DImode:
12420 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12421 break;
12422 default:
12423 gcc_unreachable ();
12424 }
12425 return;
12426 }
12427
12428 /* Construct the vector in memory one field at a time
12429 and load the whole vector. */
12430 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12431 for (i = 0; i < n_elts; i++)
12432 emit_move_insn (adjust_address_nv (mem, inner_mode,
12433 i * GET_MODE_SIZE (inner_mode)),
12434 XVECEXP (vals, 0, i));
12435 emit_move_insn (target, mem);
12436 }
12437
12438 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12439 ERR if it doesn't. EXP indicates the source location, which includes the
12440 inlining history for intrinsics. */
12441
12442 static void
12443 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12444 const_tree exp, const char *desc)
12445 {
12446 HOST_WIDE_INT lane;
12447
12448 gcc_assert (CONST_INT_P (operand));
12449
12450 lane = INTVAL (operand);
12451
12452 if (lane < low || lane >= high)
12453 {
12454 if (exp)
12455 error ("%K%s %wd out of range %wd - %wd",
12456 exp, desc, lane, low, high - 1);
12457 else
12458 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12459 }
12460 }
12461
12462 /* Bounds-check lanes. */
12463
12464 void
12465 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12466 const_tree exp)
12467 {
12468 bounds_check (operand, low, high, exp, "lane");
12469 }
12470
12471 /* Bounds-check constants. */
12472
12473 void
12474 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12475 {
12476 bounds_check (operand, low, high, NULL_TREE, "constant");
12477 }
12478
12479 HOST_WIDE_INT
12480 neon_element_bits (machine_mode mode)
12481 {
12482 return GET_MODE_UNIT_BITSIZE (mode);
12483 }
12484
12485 \f
12486 /* Predicates for `match_operand' and `match_operator'. */
12487
12488 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12489 WB is true if full writeback address modes are allowed and is false
12490 if limited writeback address modes (POST_INC and PRE_DEC) are
12491 allowed. */
12492
12493 int
12494 arm_coproc_mem_operand (rtx op, bool wb)
12495 {
12496 rtx ind;
12497
12498 /* Reject eliminable registers. */
12499 if (! (reload_in_progress || reload_completed || lra_in_progress)
12500 && ( reg_mentioned_p (frame_pointer_rtx, op)
12501 || reg_mentioned_p (arg_pointer_rtx, op)
12502 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12503 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12504 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12505 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12506 return FALSE;
12507
12508 /* Constants are converted into offsets from labels. */
12509 if (!MEM_P (op))
12510 return FALSE;
12511
12512 ind = XEXP (op, 0);
12513
12514 if (reload_completed
12515 && (GET_CODE (ind) == LABEL_REF
12516 || (GET_CODE (ind) == CONST
12517 && GET_CODE (XEXP (ind, 0)) == PLUS
12518 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12519 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12520 return TRUE;
12521
12522 /* Match: (mem (reg)). */
12523 if (REG_P (ind))
12524 return arm_address_register_rtx_p (ind, 0);
12525
12526 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12527 acceptable in any case (subject to verification by
12528 arm_address_register_rtx_p). We need WB to be true to accept
12529 PRE_INC and POST_DEC. */
12530 if (GET_CODE (ind) == POST_INC
12531 || GET_CODE (ind) == PRE_DEC
12532 || (wb
12533 && (GET_CODE (ind) == PRE_INC
12534 || GET_CODE (ind) == POST_DEC)))
12535 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12536
12537 if (wb
12538 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12539 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12540 && GET_CODE (XEXP (ind, 1)) == PLUS
12541 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12542 ind = XEXP (ind, 1);
12543
12544 /* Match:
12545 (plus (reg)
12546 (const)). */
12547 if (GET_CODE (ind) == PLUS
12548 && REG_P (XEXP (ind, 0))
12549 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12550 && CONST_INT_P (XEXP (ind, 1))
12551 && INTVAL (XEXP (ind, 1)) > -1024
12552 && INTVAL (XEXP (ind, 1)) < 1024
12553 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12554 return TRUE;
12555
12556 return FALSE;
12557 }
12558
12559 /* Return TRUE if OP is a memory operand which we can load or store a vector
12560 to/from. TYPE is one of the following values:
12561 0 - Vector load/stor (vldr)
12562 1 - Core registers (ldm)
12563 2 - Element/structure loads (vld1)
12564 */
12565 int
12566 neon_vector_mem_operand (rtx op, int type, bool strict)
12567 {
12568 rtx ind;
12569
12570 /* Reject eliminable registers. */
12571 if (strict && ! (reload_in_progress || reload_completed)
12572 && (reg_mentioned_p (frame_pointer_rtx, op)
12573 || reg_mentioned_p (arg_pointer_rtx, op)
12574 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12575 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12576 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12577 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12578 return FALSE;
12579
12580 /* Constants are converted into offsets from labels. */
12581 if (!MEM_P (op))
12582 return FALSE;
12583
12584 ind = XEXP (op, 0);
12585
12586 if (reload_completed
12587 && (GET_CODE (ind) == LABEL_REF
12588 || (GET_CODE (ind) == CONST
12589 && GET_CODE (XEXP (ind, 0)) == PLUS
12590 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12591 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12592 return TRUE;
12593
12594 /* Match: (mem (reg)). */
12595 if (REG_P (ind))
12596 return arm_address_register_rtx_p (ind, 0);
12597
12598 /* Allow post-increment with Neon registers. */
12599 if ((type != 1 && GET_CODE (ind) == POST_INC)
12600 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12601 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12602
12603 /* Allow post-increment by register for VLDn */
12604 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12605 && GET_CODE (XEXP (ind, 1)) == PLUS
12606 && REG_P (XEXP (XEXP (ind, 1), 1)))
12607 return true;
12608
12609 /* Match:
12610 (plus (reg)
12611 (const)). */
12612 if (type == 0
12613 && GET_CODE (ind) == PLUS
12614 && REG_P (XEXP (ind, 0))
12615 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12616 && CONST_INT_P (XEXP (ind, 1))
12617 && INTVAL (XEXP (ind, 1)) > -1024
12618 /* For quad modes, we restrict the constant offset to be slightly less
12619 than what the instruction format permits. We have no such constraint
12620 on double mode offsets. (This must match arm_legitimate_index_p.) */
12621 && (INTVAL (XEXP (ind, 1))
12622 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12623 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12624 return TRUE;
12625
12626 return FALSE;
12627 }
12628
12629 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12630 type. */
12631 int
12632 neon_struct_mem_operand (rtx op)
12633 {
12634 rtx ind;
12635
12636 /* Reject eliminable registers. */
12637 if (! (reload_in_progress || reload_completed)
12638 && ( reg_mentioned_p (frame_pointer_rtx, op)
12639 || reg_mentioned_p (arg_pointer_rtx, op)
12640 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12641 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12642 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12643 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12644 return FALSE;
12645
12646 /* Constants are converted into offsets from labels. */
12647 if (!MEM_P (op))
12648 return FALSE;
12649
12650 ind = XEXP (op, 0);
12651
12652 if (reload_completed
12653 && (GET_CODE (ind) == LABEL_REF
12654 || (GET_CODE (ind) == CONST
12655 && GET_CODE (XEXP (ind, 0)) == PLUS
12656 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12657 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12658 return TRUE;
12659
12660 /* Match: (mem (reg)). */
12661 if (REG_P (ind))
12662 return arm_address_register_rtx_p (ind, 0);
12663
12664 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12665 if (GET_CODE (ind) == POST_INC
12666 || GET_CODE (ind) == PRE_DEC)
12667 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12668
12669 return FALSE;
12670 }
12671
12672 /* Return true if X is a register that will be eliminated later on. */
12673 int
12674 arm_eliminable_register (rtx x)
12675 {
12676 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12677 || REGNO (x) == ARG_POINTER_REGNUM
12678 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12679 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12680 }
12681
12682 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12683 coprocessor registers. Otherwise return NO_REGS. */
12684
12685 enum reg_class
12686 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12687 {
12688 if (mode == HFmode)
12689 {
12690 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12691 return GENERAL_REGS;
12692 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12693 return NO_REGS;
12694 return GENERAL_REGS;
12695 }
12696
12697 /* The neon move patterns handle all legitimate vector and struct
12698 addresses. */
12699 if (TARGET_NEON
12700 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12701 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12702 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12703 || VALID_NEON_STRUCT_MODE (mode)))
12704 return NO_REGS;
12705
12706 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12707 return NO_REGS;
12708
12709 return GENERAL_REGS;
12710 }
12711
12712 /* Values which must be returned in the most-significant end of the return
12713 register. */
12714
12715 static bool
12716 arm_return_in_msb (const_tree valtype)
12717 {
12718 return (TARGET_AAPCS_BASED
12719 && BYTES_BIG_ENDIAN
12720 && (AGGREGATE_TYPE_P (valtype)
12721 || TREE_CODE (valtype) == COMPLEX_TYPE
12722 || FIXED_POINT_TYPE_P (valtype)));
12723 }
12724
12725 /* Return TRUE if X references a SYMBOL_REF. */
12726 int
12727 symbol_mentioned_p (rtx x)
12728 {
12729 const char * fmt;
12730 int i;
12731
12732 if (GET_CODE (x) == SYMBOL_REF)
12733 return 1;
12734
12735 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12736 are constant offsets, not symbols. */
12737 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12738 return 0;
12739
12740 fmt = GET_RTX_FORMAT (GET_CODE (x));
12741
12742 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12743 {
12744 if (fmt[i] == 'E')
12745 {
12746 int j;
12747
12748 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12749 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12750 return 1;
12751 }
12752 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12753 return 1;
12754 }
12755
12756 return 0;
12757 }
12758
12759 /* Return TRUE if X references a LABEL_REF. */
12760 int
12761 label_mentioned_p (rtx x)
12762 {
12763 const char * fmt;
12764 int i;
12765
12766 if (GET_CODE (x) == LABEL_REF)
12767 return 1;
12768
12769 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12770 instruction, but they are constant offsets, not symbols. */
12771 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12772 return 0;
12773
12774 fmt = GET_RTX_FORMAT (GET_CODE (x));
12775 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12776 {
12777 if (fmt[i] == 'E')
12778 {
12779 int j;
12780
12781 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12782 if (label_mentioned_p (XVECEXP (x, i, j)))
12783 return 1;
12784 }
12785 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12786 return 1;
12787 }
12788
12789 return 0;
12790 }
12791
12792 int
12793 tls_mentioned_p (rtx x)
12794 {
12795 switch (GET_CODE (x))
12796 {
12797 case CONST:
12798 return tls_mentioned_p (XEXP (x, 0));
12799
12800 case UNSPEC:
12801 if (XINT (x, 1) == UNSPEC_TLS)
12802 return 1;
12803
12804 /* Fall through. */
12805 default:
12806 return 0;
12807 }
12808 }
12809
12810 /* Must not copy any rtx that uses a pc-relative address.
12811 Also, disallow copying of load-exclusive instructions that
12812 may appear after splitting of compare-and-swap-style operations
12813 so as to prevent those loops from being transformed away from their
12814 canonical forms (see PR 69904). */
12815
12816 static bool
12817 arm_cannot_copy_insn_p (rtx_insn *insn)
12818 {
12819 /* The tls call insn cannot be copied, as it is paired with a data
12820 word. */
12821 if (recog_memoized (insn) == CODE_FOR_tlscall)
12822 return true;
12823
12824 subrtx_iterator::array_type array;
12825 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12826 {
12827 const_rtx x = *iter;
12828 if (GET_CODE (x) == UNSPEC
12829 && (XINT (x, 1) == UNSPEC_PIC_BASE
12830 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12831 return true;
12832 }
12833
12834 rtx set = single_set (insn);
12835 if (set)
12836 {
12837 rtx src = SET_SRC (set);
12838 if (GET_CODE (src) == ZERO_EXTEND)
12839 src = XEXP (src, 0);
12840
12841 /* Catch the load-exclusive and load-acquire operations. */
12842 if (GET_CODE (src) == UNSPEC_VOLATILE
12843 && (XINT (src, 1) == VUNSPEC_LL
12844 || XINT (src, 1) == VUNSPEC_LAX))
12845 return true;
12846 }
12847 return false;
12848 }
12849
12850 enum rtx_code
12851 minmax_code (rtx x)
12852 {
12853 enum rtx_code code = GET_CODE (x);
12854
12855 switch (code)
12856 {
12857 case SMAX:
12858 return GE;
12859 case SMIN:
12860 return LE;
12861 case UMIN:
12862 return LEU;
12863 case UMAX:
12864 return GEU;
12865 default:
12866 gcc_unreachable ();
12867 }
12868 }
12869
12870 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12871
12872 bool
12873 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12874 int *mask, bool *signed_sat)
12875 {
12876 /* The high bound must be a power of two minus one. */
12877 int log = exact_log2 (INTVAL (hi_bound) + 1);
12878 if (log == -1)
12879 return false;
12880
12881 /* The low bound is either zero (for usat) or one less than the
12882 negation of the high bound (for ssat). */
12883 if (INTVAL (lo_bound) == 0)
12884 {
12885 if (mask)
12886 *mask = log;
12887 if (signed_sat)
12888 *signed_sat = false;
12889
12890 return true;
12891 }
12892
12893 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12894 {
12895 if (mask)
12896 *mask = log + 1;
12897 if (signed_sat)
12898 *signed_sat = true;
12899
12900 return true;
12901 }
12902
12903 return false;
12904 }
12905
12906 /* Return 1 if memory locations are adjacent. */
12907 int
12908 adjacent_mem_locations (rtx a, rtx b)
12909 {
12910 /* We don't guarantee to preserve the order of these memory refs. */
12911 if (volatile_refs_p (a) || volatile_refs_p (b))
12912 return 0;
12913
12914 if ((REG_P (XEXP (a, 0))
12915 || (GET_CODE (XEXP (a, 0)) == PLUS
12916 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12917 && (REG_P (XEXP (b, 0))
12918 || (GET_CODE (XEXP (b, 0)) == PLUS
12919 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12920 {
12921 HOST_WIDE_INT val0 = 0, val1 = 0;
12922 rtx reg0, reg1;
12923 int val_diff;
12924
12925 if (GET_CODE (XEXP (a, 0)) == PLUS)
12926 {
12927 reg0 = XEXP (XEXP (a, 0), 0);
12928 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12929 }
12930 else
12931 reg0 = XEXP (a, 0);
12932
12933 if (GET_CODE (XEXP (b, 0)) == PLUS)
12934 {
12935 reg1 = XEXP (XEXP (b, 0), 0);
12936 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12937 }
12938 else
12939 reg1 = XEXP (b, 0);
12940
12941 /* Don't accept any offset that will require multiple
12942 instructions to handle, since this would cause the
12943 arith_adjacentmem pattern to output an overlong sequence. */
12944 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12945 return 0;
12946
12947 /* Don't allow an eliminable register: register elimination can make
12948 the offset too large. */
12949 if (arm_eliminable_register (reg0))
12950 return 0;
12951
12952 val_diff = val1 - val0;
12953
12954 if (arm_ld_sched)
12955 {
12956 /* If the target has load delay slots, then there's no benefit
12957 to using an ldm instruction unless the offset is zero and
12958 we are optimizing for size. */
12959 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12960 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12961 && (val_diff == 4 || val_diff == -4));
12962 }
12963
12964 return ((REGNO (reg0) == REGNO (reg1))
12965 && (val_diff == 4 || val_diff == -4));
12966 }
12967
12968 return 0;
12969 }
12970
12971 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12972 for load operations, false for store operations. CONSECUTIVE is true
12973 if the register numbers in the operation must be consecutive in the register
12974 bank. RETURN_PC is true if value is to be loaded in PC.
12975 The pattern we are trying to match for load is:
12976 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12977 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12978 :
12979 :
12980 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12981 ]
12982 where
12983 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12984 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12985 3. If consecutive is TRUE, then for kth register being loaded,
12986 REGNO (R_dk) = REGNO (R_d0) + k.
12987 The pattern for store is similar. */
12988 bool
12989 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12990 bool consecutive, bool return_pc)
12991 {
12992 HOST_WIDE_INT count = XVECLEN (op, 0);
12993 rtx reg, mem, addr;
12994 unsigned regno;
12995 unsigned first_regno;
12996 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12997 rtx elt;
12998 bool addr_reg_in_reglist = false;
12999 bool update = false;
13000 int reg_increment;
13001 int offset_adj;
13002 int regs_per_val;
13003
13004 /* If not in SImode, then registers must be consecutive
13005 (e.g., VLDM instructions for DFmode). */
13006 gcc_assert ((mode == SImode) || consecutive);
13007 /* Setting return_pc for stores is illegal. */
13008 gcc_assert (!return_pc || load);
13009
13010 /* Set up the increments and the regs per val based on the mode. */
13011 reg_increment = GET_MODE_SIZE (mode);
13012 regs_per_val = reg_increment / 4;
13013 offset_adj = return_pc ? 1 : 0;
13014
13015 if (count <= 1
13016 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13017 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13018 return false;
13019
13020 /* Check if this is a write-back. */
13021 elt = XVECEXP (op, 0, offset_adj);
13022 if (GET_CODE (SET_SRC (elt)) == PLUS)
13023 {
13024 i++;
13025 base = 1;
13026 update = true;
13027
13028 /* The offset adjustment must be the number of registers being
13029 popped times the size of a single register. */
13030 if (!REG_P (SET_DEST (elt))
13031 || !REG_P (XEXP (SET_SRC (elt), 0))
13032 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13033 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13034 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13035 ((count - 1 - offset_adj) * reg_increment))
13036 return false;
13037 }
13038
13039 i = i + offset_adj;
13040 base = base + offset_adj;
13041 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13042 success depends on the type: VLDM can do just one reg,
13043 LDM must do at least two. */
13044 if ((count <= i) && (mode == SImode))
13045 return false;
13046
13047 elt = XVECEXP (op, 0, i - 1);
13048 if (GET_CODE (elt) != SET)
13049 return false;
13050
13051 if (load)
13052 {
13053 reg = SET_DEST (elt);
13054 mem = SET_SRC (elt);
13055 }
13056 else
13057 {
13058 reg = SET_SRC (elt);
13059 mem = SET_DEST (elt);
13060 }
13061
13062 if (!REG_P (reg) || !MEM_P (mem))
13063 return false;
13064
13065 regno = REGNO (reg);
13066 first_regno = regno;
13067 addr = XEXP (mem, 0);
13068 if (GET_CODE (addr) == PLUS)
13069 {
13070 if (!CONST_INT_P (XEXP (addr, 1)))
13071 return false;
13072
13073 offset = INTVAL (XEXP (addr, 1));
13074 addr = XEXP (addr, 0);
13075 }
13076
13077 if (!REG_P (addr))
13078 return false;
13079
13080 /* Don't allow SP to be loaded unless it is also the base register. It
13081 guarantees that SP is reset correctly when an LDM instruction
13082 is interrupted. Otherwise, we might end up with a corrupt stack. */
13083 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13084 return false;
13085
13086 for (; i < count; i++)
13087 {
13088 elt = XVECEXP (op, 0, i);
13089 if (GET_CODE (elt) != SET)
13090 return false;
13091
13092 if (load)
13093 {
13094 reg = SET_DEST (elt);
13095 mem = SET_SRC (elt);
13096 }
13097 else
13098 {
13099 reg = SET_SRC (elt);
13100 mem = SET_DEST (elt);
13101 }
13102
13103 if (!REG_P (reg)
13104 || GET_MODE (reg) != mode
13105 || REGNO (reg) <= regno
13106 || (consecutive
13107 && (REGNO (reg) !=
13108 (unsigned int) (first_regno + regs_per_val * (i - base))))
13109 /* Don't allow SP to be loaded unless it is also the base register. It
13110 guarantees that SP is reset correctly when an LDM instruction
13111 is interrupted. Otherwise, we might end up with a corrupt stack. */
13112 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13113 || !MEM_P (mem)
13114 || GET_MODE (mem) != mode
13115 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13116 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13117 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13118 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13119 offset + (i - base) * reg_increment))
13120 && (!REG_P (XEXP (mem, 0))
13121 || offset + (i - base) * reg_increment != 0)))
13122 return false;
13123
13124 regno = REGNO (reg);
13125 if (regno == REGNO (addr))
13126 addr_reg_in_reglist = true;
13127 }
13128
13129 if (load)
13130 {
13131 if (update && addr_reg_in_reglist)
13132 return false;
13133
13134 /* For Thumb-1, address register is always modified - either by write-back
13135 or by explicit load. If the pattern does not describe an update,
13136 then the address register must be in the list of loaded registers. */
13137 if (TARGET_THUMB1)
13138 return update || addr_reg_in_reglist;
13139 }
13140
13141 return true;
13142 }
13143
13144 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13145 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13146 instruction. ADD_OFFSET is nonzero if the base address register needs
13147 to be modified with an add instruction before we can use it. */
13148
13149 static bool
13150 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13151 int nops, HOST_WIDE_INT add_offset)
13152 {
13153 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13154 if the offset isn't small enough. The reason 2 ldrs are faster
13155 is because these ARMs are able to do more than one cache access
13156 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13157 whilst the ARM8 has a double bandwidth cache. This means that
13158 these cores can do both an instruction fetch and a data fetch in
13159 a single cycle, so the trick of calculating the address into a
13160 scratch register (one of the result regs) and then doing a load
13161 multiple actually becomes slower (and no smaller in code size).
13162 That is the transformation
13163
13164 ldr rd1, [rbase + offset]
13165 ldr rd2, [rbase + offset + 4]
13166
13167 to
13168
13169 add rd1, rbase, offset
13170 ldmia rd1, {rd1, rd2}
13171
13172 produces worse code -- '3 cycles + any stalls on rd2' instead of
13173 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13174 access per cycle, the first sequence could never complete in less
13175 than 6 cycles, whereas the ldm sequence would only take 5 and
13176 would make better use of sequential accesses if not hitting the
13177 cache.
13178
13179 We cheat here and test 'arm_ld_sched' which we currently know to
13180 only be true for the ARM8, ARM9 and StrongARM. If this ever
13181 changes, then the test below needs to be reworked. */
13182 if (nops == 2 && arm_ld_sched && add_offset != 0)
13183 return false;
13184
13185 /* XScale has load-store double instructions, but they have stricter
13186 alignment requirements than load-store multiple, so we cannot
13187 use them.
13188
13189 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13190 the pipeline until completion.
13191
13192 NREGS CYCLES
13193 1 3
13194 2 4
13195 3 5
13196 4 6
13197
13198 An ldr instruction takes 1-3 cycles, but does not block the
13199 pipeline.
13200
13201 NREGS CYCLES
13202 1 1-3
13203 2 2-6
13204 3 3-9
13205 4 4-12
13206
13207 Best case ldr will always win. However, the more ldr instructions
13208 we issue, the less likely we are to be able to schedule them well.
13209 Using ldr instructions also increases code size.
13210
13211 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13212 for counts of 3 or 4 regs. */
13213 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13214 return false;
13215 return true;
13216 }
13217
13218 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13219 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13220 an array ORDER which describes the sequence to use when accessing the
13221 offsets that produces an ascending order. In this sequence, each
13222 offset must be larger by exactly 4 than the previous one. ORDER[0]
13223 must have been filled in with the lowest offset by the caller.
13224 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13225 we use to verify that ORDER produces an ascending order of registers.
13226 Return true if it was possible to construct such an order, false if
13227 not. */
13228
13229 static bool
13230 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13231 int *unsorted_regs)
13232 {
13233 int i;
13234 for (i = 1; i < nops; i++)
13235 {
13236 int j;
13237
13238 order[i] = order[i - 1];
13239 for (j = 0; j < nops; j++)
13240 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13241 {
13242 /* We must find exactly one offset that is higher than the
13243 previous one by 4. */
13244 if (order[i] != order[i - 1])
13245 return false;
13246 order[i] = j;
13247 }
13248 if (order[i] == order[i - 1])
13249 return false;
13250 /* The register numbers must be ascending. */
13251 if (unsorted_regs != NULL
13252 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13253 return false;
13254 }
13255 return true;
13256 }
13257
13258 /* Used to determine in a peephole whether a sequence of load
13259 instructions can be changed into a load-multiple instruction.
13260 NOPS is the number of separate load instructions we are examining. The
13261 first NOPS entries in OPERANDS are the destination registers, the
13262 next NOPS entries are memory operands. If this function is
13263 successful, *BASE is set to the common base register of the memory
13264 accesses; *LOAD_OFFSET is set to the first memory location's offset
13265 from that base register.
13266 REGS is an array filled in with the destination register numbers.
13267 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13268 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13269 the sequence of registers in REGS matches the loads from ascending memory
13270 locations, and the function verifies that the register numbers are
13271 themselves ascending. If CHECK_REGS is false, the register numbers
13272 are stored in the order they are found in the operands. */
13273 static int
13274 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13275 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13276 {
13277 int unsorted_regs[MAX_LDM_STM_OPS];
13278 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13279 int order[MAX_LDM_STM_OPS];
13280 rtx base_reg_rtx = NULL;
13281 int base_reg = -1;
13282 int i, ldm_case;
13283
13284 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13285 easily extended if required. */
13286 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13287
13288 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13289
13290 /* Loop over the operands and check that the memory references are
13291 suitable (i.e. immediate offsets from the same base register). At
13292 the same time, extract the target register, and the memory
13293 offsets. */
13294 for (i = 0; i < nops; i++)
13295 {
13296 rtx reg;
13297 rtx offset;
13298
13299 /* Convert a subreg of a mem into the mem itself. */
13300 if (GET_CODE (operands[nops + i]) == SUBREG)
13301 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13302
13303 gcc_assert (MEM_P (operands[nops + i]));
13304
13305 /* Don't reorder volatile memory references; it doesn't seem worth
13306 looking for the case where the order is ok anyway. */
13307 if (MEM_VOLATILE_P (operands[nops + i]))
13308 return 0;
13309
13310 offset = const0_rtx;
13311
13312 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13313 || (GET_CODE (reg) == SUBREG
13314 && REG_P (reg = SUBREG_REG (reg))))
13315 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13316 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13317 || (GET_CODE (reg) == SUBREG
13318 && REG_P (reg = SUBREG_REG (reg))))
13319 && (CONST_INT_P (offset
13320 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13321 {
13322 if (i == 0)
13323 {
13324 base_reg = REGNO (reg);
13325 base_reg_rtx = reg;
13326 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13327 return 0;
13328 }
13329 else if (base_reg != (int) REGNO (reg))
13330 /* Not addressed from the same base register. */
13331 return 0;
13332
13333 unsorted_regs[i] = (REG_P (operands[i])
13334 ? REGNO (operands[i])
13335 : REGNO (SUBREG_REG (operands[i])));
13336
13337 /* If it isn't an integer register, or if it overwrites the
13338 base register but isn't the last insn in the list, then
13339 we can't do this. */
13340 if (unsorted_regs[i] < 0
13341 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13342 || unsorted_regs[i] > 14
13343 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13344 return 0;
13345
13346 /* Don't allow SP to be loaded unless it is also the base
13347 register. It guarantees that SP is reset correctly when
13348 an LDM instruction is interrupted. Otherwise, we might
13349 end up with a corrupt stack. */
13350 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13351 return 0;
13352
13353 unsorted_offsets[i] = INTVAL (offset);
13354 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13355 order[0] = i;
13356 }
13357 else
13358 /* Not a suitable memory address. */
13359 return 0;
13360 }
13361
13362 /* All the useful information has now been extracted from the
13363 operands into unsorted_regs and unsorted_offsets; additionally,
13364 order[0] has been set to the lowest offset in the list. Sort
13365 the offsets into order, verifying that they are adjacent, and
13366 check that the register numbers are ascending. */
13367 if (!compute_offset_order (nops, unsorted_offsets, order,
13368 check_regs ? unsorted_regs : NULL))
13369 return 0;
13370
13371 if (saved_order)
13372 memcpy (saved_order, order, sizeof order);
13373
13374 if (base)
13375 {
13376 *base = base_reg;
13377
13378 for (i = 0; i < nops; i++)
13379 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13380
13381 *load_offset = unsorted_offsets[order[0]];
13382 }
13383
13384 if (TARGET_THUMB1
13385 && !peep2_reg_dead_p (nops, base_reg_rtx))
13386 return 0;
13387
13388 if (unsorted_offsets[order[0]] == 0)
13389 ldm_case = 1; /* ldmia */
13390 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13391 ldm_case = 2; /* ldmib */
13392 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13393 ldm_case = 3; /* ldmda */
13394 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13395 ldm_case = 4; /* ldmdb */
13396 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13397 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13398 ldm_case = 5;
13399 else
13400 return 0;
13401
13402 if (!multiple_operation_profitable_p (false, nops,
13403 ldm_case == 5
13404 ? unsorted_offsets[order[0]] : 0))
13405 return 0;
13406
13407 return ldm_case;
13408 }
13409
13410 /* Used to determine in a peephole whether a sequence of store instructions can
13411 be changed into a store-multiple instruction.
13412 NOPS is the number of separate store instructions we are examining.
13413 NOPS_TOTAL is the total number of instructions recognized by the peephole
13414 pattern.
13415 The first NOPS entries in OPERANDS are the source registers, the next
13416 NOPS entries are memory operands. If this function is successful, *BASE is
13417 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13418 to the first memory location's offset from that base register. REGS is an
13419 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13420 likewise filled with the corresponding rtx's.
13421 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13422 numbers to an ascending order of stores.
13423 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13424 from ascending memory locations, and the function verifies that the register
13425 numbers are themselves ascending. If CHECK_REGS is false, the register
13426 numbers are stored in the order they are found in the operands. */
13427 static int
13428 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13429 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13430 HOST_WIDE_INT *load_offset, bool check_regs)
13431 {
13432 int unsorted_regs[MAX_LDM_STM_OPS];
13433 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13434 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13435 int order[MAX_LDM_STM_OPS];
13436 int base_reg = -1;
13437 rtx base_reg_rtx = NULL;
13438 int i, stm_case;
13439
13440 /* Write back of base register is currently only supported for Thumb 1. */
13441 int base_writeback = TARGET_THUMB1;
13442
13443 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13444 easily extended if required. */
13445 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13446
13447 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13448
13449 /* Loop over the operands and check that the memory references are
13450 suitable (i.e. immediate offsets from the same base register). At
13451 the same time, extract the target register, and the memory
13452 offsets. */
13453 for (i = 0; i < nops; i++)
13454 {
13455 rtx reg;
13456 rtx offset;
13457
13458 /* Convert a subreg of a mem into the mem itself. */
13459 if (GET_CODE (operands[nops + i]) == SUBREG)
13460 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13461
13462 gcc_assert (MEM_P (operands[nops + i]));
13463
13464 /* Don't reorder volatile memory references; it doesn't seem worth
13465 looking for the case where the order is ok anyway. */
13466 if (MEM_VOLATILE_P (operands[nops + i]))
13467 return 0;
13468
13469 offset = const0_rtx;
13470
13471 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13472 || (GET_CODE (reg) == SUBREG
13473 && REG_P (reg = SUBREG_REG (reg))))
13474 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13475 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13476 || (GET_CODE (reg) == SUBREG
13477 && REG_P (reg = SUBREG_REG (reg))))
13478 && (CONST_INT_P (offset
13479 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13480 {
13481 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13482 ? operands[i] : SUBREG_REG (operands[i]));
13483 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13484
13485 if (i == 0)
13486 {
13487 base_reg = REGNO (reg);
13488 base_reg_rtx = reg;
13489 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13490 return 0;
13491 }
13492 else if (base_reg != (int) REGNO (reg))
13493 /* Not addressed from the same base register. */
13494 return 0;
13495
13496 /* If it isn't an integer register, then we can't do this. */
13497 if (unsorted_regs[i] < 0
13498 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13499 /* The effects are unpredictable if the base register is
13500 both updated and stored. */
13501 || (base_writeback && unsorted_regs[i] == base_reg)
13502 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13503 || unsorted_regs[i] > 14)
13504 return 0;
13505
13506 unsorted_offsets[i] = INTVAL (offset);
13507 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13508 order[0] = i;
13509 }
13510 else
13511 /* Not a suitable memory address. */
13512 return 0;
13513 }
13514
13515 /* All the useful information has now been extracted from the
13516 operands into unsorted_regs and unsorted_offsets; additionally,
13517 order[0] has been set to the lowest offset in the list. Sort
13518 the offsets into order, verifying that they are adjacent, and
13519 check that the register numbers are ascending. */
13520 if (!compute_offset_order (nops, unsorted_offsets, order,
13521 check_regs ? unsorted_regs : NULL))
13522 return 0;
13523
13524 if (saved_order)
13525 memcpy (saved_order, order, sizeof order);
13526
13527 if (base)
13528 {
13529 *base = base_reg;
13530
13531 for (i = 0; i < nops; i++)
13532 {
13533 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13534 if (reg_rtxs)
13535 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13536 }
13537
13538 *load_offset = unsorted_offsets[order[0]];
13539 }
13540
13541 if (TARGET_THUMB1
13542 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13543 return 0;
13544
13545 if (unsorted_offsets[order[0]] == 0)
13546 stm_case = 1; /* stmia */
13547 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13548 stm_case = 2; /* stmib */
13549 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13550 stm_case = 3; /* stmda */
13551 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13552 stm_case = 4; /* stmdb */
13553 else
13554 return 0;
13555
13556 if (!multiple_operation_profitable_p (false, nops, 0))
13557 return 0;
13558
13559 return stm_case;
13560 }
13561 \f
13562 /* Routines for use in generating RTL. */
13563
13564 /* Generate a load-multiple instruction. COUNT is the number of loads in
13565 the instruction; REGS and MEMS are arrays containing the operands.
13566 BASEREG is the base register to be used in addressing the memory operands.
13567 WBACK_OFFSET is nonzero if the instruction should update the base
13568 register. */
13569
13570 static rtx
13571 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13572 HOST_WIDE_INT wback_offset)
13573 {
13574 int i = 0, j;
13575 rtx result;
13576
13577 if (!multiple_operation_profitable_p (false, count, 0))
13578 {
13579 rtx seq;
13580
13581 start_sequence ();
13582
13583 for (i = 0; i < count; i++)
13584 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13585
13586 if (wback_offset != 0)
13587 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13588
13589 seq = get_insns ();
13590 end_sequence ();
13591
13592 return seq;
13593 }
13594
13595 result = gen_rtx_PARALLEL (VOIDmode,
13596 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13597 if (wback_offset != 0)
13598 {
13599 XVECEXP (result, 0, 0)
13600 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13601 i = 1;
13602 count++;
13603 }
13604
13605 for (j = 0; i < count; i++, j++)
13606 XVECEXP (result, 0, i)
13607 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13608
13609 return result;
13610 }
13611
13612 /* Generate a store-multiple instruction. COUNT is the number of stores in
13613 the instruction; REGS and MEMS are arrays containing the operands.
13614 BASEREG is the base register to be used in addressing the memory operands.
13615 WBACK_OFFSET is nonzero if the instruction should update the base
13616 register. */
13617
13618 static rtx
13619 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13620 HOST_WIDE_INT wback_offset)
13621 {
13622 int i = 0, j;
13623 rtx result;
13624
13625 if (GET_CODE (basereg) == PLUS)
13626 basereg = XEXP (basereg, 0);
13627
13628 if (!multiple_operation_profitable_p (false, count, 0))
13629 {
13630 rtx seq;
13631
13632 start_sequence ();
13633
13634 for (i = 0; i < count; i++)
13635 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13636
13637 if (wback_offset != 0)
13638 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13639
13640 seq = get_insns ();
13641 end_sequence ();
13642
13643 return seq;
13644 }
13645
13646 result = gen_rtx_PARALLEL (VOIDmode,
13647 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13648 if (wback_offset != 0)
13649 {
13650 XVECEXP (result, 0, 0)
13651 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13652 i = 1;
13653 count++;
13654 }
13655
13656 for (j = 0; i < count; i++, j++)
13657 XVECEXP (result, 0, i)
13658 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13659
13660 return result;
13661 }
13662
13663 /* Generate either a load-multiple or a store-multiple instruction. This
13664 function can be used in situations where we can start with a single MEM
13665 rtx and adjust its address upwards.
13666 COUNT is the number of operations in the instruction, not counting a
13667 possible update of the base register. REGS is an array containing the
13668 register operands.
13669 BASEREG is the base register to be used in addressing the memory operands,
13670 which are constructed from BASEMEM.
13671 WRITE_BACK specifies whether the generated instruction should include an
13672 update of the base register.
13673 OFFSETP is used to pass an offset to and from this function; this offset
13674 is not used when constructing the address (instead BASEMEM should have an
13675 appropriate offset in its address), it is used only for setting
13676 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13677
13678 static rtx
13679 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13680 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13681 {
13682 rtx mems[MAX_LDM_STM_OPS];
13683 HOST_WIDE_INT offset = *offsetp;
13684 int i;
13685
13686 gcc_assert (count <= MAX_LDM_STM_OPS);
13687
13688 if (GET_CODE (basereg) == PLUS)
13689 basereg = XEXP (basereg, 0);
13690
13691 for (i = 0; i < count; i++)
13692 {
13693 rtx addr = plus_constant (Pmode, basereg, i * 4);
13694 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13695 offset += 4;
13696 }
13697
13698 if (write_back)
13699 *offsetp = offset;
13700
13701 if (is_load)
13702 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13703 write_back ? 4 * count : 0);
13704 else
13705 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13706 write_back ? 4 * count : 0);
13707 }
13708
13709 rtx
13710 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13711 rtx basemem, HOST_WIDE_INT *offsetp)
13712 {
13713 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13714 offsetp);
13715 }
13716
13717 rtx
13718 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13719 rtx basemem, HOST_WIDE_INT *offsetp)
13720 {
13721 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13722 offsetp);
13723 }
13724
13725 /* Called from a peephole2 expander to turn a sequence of loads into an
13726 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13727 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13728 is true if we can reorder the registers because they are used commutatively
13729 subsequently.
13730 Returns true iff we could generate a new instruction. */
13731
13732 bool
13733 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13734 {
13735 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13736 rtx mems[MAX_LDM_STM_OPS];
13737 int i, j, base_reg;
13738 rtx base_reg_rtx;
13739 HOST_WIDE_INT offset;
13740 int write_back = FALSE;
13741 int ldm_case;
13742 rtx addr;
13743
13744 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13745 &base_reg, &offset, !sort_regs);
13746
13747 if (ldm_case == 0)
13748 return false;
13749
13750 if (sort_regs)
13751 for (i = 0; i < nops - 1; i++)
13752 for (j = i + 1; j < nops; j++)
13753 if (regs[i] > regs[j])
13754 {
13755 int t = regs[i];
13756 regs[i] = regs[j];
13757 regs[j] = t;
13758 }
13759 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13760
13761 if (TARGET_THUMB1)
13762 {
13763 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13764 gcc_assert (ldm_case == 1 || ldm_case == 5);
13765 write_back = TRUE;
13766 }
13767
13768 if (ldm_case == 5)
13769 {
13770 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13771 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13772 offset = 0;
13773 if (!TARGET_THUMB1)
13774 base_reg_rtx = newbase;
13775 }
13776
13777 for (i = 0; i < nops; i++)
13778 {
13779 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13780 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13781 SImode, addr, 0);
13782 }
13783 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13784 write_back ? offset + i * 4 : 0));
13785 return true;
13786 }
13787
13788 /* Called from a peephole2 expander to turn a sequence of stores into an
13789 STM instruction. OPERANDS are the operands found by the peephole matcher;
13790 NOPS indicates how many separate stores we are trying to combine.
13791 Returns true iff we could generate a new instruction. */
13792
13793 bool
13794 gen_stm_seq (rtx *operands, int nops)
13795 {
13796 int i;
13797 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13798 rtx mems[MAX_LDM_STM_OPS];
13799 int base_reg;
13800 rtx base_reg_rtx;
13801 HOST_WIDE_INT offset;
13802 int write_back = FALSE;
13803 int stm_case;
13804 rtx addr;
13805 bool base_reg_dies;
13806
13807 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13808 mem_order, &base_reg, &offset, true);
13809
13810 if (stm_case == 0)
13811 return false;
13812
13813 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13814
13815 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13816 if (TARGET_THUMB1)
13817 {
13818 gcc_assert (base_reg_dies);
13819 write_back = TRUE;
13820 }
13821
13822 if (stm_case == 5)
13823 {
13824 gcc_assert (base_reg_dies);
13825 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13826 offset = 0;
13827 }
13828
13829 addr = plus_constant (Pmode, base_reg_rtx, offset);
13830
13831 for (i = 0; i < nops; i++)
13832 {
13833 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13834 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13835 SImode, addr, 0);
13836 }
13837 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13838 write_back ? offset + i * 4 : 0));
13839 return true;
13840 }
13841
13842 /* Called from a peephole2 expander to turn a sequence of stores that are
13843 preceded by constant loads into an STM instruction. OPERANDS are the
13844 operands found by the peephole matcher; NOPS indicates how many
13845 separate stores we are trying to combine; there are 2 * NOPS
13846 instructions in the peephole.
13847 Returns true iff we could generate a new instruction. */
13848
13849 bool
13850 gen_const_stm_seq (rtx *operands, int nops)
13851 {
13852 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13853 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13854 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13855 rtx mems[MAX_LDM_STM_OPS];
13856 int base_reg;
13857 rtx base_reg_rtx;
13858 HOST_WIDE_INT offset;
13859 int write_back = FALSE;
13860 int stm_case;
13861 rtx addr;
13862 bool base_reg_dies;
13863 int i, j;
13864 HARD_REG_SET allocated;
13865
13866 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13867 mem_order, &base_reg, &offset, false);
13868
13869 if (stm_case == 0)
13870 return false;
13871
13872 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13873
13874 /* If the same register is used more than once, try to find a free
13875 register. */
13876 CLEAR_HARD_REG_SET (allocated);
13877 for (i = 0; i < nops; i++)
13878 {
13879 for (j = i + 1; j < nops; j++)
13880 if (regs[i] == regs[j])
13881 {
13882 rtx t = peep2_find_free_register (0, nops * 2,
13883 TARGET_THUMB1 ? "l" : "r",
13884 SImode, &allocated);
13885 if (t == NULL_RTX)
13886 return false;
13887 reg_rtxs[i] = t;
13888 regs[i] = REGNO (t);
13889 }
13890 }
13891
13892 /* Compute an ordering that maps the register numbers to an ascending
13893 sequence. */
13894 reg_order[0] = 0;
13895 for (i = 0; i < nops; i++)
13896 if (regs[i] < regs[reg_order[0]])
13897 reg_order[0] = i;
13898
13899 for (i = 1; i < nops; i++)
13900 {
13901 int this_order = reg_order[i - 1];
13902 for (j = 0; j < nops; j++)
13903 if (regs[j] > regs[reg_order[i - 1]]
13904 && (this_order == reg_order[i - 1]
13905 || regs[j] < regs[this_order]))
13906 this_order = j;
13907 reg_order[i] = this_order;
13908 }
13909
13910 /* Ensure that registers that must be live after the instruction end
13911 up with the correct value. */
13912 for (i = 0; i < nops; i++)
13913 {
13914 int this_order = reg_order[i];
13915 if ((this_order != mem_order[i]
13916 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13917 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13918 return false;
13919 }
13920
13921 /* Load the constants. */
13922 for (i = 0; i < nops; i++)
13923 {
13924 rtx op = operands[2 * nops + mem_order[i]];
13925 sorted_regs[i] = regs[reg_order[i]];
13926 emit_move_insn (reg_rtxs[reg_order[i]], op);
13927 }
13928
13929 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13930
13931 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13932 if (TARGET_THUMB1)
13933 {
13934 gcc_assert (base_reg_dies);
13935 write_back = TRUE;
13936 }
13937
13938 if (stm_case == 5)
13939 {
13940 gcc_assert (base_reg_dies);
13941 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13942 offset = 0;
13943 }
13944
13945 addr = plus_constant (Pmode, base_reg_rtx, offset);
13946
13947 for (i = 0; i < nops; i++)
13948 {
13949 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13950 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13951 SImode, addr, 0);
13952 }
13953 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13954 write_back ? offset + i * 4 : 0));
13955 return true;
13956 }
13957
13958 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13959 unaligned copies on processors which support unaligned semantics for those
13960 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13961 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13962 An interleave factor of 1 (the minimum) will perform no interleaving.
13963 Load/store multiple are used for aligned addresses where possible. */
13964
13965 static void
13966 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13967 HOST_WIDE_INT length,
13968 unsigned int interleave_factor)
13969 {
13970 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13971 int *regnos = XALLOCAVEC (int, interleave_factor);
13972 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13973 HOST_WIDE_INT i, j;
13974 HOST_WIDE_INT remaining = length, words;
13975 rtx halfword_tmp = NULL, byte_tmp = NULL;
13976 rtx dst, src;
13977 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13978 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13979 HOST_WIDE_INT srcoffset, dstoffset;
13980 HOST_WIDE_INT src_autoinc, dst_autoinc;
13981 rtx mem, addr;
13982
13983 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
13984
13985 /* Use hard registers if we have aligned source or destination so we can use
13986 load/store multiple with contiguous registers. */
13987 if (dst_aligned || src_aligned)
13988 for (i = 0; i < interleave_factor; i++)
13989 regs[i] = gen_rtx_REG (SImode, i);
13990 else
13991 for (i = 0; i < interleave_factor; i++)
13992 regs[i] = gen_reg_rtx (SImode);
13993
13994 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13995 src = copy_addr_to_reg (XEXP (srcbase, 0));
13996
13997 srcoffset = dstoffset = 0;
13998
13999 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14000 For copying the last bytes we want to subtract this offset again. */
14001 src_autoinc = dst_autoinc = 0;
14002
14003 for (i = 0; i < interleave_factor; i++)
14004 regnos[i] = i;
14005
14006 /* Copy BLOCK_SIZE_BYTES chunks. */
14007
14008 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14009 {
14010 /* Load words. */
14011 if (src_aligned && interleave_factor > 1)
14012 {
14013 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14014 TRUE, srcbase, &srcoffset));
14015 src_autoinc += UNITS_PER_WORD * interleave_factor;
14016 }
14017 else
14018 {
14019 for (j = 0; j < interleave_factor; j++)
14020 {
14021 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14022 - src_autoinc));
14023 mem = adjust_automodify_address (srcbase, SImode, addr,
14024 srcoffset + j * UNITS_PER_WORD);
14025 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14026 }
14027 srcoffset += block_size_bytes;
14028 }
14029
14030 /* Store words. */
14031 if (dst_aligned && interleave_factor > 1)
14032 {
14033 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14034 TRUE, dstbase, &dstoffset));
14035 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14036 }
14037 else
14038 {
14039 for (j = 0; j < interleave_factor; j++)
14040 {
14041 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14042 - dst_autoinc));
14043 mem = adjust_automodify_address (dstbase, SImode, addr,
14044 dstoffset + j * UNITS_PER_WORD);
14045 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14046 }
14047 dstoffset += block_size_bytes;
14048 }
14049
14050 remaining -= block_size_bytes;
14051 }
14052
14053 /* Copy any whole words left (note these aren't interleaved with any
14054 subsequent halfword/byte load/stores in the interests of simplicity). */
14055
14056 words = remaining / UNITS_PER_WORD;
14057
14058 gcc_assert (words < interleave_factor);
14059
14060 if (src_aligned && words > 1)
14061 {
14062 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14063 &srcoffset));
14064 src_autoinc += UNITS_PER_WORD * words;
14065 }
14066 else
14067 {
14068 for (j = 0; j < words; j++)
14069 {
14070 addr = plus_constant (Pmode, src,
14071 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14072 mem = adjust_automodify_address (srcbase, SImode, addr,
14073 srcoffset + j * UNITS_PER_WORD);
14074 if (src_aligned)
14075 emit_move_insn (regs[j], mem);
14076 else
14077 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14078 }
14079 srcoffset += words * UNITS_PER_WORD;
14080 }
14081
14082 if (dst_aligned && words > 1)
14083 {
14084 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14085 &dstoffset));
14086 dst_autoinc += words * UNITS_PER_WORD;
14087 }
14088 else
14089 {
14090 for (j = 0; j < words; j++)
14091 {
14092 addr = plus_constant (Pmode, dst,
14093 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14094 mem = adjust_automodify_address (dstbase, SImode, addr,
14095 dstoffset + j * UNITS_PER_WORD);
14096 if (dst_aligned)
14097 emit_move_insn (mem, regs[j]);
14098 else
14099 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14100 }
14101 dstoffset += words * UNITS_PER_WORD;
14102 }
14103
14104 remaining -= words * UNITS_PER_WORD;
14105
14106 gcc_assert (remaining < 4);
14107
14108 /* Copy a halfword if necessary. */
14109
14110 if (remaining >= 2)
14111 {
14112 halfword_tmp = gen_reg_rtx (SImode);
14113
14114 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14115 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14116 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14117
14118 /* Either write out immediately, or delay until we've loaded the last
14119 byte, depending on interleave factor. */
14120 if (interleave_factor == 1)
14121 {
14122 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14123 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14124 emit_insn (gen_unaligned_storehi (mem,
14125 gen_lowpart (HImode, halfword_tmp)));
14126 halfword_tmp = NULL;
14127 dstoffset += 2;
14128 }
14129
14130 remaining -= 2;
14131 srcoffset += 2;
14132 }
14133
14134 gcc_assert (remaining < 2);
14135
14136 /* Copy last byte. */
14137
14138 if ((remaining & 1) != 0)
14139 {
14140 byte_tmp = gen_reg_rtx (SImode);
14141
14142 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14143 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14144 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14145
14146 if (interleave_factor == 1)
14147 {
14148 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14149 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14150 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14151 byte_tmp = NULL;
14152 dstoffset++;
14153 }
14154
14155 remaining--;
14156 srcoffset++;
14157 }
14158
14159 /* Store last halfword if we haven't done so already. */
14160
14161 if (halfword_tmp)
14162 {
14163 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14164 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14165 emit_insn (gen_unaligned_storehi (mem,
14166 gen_lowpart (HImode, halfword_tmp)));
14167 dstoffset += 2;
14168 }
14169
14170 /* Likewise for last byte. */
14171
14172 if (byte_tmp)
14173 {
14174 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14175 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14176 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14177 dstoffset++;
14178 }
14179
14180 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14181 }
14182
14183 /* From mips_adjust_block_mem:
14184
14185 Helper function for doing a loop-based block operation on memory
14186 reference MEM. Each iteration of the loop will operate on LENGTH
14187 bytes of MEM.
14188
14189 Create a new base register for use within the loop and point it to
14190 the start of MEM. Create a new memory reference that uses this
14191 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14192
14193 static void
14194 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14195 rtx *loop_mem)
14196 {
14197 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14198
14199 /* Although the new mem does not refer to a known location,
14200 it does keep up to LENGTH bytes of alignment. */
14201 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14202 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14203 }
14204
14205 /* From mips_block_move_loop:
14206
14207 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14208 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14209 the memory regions do not overlap. */
14210
14211 static void
14212 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14213 unsigned int interleave_factor,
14214 HOST_WIDE_INT bytes_per_iter)
14215 {
14216 rtx src_reg, dest_reg, final_src, test;
14217 HOST_WIDE_INT leftover;
14218
14219 leftover = length % bytes_per_iter;
14220 length -= leftover;
14221
14222 /* Create registers and memory references for use within the loop. */
14223 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14224 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14225
14226 /* Calculate the value that SRC_REG should have after the last iteration of
14227 the loop. */
14228 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14229 0, 0, OPTAB_WIDEN);
14230
14231 /* Emit the start of the loop. */
14232 rtx_code_label *label = gen_label_rtx ();
14233 emit_label (label);
14234
14235 /* Emit the loop body. */
14236 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14237 interleave_factor);
14238
14239 /* Move on to the next block. */
14240 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14241 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14242
14243 /* Emit the loop condition. */
14244 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14245 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14246
14247 /* Mop up any left-over bytes. */
14248 if (leftover)
14249 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14250 }
14251
14252 /* Emit a block move when either the source or destination is unaligned (not
14253 aligned to a four-byte boundary). This may need further tuning depending on
14254 core type, optimize_size setting, etc. */
14255
14256 static int
14257 arm_movmemqi_unaligned (rtx *operands)
14258 {
14259 HOST_WIDE_INT length = INTVAL (operands[2]);
14260
14261 if (optimize_size)
14262 {
14263 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14264 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14265 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14266 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14267 or dst_aligned though: allow more interleaving in those cases since the
14268 resulting code can be smaller. */
14269 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14270 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14271
14272 if (length > 12)
14273 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14274 interleave_factor, bytes_per_iter);
14275 else
14276 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14277 interleave_factor);
14278 }
14279 else
14280 {
14281 /* Note that the loop created by arm_block_move_unaligned_loop may be
14282 subject to loop unrolling, which makes tuning this condition a little
14283 redundant. */
14284 if (length > 32)
14285 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14286 else
14287 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14288 }
14289
14290 return 1;
14291 }
14292
14293 int
14294 arm_gen_movmemqi (rtx *operands)
14295 {
14296 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14297 HOST_WIDE_INT srcoffset, dstoffset;
14298 rtx src, dst, srcbase, dstbase;
14299 rtx part_bytes_reg = NULL;
14300 rtx mem;
14301
14302 if (!CONST_INT_P (operands[2])
14303 || !CONST_INT_P (operands[3])
14304 || INTVAL (operands[2]) > 64)
14305 return 0;
14306
14307 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14308 return arm_movmemqi_unaligned (operands);
14309
14310 if (INTVAL (operands[3]) & 3)
14311 return 0;
14312
14313 dstbase = operands[0];
14314 srcbase = operands[1];
14315
14316 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14317 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14318
14319 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14320 out_words_to_go = INTVAL (operands[2]) / 4;
14321 last_bytes = INTVAL (operands[2]) & 3;
14322 dstoffset = srcoffset = 0;
14323
14324 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14325 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14326
14327 while (in_words_to_go >= 2)
14328 {
14329 if (in_words_to_go > 4)
14330 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14331 TRUE, srcbase, &srcoffset));
14332 else
14333 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14334 src, FALSE, srcbase,
14335 &srcoffset));
14336
14337 if (out_words_to_go)
14338 {
14339 if (out_words_to_go > 4)
14340 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14341 TRUE, dstbase, &dstoffset));
14342 else if (out_words_to_go != 1)
14343 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14344 out_words_to_go, dst,
14345 (last_bytes == 0
14346 ? FALSE : TRUE),
14347 dstbase, &dstoffset));
14348 else
14349 {
14350 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14351 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14352 if (last_bytes != 0)
14353 {
14354 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14355 dstoffset += 4;
14356 }
14357 }
14358 }
14359
14360 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14361 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14362 }
14363
14364 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14365 if (out_words_to_go)
14366 {
14367 rtx sreg;
14368
14369 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14370 sreg = copy_to_reg (mem);
14371
14372 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14373 emit_move_insn (mem, sreg);
14374 in_words_to_go--;
14375
14376 gcc_assert (!in_words_to_go); /* Sanity check */
14377 }
14378
14379 if (in_words_to_go)
14380 {
14381 gcc_assert (in_words_to_go > 0);
14382
14383 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14384 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14385 }
14386
14387 gcc_assert (!last_bytes || part_bytes_reg);
14388
14389 if (BYTES_BIG_ENDIAN && last_bytes)
14390 {
14391 rtx tmp = gen_reg_rtx (SImode);
14392
14393 /* The bytes we want are in the top end of the word. */
14394 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14395 GEN_INT (8 * (4 - last_bytes))));
14396 part_bytes_reg = tmp;
14397
14398 while (last_bytes)
14399 {
14400 mem = adjust_automodify_address (dstbase, QImode,
14401 plus_constant (Pmode, dst,
14402 last_bytes - 1),
14403 dstoffset + last_bytes - 1);
14404 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14405
14406 if (--last_bytes)
14407 {
14408 tmp = gen_reg_rtx (SImode);
14409 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14410 part_bytes_reg = tmp;
14411 }
14412 }
14413
14414 }
14415 else
14416 {
14417 if (last_bytes > 1)
14418 {
14419 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14420 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14421 last_bytes -= 2;
14422 if (last_bytes)
14423 {
14424 rtx tmp = gen_reg_rtx (SImode);
14425 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14426 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14427 part_bytes_reg = tmp;
14428 dstoffset += 2;
14429 }
14430 }
14431
14432 if (last_bytes)
14433 {
14434 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14435 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14436 }
14437 }
14438
14439 return 1;
14440 }
14441
14442 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14443 by mode size. */
14444 inline static rtx
14445 next_consecutive_mem (rtx mem)
14446 {
14447 machine_mode mode = GET_MODE (mem);
14448 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14449 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14450
14451 return adjust_automodify_address (mem, mode, addr, offset);
14452 }
14453
14454 /* Copy using LDRD/STRD instructions whenever possible.
14455 Returns true upon success. */
14456 bool
14457 gen_movmem_ldrd_strd (rtx *operands)
14458 {
14459 unsigned HOST_WIDE_INT len;
14460 HOST_WIDE_INT align;
14461 rtx src, dst, base;
14462 rtx reg0;
14463 bool src_aligned, dst_aligned;
14464 bool src_volatile, dst_volatile;
14465
14466 gcc_assert (CONST_INT_P (operands[2]));
14467 gcc_assert (CONST_INT_P (operands[3]));
14468
14469 len = UINTVAL (operands[2]);
14470 if (len > 64)
14471 return false;
14472
14473 /* Maximum alignment we can assume for both src and dst buffers. */
14474 align = INTVAL (operands[3]);
14475
14476 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14477 return false;
14478
14479 /* Place src and dst addresses in registers
14480 and update the corresponding mem rtx. */
14481 dst = operands[0];
14482 dst_volatile = MEM_VOLATILE_P (dst);
14483 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14484 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14485 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14486
14487 src = operands[1];
14488 src_volatile = MEM_VOLATILE_P (src);
14489 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14490 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14491 src = adjust_automodify_address (src, VOIDmode, base, 0);
14492
14493 if (!unaligned_access && !(src_aligned && dst_aligned))
14494 return false;
14495
14496 if (src_volatile || dst_volatile)
14497 return false;
14498
14499 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14500 if (!(dst_aligned || src_aligned))
14501 return arm_gen_movmemqi (operands);
14502
14503 /* If the either src or dst is unaligned we'll be accessing it as pairs
14504 of unaligned SImode accesses. Otherwise we can generate DImode
14505 ldrd/strd instructions. */
14506 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14507 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14508
14509 while (len >= 8)
14510 {
14511 len -= 8;
14512 reg0 = gen_reg_rtx (DImode);
14513 rtx low_reg = NULL_RTX;
14514 rtx hi_reg = NULL_RTX;
14515
14516 if (!src_aligned || !dst_aligned)
14517 {
14518 low_reg = gen_lowpart (SImode, reg0);
14519 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14520 }
14521 if (src_aligned)
14522 emit_move_insn (reg0, src);
14523 else
14524 {
14525 emit_insn (gen_unaligned_loadsi (low_reg, src));
14526 src = next_consecutive_mem (src);
14527 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14528 }
14529
14530 if (dst_aligned)
14531 emit_move_insn (dst, reg0);
14532 else
14533 {
14534 emit_insn (gen_unaligned_storesi (dst, low_reg));
14535 dst = next_consecutive_mem (dst);
14536 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14537 }
14538
14539 src = next_consecutive_mem (src);
14540 dst = next_consecutive_mem (dst);
14541 }
14542
14543 gcc_assert (len < 8);
14544 if (len >= 4)
14545 {
14546 /* More than a word but less than a double-word to copy. Copy a word. */
14547 reg0 = gen_reg_rtx (SImode);
14548 src = adjust_address (src, SImode, 0);
14549 dst = adjust_address (dst, SImode, 0);
14550 if (src_aligned)
14551 emit_move_insn (reg0, src);
14552 else
14553 emit_insn (gen_unaligned_loadsi (reg0, src));
14554
14555 if (dst_aligned)
14556 emit_move_insn (dst, reg0);
14557 else
14558 emit_insn (gen_unaligned_storesi (dst, reg0));
14559
14560 src = next_consecutive_mem (src);
14561 dst = next_consecutive_mem (dst);
14562 len -= 4;
14563 }
14564
14565 if (len == 0)
14566 return true;
14567
14568 /* Copy the remaining bytes. */
14569 if (len >= 2)
14570 {
14571 dst = adjust_address (dst, HImode, 0);
14572 src = adjust_address (src, HImode, 0);
14573 reg0 = gen_reg_rtx (SImode);
14574 if (src_aligned)
14575 emit_insn (gen_zero_extendhisi2 (reg0, src));
14576 else
14577 emit_insn (gen_unaligned_loadhiu (reg0, src));
14578
14579 if (dst_aligned)
14580 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14581 else
14582 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14583
14584 src = next_consecutive_mem (src);
14585 dst = next_consecutive_mem (dst);
14586 if (len == 2)
14587 return true;
14588 }
14589
14590 dst = adjust_address (dst, QImode, 0);
14591 src = adjust_address (src, QImode, 0);
14592 reg0 = gen_reg_rtx (QImode);
14593 emit_move_insn (reg0, src);
14594 emit_move_insn (dst, reg0);
14595 return true;
14596 }
14597
14598 /* Select a dominance comparison mode if possible for a test of the general
14599 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14600 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14601 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14602 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14603 In all cases OP will be either EQ or NE, but we don't need to know which
14604 here. If we are unable to support a dominance comparison we return
14605 CC mode. This will then fail to match for the RTL expressions that
14606 generate this call. */
14607 machine_mode
14608 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14609 {
14610 enum rtx_code cond1, cond2;
14611 int swapped = 0;
14612
14613 /* Currently we will probably get the wrong result if the individual
14614 comparisons are not simple. This also ensures that it is safe to
14615 reverse a comparison if necessary. */
14616 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14617 != CCmode)
14618 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14619 != CCmode))
14620 return CCmode;
14621
14622 /* The if_then_else variant of this tests the second condition if the
14623 first passes, but is true if the first fails. Reverse the first
14624 condition to get a true "inclusive-or" expression. */
14625 if (cond_or == DOM_CC_NX_OR_Y)
14626 cond1 = reverse_condition (cond1);
14627
14628 /* If the comparisons are not equal, and one doesn't dominate the other,
14629 then we can't do this. */
14630 if (cond1 != cond2
14631 && !comparison_dominates_p (cond1, cond2)
14632 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14633 return CCmode;
14634
14635 if (swapped)
14636 std::swap (cond1, cond2);
14637
14638 switch (cond1)
14639 {
14640 case EQ:
14641 if (cond_or == DOM_CC_X_AND_Y)
14642 return CC_DEQmode;
14643
14644 switch (cond2)
14645 {
14646 case EQ: return CC_DEQmode;
14647 case LE: return CC_DLEmode;
14648 case LEU: return CC_DLEUmode;
14649 case GE: return CC_DGEmode;
14650 case GEU: return CC_DGEUmode;
14651 default: gcc_unreachable ();
14652 }
14653
14654 case LT:
14655 if (cond_or == DOM_CC_X_AND_Y)
14656 return CC_DLTmode;
14657
14658 switch (cond2)
14659 {
14660 case LT:
14661 return CC_DLTmode;
14662 case LE:
14663 return CC_DLEmode;
14664 case NE:
14665 return CC_DNEmode;
14666 default:
14667 gcc_unreachable ();
14668 }
14669
14670 case GT:
14671 if (cond_or == DOM_CC_X_AND_Y)
14672 return CC_DGTmode;
14673
14674 switch (cond2)
14675 {
14676 case GT:
14677 return CC_DGTmode;
14678 case GE:
14679 return CC_DGEmode;
14680 case NE:
14681 return CC_DNEmode;
14682 default:
14683 gcc_unreachable ();
14684 }
14685
14686 case LTU:
14687 if (cond_or == DOM_CC_X_AND_Y)
14688 return CC_DLTUmode;
14689
14690 switch (cond2)
14691 {
14692 case LTU:
14693 return CC_DLTUmode;
14694 case LEU:
14695 return CC_DLEUmode;
14696 case NE:
14697 return CC_DNEmode;
14698 default:
14699 gcc_unreachable ();
14700 }
14701
14702 case GTU:
14703 if (cond_or == DOM_CC_X_AND_Y)
14704 return CC_DGTUmode;
14705
14706 switch (cond2)
14707 {
14708 case GTU:
14709 return CC_DGTUmode;
14710 case GEU:
14711 return CC_DGEUmode;
14712 case NE:
14713 return CC_DNEmode;
14714 default:
14715 gcc_unreachable ();
14716 }
14717
14718 /* The remaining cases only occur when both comparisons are the
14719 same. */
14720 case NE:
14721 gcc_assert (cond1 == cond2);
14722 return CC_DNEmode;
14723
14724 case LE:
14725 gcc_assert (cond1 == cond2);
14726 return CC_DLEmode;
14727
14728 case GE:
14729 gcc_assert (cond1 == cond2);
14730 return CC_DGEmode;
14731
14732 case LEU:
14733 gcc_assert (cond1 == cond2);
14734 return CC_DLEUmode;
14735
14736 case GEU:
14737 gcc_assert (cond1 == cond2);
14738 return CC_DGEUmode;
14739
14740 default:
14741 gcc_unreachable ();
14742 }
14743 }
14744
14745 machine_mode
14746 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14747 {
14748 /* All floating point compares return CCFP if it is an equality
14749 comparison, and CCFPE otherwise. */
14750 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14751 {
14752 switch (op)
14753 {
14754 case EQ:
14755 case NE:
14756 case UNORDERED:
14757 case ORDERED:
14758 case UNLT:
14759 case UNLE:
14760 case UNGT:
14761 case UNGE:
14762 case UNEQ:
14763 case LTGT:
14764 return CCFPmode;
14765
14766 case LT:
14767 case LE:
14768 case GT:
14769 case GE:
14770 return CCFPEmode;
14771
14772 default:
14773 gcc_unreachable ();
14774 }
14775 }
14776
14777 /* A compare with a shifted operand. Because of canonicalization, the
14778 comparison will have to be swapped when we emit the assembler. */
14779 if (GET_MODE (y) == SImode
14780 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14781 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14782 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14783 || GET_CODE (x) == ROTATERT))
14784 return CC_SWPmode;
14785
14786 /* This operation is performed swapped, but since we only rely on the Z
14787 flag we don't need an additional mode. */
14788 if (GET_MODE (y) == SImode
14789 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14790 && GET_CODE (x) == NEG
14791 && (op == EQ || op == NE))
14792 return CC_Zmode;
14793
14794 /* This is a special case that is used by combine to allow a
14795 comparison of a shifted byte load to be split into a zero-extend
14796 followed by a comparison of the shifted integer (only valid for
14797 equalities and unsigned inequalities). */
14798 if (GET_MODE (x) == SImode
14799 && GET_CODE (x) == ASHIFT
14800 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14801 && GET_CODE (XEXP (x, 0)) == SUBREG
14802 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14803 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14804 && (op == EQ || op == NE
14805 || op == GEU || op == GTU || op == LTU || op == LEU)
14806 && CONST_INT_P (y))
14807 return CC_Zmode;
14808
14809 /* A construct for a conditional compare, if the false arm contains
14810 0, then both conditions must be true, otherwise either condition
14811 must be true. Not all conditions are possible, so CCmode is
14812 returned if it can't be done. */
14813 if (GET_CODE (x) == IF_THEN_ELSE
14814 && (XEXP (x, 2) == const0_rtx
14815 || XEXP (x, 2) == const1_rtx)
14816 && COMPARISON_P (XEXP (x, 0))
14817 && COMPARISON_P (XEXP (x, 1)))
14818 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14819 INTVAL (XEXP (x, 2)));
14820
14821 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14822 if (GET_CODE (x) == AND
14823 && (op == EQ || op == NE)
14824 && COMPARISON_P (XEXP (x, 0))
14825 && COMPARISON_P (XEXP (x, 1)))
14826 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14827 DOM_CC_X_AND_Y);
14828
14829 if (GET_CODE (x) == IOR
14830 && (op == EQ || op == NE)
14831 && COMPARISON_P (XEXP (x, 0))
14832 && COMPARISON_P (XEXP (x, 1)))
14833 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14834 DOM_CC_X_OR_Y);
14835
14836 /* An operation (on Thumb) where we want to test for a single bit.
14837 This is done by shifting that bit up into the top bit of a
14838 scratch register; we can then branch on the sign bit. */
14839 if (TARGET_THUMB1
14840 && GET_MODE (x) == SImode
14841 && (op == EQ || op == NE)
14842 && GET_CODE (x) == ZERO_EXTRACT
14843 && XEXP (x, 1) == const1_rtx)
14844 return CC_Nmode;
14845
14846 /* An operation that sets the condition codes as a side-effect, the
14847 V flag is not set correctly, so we can only use comparisons where
14848 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14849 instead.) */
14850 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14851 if (GET_MODE (x) == SImode
14852 && y == const0_rtx
14853 && (op == EQ || op == NE || op == LT || op == GE)
14854 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14855 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14856 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14857 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14858 || GET_CODE (x) == LSHIFTRT
14859 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14860 || GET_CODE (x) == ROTATERT
14861 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14862 return CC_NOOVmode;
14863
14864 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14865 return CC_Zmode;
14866
14867 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14868 && GET_CODE (x) == PLUS
14869 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14870 return CC_Cmode;
14871
14872 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14873 {
14874 switch (op)
14875 {
14876 case EQ:
14877 case NE:
14878 /* A DImode comparison against zero can be implemented by
14879 or'ing the two halves together. */
14880 if (y == const0_rtx)
14881 return CC_Zmode;
14882
14883 /* We can do an equality test in three Thumb instructions. */
14884 if (!TARGET_32BIT)
14885 return CC_Zmode;
14886
14887 /* FALLTHROUGH */
14888
14889 case LTU:
14890 case LEU:
14891 case GTU:
14892 case GEU:
14893 /* DImode unsigned comparisons can be implemented by cmp +
14894 cmpeq without a scratch register. Not worth doing in
14895 Thumb-2. */
14896 if (TARGET_32BIT)
14897 return CC_CZmode;
14898
14899 /* FALLTHROUGH */
14900
14901 case LT:
14902 case LE:
14903 case GT:
14904 case GE:
14905 /* DImode signed and unsigned comparisons can be implemented
14906 by cmp + sbcs with a scratch register, but that does not
14907 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14908 gcc_assert (op != EQ && op != NE);
14909 return CC_NCVmode;
14910
14911 default:
14912 gcc_unreachable ();
14913 }
14914 }
14915
14916 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14917 return GET_MODE (x);
14918
14919 return CCmode;
14920 }
14921
14922 /* X and Y are two things to compare using CODE. Emit the compare insn and
14923 return the rtx for register 0 in the proper mode. FP means this is a
14924 floating point compare: I don't think that it is needed on the arm. */
14925 rtx
14926 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14927 {
14928 machine_mode mode;
14929 rtx cc_reg;
14930 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14931
14932 /* We might have X as a constant, Y as a register because of the predicates
14933 used for cmpdi. If so, force X to a register here. */
14934 if (dimode_comparison && !REG_P (x))
14935 x = force_reg (DImode, x);
14936
14937 mode = SELECT_CC_MODE (code, x, y);
14938 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14939
14940 if (dimode_comparison
14941 && mode != CC_CZmode)
14942 {
14943 rtx clobber, set;
14944
14945 /* To compare two non-zero values for equality, XOR them and
14946 then compare against zero. Not used for ARM mode; there
14947 CC_CZmode is cheaper. */
14948 if (mode == CC_Zmode && y != const0_rtx)
14949 {
14950 gcc_assert (!reload_completed);
14951 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14952 y = const0_rtx;
14953 }
14954
14955 /* A scratch register is required. */
14956 if (reload_completed)
14957 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14958 else
14959 scratch = gen_rtx_SCRATCH (SImode);
14960
14961 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14962 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14963 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14964 }
14965 else
14966 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14967
14968 return cc_reg;
14969 }
14970
14971 /* Generate a sequence of insns that will generate the correct return
14972 address mask depending on the physical architecture that the program
14973 is running on. */
14974 rtx
14975 arm_gen_return_addr_mask (void)
14976 {
14977 rtx reg = gen_reg_rtx (Pmode);
14978
14979 emit_insn (gen_return_addr_mask (reg));
14980 return reg;
14981 }
14982
14983 void
14984 arm_reload_in_hi (rtx *operands)
14985 {
14986 rtx ref = operands[1];
14987 rtx base, scratch;
14988 HOST_WIDE_INT offset = 0;
14989
14990 if (GET_CODE (ref) == SUBREG)
14991 {
14992 offset = SUBREG_BYTE (ref);
14993 ref = SUBREG_REG (ref);
14994 }
14995
14996 if (REG_P (ref))
14997 {
14998 /* We have a pseudo which has been spilt onto the stack; there
14999 are two cases here: the first where there is a simple
15000 stack-slot replacement and a second where the stack-slot is
15001 out of range, or is used as a subreg. */
15002 if (reg_equiv_mem (REGNO (ref)))
15003 {
15004 ref = reg_equiv_mem (REGNO (ref));
15005 base = find_replacement (&XEXP (ref, 0));
15006 }
15007 else
15008 /* The slot is out of range, or was dressed up in a SUBREG. */
15009 base = reg_equiv_address (REGNO (ref));
15010
15011 /* PR 62554: If there is no equivalent memory location then just move
15012 the value as an SImode register move. This happens when the target
15013 architecture variant does not have an HImode register move. */
15014 if (base == NULL)
15015 {
15016 gcc_assert (REG_P (operands[0]));
15017 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15018 gen_rtx_SUBREG (SImode, ref, 0)));
15019 return;
15020 }
15021 }
15022 else
15023 base = find_replacement (&XEXP (ref, 0));
15024
15025 /* Handle the case where the address is too complex to be offset by 1. */
15026 if (GET_CODE (base) == MINUS
15027 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15028 {
15029 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15030
15031 emit_set_insn (base_plus, base);
15032 base = base_plus;
15033 }
15034 else if (GET_CODE (base) == PLUS)
15035 {
15036 /* The addend must be CONST_INT, or we would have dealt with it above. */
15037 HOST_WIDE_INT hi, lo;
15038
15039 offset += INTVAL (XEXP (base, 1));
15040 base = XEXP (base, 0);
15041
15042 /* Rework the address into a legal sequence of insns. */
15043 /* Valid range for lo is -4095 -> 4095 */
15044 lo = (offset >= 0
15045 ? (offset & 0xfff)
15046 : -((-offset) & 0xfff));
15047
15048 /* Corner case, if lo is the max offset then we would be out of range
15049 once we have added the additional 1 below, so bump the msb into the
15050 pre-loading insn(s). */
15051 if (lo == 4095)
15052 lo &= 0x7ff;
15053
15054 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15055 ^ (HOST_WIDE_INT) 0x80000000)
15056 - (HOST_WIDE_INT) 0x80000000);
15057
15058 gcc_assert (hi + lo == offset);
15059
15060 if (hi != 0)
15061 {
15062 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15063
15064 /* Get the base address; addsi3 knows how to handle constants
15065 that require more than one insn. */
15066 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15067 base = base_plus;
15068 offset = lo;
15069 }
15070 }
15071
15072 /* Operands[2] may overlap operands[0] (though it won't overlap
15073 operands[1]), that's why we asked for a DImode reg -- so we can
15074 use the bit that does not overlap. */
15075 if (REGNO (operands[2]) == REGNO (operands[0]))
15076 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15077 else
15078 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15079
15080 emit_insn (gen_zero_extendqisi2 (scratch,
15081 gen_rtx_MEM (QImode,
15082 plus_constant (Pmode, base,
15083 offset))));
15084 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15085 gen_rtx_MEM (QImode,
15086 plus_constant (Pmode, base,
15087 offset + 1))));
15088 if (!BYTES_BIG_ENDIAN)
15089 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15090 gen_rtx_IOR (SImode,
15091 gen_rtx_ASHIFT
15092 (SImode,
15093 gen_rtx_SUBREG (SImode, operands[0], 0),
15094 GEN_INT (8)),
15095 scratch));
15096 else
15097 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15098 gen_rtx_IOR (SImode,
15099 gen_rtx_ASHIFT (SImode, scratch,
15100 GEN_INT (8)),
15101 gen_rtx_SUBREG (SImode, operands[0], 0)));
15102 }
15103
15104 /* Handle storing a half-word to memory during reload by synthesizing as two
15105 byte stores. Take care not to clobber the input values until after we
15106 have moved them somewhere safe. This code assumes that if the DImode
15107 scratch in operands[2] overlaps either the input value or output address
15108 in some way, then that value must die in this insn (we absolutely need
15109 two scratch registers for some corner cases). */
15110 void
15111 arm_reload_out_hi (rtx *operands)
15112 {
15113 rtx ref = operands[0];
15114 rtx outval = operands[1];
15115 rtx base, scratch;
15116 HOST_WIDE_INT offset = 0;
15117
15118 if (GET_CODE (ref) == SUBREG)
15119 {
15120 offset = SUBREG_BYTE (ref);
15121 ref = SUBREG_REG (ref);
15122 }
15123
15124 if (REG_P (ref))
15125 {
15126 /* We have a pseudo which has been spilt onto the stack; there
15127 are two cases here: the first where there is a simple
15128 stack-slot replacement and a second where the stack-slot is
15129 out of range, or is used as a subreg. */
15130 if (reg_equiv_mem (REGNO (ref)))
15131 {
15132 ref = reg_equiv_mem (REGNO (ref));
15133 base = find_replacement (&XEXP (ref, 0));
15134 }
15135 else
15136 /* The slot is out of range, or was dressed up in a SUBREG. */
15137 base = reg_equiv_address (REGNO (ref));
15138
15139 /* PR 62254: If there is no equivalent memory location then just move
15140 the value as an SImode register move. This happens when the target
15141 architecture variant does not have an HImode register move. */
15142 if (base == NULL)
15143 {
15144 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15145
15146 if (REG_P (outval))
15147 {
15148 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15149 gen_rtx_SUBREG (SImode, outval, 0)));
15150 }
15151 else /* SUBREG_P (outval) */
15152 {
15153 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15154 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15155 SUBREG_REG (outval)));
15156 else
15157 /* FIXME: Handle other cases ? */
15158 gcc_unreachable ();
15159 }
15160 return;
15161 }
15162 }
15163 else
15164 base = find_replacement (&XEXP (ref, 0));
15165
15166 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15167
15168 /* Handle the case where the address is too complex to be offset by 1. */
15169 if (GET_CODE (base) == MINUS
15170 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15171 {
15172 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15173
15174 /* Be careful not to destroy OUTVAL. */
15175 if (reg_overlap_mentioned_p (base_plus, outval))
15176 {
15177 /* Updating base_plus might destroy outval, see if we can
15178 swap the scratch and base_plus. */
15179 if (!reg_overlap_mentioned_p (scratch, outval))
15180 std::swap (scratch, base_plus);
15181 else
15182 {
15183 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15184
15185 /* Be conservative and copy OUTVAL into the scratch now,
15186 this should only be necessary if outval is a subreg
15187 of something larger than a word. */
15188 /* XXX Might this clobber base? I can't see how it can,
15189 since scratch is known to overlap with OUTVAL, and
15190 must be wider than a word. */
15191 emit_insn (gen_movhi (scratch_hi, outval));
15192 outval = scratch_hi;
15193 }
15194 }
15195
15196 emit_set_insn (base_plus, base);
15197 base = base_plus;
15198 }
15199 else if (GET_CODE (base) == PLUS)
15200 {
15201 /* The addend must be CONST_INT, or we would have dealt with it above. */
15202 HOST_WIDE_INT hi, lo;
15203
15204 offset += INTVAL (XEXP (base, 1));
15205 base = XEXP (base, 0);
15206
15207 /* Rework the address into a legal sequence of insns. */
15208 /* Valid range for lo is -4095 -> 4095 */
15209 lo = (offset >= 0
15210 ? (offset & 0xfff)
15211 : -((-offset) & 0xfff));
15212
15213 /* Corner case, if lo is the max offset then we would be out of range
15214 once we have added the additional 1 below, so bump the msb into the
15215 pre-loading insn(s). */
15216 if (lo == 4095)
15217 lo &= 0x7ff;
15218
15219 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15220 ^ (HOST_WIDE_INT) 0x80000000)
15221 - (HOST_WIDE_INT) 0x80000000);
15222
15223 gcc_assert (hi + lo == offset);
15224
15225 if (hi != 0)
15226 {
15227 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15228
15229 /* Be careful not to destroy OUTVAL. */
15230 if (reg_overlap_mentioned_p (base_plus, outval))
15231 {
15232 /* Updating base_plus might destroy outval, see if we
15233 can swap the scratch and base_plus. */
15234 if (!reg_overlap_mentioned_p (scratch, outval))
15235 std::swap (scratch, base_plus);
15236 else
15237 {
15238 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15239
15240 /* Be conservative and copy outval into scratch now,
15241 this should only be necessary if outval is a
15242 subreg of something larger than a word. */
15243 /* XXX Might this clobber base? I can't see how it
15244 can, since scratch is known to overlap with
15245 outval. */
15246 emit_insn (gen_movhi (scratch_hi, outval));
15247 outval = scratch_hi;
15248 }
15249 }
15250
15251 /* Get the base address; addsi3 knows how to handle constants
15252 that require more than one insn. */
15253 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15254 base = base_plus;
15255 offset = lo;
15256 }
15257 }
15258
15259 if (BYTES_BIG_ENDIAN)
15260 {
15261 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15262 plus_constant (Pmode, base,
15263 offset + 1)),
15264 gen_lowpart (QImode, outval)));
15265 emit_insn (gen_lshrsi3 (scratch,
15266 gen_rtx_SUBREG (SImode, outval, 0),
15267 GEN_INT (8)));
15268 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15269 offset)),
15270 gen_lowpart (QImode, scratch)));
15271 }
15272 else
15273 {
15274 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15275 offset)),
15276 gen_lowpart (QImode, outval)));
15277 emit_insn (gen_lshrsi3 (scratch,
15278 gen_rtx_SUBREG (SImode, outval, 0),
15279 GEN_INT (8)));
15280 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15281 plus_constant (Pmode, base,
15282 offset + 1)),
15283 gen_lowpart (QImode, scratch)));
15284 }
15285 }
15286
15287 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15288 (padded to the size of a word) should be passed in a register. */
15289
15290 static bool
15291 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15292 {
15293 if (TARGET_AAPCS_BASED)
15294 return must_pass_in_stack_var_size (mode, type);
15295 else
15296 return must_pass_in_stack_var_size_or_pad (mode, type);
15297 }
15298
15299
15300 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15301 byte of a stack argument has useful data. For legacy APCS ABIs we use
15302 the default. For AAPCS based ABIs small aggregate types are placed
15303 in the lowest memory address. */
15304
15305 static pad_direction
15306 arm_function_arg_padding (machine_mode mode, const_tree type)
15307 {
15308 if (!TARGET_AAPCS_BASED)
15309 return default_function_arg_padding (mode, type);
15310
15311 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15312 return PAD_DOWNWARD;
15313
15314 return PAD_UPWARD;
15315 }
15316
15317
15318 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15319 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15320 register has useful data, and return the opposite if the most
15321 significant byte does. */
15322
15323 bool
15324 arm_pad_reg_upward (machine_mode mode,
15325 tree type, int first ATTRIBUTE_UNUSED)
15326 {
15327 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15328 {
15329 /* For AAPCS, small aggregates, small fixed-point types,
15330 and small complex types are always padded upwards. */
15331 if (type)
15332 {
15333 if ((AGGREGATE_TYPE_P (type)
15334 || TREE_CODE (type) == COMPLEX_TYPE
15335 || FIXED_POINT_TYPE_P (type))
15336 && int_size_in_bytes (type) <= 4)
15337 return true;
15338 }
15339 else
15340 {
15341 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15342 && GET_MODE_SIZE (mode) <= 4)
15343 return true;
15344 }
15345 }
15346
15347 /* Otherwise, use default padding. */
15348 return !BYTES_BIG_ENDIAN;
15349 }
15350
15351 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15352 assuming that the address in the base register is word aligned. */
15353 bool
15354 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15355 {
15356 HOST_WIDE_INT max_offset;
15357
15358 /* Offset must be a multiple of 4 in Thumb mode. */
15359 if (TARGET_THUMB2 && ((offset & 3) != 0))
15360 return false;
15361
15362 if (TARGET_THUMB2)
15363 max_offset = 1020;
15364 else if (TARGET_ARM)
15365 max_offset = 255;
15366 else
15367 return false;
15368
15369 return ((offset <= max_offset) && (offset >= -max_offset));
15370 }
15371
15372 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15373 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15374 Assumes that the address in the base register RN is word aligned. Pattern
15375 guarantees that both memory accesses use the same base register,
15376 the offsets are constants within the range, and the gap between the offsets is 4.
15377 If preload complete then check that registers are legal. WBACK indicates whether
15378 address is updated. LOAD indicates whether memory access is load or store. */
15379 bool
15380 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15381 bool wback, bool load)
15382 {
15383 unsigned int t, t2, n;
15384
15385 if (!reload_completed)
15386 return true;
15387
15388 if (!offset_ok_for_ldrd_strd (offset))
15389 return false;
15390
15391 t = REGNO (rt);
15392 t2 = REGNO (rt2);
15393 n = REGNO (rn);
15394
15395 if ((TARGET_THUMB2)
15396 && ((wback && (n == t || n == t2))
15397 || (t == SP_REGNUM)
15398 || (t == PC_REGNUM)
15399 || (t2 == SP_REGNUM)
15400 || (t2 == PC_REGNUM)
15401 || (!load && (n == PC_REGNUM))
15402 || (load && (t == t2))
15403 /* Triggers Cortex-M3 LDRD errata. */
15404 || (!wback && load && fix_cm3_ldrd && (n == t))))
15405 return false;
15406
15407 if ((TARGET_ARM)
15408 && ((wback && (n == t || n == t2))
15409 || (t2 == PC_REGNUM)
15410 || (t % 2 != 0) /* First destination register is not even. */
15411 || (t2 != t + 1)
15412 /* PC can be used as base register (for offset addressing only),
15413 but it is depricated. */
15414 || (n == PC_REGNUM)))
15415 return false;
15416
15417 return true;
15418 }
15419
15420 /* Return true if a 64-bit access with alignment ALIGN and with a
15421 constant offset OFFSET from the base pointer is permitted on this
15422 architecture. */
15423 static bool
15424 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15425 {
15426 return (unaligned_access
15427 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15428 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15429 }
15430
15431 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15432 operand MEM's address contains an immediate offset from the base
15433 register and has no side effects, in which case it sets BASE,
15434 OFFSET and ALIGN accordingly. */
15435 static bool
15436 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15437 {
15438 rtx addr;
15439
15440 gcc_assert (base != NULL && offset != NULL);
15441
15442 /* TODO: Handle more general memory operand patterns, such as
15443 PRE_DEC and PRE_INC. */
15444
15445 if (side_effects_p (mem))
15446 return false;
15447
15448 /* Can't deal with subregs. */
15449 if (GET_CODE (mem) == SUBREG)
15450 return false;
15451
15452 gcc_assert (MEM_P (mem));
15453
15454 *offset = const0_rtx;
15455 *align = MEM_ALIGN (mem);
15456
15457 addr = XEXP (mem, 0);
15458
15459 /* If addr isn't valid for DImode, then we can't handle it. */
15460 if (!arm_legitimate_address_p (DImode, addr,
15461 reload_in_progress || reload_completed))
15462 return false;
15463
15464 if (REG_P (addr))
15465 {
15466 *base = addr;
15467 return true;
15468 }
15469 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15470 {
15471 *base = XEXP (addr, 0);
15472 *offset = XEXP (addr, 1);
15473 return (REG_P (*base) && CONST_INT_P (*offset));
15474 }
15475
15476 return false;
15477 }
15478
15479 /* Called from a peephole2 to replace two word-size accesses with a
15480 single LDRD/STRD instruction. Returns true iff we can generate a
15481 new instruction sequence. That is, both accesses use the same base
15482 register and the gap between constant offsets is 4. This function
15483 may reorder its operands to match ldrd/strd RTL templates.
15484 OPERANDS are the operands found by the peephole matcher;
15485 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15486 corresponding memory operands. LOAD indicaates whether the access
15487 is load or store. CONST_STORE indicates a store of constant
15488 integer values held in OPERANDS[4,5] and assumes that the pattern
15489 is of length 4 insn, for the purpose of checking dead registers.
15490 COMMUTE indicates that register operands may be reordered. */
15491 bool
15492 gen_operands_ldrd_strd (rtx *operands, bool load,
15493 bool const_store, bool commute)
15494 {
15495 int nops = 2;
15496 HOST_WIDE_INT offsets[2], offset, align[2];
15497 rtx base = NULL_RTX;
15498 rtx cur_base, cur_offset, tmp;
15499 int i, gap;
15500 HARD_REG_SET regset;
15501
15502 gcc_assert (!const_store || !load);
15503 /* Check that the memory references are immediate offsets from the
15504 same base register. Extract the base register, the destination
15505 registers, and the corresponding memory offsets. */
15506 for (i = 0; i < nops; i++)
15507 {
15508 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15509 &align[i]))
15510 return false;
15511
15512 if (i == 0)
15513 base = cur_base;
15514 else if (REGNO (base) != REGNO (cur_base))
15515 return false;
15516
15517 offsets[i] = INTVAL (cur_offset);
15518 if (GET_CODE (operands[i]) == SUBREG)
15519 {
15520 tmp = SUBREG_REG (operands[i]);
15521 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15522 operands[i] = tmp;
15523 }
15524 }
15525
15526 /* Make sure there is no dependency between the individual loads. */
15527 if (load && REGNO (operands[0]) == REGNO (base))
15528 return false; /* RAW */
15529
15530 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15531 return false; /* WAW */
15532
15533 /* If the same input register is used in both stores
15534 when storing different constants, try to find a free register.
15535 For example, the code
15536 mov r0, 0
15537 str r0, [r2]
15538 mov r0, 1
15539 str r0, [r2, #4]
15540 can be transformed into
15541 mov r1, 0
15542 mov r0, 1
15543 strd r1, r0, [r2]
15544 in Thumb mode assuming that r1 is free.
15545 For ARM mode do the same but only if the starting register
15546 can be made to be even. */
15547 if (const_store
15548 && REGNO (operands[0]) == REGNO (operands[1])
15549 && INTVAL (operands[4]) != INTVAL (operands[5]))
15550 {
15551 if (TARGET_THUMB2)
15552 {
15553 CLEAR_HARD_REG_SET (regset);
15554 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15555 if (tmp == NULL_RTX)
15556 return false;
15557
15558 /* Use the new register in the first load to ensure that
15559 if the original input register is not dead after peephole,
15560 then it will have the correct constant value. */
15561 operands[0] = tmp;
15562 }
15563 else if (TARGET_ARM)
15564 {
15565 int regno = REGNO (operands[0]);
15566 if (!peep2_reg_dead_p (4, operands[0]))
15567 {
15568 /* When the input register is even and is not dead after the
15569 pattern, it has to hold the second constant but we cannot
15570 form a legal STRD in ARM mode with this register as the second
15571 register. */
15572 if (regno % 2 == 0)
15573 return false;
15574
15575 /* Is regno-1 free? */
15576 SET_HARD_REG_SET (regset);
15577 CLEAR_HARD_REG_BIT(regset, regno - 1);
15578 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15579 if (tmp == NULL_RTX)
15580 return false;
15581
15582 operands[0] = tmp;
15583 }
15584 else
15585 {
15586 /* Find a DImode register. */
15587 CLEAR_HARD_REG_SET (regset);
15588 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15589 if (tmp != NULL_RTX)
15590 {
15591 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15592 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15593 }
15594 else
15595 {
15596 /* Can we use the input register to form a DI register? */
15597 SET_HARD_REG_SET (regset);
15598 CLEAR_HARD_REG_BIT(regset,
15599 regno % 2 == 0 ? regno + 1 : regno - 1);
15600 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15601 if (tmp == NULL_RTX)
15602 return false;
15603 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15604 }
15605 }
15606
15607 gcc_assert (operands[0] != NULL_RTX);
15608 gcc_assert (operands[1] != NULL_RTX);
15609 gcc_assert (REGNO (operands[0]) % 2 == 0);
15610 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15611 }
15612 }
15613
15614 /* Make sure the instructions are ordered with lower memory access first. */
15615 if (offsets[0] > offsets[1])
15616 {
15617 gap = offsets[0] - offsets[1];
15618 offset = offsets[1];
15619
15620 /* Swap the instructions such that lower memory is accessed first. */
15621 std::swap (operands[0], operands[1]);
15622 std::swap (operands[2], operands[3]);
15623 std::swap (align[0], align[1]);
15624 if (const_store)
15625 std::swap (operands[4], operands[5]);
15626 }
15627 else
15628 {
15629 gap = offsets[1] - offsets[0];
15630 offset = offsets[0];
15631 }
15632
15633 /* Make sure accesses are to consecutive memory locations. */
15634 if (gap != 4)
15635 return false;
15636
15637 if (!align_ok_ldrd_strd (align[0], offset))
15638 return false;
15639
15640 /* Make sure we generate legal instructions. */
15641 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15642 false, load))
15643 return true;
15644
15645 /* In Thumb state, where registers are almost unconstrained, there
15646 is little hope to fix it. */
15647 if (TARGET_THUMB2)
15648 return false;
15649
15650 if (load && commute)
15651 {
15652 /* Try reordering registers. */
15653 std::swap (operands[0], operands[1]);
15654 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15655 false, load))
15656 return true;
15657 }
15658
15659 if (const_store)
15660 {
15661 /* If input registers are dead after this pattern, they can be
15662 reordered or replaced by other registers that are free in the
15663 current pattern. */
15664 if (!peep2_reg_dead_p (4, operands[0])
15665 || !peep2_reg_dead_p (4, operands[1]))
15666 return false;
15667
15668 /* Try to reorder the input registers. */
15669 /* For example, the code
15670 mov r0, 0
15671 mov r1, 1
15672 str r1, [r2]
15673 str r0, [r2, #4]
15674 can be transformed into
15675 mov r1, 0
15676 mov r0, 1
15677 strd r0, [r2]
15678 */
15679 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15680 false, false))
15681 {
15682 std::swap (operands[0], operands[1]);
15683 return true;
15684 }
15685
15686 /* Try to find a free DI register. */
15687 CLEAR_HARD_REG_SET (regset);
15688 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15689 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15690 while (true)
15691 {
15692 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15693 if (tmp == NULL_RTX)
15694 return false;
15695
15696 /* DREG must be an even-numbered register in DImode.
15697 Split it into SI registers. */
15698 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15699 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15700 gcc_assert (operands[0] != NULL_RTX);
15701 gcc_assert (operands[1] != NULL_RTX);
15702 gcc_assert (REGNO (operands[0]) % 2 == 0);
15703 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15704
15705 return (operands_ok_ldrd_strd (operands[0], operands[1],
15706 base, offset,
15707 false, load));
15708 }
15709 }
15710
15711 return false;
15712 }
15713
15714
15715
15716 \f
15717 /* Print a symbolic form of X to the debug file, F. */
15718 static void
15719 arm_print_value (FILE *f, rtx x)
15720 {
15721 switch (GET_CODE (x))
15722 {
15723 case CONST_INT:
15724 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15725 return;
15726
15727 case CONST_DOUBLE:
15728 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15729 return;
15730
15731 case CONST_VECTOR:
15732 {
15733 int i;
15734
15735 fprintf (f, "<");
15736 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15737 {
15738 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15739 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15740 fputc (',', f);
15741 }
15742 fprintf (f, ">");
15743 }
15744 return;
15745
15746 case CONST_STRING:
15747 fprintf (f, "\"%s\"", XSTR (x, 0));
15748 return;
15749
15750 case SYMBOL_REF:
15751 fprintf (f, "`%s'", XSTR (x, 0));
15752 return;
15753
15754 case LABEL_REF:
15755 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15756 return;
15757
15758 case CONST:
15759 arm_print_value (f, XEXP (x, 0));
15760 return;
15761
15762 case PLUS:
15763 arm_print_value (f, XEXP (x, 0));
15764 fprintf (f, "+");
15765 arm_print_value (f, XEXP (x, 1));
15766 return;
15767
15768 case PC:
15769 fprintf (f, "pc");
15770 return;
15771
15772 default:
15773 fprintf (f, "????");
15774 return;
15775 }
15776 }
15777 \f
15778 /* Routines for manipulation of the constant pool. */
15779
15780 /* Arm instructions cannot load a large constant directly into a
15781 register; they have to come from a pc relative load. The constant
15782 must therefore be placed in the addressable range of the pc
15783 relative load. Depending on the precise pc relative load
15784 instruction the range is somewhere between 256 bytes and 4k. This
15785 means that we often have to dump a constant inside a function, and
15786 generate code to branch around it.
15787
15788 It is important to minimize this, since the branches will slow
15789 things down and make the code larger.
15790
15791 Normally we can hide the table after an existing unconditional
15792 branch so that there is no interruption of the flow, but in the
15793 worst case the code looks like this:
15794
15795 ldr rn, L1
15796 ...
15797 b L2
15798 align
15799 L1: .long value
15800 L2:
15801 ...
15802
15803 ldr rn, L3
15804 ...
15805 b L4
15806 align
15807 L3: .long value
15808 L4:
15809 ...
15810
15811 We fix this by performing a scan after scheduling, which notices
15812 which instructions need to have their operands fetched from the
15813 constant table and builds the table.
15814
15815 The algorithm starts by building a table of all the constants that
15816 need fixing up and all the natural barriers in the function (places
15817 where a constant table can be dropped without breaking the flow).
15818 For each fixup we note how far the pc-relative replacement will be
15819 able to reach and the offset of the instruction into the function.
15820
15821 Having built the table we then group the fixes together to form
15822 tables that are as large as possible (subject to addressing
15823 constraints) and emit each table of constants after the last
15824 barrier that is within range of all the instructions in the group.
15825 If a group does not contain a barrier, then we forcibly create one
15826 by inserting a jump instruction into the flow. Once the table has
15827 been inserted, the insns are then modified to reference the
15828 relevant entry in the pool.
15829
15830 Possible enhancements to the algorithm (not implemented) are:
15831
15832 1) For some processors and object formats, there may be benefit in
15833 aligning the pools to the start of cache lines; this alignment
15834 would need to be taken into account when calculating addressability
15835 of a pool. */
15836
15837 /* These typedefs are located at the start of this file, so that
15838 they can be used in the prototypes there. This comment is to
15839 remind readers of that fact so that the following structures
15840 can be understood more easily.
15841
15842 typedef struct minipool_node Mnode;
15843 typedef struct minipool_fixup Mfix; */
15844
15845 struct minipool_node
15846 {
15847 /* Doubly linked chain of entries. */
15848 Mnode * next;
15849 Mnode * prev;
15850 /* The maximum offset into the code that this entry can be placed. While
15851 pushing fixes for forward references, all entries are sorted in order
15852 of increasing max_address. */
15853 HOST_WIDE_INT max_address;
15854 /* Similarly for an entry inserted for a backwards ref. */
15855 HOST_WIDE_INT min_address;
15856 /* The number of fixes referencing this entry. This can become zero
15857 if we "unpush" an entry. In this case we ignore the entry when we
15858 come to emit the code. */
15859 int refcount;
15860 /* The offset from the start of the minipool. */
15861 HOST_WIDE_INT offset;
15862 /* The value in table. */
15863 rtx value;
15864 /* The mode of value. */
15865 machine_mode mode;
15866 /* The size of the value. With iWMMXt enabled
15867 sizes > 4 also imply an alignment of 8-bytes. */
15868 int fix_size;
15869 };
15870
15871 struct minipool_fixup
15872 {
15873 Mfix * next;
15874 rtx_insn * insn;
15875 HOST_WIDE_INT address;
15876 rtx * loc;
15877 machine_mode mode;
15878 int fix_size;
15879 rtx value;
15880 Mnode * minipool;
15881 HOST_WIDE_INT forwards;
15882 HOST_WIDE_INT backwards;
15883 };
15884
15885 /* Fixes less than a word need padding out to a word boundary. */
15886 #define MINIPOOL_FIX_SIZE(mode) \
15887 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15888
15889 static Mnode * minipool_vector_head;
15890 static Mnode * minipool_vector_tail;
15891 static rtx_code_label *minipool_vector_label;
15892 static int minipool_pad;
15893
15894 /* The linked list of all minipool fixes required for this function. */
15895 Mfix * minipool_fix_head;
15896 Mfix * minipool_fix_tail;
15897 /* The fix entry for the current minipool, once it has been placed. */
15898 Mfix * minipool_barrier;
15899
15900 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15901 #define JUMP_TABLES_IN_TEXT_SECTION 0
15902 #endif
15903
15904 static HOST_WIDE_INT
15905 get_jump_table_size (rtx_jump_table_data *insn)
15906 {
15907 /* ADDR_VECs only take room if read-only data does into the text
15908 section. */
15909 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15910 {
15911 rtx body = PATTERN (insn);
15912 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15913 HOST_WIDE_INT size;
15914 HOST_WIDE_INT modesize;
15915
15916 modesize = GET_MODE_SIZE (GET_MODE (body));
15917 size = modesize * XVECLEN (body, elt);
15918 switch (modesize)
15919 {
15920 case 1:
15921 /* Round up size of TBB table to a halfword boundary. */
15922 size = (size + 1) & ~HOST_WIDE_INT_1;
15923 break;
15924 case 2:
15925 /* No padding necessary for TBH. */
15926 break;
15927 case 4:
15928 /* Add two bytes for alignment on Thumb. */
15929 if (TARGET_THUMB)
15930 size += 2;
15931 break;
15932 default:
15933 gcc_unreachable ();
15934 }
15935 return size;
15936 }
15937
15938 return 0;
15939 }
15940
15941 /* Return the maximum amount of padding that will be inserted before
15942 label LABEL. */
15943
15944 static HOST_WIDE_INT
15945 get_label_padding (rtx label)
15946 {
15947 HOST_WIDE_INT align, min_insn_size;
15948
15949 align = 1 << label_to_alignment (label);
15950 min_insn_size = TARGET_THUMB ? 2 : 4;
15951 return align > min_insn_size ? align - min_insn_size : 0;
15952 }
15953
15954 /* Move a minipool fix MP from its current location to before MAX_MP.
15955 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15956 constraints may need updating. */
15957 static Mnode *
15958 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15959 HOST_WIDE_INT max_address)
15960 {
15961 /* The code below assumes these are different. */
15962 gcc_assert (mp != max_mp);
15963
15964 if (max_mp == NULL)
15965 {
15966 if (max_address < mp->max_address)
15967 mp->max_address = max_address;
15968 }
15969 else
15970 {
15971 if (max_address > max_mp->max_address - mp->fix_size)
15972 mp->max_address = max_mp->max_address - mp->fix_size;
15973 else
15974 mp->max_address = max_address;
15975
15976 /* Unlink MP from its current position. Since max_mp is non-null,
15977 mp->prev must be non-null. */
15978 mp->prev->next = mp->next;
15979 if (mp->next != NULL)
15980 mp->next->prev = mp->prev;
15981 else
15982 minipool_vector_tail = mp->prev;
15983
15984 /* Re-insert it before MAX_MP. */
15985 mp->next = max_mp;
15986 mp->prev = max_mp->prev;
15987 max_mp->prev = mp;
15988
15989 if (mp->prev != NULL)
15990 mp->prev->next = mp;
15991 else
15992 minipool_vector_head = mp;
15993 }
15994
15995 /* Save the new entry. */
15996 max_mp = mp;
15997
15998 /* Scan over the preceding entries and adjust their addresses as
15999 required. */
16000 while (mp->prev != NULL
16001 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16002 {
16003 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16004 mp = mp->prev;
16005 }
16006
16007 return max_mp;
16008 }
16009
16010 /* Add a constant to the minipool for a forward reference. Returns the
16011 node added or NULL if the constant will not fit in this pool. */
16012 static Mnode *
16013 add_minipool_forward_ref (Mfix *fix)
16014 {
16015 /* If set, max_mp is the first pool_entry that has a lower
16016 constraint than the one we are trying to add. */
16017 Mnode * max_mp = NULL;
16018 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16019 Mnode * mp;
16020
16021 /* If the minipool starts before the end of FIX->INSN then this FIX
16022 can not be placed into the current pool. Furthermore, adding the
16023 new constant pool entry may cause the pool to start FIX_SIZE bytes
16024 earlier. */
16025 if (minipool_vector_head &&
16026 (fix->address + get_attr_length (fix->insn)
16027 >= minipool_vector_head->max_address - fix->fix_size))
16028 return NULL;
16029
16030 /* Scan the pool to see if a constant with the same value has
16031 already been added. While we are doing this, also note the
16032 location where we must insert the constant if it doesn't already
16033 exist. */
16034 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16035 {
16036 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16037 && fix->mode == mp->mode
16038 && (!LABEL_P (fix->value)
16039 || (CODE_LABEL_NUMBER (fix->value)
16040 == CODE_LABEL_NUMBER (mp->value)))
16041 && rtx_equal_p (fix->value, mp->value))
16042 {
16043 /* More than one fix references this entry. */
16044 mp->refcount++;
16045 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16046 }
16047
16048 /* Note the insertion point if necessary. */
16049 if (max_mp == NULL
16050 && mp->max_address > max_address)
16051 max_mp = mp;
16052
16053 /* If we are inserting an 8-bytes aligned quantity and
16054 we have not already found an insertion point, then
16055 make sure that all such 8-byte aligned quantities are
16056 placed at the start of the pool. */
16057 if (ARM_DOUBLEWORD_ALIGN
16058 && max_mp == NULL
16059 && fix->fix_size >= 8
16060 && mp->fix_size < 8)
16061 {
16062 max_mp = mp;
16063 max_address = mp->max_address;
16064 }
16065 }
16066
16067 /* The value is not currently in the minipool, so we need to create
16068 a new entry for it. If MAX_MP is NULL, the entry will be put on
16069 the end of the list since the placement is less constrained than
16070 any existing entry. Otherwise, we insert the new fix before
16071 MAX_MP and, if necessary, adjust the constraints on the other
16072 entries. */
16073 mp = XNEW (Mnode);
16074 mp->fix_size = fix->fix_size;
16075 mp->mode = fix->mode;
16076 mp->value = fix->value;
16077 mp->refcount = 1;
16078 /* Not yet required for a backwards ref. */
16079 mp->min_address = -65536;
16080
16081 if (max_mp == NULL)
16082 {
16083 mp->max_address = max_address;
16084 mp->next = NULL;
16085 mp->prev = minipool_vector_tail;
16086
16087 if (mp->prev == NULL)
16088 {
16089 minipool_vector_head = mp;
16090 minipool_vector_label = gen_label_rtx ();
16091 }
16092 else
16093 mp->prev->next = mp;
16094
16095 minipool_vector_tail = mp;
16096 }
16097 else
16098 {
16099 if (max_address > max_mp->max_address - mp->fix_size)
16100 mp->max_address = max_mp->max_address - mp->fix_size;
16101 else
16102 mp->max_address = max_address;
16103
16104 mp->next = max_mp;
16105 mp->prev = max_mp->prev;
16106 max_mp->prev = mp;
16107 if (mp->prev != NULL)
16108 mp->prev->next = mp;
16109 else
16110 minipool_vector_head = mp;
16111 }
16112
16113 /* Save the new entry. */
16114 max_mp = mp;
16115
16116 /* Scan over the preceding entries and adjust their addresses as
16117 required. */
16118 while (mp->prev != NULL
16119 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16120 {
16121 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16122 mp = mp->prev;
16123 }
16124
16125 return max_mp;
16126 }
16127
16128 static Mnode *
16129 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16130 HOST_WIDE_INT min_address)
16131 {
16132 HOST_WIDE_INT offset;
16133
16134 /* The code below assumes these are different. */
16135 gcc_assert (mp != min_mp);
16136
16137 if (min_mp == NULL)
16138 {
16139 if (min_address > mp->min_address)
16140 mp->min_address = min_address;
16141 }
16142 else
16143 {
16144 /* We will adjust this below if it is too loose. */
16145 mp->min_address = min_address;
16146
16147 /* Unlink MP from its current position. Since min_mp is non-null,
16148 mp->next must be non-null. */
16149 mp->next->prev = mp->prev;
16150 if (mp->prev != NULL)
16151 mp->prev->next = mp->next;
16152 else
16153 minipool_vector_head = mp->next;
16154
16155 /* Reinsert it after MIN_MP. */
16156 mp->prev = min_mp;
16157 mp->next = min_mp->next;
16158 min_mp->next = mp;
16159 if (mp->next != NULL)
16160 mp->next->prev = mp;
16161 else
16162 minipool_vector_tail = mp;
16163 }
16164
16165 min_mp = mp;
16166
16167 offset = 0;
16168 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16169 {
16170 mp->offset = offset;
16171 if (mp->refcount > 0)
16172 offset += mp->fix_size;
16173
16174 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16175 mp->next->min_address = mp->min_address + mp->fix_size;
16176 }
16177
16178 return min_mp;
16179 }
16180
16181 /* Add a constant to the minipool for a backward reference. Returns the
16182 node added or NULL if the constant will not fit in this pool.
16183
16184 Note that the code for insertion for a backwards reference can be
16185 somewhat confusing because the calculated offsets for each fix do
16186 not take into account the size of the pool (which is still under
16187 construction. */
16188 static Mnode *
16189 add_minipool_backward_ref (Mfix *fix)
16190 {
16191 /* If set, min_mp is the last pool_entry that has a lower constraint
16192 than the one we are trying to add. */
16193 Mnode *min_mp = NULL;
16194 /* This can be negative, since it is only a constraint. */
16195 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16196 Mnode *mp;
16197
16198 /* If we can't reach the current pool from this insn, or if we can't
16199 insert this entry at the end of the pool without pushing other
16200 fixes out of range, then we don't try. This ensures that we
16201 can't fail later on. */
16202 if (min_address >= minipool_barrier->address
16203 || (minipool_vector_tail->min_address + fix->fix_size
16204 >= minipool_barrier->address))
16205 return NULL;
16206
16207 /* Scan the pool to see if a constant with the same value has
16208 already been added. While we are doing this, also note the
16209 location where we must insert the constant if it doesn't already
16210 exist. */
16211 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16212 {
16213 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16214 && fix->mode == mp->mode
16215 && (!LABEL_P (fix->value)
16216 || (CODE_LABEL_NUMBER (fix->value)
16217 == CODE_LABEL_NUMBER (mp->value)))
16218 && rtx_equal_p (fix->value, mp->value)
16219 /* Check that there is enough slack to move this entry to the
16220 end of the table (this is conservative). */
16221 && (mp->max_address
16222 > (minipool_barrier->address
16223 + minipool_vector_tail->offset
16224 + minipool_vector_tail->fix_size)))
16225 {
16226 mp->refcount++;
16227 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16228 }
16229
16230 if (min_mp != NULL)
16231 mp->min_address += fix->fix_size;
16232 else
16233 {
16234 /* Note the insertion point if necessary. */
16235 if (mp->min_address < min_address)
16236 {
16237 /* For now, we do not allow the insertion of 8-byte alignment
16238 requiring nodes anywhere but at the start of the pool. */
16239 if (ARM_DOUBLEWORD_ALIGN
16240 && fix->fix_size >= 8 && mp->fix_size < 8)
16241 return NULL;
16242 else
16243 min_mp = mp;
16244 }
16245 else if (mp->max_address
16246 < minipool_barrier->address + mp->offset + fix->fix_size)
16247 {
16248 /* Inserting before this entry would push the fix beyond
16249 its maximum address (which can happen if we have
16250 re-located a forwards fix); force the new fix to come
16251 after it. */
16252 if (ARM_DOUBLEWORD_ALIGN
16253 && fix->fix_size >= 8 && mp->fix_size < 8)
16254 return NULL;
16255 else
16256 {
16257 min_mp = mp;
16258 min_address = mp->min_address + fix->fix_size;
16259 }
16260 }
16261 /* Do not insert a non-8-byte aligned quantity before 8-byte
16262 aligned quantities. */
16263 else if (ARM_DOUBLEWORD_ALIGN
16264 && fix->fix_size < 8
16265 && mp->fix_size >= 8)
16266 {
16267 min_mp = mp;
16268 min_address = mp->min_address + fix->fix_size;
16269 }
16270 }
16271 }
16272
16273 /* We need to create a new entry. */
16274 mp = XNEW (Mnode);
16275 mp->fix_size = fix->fix_size;
16276 mp->mode = fix->mode;
16277 mp->value = fix->value;
16278 mp->refcount = 1;
16279 mp->max_address = minipool_barrier->address + 65536;
16280
16281 mp->min_address = min_address;
16282
16283 if (min_mp == NULL)
16284 {
16285 mp->prev = NULL;
16286 mp->next = minipool_vector_head;
16287
16288 if (mp->next == NULL)
16289 {
16290 minipool_vector_tail = mp;
16291 minipool_vector_label = gen_label_rtx ();
16292 }
16293 else
16294 mp->next->prev = mp;
16295
16296 minipool_vector_head = mp;
16297 }
16298 else
16299 {
16300 mp->next = min_mp->next;
16301 mp->prev = min_mp;
16302 min_mp->next = mp;
16303
16304 if (mp->next != NULL)
16305 mp->next->prev = mp;
16306 else
16307 minipool_vector_tail = mp;
16308 }
16309
16310 /* Save the new entry. */
16311 min_mp = mp;
16312
16313 if (mp->prev)
16314 mp = mp->prev;
16315 else
16316 mp->offset = 0;
16317
16318 /* Scan over the following entries and adjust their offsets. */
16319 while (mp->next != NULL)
16320 {
16321 if (mp->next->min_address < mp->min_address + mp->fix_size)
16322 mp->next->min_address = mp->min_address + mp->fix_size;
16323
16324 if (mp->refcount)
16325 mp->next->offset = mp->offset + mp->fix_size;
16326 else
16327 mp->next->offset = mp->offset;
16328
16329 mp = mp->next;
16330 }
16331
16332 return min_mp;
16333 }
16334
16335 static void
16336 assign_minipool_offsets (Mfix *barrier)
16337 {
16338 HOST_WIDE_INT offset = 0;
16339 Mnode *mp;
16340
16341 minipool_barrier = barrier;
16342
16343 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16344 {
16345 mp->offset = offset;
16346
16347 if (mp->refcount > 0)
16348 offset += mp->fix_size;
16349 }
16350 }
16351
16352 /* Output the literal table */
16353 static void
16354 dump_minipool (rtx_insn *scan)
16355 {
16356 Mnode * mp;
16357 Mnode * nmp;
16358 int align64 = 0;
16359
16360 if (ARM_DOUBLEWORD_ALIGN)
16361 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16362 if (mp->refcount > 0 && mp->fix_size >= 8)
16363 {
16364 align64 = 1;
16365 break;
16366 }
16367
16368 if (dump_file)
16369 fprintf (dump_file,
16370 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16371 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16372
16373 scan = emit_label_after (gen_label_rtx (), scan);
16374 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16375 scan = emit_label_after (minipool_vector_label, scan);
16376
16377 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16378 {
16379 if (mp->refcount > 0)
16380 {
16381 if (dump_file)
16382 {
16383 fprintf (dump_file,
16384 ";; Offset %u, min %ld, max %ld ",
16385 (unsigned) mp->offset, (unsigned long) mp->min_address,
16386 (unsigned long) mp->max_address);
16387 arm_print_value (dump_file, mp->value);
16388 fputc ('\n', dump_file);
16389 }
16390
16391 rtx val = copy_rtx (mp->value);
16392
16393 switch (GET_MODE_SIZE (mp->mode))
16394 {
16395 #ifdef HAVE_consttable_1
16396 case 1:
16397 scan = emit_insn_after (gen_consttable_1 (val), scan);
16398 break;
16399
16400 #endif
16401 #ifdef HAVE_consttable_2
16402 case 2:
16403 scan = emit_insn_after (gen_consttable_2 (val), scan);
16404 break;
16405
16406 #endif
16407 #ifdef HAVE_consttable_4
16408 case 4:
16409 scan = emit_insn_after (gen_consttable_4 (val), scan);
16410 break;
16411
16412 #endif
16413 #ifdef HAVE_consttable_8
16414 case 8:
16415 scan = emit_insn_after (gen_consttable_8 (val), scan);
16416 break;
16417
16418 #endif
16419 #ifdef HAVE_consttable_16
16420 case 16:
16421 scan = emit_insn_after (gen_consttable_16 (val), scan);
16422 break;
16423
16424 #endif
16425 default:
16426 gcc_unreachable ();
16427 }
16428 }
16429
16430 nmp = mp->next;
16431 free (mp);
16432 }
16433
16434 minipool_vector_head = minipool_vector_tail = NULL;
16435 scan = emit_insn_after (gen_consttable_end (), scan);
16436 scan = emit_barrier_after (scan);
16437 }
16438
16439 /* Return the cost of forcibly inserting a barrier after INSN. */
16440 static int
16441 arm_barrier_cost (rtx_insn *insn)
16442 {
16443 /* Basing the location of the pool on the loop depth is preferable,
16444 but at the moment, the basic block information seems to be
16445 corrupt by this stage of the compilation. */
16446 int base_cost = 50;
16447 rtx_insn *next = next_nonnote_insn (insn);
16448
16449 if (next != NULL && LABEL_P (next))
16450 base_cost -= 20;
16451
16452 switch (GET_CODE (insn))
16453 {
16454 case CODE_LABEL:
16455 /* It will always be better to place the table before the label, rather
16456 than after it. */
16457 return 50;
16458
16459 case INSN:
16460 case CALL_INSN:
16461 return base_cost;
16462
16463 case JUMP_INSN:
16464 return base_cost - 10;
16465
16466 default:
16467 return base_cost + 10;
16468 }
16469 }
16470
16471 /* Find the best place in the insn stream in the range
16472 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16473 Create the barrier by inserting a jump and add a new fix entry for
16474 it. */
16475 static Mfix *
16476 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16477 {
16478 HOST_WIDE_INT count = 0;
16479 rtx_barrier *barrier;
16480 rtx_insn *from = fix->insn;
16481 /* The instruction after which we will insert the jump. */
16482 rtx_insn *selected = NULL;
16483 int selected_cost;
16484 /* The address at which the jump instruction will be placed. */
16485 HOST_WIDE_INT selected_address;
16486 Mfix * new_fix;
16487 HOST_WIDE_INT max_count = max_address - fix->address;
16488 rtx_code_label *label = gen_label_rtx ();
16489
16490 selected_cost = arm_barrier_cost (from);
16491 selected_address = fix->address;
16492
16493 while (from && count < max_count)
16494 {
16495 rtx_jump_table_data *tmp;
16496 int new_cost;
16497
16498 /* This code shouldn't have been called if there was a natural barrier
16499 within range. */
16500 gcc_assert (!BARRIER_P (from));
16501
16502 /* Count the length of this insn. This must stay in sync with the
16503 code that pushes minipool fixes. */
16504 if (LABEL_P (from))
16505 count += get_label_padding (from);
16506 else
16507 count += get_attr_length (from);
16508
16509 /* If there is a jump table, add its length. */
16510 if (tablejump_p (from, NULL, &tmp))
16511 {
16512 count += get_jump_table_size (tmp);
16513
16514 /* Jump tables aren't in a basic block, so base the cost on
16515 the dispatch insn. If we select this location, we will
16516 still put the pool after the table. */
16517 new_cost = arm_barrier_cost (from);
16518
16519 if (count < max_count
16520 && (!selected || new_cost <= selected_cost))
16521 {
16522 selected = tmp;
16523 selected_cost = new_cost;
16524 selected_address = fix->address + count;
16525 }
16526
16527 /* Continue after the dispatch table. */
16528 from = NEXT_INSN (tmp);
16529 continue;
16530 }
16531
16532 new_cost = arm_barrier_cost (from);
16533
16534 if (count < max_count
16535 && (!selected || new_cost <= selected_cost))
16536 {
16537 selected = from;
16538 selected_cost = new_cost;
16539 selected_address = fix->address + count;
16540 }
16541
16542 from = NEXT_INSN (from);
16543 }
16544
16545 /* Make sure that we found a place to insert the jump. */
16546 gcc_assert (selected);
16547
16548 /* Create a new JUMP_INSN that branches around a barrier. */
16549 from = emit_jump_insn_after (gen_jump (label), selected);
16550 JUMP_LABEL (from) = label;
16551 barrier = emit_barrier_after (from);
16552 emit_label_after (label, barrier);
16553
16554 /* Create a minipool barrier entry for the new barrier. */
16555 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16556 new_fix->insn = barrier;
16557 new_fix->address = selected_address;
16558 new_fix->next = fix->next;
16559 fix->next = new_fix;
16560
16561 return new_fix;
16562 }
16563
16564 /* Record that there is a natural barrier in the insn stream at
16565 ADDRESS. */
16566 static void
16567 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16568 {
16569 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16570
16571 fix->insn = insn;
16572 fix->address = address;
16573
16574 fix->next = NULL;
16575 if (minipool_fix_head != NULL)
16576 minipool_fix_tail->next = fix;
16577 else
16578 minipool_fix_head = fix;
16579
16580 minipool_fix_tail = fix;
16581 }
16582
16583 /* Record INSN, which will need fixing up to load a value from the
16584 minipool. ADDRESS is the offset of the insn since the start of the
16585 function; LOC is a pointer to the part of the insn which requires
16586 fixing; VALUE is the constant that must be loaded, which is of type
16587 MODE. */
16588 static void
16589 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16590 machine_mode mode, rtx value)
16591 {
16592 gcc_assert (!arm_disable_literal_pool);
16593 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16594
16595 fix->insn = insn;
16596 fix->address = address;
16597 fix->loc = loc;
16598 fix->mode = mode;
16599 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16600 fix->value = value;
16601 fix->forwards = get_attr_pool_range (insn);
16602 fix->backwards = get_attr_neg_pool_range (insn);
16603 fix->minipool = NULL;
16604
16605 /* If an insn doesn't have a range defined for it, then it isn't
16606 expecting to be reworked by this code. Better to stop now than
16607 to generate duff assembly code. */
16608 gcc_assert (fix->forwards || fix->backwards);
16609
16610 /* If an entry requires 8-byte alignment then assume all constant pools
16611 require 4 bytes of padding. Trying to do this later on a per-pool
16612 basis is awkward because existing pool entries have to be modified. */
16613 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16614 minipool_pad = 4;
16615
16616 if (dump_file)
16617 {
16618 fprintf (dump_file,
16619 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16620 GET_MODE_NAME (mode),
16621 INSN_UID (insn), (unsigned long) address,
16622 -1 * (long)fix->backwards, (long)fix->forwards);
16623 arm_print_value (dump_file, fix->value);
16624 fprintf (dump_file, "\n");
16625 }
16626
16627 /* Add it to the chain of fixes. */
16628 fix->next = NULL;
16629
16630 if (minipool_fix_head != NULL)
16631 minipool_fix_tail->next = fix;
16632 else
16633 minipool_fix_head = fix;
16634
16635 minipool_fix_tail = fix;
16636 }
16637
16638 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16639 Returns the number of insns needed, or 99 if we always want to synthesize
16640 the value. */
16641 int
16642 arm_max_const_double_inline_cost ()
16643 {
16644 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16645 }
16646
16647 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16648 Returns the number of insns needed, or 99 if we don't know how to
16649 do it. */
16650 int
16651 arm_const_double_inline_cost (rtx val)
16652 {
16653 rtx lowpart, highpart;
16654 machine_mode mode;
16655
16656 mode = GET_MODE (val);
16657
16658 if (mode == VOIDmode)
16659 mode = DImode;
16660
16661 gcc_assert (GET_MODE_SIZE (mode) == 8);
16662
16663 lowpart = gen_lowpart (SImode, val);
16664 highpart = gen_highpart_mode (SImode, mode, val);
16665
16666 gcc_assert (CONST_INT_P (lowpart));
16667 gcc_assert (CONST_INT_P (highpart));
16668
16669 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16670 NULL_RTX, NULL_RTX, 0, 0)
16671 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16672 NULL_RTX, NULL_RTX, 0, 0));
16673 }
16674
16675 /* Cost of loading a SImode constant. */
16676 static inline int
16677 arm_const_inline_cost (enum rtx_code code, rtx val)
16678 {
16679 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16680 NULL_RTX, NULL_RTX, 1, 0);
16681 }
16682
16683 /* Return true if it is worthwhile to split a 64-bit constant into two
16684 32-bit operations. This is the case if optimizing for size, or
16685 if we have load delay slots, or if one 32-bit part can be done with
16686 a single data operation. */
16687 bool
16688 arm_const_double_by_parts (rtx val)
16689 {
16690 machine_mode mode = GET_MODE (val);
16691 rtx part;
16692
16693 if (optimize_size || arm_ld_sched)
16694 return true;
16695
16696 if (mode == VOIDmode)
16697 mode = DImode;
16698
16699 part = gen_highpart_mode (SImode, mode, val);
16700
16701 gcc_assert (CONST_INT_P (part));
16702
16703 if (const_ok_for_arm (INTVAL (part))
16704 || const_ok_for_arm (~INTVAL (part)))
16705 return true;
16706
16707 part = gen_lowpart (SImode, val);
16708
16709 gcc_assert (CONST_INT_P (part));
16710
16711 if (const_ok_for_arm (INTVAL (part))
16712 || const_ok_for_arm (~INTVAL (part)))
16713 return true;
16714
16715 return false;
16716 }
16717
16718 /* Return true if it is possible to inline both the high and low parts
16719 of a 64-bit constant into 32-bit data processing instructions. */
16720 bool
16721 arm_const_double_by_immediates (rtx val)
16722 {
16723 machine_mode mode = GET_MODE (val);
16724 rtx part;
16725
16726 if (mode == VOIDmode)
16727 mode = DImode;
16728
16729 part = gen_highpart_mode (SImode, mode, val);
16730
16731 gcc_assert (CONST_INT_P (part));
16732
16733 if (!const_ok_for_arm (INTVAL (part)))
16734 return false;
16735
16736 part = gen_lowpart (SImode, val);
16737
16738 gcc_assert (CONST_INT_P (part));
16739
16740 if (!const_ok_for_arm (INTVAL (part)))
16741 return false;
16742
16743 return true;
16744 }
16745
16746 /* Scan INSN and note any of its operands that need fixing.
16747 If DO_PUSHES is false we do not actually push any of the fixups
16748 needed. */
16749 static void
16750 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16751 {
16752 int opno;
16753
16754 extract_constrain_insn (insn);
16755
16756 if (recog_data.n_alternatives == 0)
16757 return;
16758
16759 /* Fill in recog_op_alt with information about the constraints of
16760 this insn. */
16761 preprocess_constraints (insn);
16762
16763 const operand_alternative *op_alt = which_op_alt ();
16764 for (opno = 0; opno < recog_data.n_operands; opno++)
16765 {
16766 /* Things we need to fix can only occur in inputs. */
16767 if (recog_data.operand_type[opno] != OP_IN)
16768 continue;
16769
16770 /* If this alternative is a memory reference, then any mention
16771 of constants in this alternative is really to fool reload
16772 into allowing us to accept one there. We need to fix them up
16773 now so that we output the right code. */
16774 if (op_alt[opno].memory_ok)
16775 {
16776 rtx op = recog_data.operand[opno];
16777
16778 if (CONSTANT_P (op))
16779 {
16780 if (do_pushes)
16781 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16782 recog_data.operand_mode[opno], op);
16783 }
16784 else if (MEM_P (op)
16785 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16786 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16787 {
16788 if (do_pushes)
16789 {
16790 rtx cop = avoid_constant_pool_reference (op);
16791
16792 /* Casting the address of something to a mode narrower
16793 than a word can cause avoid_constant_pool_reference()
16794 to return the pool reference itself. That's no good to
16795 us here. Lets just hope that we can use the
16796 constant pool value directly. */
16797 if (op == cop)
16798 cop = get_pool_constant (XEXP (op, 0));
16799
16800 push_minipool_fix (insn, address,
16801 recog_data.operand_loc[opno],
16802 recog_data.operand_mode[opno], cop);
16803 }
16804
16805 }
16806 }
16807 }
16808
16809 return;
16810 }
16811
16812 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16813 and unions in the context of ARMv8-M Security Extensions. It is used as a
16814 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16815 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16816 or four masks, depending on whether it is being computed for a
16817 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16818 respectively. The tree for the type of the argument or a field within an
16819 argument is passed in ARG_TYPE, the current register this argument or field
16820 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16821 argument or field starts at is passed in STARTING_BIT and the last used bit
16822 is kept in LAST_USED_BIT which is also updated accordingly. */
16823
16824 static unsigned HOST_WIDE_INT
16825 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16826 uint32_t * padding_bits_to_clear,
16827 unsigned starting_bit, int * last_used_bit)
16828
16829 {
16830 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16831
16832 if (TREE_CODE (arg_type) == RECORD_TYPE)
16833 {
16834 unsigned current_bit = starting_bit;
16835 tree field;
16836 long int offset, size;
16837
16838
16839 field = TYPE_FIELDS (arg_type);
16840 while (field)
16841 {
16842 /* The offset within a structure is always an offset from
16843 the start of that structure. Make sure we take that into the
16844 calculation of the register based offset that we use here. */
16845 offset = starting_bit;
16846 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16847 offset %= 32;
16848
16849 /* This is the actual size of the field, for bitfields this is the
16850 bitfield width and not the container size. */
16851 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16852
16853 if (*last_used_bit != offset)
16854 {
16855 if (offset < *last_used_bit)
16856 {
16857 /* This field's offset is before the 'last_used_bit', that
16858 means this field goes on the next register. So we need to
16859 pad the rest of the current register and increase the
16860 register number. */
16861 uint32_t mask;
16862 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16863 mask++;
16864
16865 padding_bits_to_clear[*regno] |= mask;
16866 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16867 (*regno)++;
16868 }
16869 else
16870 {
16871 /* Otherwise we pad the bits between the last field's end and
16872 the start of the new field. */
16873 uint32_t mask;
16874
16875 mask = ((uint32_t)-1) >> (32 - offset);
16876 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16877 padding_bits_to_clear[*regno] |= mask;
16878 }
16879 current_bit = offset;
16880 }
16881
16882 /* Calculate further padding bits for inner structs/unions too. */
16883 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16884 {
16885 *last_used_bit = current_bit;
16886 not_to_clear_reg_mask
16887 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16888 padding_bits_to_clear, offset,
16889 last_used_bit);
16890 }
16891 else
16892 {
16893 /* Update 'current_bit' with this field's size. If the
16894 'current_bit' lies in a subsequent register, update 'regno' and
16895 reset 'current_bit' to point to the current bit in that new
16896 register. */
16897 current_bit += size;
16898 while (current_bit >= 32)
16899 {
16900 current_bit-=32;
16901 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16902 (*regno)++;
16903 }
16904 *last_used_bit = current_bit;
16905 }
16906
16907 field = TREE_CHAIN (field);
16908 }
16909 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16910 }
16911 else if (TREE_CODE (arg_type) == UNION_TYPE)
16912 {
16913 tree field, field_t;
16914 int i, regno_t, field_size;
16915 int max_reg = -1;
16916 int max_bit = -1;
16917 uint32_t mask;
16918 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16919 = {-1, -1, -1, -1};
16920
16921 /* To compute the padding bits in a union we only consider bits as
16922 padding bits if they are always either a padding bit or fall outside a
16923 fields size for all fields in the union. */
16924 field = TYPE_FIELDS (arg_type);
16925 while (field)
16926 {
16927 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16928 = {0U, 0U, 0U, 0U};
16929 int last_used_bit_t = *last_used_bit;
16930 regno_t = *regno;
16931 field_t = TREE_TYPE (field);
16932
16933 /* If the field's type is either a record or a union make sure to
16934 compute their padding bits too. */
16935 if (RECORD_OR_UNION_TYPE_P (field_t))
16936 not_to_clear_reg_mask
16937 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16938 &padding_bits_to_clear_t[0],
16939 starting_bit, &last_used_bit_t);
16940 else
16941 {
16942 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16943 regno_t = (field_size / 32) + *regno;
16944 last_used_bit_t = (starting_bit + field_size) % 32;
16945 }
16946
16947 for (i = *regno; i < regno_t; i++)
16948 {
16949 /* For all but the last register used by this field only keep the
16950 padding bits that were padding bits in this field. */
16951 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16952 }
16953
16954 /* For the last register, keep all padding bits that were padding
16955 bits in this field and any padding bits that are still valid
16956 as padding bits but fall outside of this field's size. */
16957 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16958 padding_bits_to_clear_res[regno_t]
16959 &= padding_bits_to_clear_t[regno_t] | mask;
16960
16961 /* Update the maximum size of the fields in terms of registers used
16962 ('max_reg') and the 'last_used_bit' in said register. */
16963 if (max_reg < regno_t)
16964 {
16965 max_reg = regno_t;
16966 max_bit = last_used_bit_t;
16967 }
16968 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16969 max_bit = last_used_bit_t;
16970
16971 field = TREE_CHAIN (field);
16972 }
16973
16974 /* Update the current padding_bits_to_clear using the intersection of the
16975 padding bits of all the fields. */
16976 for (i=*regno; i < max_reg; i++)
16977 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16978
16979 /* Do not keep trailing padding bits, we do not know yet whether this
16980 is the end of the argument. */
16981 mask = ((uint32_t) 1 << max_bit) - 1;
16982 padding_bits_to_clear[max_reg]
16983 |= padding_bits_to_clear_res[max_reg] & mask;
16984
16985 *regno = max_reg;
16986 *last_used_bit = max_bit;
16987 }
16988 else
16989 /* This function should only be used for structs and unions. */
16990 gcc_unreachable ();
16991
16992 return not_to_clear_reg_mask;
16993 }
16994
16995 /* In the context of ARMv8-M Security Extensions, this function is used for both
16996 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16997 registers are used when returning or passing arguments, which is then
16998 returned as a mask. It will also compute a mask to indicate padding/unused
16999 bits for each of these registers, and passes this through the
17000 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
17001 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17002 the starting register used to pass this argument or return value is passed
17003 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17004 for struct and union types. */
17005
17006 static unsigned HOST_WIDE_INT
17007 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17008 uint32_t * padding_bits_to_clear)
17009
17010 {
17011 int last_used_bit = 0;
17012 unsigned HOST_WIDE_INT not_to_clear_mask;
17013
17014 if (RECORD_OR_UNION_TYPE_P (arg_type))
17015 {
17016 not_to_clear_mask
17017 = comp_not_to_clear_mask_str_un (arg_type, &regno,
17018 padding_bits_to_clear, 0,
17019 &last_used_bit);
17020
17021
17022 /* If the 'last_used_bit' is not zero, that means we are still using a
17023 part of the last 'regno'. In such cases we must clear the trailing
17024 bits. Otherwise we are not using regno and we should mark it as to
17025 clear. */
17026 if (last_used_bit != 0)
17027 padding_bits_to_clear[regno]
17028 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17029 else
17030 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17031 }
17032 else
17033 {
17034 not_to_clear_mask = 0;
17035 /* We are not dealing with structs nor unions. So these arguments may be
17036 passed in floating point registers too. In some cases a BLKmode is
17037 used when returning or passing arguments in multiple VFP registers. */
17038 if (GET_MODE (arg_rtx) == BLKmode)
17039 {
17040 int i, arg_regs;
17041 rtx reg;
17042
17043 /* This should really only occur when dealing with the hard-float
17044 ABI. */
17045 gcc_assert (TARGET_HARD_FLOAT_ABI);
17046
17047 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17048 {
17049 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17050 gcc_assert (REG_P (reg));
17051
17052 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17053
17054 /* If we are dealing with DF mode, make sure we don't
17055 clear either of the registers it addresses. */
17056 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17057 if (arg_regs > 1)
17058 {
17059 unsigned HOST_WIDE_INT mask;
17060 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17061 mask -= HOST_WIDE_INT_1U << REGNO (reg);
17062 not_to_clear_mask |= mask;
17063 }
17064 }
17065 }
17066 else
17067 {
17068 /* Otherwise we can rely on the MODE to determine how many registers
17069 are being used by this argument. */
17070 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17071 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17072 if (arg_regs > 1)
17073 {
17074 unsigned HOST_WIDE_INT
17075 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17076 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17077 not_to_clear_mask |= mask;
17078 }
17079 }
17080 }
17081
17082 return not_to_clear_mask;
17083 }
17084
17085 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17086 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
17087 are to be fully cleared, using the value in register CLEARING_REG if more
17088 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17089 the bits that needs to be cleared in caller-saved core registers, with
17090 SCRATCH_REG used as a scratch register for that clearing.
17091
17092 NOTE: one of three following assertions must hold:
17093 - SCRATCH_REG is a low register
17094 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17095 in TO_CLEAR_BITMAP)
17096 - CLEARING_REG is a low register. */
17097
17098 static void
17099 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17100 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17101 {
17102 bool saved_clearing = false;
17103 rtx saved_clearing_reg = NULL_RTX;
17104 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17105
17106 gcc_assert (arm_arch_cmse);
17107
17108 if (!bitmap_empty_p (to_clear_bitmap))
17109 {
17110 minregno = bitmap_first_set_bit (to_clear_bitmap);
17111 maxregno = bitmap_last_set_bit (to_clear_bitmap);
17112 }
17113 clearing_regno = REGNO (clearing_reg);
17114
17115 /* Clear padding bits. */
17116 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17117 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17118 {
17119 uint64_t mask;
17120 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17121
17122 if (padding_bits_to_clear[i] == 0)
17123 continue;
17124
17125 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17126 CLEARING_REG as scratch. */
17127 if (TARGET_THUMB1
17128 && REGNO (scratch_reg) > LAST_LO_REGNUM)
17129 {
17130 /* clearing_reg is not to be cleared, copy its value into scratch_reg
17131 such that we can use clearing_reg to clear the unused bits in the
17132 arguments. */
17133 if ((clearing_regno > maxregno
17134 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17135 && !saved_clearing)
17136 {
17137 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17138 emit_move_insn (scratch_reg, clearing_reg);
17139 saved_clearing = true;
17140 saved_clearing_reg = scratch_reg;
17141 }
17142 scratch_reg = clearing_reg;
17143 }
17144
17145 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
17146 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17147 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17148
17149 /* Fill the top half of the negated padding_bits_to_clear[i]. */
17150 mask = (~padding_bits_to_clear[i]) >> 16;
17151 rtx16 = gen_int_mode (16, SImode);
17152 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17153 if (mask)
17154 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17155
17156 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17157 }
17158 if (saved_clearing)
17159 emit_move_insn (clearing_reg, saved_clearing_reg);
17160
17161
17162 /* Clear full registers. */
17163
17164 /* If not marked for clearing, clearing_reg already does not contain
17165 any secret. */
17166 if (clearing_regno <= maxregno
17167 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17168 {
17169 emit_move_insn (clearing_reg, const0_rtx);
17170 emit_use (clearing_reg);
17171 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17172 }
17173
17174 for (regno = minregno; regno <= maxregno; regno++)
17175 {
17176 if (!bitmap_bit_p (to_clear_bitmap, regno))
17177 continue;
17178
17179 if (IS_VFP_REGNUM (regno))
17180 {
17181 /* If regno is an even vfp register and its successor is also to
17182 be cleared, use vmov. */
17183 if (TARGET_VFP_DOUBLE
17184 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17185 && bitmap_bit_p (to_clear_bitmap, regno + 1))
17186 {
17187 emit_move_insn (gen_rtx_REG (DFmode, regno),
17188 CONST1_RTX (DFmode));
17189 emit_use (gen_rtx_REG (DFmode, regno));
17190 regno++;
17191 }
17192 else
17193 {
17194 emit_move_insn (gen_rtx_REG (SFmode, regno),
17195 CONST1_RTX (SFmode));
17196 emit_use (gen_rtx_REG (SFmode, regno));
17197 }
17198 }
17199 else
17200 {
17201 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
17202 emit_use (gen_rtx_REG (SImode, regno));
17203 }
17204 }
17205 }
17206
17207 /* Clears caller saved registers not used to pass arguments before a
17208 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
17209 registers is done in __gnu_cmse_nonsecure_call libcall.
17210 See libgcc/config/arm/cmse_nonsecure_call.S. */
17211
17212 static void
17213 cmse_nonsecure_call_clear_caller_saved (void)
17214 {
17215 basic_block bb;
17216
17217 FOR_EACH_BB_FN (bb, cfun)
17218 {
17219 rtx_insn *insn;
17220
17221 FOR_BB_INSNS (bb, insn)
17222 {
17223 unsigned address_regnum, regno, maxregno =
17224 TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
17225 auto_sbitmap to_clear_bitmap (maxregno + 1);
17226 rtx_insn *seq;
17227 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
17228 rtx address;
17229 CUMULATIVE_ARGS args_so_far_v;
17230 cumulative_args_t args_so_far;
17231 tree arg_type, fntype;
17232 bool first_param = true;
17233 function_args_iterator args_iter;
17234 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17235
17236 if (!NONDEBUG_INSN_P (insn))
17237 continue;
17238
17239 if (!CALL_P (insn))
17240 continue;
17241
17242 pat = PATTERN (insn);
17243 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17244 call = XVECEXP (pat, 0, 0);
17245
17246 /* Get the real call RTX if the insn sets a value, ie. returns. */
17247 if (GET_CODE (call) == SET)
17248 call = SET_SRC (call);
17249
17250 /* Check if it is a cmse_nonsecure_call. */
17251 unspec = XEXP (call, 0);
17252 if (GET_CODE (unspec) != UNSPEC
17253 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17254 continue;
17255
17256 /* Determine the caller-saved registers we need to clear. */
17257 bitmap_clear (to_clear_bitmap);
17258 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
17259
17260 /* Only look at the caller-saved floating point registers in case of
17261 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17262 lazy store and loads which clear both caller- and callee-saved
17263 registers. */
17264 if (TARGET_HARD_FLOAT_ABI)
17265 {
17266 auto_sbitmap float_bitmap (maxregno + 1);
17267
17268 bitmap_clear (float_bitmap);
17269 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
17270 D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
17271 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
17272 }
17273
17274 /* Make sure the register used to hold the function address is not
17275 cleared. */
17276 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17277 gcc_assert (MEM_P (address));
17278 gcc_assert (REG_P (XEXP (address, 0)));
17279 address_regnum = REGNO (XEXP (address, 0));
17280 if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
17281 bitmap_clear_bit (to_clear_bitmap, address_regnum);
17282
17283 /* Set basic block of call insn so that df rescan is performed on
17284 insns inserted here. */
17285 set_block_for_insn (insn, bb);
17286 df_set_flags (DF_DEFER_INSN_RESCAN);
17287 start_sequence ();
17288
17289 /* Make sure the scheduler doesn't schedule other insns beyond
17290 here. */
17291 emit_insn (gen_blockage ());
17292
17293 /* Walk through all arguments and clear registers appropriately.
17294 */
17295 fntype = TREE_TYPE (MEM_EXPR (address));
17296 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17297 NULL_TREE);
17298 args_so_far = pack_cumulative_args (&args_so_far_v);
17299 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17300 {
17301 rtx arg_rtx;
17302 uint64_t to_clear_args_mask;
17303 machine_mode arg_mode = TYPE_MODE (arg_type);
17304
17305 if (VOID_TYPE_P (arg_type))
17306 continue;
17307
17308 if (!first_param)
17309 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17310 true);
17311
17312 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17313 true);
17314 gcc_assert (REG_P (arg_rtx));
17315 to_clear_args_mask
17316 = compute_not_to_clear_mask (arg_type, arg_rtx,
17317 REGNO (arg_rtx),
17318 &padding_bits_to_clear[0]);
17319 if (to_clear_args_mask)
17320 {
17321 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17322 {
17323 if (to_clear_args_mask & (1ULL << regno))
17324 bitmap_clear_bit (to_clear_bitmap, regno);
17325 }
17326 }
17327
17328 first_param = false;
17329 }
17330
17331 /* We use right shift and left shift to clear the LSB of the address
17332 we jump to instead of using bic, to avoid having to use an extra
17333 register on Thumb-1. */
17334 clearing_reg = XEXP (address, 0);
17335 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
17336 emit_insn (gen_rtx_SET (clearing_reg, shift));
17337 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
17338 emit_insn (gen_rtx_SET (clearing_reg, shift));
17339
17340 /* Clear caller-saved registers that leak before doing a non-secure
17341 call. */
17342 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
17343 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
17344 NUM_ARG_REGS, ip_reg, clearing_reg);
17345
17346 seq = get_insns ();
17347 end_sequence ();
17348 emit_insn_before (seq, insn);
17349 }
17350 }
17351 }
17352
17353 /* Rewrite move insn into subtract of 0 if the condition codes will
17354 be useful in next conditional jump insn. */
17355
17356 static void
17357 thumb1_reorg (void)
17358 {
17359 basic_block bb;
17360
17361 FOR_EACH_BB_FN (bb, cfun)
17362 {
17363 rtx dest, src;
17364 rtx cmp, op0, op1, set = NULL;
17365 rtx_insn *prev, *insn = BB_END (bb);
17366 bool insn_clobbered = false;
17367
17368 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17369 insn = PREV_INSN (insn);
17370
17371 /* Find the last cbranchsi4_insn in basic block BB. */
17372 if (insn == BB_HEAD (bb)
17373 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17374 continue;
17375
17376 /* Get the register with which we are comparing. */
17377 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17378 op0 = XEXP (cmp, 0);
17379 op1 = XEXP (cmp, 1);
17380
17381 /* Check that comparison is against ZERO. */
17382 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17383 continue;
17384
17385 /* Find the first flag setting insn before INSN in basic block BB. */
17386 gcc_assert (insn != BB_HEAD (bb));
17387 for (prev = PREV_INSN (insn);
17388 (!insn_clobbered
17389 && prev != BB_HEAD (bb)
17390 && (NOTE_P (prev)
17391 || DEBUG_INSN_P (prev)
17392 || ((set = single_set (prev)) != NULL
17393 && get_attr_conds (prev) == CONDS_NOCOND)));
17394 prev = PREV_INSN (prev))
17395 {
17396 if (reg_set_p (op0, prev))
17397 insn_clobbered = true;
17398 }
17399
17400 /* Skip if op0 is clobbered by insn other than prev. */
17401 if (insn_clobbered)
17402 continue;
17403
17404 if (!set)
17405 continue;
17406
17407 dest = SET_DEST (set);
17408 src = SET_SRC (set);
17409 if (!low_register_operand (dest, SImode)
17410 || !low_register_operand (src, SImode))
17411 continue;
17412
17413 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17414 in INSN. Both src and dest of the move insn are checked. */
17415 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17416 {
17417 dest = copy_rtx (dest);
17418 src = copy_rtx (src);
17419 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17420 PATTERN (prev) = gen_rtx_SET (dest, src);
17421 INSN_CODE (prev) = -1;
17422 /* Set test register in INSN to dest. */
17423 XEXP (cmp, 0) = copy_rtx (dest);
17424 INSN_CODE (insn) = -1;
17425 }
17426 }
17427 }
17428
17429 /* Convert instructions to their cc-clobbering variant if possible, since
17430 that allows us to use smaller encodings. */
17431
17432 static void
17433 thumb2_reorg (void)
17434 {
17435 basic_block bb;
17436 regset_head live;
17437
17438 INIT_REG_SET (&live);
17439
17440 /* We are freeing block_for_insn in the toplev to keep compatibility
17441 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17442 compute_bb_for_insn ();
17443 df_analyze ();
17444
17445 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17446
17447 FOR_EACH_BB_FN (bb, cfun)
17448 {
17449 if ((current_tune->disparage_flag_setting_t16_encodings
17450 == tune_params::DISPARAGE_FLAGS_ALL)
17451 && optimize_bb_for_speed_p (bb))
17452 continue;
17453
17454 rtx_insn *insn;
17455 Convert_Action action = SKIP;
17456 Convert_Action action_for_partial_flag_setting
17457 = ((current_tune->disparage_flag_setting_t16_encodings
17458 != tune_params::DISPARAGE_FLAGS_NEITHER)
17459 && optimize_bb_for_speed_p (bb))
17460 ? SKIP : CONV;
17461
17462 COPY_REG_SET (&live, DF_LR_OUT (bb));
17463 df_simulate_initialize_backwards (bb, &live);
17464 FOR_BB_INSNS_REVERSE (bb, insn)
17465 {
17466 if (NONJUMP_INSN_P (insn)
17467 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17468 && GET_CODE (PATTERN (insn)) == SET)
17469 {
17470 action = SKIP;
17471 rtx pat = PATTERN (insn);
17472 rtx dst = XEXP (pat, 0);
17473 rtx src = XEXP (pat, 1);
17474 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17475
17476 if (UNARY_P (src) || BINARY_P (src))
17477 op0 = XEXP (src, 0);
17478
17479 if (BINARY_P (src))
17480 op1 = XEXP (src, 1);
17481
17482 if (low_register_operand (dst, SImode))
17483 {
17484 switch (GET_CODE (src))
17485 {
17486 case PLUS:
17487 /* Adding two registers and storing the result
17488 in the first source is already a 16-bit
17489 operation. */
17490 if (rtx_equal_p (dst, op0)
17491 && register_operand (op1, SImode))
17492 break;
17493
17494 if (low_register_operand (op0, SImode))
17495 {
17496 /* ADDS <Rd>,<Rn>,<Rm> */
17497 if (low_register_operand (op1, SImode))
17498 action = CONV;
17499 /* ADDS <Rdn>,#<imm8> */
17500 /* SUBS <Rdn>,#<imm8> */
17501 else if (rtx_equal_p (dst, op0)
17502 && CONST_INT_P (op1)
17503 && IN_RANGE (INTVAL (op1), -255, 255))
17504 action = CONV;
17505 /* ADDS <Rd>,<Rn>,#<imm3> */
17506 /* SUBS <Rd>,<Rn>,#<imm3> */
17507 else if (CONST_INT_P (op1)
17508 && IN_RANGE (INTVAL (op1), -7, 7))
17509 action = CONV;
17510 }
17511 /* ADCS <Rd>, <Rn> */
17512 else if (GET_CODE (XEXP (src, 0)) == PLUS
17513 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17514 && low_register_operand (XEXP (XEXP (src, 0), 1),
17515 SImode)
17516 && COMPARISON_P (op1)
17517 && cc_register (XEXP (op1, 0), VOIDmode)
17518 && maybe_get_arm_condition_code (op1) == ARM_CS
17519 && XEXP (op1, 1) == const0_rtx)
17520 action = CONV;
17521 break;
17522
17523 case MINUS:
17524 /* RSBS <Rd>,<Rn>,#0
17525 Not handled here: see NEG below. */
17526 /* SUBS <Rd>,<Rn>,#<imm3>
17527 SUBS <Rdn>,#<imm8>
17528 Not handled here: see PLUS above. */
17529 /* SUBS <Rd>,<Rn>,<Rm> */
17530 if (low_register_operand (op0, SImode)
17531 && low_register_operand (op1, SImode))
17532 action = CONV;
17533 break;
17534
17535 case MULT:
17536 /* MULS <Rdm>,<Rn>,<Rdm>
17537 As an exception to the rule, this is only used
17538 when optimizing for size since MULS is slow on all
17539 known implementations. We do not even want to use
17540 MULS in cold code, if optimizing for speed, so we
17541 test the global flag here. */
17542 if (!optimize_size)
17543 break;
17544 /* Fall through. */
17545 case AND:
17546 case IOR:
17547 case XOR:
17548 /* ANDS <Rdn>,<Rm> */
17549 if (rtx_equal_p (dst, op0)
17550 && low_register_operand (op1, SImode))
17551 action = action_for_partial_flag_setting;
17552 else if (rtx_equal_p (dst, op1)
17553 && low_register_operand (op0, SImode))
17554 action = action_for_partial_flag_setting == SKIP
17555 ? SKIP : SWAP_CONV;
17556 break;
17557
17558 case ASHIFTRT:
17559 case ASHIFT:
17560 case LSHIFTRT:
17561 /* ASRS <Rdn>,<Rm> */
17562 /* LSRS <Rdn>,<Rm> */
17563 /* LSLS <Rdn>,<Rm> */
17564 if (rtx_equal_p (dst, op0)
17565 && low_register_operand (op1, SImode))
17566 action = action_for_partial_flag_setting;
17567 /* ASRS <Rd>,<Rm>,#<imm5> */
17568 /* LSRS <Rd>,<Rm>,#<imm5> */
17569 /* LSLS <Rd>,<Rm>,#<imm5> */
17570 else if (low_register_operand (op0, SImode)
17571 && CONST_INT_P (op1)
17572 && IN_RANGE (INTVAL (op1), 0, 31))
17573 action = action_for_partial_flag_setting;
17574 break;
17575
17576 case ROTATERT:
17577 /* RORS <Rdn>,<Rm> */
17578 if (rtx_equal_p (dst, op0)
17579 && low_register_operand (op1, SImode))
17580 action = action_for_partial_flag_setting;
17581 break;
17582
17583 case NOT:
17584 /* MVNS <Rd>,<Rm> */
17585 if (low_register_operand (op0, SImode))
17586 action = action_for_partial_flag_setting;
17587 break;
17588
17589 case NEG:
17590 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17591 if (low_register_operand (op0, SImode))
17592 action = CONV;
17593 break;
17594
17595 case CONST_INT:
17596 /* MOVS <Rd>,#<imm8> */
17597 if (CONST_INT_P (src)
17598 && IN_RANGE (INTVAL (src), 0, 255))
17599 action = action_for_partial_flag_setting;
17600 break;
17601
17602 case REG:
17603 /* MOVS and MOV<c> with registers have different
17604 encodings, so are not relevant here. */
17605 break;
17606
17607 default:
17608 break;
17609 }
17610 }
17611
17612 if (action != SKIP)
17613 {
17614 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17615 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17616 rtvec vec;
17617
17618 if (action == SWAP_CONV)
17619 {
17620 src = copy_rtx (src);
17621 XEXP (src, 0) = op1;
17622 XEXP (src, 1) = op0;
17623 pat = gen_rtx_SET (dst, src);
17624 vec = gen_rtvec (2, pat, clobber);
17625 }
17626 else /* action == CONV */
17627 vec = gen_rtvec (2, pat, clobber);
17628
17629 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17630 INSN_CODE (insn) = -1;
17631 }
17632 }
17633
17634 if (NONDEBUG_INSN_P (insn))
17635 df_simulate_one_insn_backwards (bb, insn, &live);
17636 }
17637 }
17638
17639 CLEAR_REG_SET (&live);
17640 }
17641
17642 /* Gcc puts the pool in the wrong place for ARM, since we can only
17643 load addresses a limited distance around the pc. We do some
17644 special munging to move the constant pool values to the correct
17645 point in the code. */
17646 static void
17647 arm_reorg (void)
17648 {
17649 rtx_insn *insn;
17650 HOST_WIDE_INT address = 0;
17651 Mfix * fix;
17652
17653 if (use_cmse)
17654 cmse_nonsecure_call_clear_caller_saved ();
17655 if (TARGET_THUMB1)
17656 thumb1_reorg ();
17657 else if (TARGET_THUMB2)
17658 thumb2_reorg ();
17659
17660 /* Ensure all insns that must be split have been split at this point.
17661 Otherwise, the pool placement code below may compute incorrect
17662 insn lengths. Note that when optimizing, all insns have already
17663 been split at this point. */
17664 if (!optimize)
17665 split_all_insns_noflow ();
17666
17667 /* Make sure we do not attempt to create a literal pool even though it should
17668 no longer be necessary to create any. */
17669 if (arm_disable_literal_pool)
17670 return ;
17671
17672 minipool_fix_head = minipool_fix_tail = NULL;
17673
17674 /* The first insn must always be a note, or the code below won't
17675 scan it properly. */
17676 insn = get_insns ();
17677 gcc_assert (NOTE_P (insn));
17678 minipool_pad = 0;
17679
17680 /* Scan all the insns and record the operands that will need fixing. */
17681 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17682 {
17683 if (BARRIER_P (insn))
17684 push_minipool_barrier (insn, address);
17685 else if (INSN_P (insn))
17686 {
17687 rtx_jump_table_data *table;
17688
17689 note_invalid_constants (insn, address, true);
17690 address += get_attr_length (insn);
17691
17692 /* If the insn is a vector jump, add the size of the table
17693 and skip the table. */
17694 if (tablejump_p (insn, NULL, &table))
17695 {
17696 address += get_jump_table_size (table);
17697 insn = table;
17698 }
17699 }
17700 else if (LABEL_P (insn))
17701 /* Add the worst-case padding due to alignment. We don't add
17702 the _current_ padding because the minipool insertions
17703 themselves might change it. */
17704 address += get_label_padding (insn);
17705 }
17706
17707 fix = minipool_fix_head;
17708
17709 /* Now scan the fixups and perform the required changes. */
17710 while (fix)
17711 {
17712 Mfix * ftmp;
17713 Mfix * fdel;
17714 Mfix * last_added_fix;
17715 Mfix * last_barrier = NULL;
17716 Mfix * this_fix;
17717
17718 /* Skip any further barriers before the next fix. */
17719 while (fix && BARRIER_P (fix->insn))
17720 fix = fix->next;
17721
17722 /* No more fixes. */
17723 if (fix == NULL)
17724 break;
17725
17726 last_added_fix = NULL;
17727
17728 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17729 {
17730 if (BARRIER_P (ftmp->insn))
17731 {
17732 if (ftmp->address >= minipool_vector_head->max_address)
17733 break;
17734
17735 last_barrier = ftmp;
17736 }
17737 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17738 break;
17739
17740 last_added_fix = ftmp; /* Keep track of the last fix added. */
17741 }
17742
17743 /* If we found a barrier, drop back to that; any fixes that we
17744 could have reached but come after the barrier will now go in
17745 the next mini-pool. */
17746 if (last_barrier != NULL)
17747 {
17748 /* Reduce the refcount for those fixes that won't go into this
17749 pool after all. */
17750 for (fdel = last_barrier->next;
17751 fdel && fdel != ftmp;
17752 fdel = fdel->next)
17753 {
17754 fdel->minipool->refcount--;
17755 fdel->minipool = NULL;
17756 }
17757
17758 ftmp = last_barrier;
17759 }
17760 else
17761 {
17762 /* ftmp is first fix that we can't fit into this pool and
17763 there no natural barriers that we could use. Insert a
17764 new barrier in the code somewhere between the previous
17765 fix and this one, and arrange to jump around it. */
17766 HOST_WIDE_INT max_address;
17767
17768 /* The last item on the list of fixes must be a barrier, so
17769 we can never run off the end of the list of fixes without
17770 last_barrier being set. */
17771 gcc_assert (ftmp);
17772
17773 max_address = minipool_vector_head->max_address;
17774 /* Check that there isn't another fix that is in range that
17775 we couldn't fit into this pool because the pool was
17776 already too large: we need to put the pool before such an
17777 instruction. The pool itself may come just after the
17778 fix because create_fix_barrier also allows space for a
17779 jump instruction. */
17780 if (ftmp->address < max_address)
17781 max_address = ftmp->address + 1;
17782
17783 last_barrier = create_fix_barrier (last_added_fix, max_address);
17784 }
17785
17786 assign_minipool_offsets (last_barrier);
17787
17788 while (ftmp)
17789 {
17790 if (!BARRIER_P (ftmp->insn)
17791 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17792 == NULL))
17793 break;
17794
17795 ftmp = ftmp->next;
17796 }
17797
17798 /* Scan over the fixes we have identified for this pool, fixing them
17799 up and adding the constants to the pool itself. */
17800 for (this_fix = fix; this_fix && ftmp != this_fix;
17801 this_fix = this_fix->next)
17802 if (!BARRIER_P (this_fix->insn))
17803 {
17804 rtx addr
17805 = plus_constant (Pmode,
17806 gen_rtx_LABEL_REF (VOIDmode,
17807 minipool_vector_label),
17808 this_fix->minipool->offset);
17809 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17810 }
17811
17812 dump_minipool (last_barrier->insn);
17813 fix = ftmp;
17814 }
17815
17816 /* From now on we must synthesize any constants that we can't handle
17817 directly. This can happen if the RTL gets split during final
17818 instruction generation. */
17819 cfun->machine->after_arm_reorg = 1;
17820
17821 /* Free the minipool memory. */
17822 obstack_free (&minipool_obstack, minipool_startobj);
17823 }
17824 \f
17825 /* Routines to output assembly language. */
17826
17827 /* Return string representation of passed in real value. */
17828 static const char *
17829 fp_const_from_val (REAL_VALUE_TYPE *r)
17830 {
17831 if (!fp_consts_inited)
17832 init_fp_table ();
17833
17834 gcc_assert (real_equal (r, &value_fp0));
17835 return "0";
17836 }
17837
17838 /* OPERANDS[0] is the entire list of insns that constitute pop,
17839 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17840 is in the list, UPDATE is true iff the list contains explicit
17841 update of base register. */
17842 void
17843 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17844 bool update)
17845 {
17846 int i;
17847 char pattern[100];
17848 int offset;
17849 const char *conditional;
17850 int num_saves = XVECLEN (operands[0], 0);
17851 unsigned int regno;
17852 unsigned int regno_base = REGNO (operands[1]);
17853 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17854
17855 offset = 0;
17856 offset += update ? 1 : 0;
17857 offset += return_pc ? 1 : 0;
17858
17859 /* Is the base register in the list? */
17860 for (i = offset; i < num_saves; i++)
17861 {
17862 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17863 /* If SP is in the list, then the base register must be SP. */
17864 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17865 /* If base register is in the list, there must be no explicit update. */
17866 if (regno == regno_base)
17867 gcc_assert (!update);
17868 }
17869
17870 conditional = reverse ? "%?%D0" : "%?%d0";
17871 /* Can't use POP if returning from an interrupt. */
17872 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17873 sprintf (pattern, "pop%s\t{", conditional);
17874 else
17875 {
17876 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17877 It's just a convention, their semantics are identical. */
17878 if (regno_base == SP_REGNUM)
17879 sprintf (pattern, "ldmfd%s\t", conditional);
17880 else if (update)
17881 sprintf (pattern, "ldmia%s\t", conditional);
17882 else
17883 sprintf (pattern, "ldm%s\t", conditional);
17884
17885 strcat (pattern, reg_names[regno_base]);
17886 if (update)
17887 strcat (pattern, "!, {");
17888 else
17889 strcat (pattern, ", {");
17890 }
17891
17892 /* Output the first destination register. */
17893 strcat (pattern,
17894 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17895
17896 /* Output the rest of the destination registers. */
17897 for (i = offset + 1; i < num_saves; i++)
17898 {
17899 strcat (pattern, ", ");
17900 strcat (pattern,
17901 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17902 }
17903
17904 strcat (pattern, "}");
17905
17906 if (interrupt_p && return_pc)
17907 strcat (pattern, "^");
17908
17909 output_asm_insn (pattern, &cond);
17910 }
17911
17912
17913 /* Output the assembly for a store multiple. */
17914
17915 const char *
17916 vfp_output_vstmd (rtx * operands)
17917 {
17918 char pattern[100];
17919 int p;
17920 int base;
17921 int i;
17922 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17923 ? XEXP (operands[0], 0)
17924 : XEXP (XEXP (operands[0], 0), 0);
17925 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17926
17927 if (push_p)
17928 strcpy (pattern, "vpush%?.64\t{%P1");
17929 else
17930 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17931
17932 p = strlen (pattern);
17933
17934 gcc_assert (REG_P (operands[1]));
17935
17936 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17937 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17938 {
17939 p += sprintf (&pattern[p], ", d%d", base + i);
17940 }
17941 strcpy (&pattern[p], "}");
17942
17943 output_asm_insn (pattern, operands);
17944 return "";
17945 }
17946
17947
17948 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17949 number of bytes pushed. */
17950
17951 static int
17952 vfp_emit_fstmd (int base_reg, int count)
17953 {
17954 rtx par;
17955 rtx dwarf;
17956 rtx tmp, reg;
17957 int i;
17958
17959 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17960 register pairs are stored by a store multiple insn. We avoid this
17961 by pushing an extra pair. */
17962 if (count == 2 && !arm_arch6)
17963 {
17964 if (base_reg == LAST_VFP_REGNUM - 3)
17965 base_reg -= 2;
17966 count++;
17967 }
17968
17969 /* FSTMD may not store more than 16 doubleword registers at once. Split
17970 larger stores into multiple parts (up to a maximum of two, in
17971 practice). */
17972 if (count > 16)
17973 {
17974 int saved;
17975 /* NOTE: base_reg is an internal register number, so each D register
17976 counts as 2. */
17977 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17978 saved += vfp_emit_fstmd (base_reg, 16);
17979 return saved;
17980 }
17981
17982 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17983 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17984
17985 reg = gen_rtx_REG (DFmode, base_reg);
17986 base_reg += 2;
17987
17988 XVECEXP (par, 0, 0)
17989 = gen_rtx_SET (gen_frame_mem
17990 (BLKmode,
17991 gen_rtx_PRE_MODIFY (Pmode,
17992 stack_pointer_rtx,
17993 plus_constant
17994 (Pmode, stack_pointer_rtx,
17995 - (count * 8)))
17996 ),
17997 gen_rtx_UNSPEC (BLKmode,
17998 gen_rtvec (1, reg),
17999 UNSPEC_PUSH_MULT));
18000
18001 tmp = gen_rtx_SET (stack_pointer_rtx,
18002 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
18003 RTX_FRAME_RELATED_P (tmp) = 1;
18004 XVECEXP (dwarf, 0, 0) = tmp;
18005
18006 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
18007 RTX_FRAME_RELATED_P (tmp) = 1;
18008 XVECEXP (dwarf, 0, 1) = tmp;
18009
18010 for (i = 1; i < count; i++)
18011 {
18012 reg = gen_rtx_REG (DFmode, base_reg);
18013 base_reg += 2;
18014 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18015
18016 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18017 plus_constant (Pmode,
18018 stack_pointer_rtx,
18019 i * 8)),
18020 reg);
18021 RTX_FRAME_RELATED_P (tmp) = 1;
18022 XVECEXP (dwarf, 0, i + 1) = tmp;
18023 }
18024
18025 par = emit_insn (par);
18026 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18027 RTX_FRAME_RELATED_P (par) = 1;
18028
18029 return count * 8;
18030 }
18031
18032 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18033 has the cmse_nonsecure_call attribute and returns false otherwise. */
18034
18035 bool
18036 detect_cmse_nonsecure_call (tree addr)
18037 {
18038 if (!addr)
18039 return FALSE;
18040
18041 tree fntype = TREE_TYPE (addr);
18042 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18043 TYPE_ATTRIBUTES (fntype)))
18044 return TRUE;
18045 return FALSE;
18046 }
18047
18048
18049 /* Emit a call instruction with pattern PAT. ADDR is the address of
18050 the call target. */
18051
18052 void
18053 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18054 {
18055 rtx insn;
18056
18057 insn = emit_call_insn (pat);
18058
18059 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18060 If the call might use such an entry, add a use of the PIC register
18061 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18062 if (TARGET_VXWORKS_RTP
18063 && flag_pic
18064 && !sibcall
18065 && GET_CODE (addr) == SYMBOL_REF
18066 && (SYMBOL_REF_DECL (addr)
18067 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18068 : !SYMBOL_REF_LOCAL_P (addr)))
18069 {
18070 require_pic_register ();
18071 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18072 }
18073
18074 if (TARGET_AAPCS_BASED)
18075 {
18076 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18077 linker. We need to add an IP clobber to allow setting
18078 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18079 is not needed since it's a fixed register. */
18080 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18081 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18082 }
18083 }
18084
18085 /* Output a 'call' insn. */
18086 const char *
18087 output_call (rtx *operands)
18088 {
18089 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
18090
18091 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18092 if (REGNO (operands[0]) == LR_REGNUM)
18093 {
18094 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18095 output_asm_insn ("mov%?\t%0, %|lr", operands);
18096 }
18097
18098 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18099
18100 if (TARGET_INTERWORK || arm_arch4t)
18101 output_asm_insn ("bx%?\t%0", operands);
18102 else
18103 output_asm_insn ("mov%?\t%|pc, %0", operands);
18104
18105 return "";
18106 }
18107
18108 /* Output a move from arm registers to arm registers of a long double
18109 OPERANDS[0] is the destination.
18110 OPERANDS[1] is the source. */
18111 const char *
18112 output_mov_long_double_arm_from_arm (rtx *operands)
18113 {
18114 /* We have to be careful here because the two might overlap. */
18115 int dest_start = REGNO (operands[0]);
18116 int src_start = REGNO (operands[1]);
18117 rtx ops[2];
18118 int i;
18119
18120 if (dest_start < src_start)
18121 {
18122 for (i = 0; i < 3; i++)
18123 {
18124 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18125 ops[1] = gen_rtx_REG (SImode, src_start + i);
18126 output_asm_insn ("mov%?\t%0, %1", ops);
18127 }
18128 }
18129 else
18130 {
18131 for (i = 2; i >= 0; i--)
18132 {
18133 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18134 ops[1] = gen_rtx_REG (SImode, src_start + i);
18135 output_asm_insn ("mov%?\t%0, %1", ops);
18136 }
18137 }
18138
18139 return "";
18140 }
18141
18142 void
18143 arm_emit_movpair (rtx dest, rtx src)
18144 {
18145 /* If the src is an immediate, simplify it. */
18146 if (CONST_INT_P (src))
18147 {
18148 HOST_WIDE_INT val = INTVAL (src);
18149 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18150 if ((val >> 16) & 0x0000ffff)
18151 {
18152 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18153 GEN_INT (16)),
18154 GEN_INT ((val >> 16) & 0x0000ffff));
18155 rtx_insn *insn = get_last_insn ();
18156 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18157 }
18158 return;
18159 }
18160 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18161 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18162 rtx_insn *insn = get_last_insn ();
18163 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18164 }
18165
18166 /* Output a move between double words. It must be REG<-MEM
18167 or MEM<-REG. */
18168 const char *
18169 output_move_double (rtx *operands, bool emit, int *count)
18170 {
18171 enum rtx_code code0 = GET_CODE (operands[0]);
18172 enum rtx_code code1 = GET_CODE (operands[1]);
18173 rtx otherops[3];
18174 if (count)
18175 *count = 1;
18176
18177 /* The only case when this might happen is when
18178 you are looking at the length of a DImode instruction
18179 that has an invalid constant in it. */
18180 if (code0 == REG && code1 != MEM)
18181 {
18182 gcc_assert (!emit);
18183 *count = 2;
18184 return "";
18185 }
18186
18187 if (code0 == REG)
18188 {
18189 unsigned int reg0 = REGNO (operands[0]);
18190
18191 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18192
18193 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18194
18195 switch (GET_CODE (XEXP (operands[1], 0)))
18196 {
18197 case REG:
18198
18199 if (emit)
18200 {
18201 if (TARGET_LDRD
18202 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18203 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18204 else
18205 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18206 }
18207 break;
18208
18209 case PRE_INC:
18210 gcc_assert (TARGET_LDRD);
18211 if (emit)
18212 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18213 break;
18214
18215 case PRE_DEC:
18216 if (emit)
18217 {
18218 if (TARGET_LDRD)
18219 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18220 else
18221 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18222 }
18223 break;
18224
18225 case POST_INC:
18226 if (emit)
18227 {
18228 if (TARGET_LDRD)
18229 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18230 else
18231 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18232 }
18233 break;
18234
18235 case POST_DEC:
18236 gcc_assert (TARGET_LDRD);
18237 if (emit)
18238 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18239 break;
18240
18241 case PRE_MODIFY:
18242 case POST_MODIFY:
18243 /* Autoicrement addressing modes should never have overlapping
18244 base and destination registers, and overlapping index registers
18245 are already prohibited, so this doesn't need to worry about
18246 fix_cm3_ldrd. */
18247 otherops[0] = operands[0];
18248 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18249 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18250
18251 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18252 {
18253 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18254 {
18255 /* Registers overlap so split out the increment. */
18256 if (emit)
18257 {
18258 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18259 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18260 }
18261 if (count)
18262 *count = 2;
18263 }
18264 else
18265 {
18266 /* Use a single insn if we can.
18267 FIXME: IWMMXT allows offsets larger than ldrd can
18268 handle, fix these up with a pair of ldr. */
18269 if (TARGET_THUMB2
18270 || !CONST_INT_P (otherops[2])
18271 || (INTVAL (otherops[2]) > -256
18272 && INTVAL (otherops[2]) < 256))
18273 {
18274 if (emit)
18275 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18276 }
18277 else
18278 {
18279 if (emit)
18280 {
18281 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18282 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18283 }
18284 if (count)
18285 *count = 2;
18286
18287 }
18288 }
18289 }
18290 else
18291 {
18292 /* Use a single insn if we can.
18293 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18294 fix these up with a pair of ldr. */
18295 if (TARGET_THUMB2
18296 || !CONST_INT_P (otherops[2])
18297 || (INTVAL (otherops[2]) > -256
18298 && INTVAL (otherops[2]) < 256))
18299 {
18300 if (emit)
18301 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18302 }
18303 else
18304 {
18305 if (emit)
18306 {
18307 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18308 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18309 }
18310 if (count)
18311 *count = 2;
18312 }
18313 }
18314 break;
18315
18316 case LABEL_REF:
18317 case CONST:
18318 /* We might be able to use ldrd %0, %1 here. However the range is
18319 different to ldr/adr, and it is broken on some ARMv7-M
18320 implementations. */
18321 /* Use the second register of the pair to avoid problematic
18322 overlap. */
18323 otherops[1] = operands[1];
18324 if (emit)
18325 output_asm_insn ("adr%?\t%0, %1", otherops);
18326 operands[1] = otherops[0];
18327 if (emit)
18328 {
18329 if (TARGET_LDRD)
18330 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18331 else
18332 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18333 }
18334
18335 if (count)
18336 *count = 2;
18337 break;
18338
18339 /* ??? This needs checking for thumb2. */
18340 default:
18341 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18342 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18343 {
18344 otherops[0] = operands[0];
18345 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18346 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18347
18348 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18349 {
18350 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18351 {
18352 switch ((int) INTVAL (otherops[2]))
18353 {
18354 case -8:
18355 if (emit)
18356 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18357 return "";
18358 case -4:
18359 if (TARGET_THUMB2)
18360 break;
18361 if (emit)
18362 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18363 return "";
18364 case 4:
18365 if (TARGET_THUMB2)
18366 break;
18367 if (emit)
18368 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18369 return "";
18370 }
18371 }
18372 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18373 operands[1] = otherops[0];
18374 if (TARGET_LDRD
18375 && (REG_P (otherops[2])
18376 || TARGET_THUMB2
18377 || (CONST_INT_P (otherops[2])
18378 && INTVAL (otherops[2]) > -256
18379 && INTVAL (otherops[2]) < 256)))
18380 {
18381 if (reg_overlap_mentioned_p (operands[0],
18382 otherops[2]))
18383 {
18384 /* Swap base and index registers over to
18385 avoid a conflict. */
18386 std::swap (otherops[1], otherops[2]);
18387 }
18388 /* If both registers conflict, it will usually
18389 have been fixed by a splitter. */
18390 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18391 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18392 {
18393 if (emit)
18394 {
18395 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18396 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18397 }
18398 if (count)
18399 *count = 2;
18400 }
18401 else
18402 {
18403 otherops[0] = operands[0];
18404 if (emit)
18405 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18406 }
18407 return "";
18408 }
18409
18410 if (CONST_INT_P (otherops[2]))
18411 {
18412 if (emit)
18413 {
18414 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18415 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18416 else
18417 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18418 }
18419 }
18420 else
18421 {
18422 if (emit)
18423 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18424 }
18425 }
18426 else
18427 {
18428 if (emit)
18429 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18430 }
18431
18432 if (count)
18433 *count = 2;
18434
18435 if (TARGET_LDRD)
18436 return "ldrd%?\t%0, [%1]";
18437
18438 return "ldmia%?\t%1, %M0";
18439 }
18440 else
18441 {
18442 otherops[1] = adjust_address (operands[1], SImode, 4);
18443 /* Take care of overlapping base/data reg. */
18444 if (reg_mentioned_p (operands[0], operands[1]))
18445 {
18446 if (emit)
18447 {
18448 output_asm_insn ("ldr%?\t%0, %1", otherops);
18449 output_asm_insn ("ldr%?\t%0, %1", operands);
18450 }
18451 if (count)
18452 *count = 2;
18453
18454 }
18455 else
18456 {
18457 if (emit)
18458 {
18459 output_asm_insn ("ldr%?\t%0, %1", operands);
18460 output_asm_insn ("ldr%?\t%0, %1", otherops);
18461 }
18462 if (count)
18463 *count = 2;
18464 }
18465 }
18466 }
18467 }
18468 else
18469 {
18470 /* Constraints should ensure this. */
18471 gcc_assert (code0 == MEM && code1 == REG);
18472 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18473 || (TARGET_ARM && TARGET_LDRD));
18474
18475 switch (GET_CODE (XEXP (operands[0], 0)))
18476 {
18477 case REG:
18478 if (emit)
18479 {
18480 if (TARGET_LDRD)
18481 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18482 else
18483 output_asm_insn ("stm%?\t%m0, %M1", operands);
18484 }
18485 break;
18486
18487 case PRE_INC:
18488 gcc_assert (TARGET_LDRD);
18489 if (emit)
18490 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18491 break;
18492
18493 case PRE_DEC:
18494 if (emit)
18495 {
18496 if (TARGET_LDRD)
18497 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18498 else
18499 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18500 }
18501 break;
18502
18503 case POST_INC:
18504 if (emit)
18505 {
18506 if (TARGET_LDRD)
18507 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18508 else
18509 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18510 }
18511 break;
18512
18513 case POST_DEC:
18514 gcc_assert (TARGET_LDRD);
18515 if (emit)
18516 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18517 break;
18518
18519 case PRE_MODIFY:
18520 case POST_MODIFY:
18521 otherops[0] = operands[1];
18522 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18523 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18524
18525 /* IWMMXT allows offsets larger than ldrd can handle,
18526 fix these up with a pair of ldr. */
18527 if (!TARGET_THUMB2
18528 && CONST_INT_P (otherops[2])
18529 && (INTVAL(otherops[2]) <= -256
18530 || INTVAL(otherops[2]) >= 256))
18531 {
18532 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18533 {
18534 if (emit)
18535 {
18536 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18537 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18538 }
18539 if (count)
18540 *count = 2;
18541 }
18542 else
18543 {
18544 if (emit)
18545 {
18546 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18547 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18548 }
18549 if (count)
18550 *count = 2;
18551 }
18552 }
18553 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18554 {
18555 if (emit)
18556 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18557 }
18558 else
18559 {
18560 if (emit)
18561 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18562 }
18563 break;
18564
18565 case PLUS:
18566 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18567 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18568 {
18569 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18570 {
18571 case -8:
18572 if (emit)
18573 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18574 return "";
18575
18576 case -4:
18577 if (TARGET_THUMB2)
18578 break;
18579 if (emit)
18580 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18581 return "";
18582
18583 case 4:
18584 if (TARGET_THUMB2)
18585 break;
18586 if (emit)
18587 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18588 return "";
18589 }
18590 }
18591 if (TARGET_LDRD
18592 && (REG_P (otherops[2])
18593 || TARGET_THUMB2
18594 || (CONST_INT_P (otherops[2])
18595 && INTVAL (otherops[2]) > -256
18596 && INTVAL (otherops[2]) < 256)))
18597 {
18598 otherops[0] = operands[1];
18599 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18600 if (emit)
18601 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18602 return "";
18603 }
18604 /* Fall through */
18605
18606 default:
18607 otherops[0] = adjust_address (operands[0], SImode, 4);
18608 otherops[1] = operands[1];
18609 if (emit)
18610 {
18611 output_asm_insn ("str%?\t%1, %0", operands);
18612 output_asm_insn ("str%?\t%H1, %0", otherops);
18613 }
18614 if (count)
18615 *count = 2;
18616 }
18617 }
18618
18619 return "";
18620 }
18621
18622 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18623 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18624
18625 const char *
18626 output_move_quad (rtx *operands)
18627 {
18628 if (REG_P (operands[0]))
18629 {
18630 /* Load, or reg->reg move. */
18631
18632 if (MEM_P (operands[1]))
18633 {
18634 switch (GET_CODE (XEXP (operands[1], 0)))
18635 {
18636 case REG:
18637 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18638 break;
18639
18640 case LABEL_REF:
18641 case CONST:
18642 output_asm_insn ("adr%?\t%0, %1", operands);
18643 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18644 break;
18645
18646 default:
18647 gcc_unreachable ();
18648 }
18649 }
18650 else
18651 {
18652 rtx ops[2];
18653 int dest, src, i;
18654
18655 gcc_assert (REG_P (operands[1]));
18656
18657 dest = REGNO (operands[0]);
18658 src = REGNO (operands[1]);
18659
18660 /* This seems pretty dumb, but hopefully GCC won't try to do it
18661 very often. */
18662 if (dest < src)
18663 for (i = 0; i < 4; i++)
18664 {
18665 ops[0] = gen_rtx_REG (SImode, dest + i);
18666 ops[1] = gen_rtx_REG (SImode, src + i);
18667 output_asm_insn ("mov%?\t%0, %1", ops);
18668 }
18669 else
18670 for (i = 3; i >= 0; i--)
18671 {
18672 ops[0] = gen_rtx_REG (SImode, dest + i);
18673 ops[1] = gen_rtx_REG (SImode, src + i);
18674 output_asm_insn ("mov%?\t%0, %1", ops);
18675 }
18676 }
18677 }
18678 else
18679 {
18680 gcc_assert (MEM_P (operands[0]));
18681 gcc_assert (REG_P (operands[1]));
18682 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18683
18684 switch (GET_CODE (XEXP (operands[0], 0)))
18685 {
18686 case REG:
18687 output_asm_insn ("stm%?\t%m0, %M1", operands);
18688 break;
18689
18690 default:
18691 gcc_unreachable ();
18692 }
18693 }
18694
18695 return "";
18696 }
18697
18698 /* Output a VFP load or store instruction. */
18699
18700 const char *
18701 output_move_vfp (rtx *operands)
18702 {
18703 rtx reg, mem, addr, ops[2];
18704 int load = REG_P (operands[0]);
18705 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18706 int sp = (!TARGET_VFP_FP16INST
18707 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18708 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18709 const char *templ;
18710 char buff[50];
18711 machine_mode mode;
18712
18713 reg = operands[!load];
18714 mem = operands[load];
18715
18716 mode = GET_MODE (reg);
18717
18718 gcc_assert (REG_P (reg));
18719 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18720 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18721 || mode == SFmode
18722 || mode == DFmode
18723 || mode == HImode
18724 || mode == SImode
18725 || mode == DImode
18726 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18727 gcc_assert (MEM_P (mem));
18728
18729 addr = XEXP (mem, 0);
18730
18731 switch (GET_CODE (addr))
18732 {
18733 case PRE_DEC:
18734 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18735 ops[0] = XEXP (addr, 0);
18736 ops[1] = reg;
18737 break;
18738
18739 case POST_INC:
18740 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18741 ops[0] = XEXP (addr, 0);
18742 ops[1] = reg;
18743 break;
18744
18745 default:
18746 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18747 ops[0] = reg;
18748 ops[1] = mem;
18749 break;
18750 }
18751
18752 sprintf (buff, templ,
18753 load ? "ld" : "st",
18754 dp ? "64" : sp ? "32" : "16",
18755 dp ? "P" : "",
18756 integer_p ? "\t%@ int" : "");
18757 output_asm_insn (buff, ops);
18758
18759 return "";
18760 }
18761
18762 /* Output a Neon double-word or quad-word load or store, or a load
18763 or store for larger structure modes.
18764
18765 WARNING: The ordering of elements is weird in big-endian mode,
18766 because the EABI requires that vectors stored in memory appear
18767 as though they were stored by a VSTM, as required by the EABI.
18768 GCC RTL defines element ordering based on in-memory order.
18769 This can be different from the architectural ordering of elements
18770 within a NEON register. The intrinsics defined in arm_neon.h use the
18771 NEON register element ordering, not the GCC RTL element ordering.
18772
18773 For example, the in-memory ordering of a big-endian a quadword
18774 vector with 16-bit elements when stored from register pair {d0,d1}
18775 will be (lowest address first, d0[N] is NEON register element N):
18776
18777 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18778
18779 When necessary, quadword registers (dN, dN+1) are moved to ARM
18780 registers from rN in the order:
18781
18782 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18783
18784 So that STM/LDM can be used on vectors in ARM registers, and the
18785 same memory layout will result as if VSTM/VLDM were used.
18786
18787 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18788 possible, which allows use of appropriate alignment tags.
18789 Note that the choice of "64" is independent of the actual vector
18790 element size; this size simply ensures that the behavior is
18791 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18792
18793 Due to limitations of those instructions, use of VST1.64/VLD1.64
18794 is not possible if:
18795 - the address contains PRE_DEC, or
18796 - the mode refers to more than 4 double-word registers
18797
18798 In those cases, it would be possible to replace VSTM/VLDM by a
18799 sequence of instructions; this is not currently implemented since
18800 this is not certain to actually improve performance. */
18801
18802 const char *
18803 output_move_neon (rtx *operands)
18804 {
18805 rtx reg, mem, addr, ops[2];
18806 int regno, nregs, load = REG_P (operands[0]);
18807 const char *templ;
18808 char buff[50];
18809 machine_mode mode;
18810
18811 reg = operands[!load];
18812 mem = operands[load];
18813
18814 mode = GET_MODE (reg);
18815
18816 gcc_assert (REG_P (reg));
18817 regno = REGNO (reg);
18818 nregs = REG_NREGS (reg) / 2;
18819 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18820 || NEON_REGNO_OK_FOR_QUAD (regno));
18821 gcc_assert (VALID_NEON_DREG_MODE (mode)
18822 || VALID_NEON_QREG_MODE (mode)
18823 || VALID_NEON_STRUCT_MODE (mode));
18824 gcc_assert (MEM_P (mem));
18825
18826 addr = XEXP (mem, 0);
18827
18828 /* Strip off const from addresses like (const (plus (...))). */
18829 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18830 addr = XEXP (addr, 0);
18831
18832 switch (GET_CODE (addr))
18833 {
18834 case POST_INC:
18835 /* We have to use vldm / vstm for too-large modes. */
18836 if (nregs > 4)
18837 {
18838 templ = "v%smia%%?\t%%0!, %%h1";
18839 ops[0] = XEXP (addr, 0);
18840 }
18841 else
18842 {
18843 templ = "v%s1.64\t%%h1, %%A0";
18844 ops[0] = mem;
18845 }
18846 ops[1] = reg;
18847 break;
18848
18849 case PRE_DEC:
18850 /* We have to use vldm / vstm in this case, since there is no
18851 pre-decrement form of the vld1 / vst1 instructions. */
18852 templ = "v%smdb%%?\t%%0!, %%h1";
18853 ops[0] = XEXP (addr, 0);
18854 ops[1] = reg;
18855 break;
18856
18857 case POST_MODIFY:
18858 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18859 gcc_unreachable ();
18860
18861 case REG:
18862 /* We have to use vldm / vstm for too-large modes. */
18863 if (nregs > 1)
18864 {
18865 if (nregs > 4)
18866 templ = "v%smia%%?\t%%m0, %%h1";
18867 else
18868 templ = "v%s1.64\t%%h1, %%A0";
18869
18870 ops[0] = mem;
18871 ops[1] = reg;
18872 break;
18873 }
18874 /* Fall through. */
18875 case LABEL_REF:
18876 case PLUS:
18877 {
18878 int i;
18879 int overlap = -1;
18880 for (i = 0; i < nregs; i++)
18881 {
18882 /* We're only using DImode here because it's a convenient size. */
18883 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18884 ops[1] = adjust_address (mem, DImode, 8 * i);
18885 if (reg_overlap_mentioned_p (ops[0], mem))
18886 {
18887 gcc_assert (overlap == -1);
18888 overlap = i;
18889 }
18890 else
18891 {
18892 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18893 output_asm_insn (buff, ops);
18894 }
18895 }
18896 if (overlap != -1)
18897 {
18898 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18899 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18900 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18901 output_asm_insn (buff, ops);
18902 }
18903
18904 return "";
18905 }
18906
18907 default:
18908 gcc_unreachable ();
18909 }
18910
18911 sprintf (buff, templ, load ? "ld" : "st");
18912 output_asm_insn (buff, ops);
18913
18914 return "";
18915 }
18916
18917 /* Compute and return the length of neon_mov<mode>, where <mode> is
18918 one of VSTRUCT modes: EI, OI, CI or XI. */
18919 int
18920 arm_attr_length_move_neon (rtx_insn *insn)
18921 {
18922 rtx reg, mem, addr;
18923 int load;
18924 machine_mode mode;
18925
18926 extract_insn_cached (insn);
18927
18928 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18929 {
18930 mode = GET_MODE (recog_data.operand[0]);
18931 switch (mode)
18932 {
18933 case E_EImode:
18934 case E_OImode:
18935 return 8;
18936 case E_CImode:
18937 return 12;
18938 case E_XImode:
18939 return 16;
18940 default:
18941 gcc_unreachable ();
18942 }
18943 }
18944
18945 load = REG_P (recog_data.operand[0]);
18946 reg = recog_data.operand[!load];
18947 mem = recog_data.operand[load];
18948
18949 gcc_assert (MEM_P (mem));
18950
18951 addr = XEXP (mem, 0);
18952
18953 /* Strip off const from addresses like (const (plus (...))). */
18954 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18955 addr = XEXP (addr, 0);
18956
18957 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18958 {
18959 int insns = REG_NREGS (reg) / 2;
18960 return insns * 4;
18961 }
18962 else
18963 return 4;
18964 }
18965
18966 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18967 return zero. */
18968
18969 int
18970 arm_address_offset_is_imm (rtx_insn *insn)
18971 {
18972 rtx mem, addr;
18973
18974 extract_insn_cached (insn);
18975
18976 if (REG_P (recog_data.operand[0]))
18977 return 0;
18978
18979 mem = recog_data.operand[0];
18980
18981 gcc_assert (MEM_P (mem));
18982
18983 addr = XEXP (mem, 0);
18984
18985 if (REG_P (addr)
18986 || (GET_CODE (addr) == PLUS
18987 && REG_P (XEXP (addr, 0))
18988 && CONST_INT_P (XEXP (addr, 1))))
18989 return 1;
18990 else
18991 return 0;
18992 }
18993
18994 /* Output an ADD r, s, #n where n may be too big for one instruction.
18995 If adding zero to one register, output nothing. */
18996 const char *
18997 output_add_immediate (rtx *operands)
18998 {
18999 HOST_WIDE_INT n = INTVAL (operands[2]);
19000
19001 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19002 {
19003 if (n < 0)
19004 output_multi_immediate (operands,
19005 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19006 -n);
19007 else
19008 output_multi_immediate (operands,
19009 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19010 n);
19011 }
19012
19013 return "";
19014 }
19015
19016 /* Output a multiple immediate operation.
19017 OPERANDS is the vector of operands referred to in the output patterns.
19018 INSTR1 is the output pattern to use for the first constant.
19019 INSTR2 is the output pattern to use for subsequent constants.
19020 IMMED_OP is the index of the constant slot in OPERANDS.
19021 N is the constant value. */
19022 static const char *
19023 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19024 int immed_op, HOST_WIDE_INT n)
19025 {
19026 #if HOST_BITS_PER_WIDE_INT > 32
19027 n &= 0xffffffff;
19028 #endif
19029
19030 if (n == 0)
19031 {
19032 /* Quick and easy output. */
19033 operands[immed_op] = const0_rtx;
19034 output_asm_insn (instr1, operands);
19035 }
19036 else
19037 {
19038 int i;
19039 const char * instr = instr1;
19040
19041 /* Note that n is never zero here (which would give no output). */
19042 for (i = 0; i < 32; i += 2)
19043 {
19044 if (n & (3 << i))
19045 {
19046 operands[immed_op] = GEN_INT (n & (255 << i));
19047 output_asm_insn (instr, operands);
19048 instr = instr2;
19049 i += 6;
19050 }
19051 }
19052 }
19053
19054 return "";
19055 }
19056
19057 /* Return the name of a shifter operation. */
19058 static const char *
19059 arm_shift_nmem(enum rtx_code code)
19060 {
19061 switch (code)
19062 {
19063 case ASHIFT:
19064 return ARM_LSL_NAME;
19065
19066 case ASHIFTRT:
19067 return "asr";
19068
19069 case LSHIFTRT:
19070 return "lsr";
19071
19072 case ROTATERT:
19073 return "ror";
19074
19075 default:
19076 abort();
19077 }
19078 }
19079
19080 /* Return the appropriate ARM instruction for the operation code.
19081 The returned result should not be overwritten. OP is the rtx of the
19082 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19083 was shifted. */
19084 const char *
19085 arithmetic_instr (rtx op, int shift_first_arg)
19086 {
19087 switch (GET_CODE (op))
19088 {
19089 case PLUS:
19090 return "add";
19091
19092 case MINUS:
19093 return shift_first_arg ? "rsb" : "sub";
19094
19095 case IOR:
19096 return "orr";
19097
19098 case XOR:
19099 return "eor";
19100
19101 case AND:
19102 return "and";
19103
19104 case ASHIFT:
19105 case ASHIFTRT:
19106 case LSHIFTRT:
19107 case ROTATERT:
19108 return arm_shift_nmem(GET_CODE(op));
19109
19110 default:
19111 gcc_unreachable ();
19112 }
19113 }
19114
19115 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19116 for the operation code. The returned result should not be overwritten.
19117 OP is the rtx code of the shift.
19118 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19119 shift. */
19120 static const char *
19121 shift_op (rtx op, HOST_WIDE_INT *amountp)
19122 {
19123 const char * mnem;
19124 enum rtx_code code = GET_CODE (op);
19125
19126 switch (code)
19127 {
19128 case ROTATE:
19129 if (!CONST_INT_P (XEXP (op, 1)))
19130 {
19131 output_operand_lossage ("invalid shift operand");
19132 return NULL;
19133 }
19134
19135 code = ROTATERT;
19136 *amountp = 32 - INTVAL (XEXP (op, 1));
19137 mnem = "ror";
19138 break;
19139
19140 case ASHIFT:
19141 case ASHIFTRT:
19142 case LSHIFTRT:
19143 case ROTATERT:
19144 mnem = arm_shift_nmem(code);
19145 if (CONST_INT_P (XEXP (op, 1)))
19146 {
19147 *amountp = INTVAL (XEXP (op, 1));
19148 }
19149 else if (REG_P (XEXP (op, 1)))
19150 {
19151 *amountp = -1;
19152 return mnem;
19153 }
19154 else
19155 {
19156 output_operand_lossage ("invalid shift operand");
19157 return NULL;
19158 }
19159 break;
19160
19161 case MULT:
19162 /* We never have to worry about the amount being other than a
19163 power of 2, since this case can never be reloaded from a reg. */
19164 if (!CONST_INT_P (XEXP (op, 1)))
19165 {
19166 output_operand_lossage ("invalid shift operand");
19167 return NULL;
19168 }
19169
19170 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19171
19172 /* Amount must be a power of two. */
19173 if (*amountp & (*amountp - 1))
19174 {
19175 output_operand_lossage ("invalid shift operand");
19176 return NULL;
19177 }
19178
19179 *amountp = exact_log2 (*amountp);
19180 gcc_assert (IN_RANGE (*amountp, 0, 31));
19181 return ARM_LSL_NAME;
19182
19183 default:
19184 output_operand_lossage ("invalid shift operand");
19185 return NULL;
19186 }
19187
19188 /* This is not 100% correct, but follows from the desire to merge
19189 multiplication by a power of 2 with the recognizer for a
19190 shift. >=32 is not a valid shift for "lsl", so we must try and
19191 output a shift that produces the correct arithmetical result.
19192 Using lsr #32 is identical except for the fact that the carry bit
19193 is not set correctly if we set the flags; but we never use the
19194 carry bit from such an operation, so we can ignore that. */
19195 if (code == ROTATERT)
19196 /* Rotate is just modulo 32. */
19197 *amountp &= 31;
19198 else if (*amountp != (*amountp & 31))
19199 {
19200 if (code == ASHIFT)
19201 mnem = "lsr";
19202 *amountp = 32;
19203 }
19204
19205 /* Shifts of 0 are no-ops. */
19206 if (*amountp == 0)
19207 return NULL;
19208
19209 return mnem;
19210 }
19211
19212 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19213 because /bin/as is horribly restrictive. The judgement about
19214 whether or not each character is 'printable' (and can be output as
19215 is) or not (and must be printed with an octal escape) must be made
19216 with reference to the *host* character set -- the situation is
19217 similar to that discussed in the comments above pp_c_char in
19218 c-pretty-print.c. */
19219
19220 #define MAX_ASCII_LEN 51
19221
19222 void
19223 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19224 {
19225 int i;
19226 int len_so_far = 0;
19227
19228 fputs ("\t.ascii\t\"", stream);
19229
19230 for (i = 0; i < len; i++)
19231 {
19232 int c = p[i];
19233
19234 if (len_so_far >= MAX_ASCII_LEN)
19235 {
19236 fputs ("\"\n\t.ascii\t\"", stream);
19237 len_so_far = 0;
19238 }
19239
19240 if (ISPRINT (c))
19241 {
19242 if (c == '\\' || c == '\"')
19243 {
19244 putc ('\\', stream);
19245 len_so_far++;
19246 }
19247 putc (c, stream);
19248 len_so_far++;
19249 }
19250 else
19251 {
19252 fprintf (stream, "\\%03o", c);
19253 len_so_far += 4;
19254 }
19255 }
19256
19257 fputs ("\"\n", stream);
19258 }
19259 \f
19260 /* Whether a register is callee saved or not. This is necessary because high
19261 registers are marked as caller saved when optimizing for size on Thumb-1
19262 targets despite being callee saved in order to avoid using them. */
19263 #define callee_saved_reg_p(reg) \
19264 (!call_used_regs[reg] \
19265 || (TARGET_THUMB1 && optimize_size \
19266 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19267
19268 /* Compute the register save mask for registers 0 through 12
19269 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19270
19271 static unsigned long
19272 arm_compute_save_reg0_reg12_mask (void)
19273 {
19274 unsigned long func_type = arm_current_func_type ();
19275 unsigned long save_reg_mask = 0;
19276 unsigned int reg;
19277
19278 if (IS_INTERRUPT (func_type))
19279 {
19280 unsigned int max_reg;
19281 /* Interrupt functions must not corrupt any registers,
19282 even call clobbered ones. If this is a leaf function
19283 we can just examine the registers used by the RTL, but
19284 otherwise we have to assume that whatever function is
19285 called might clobber anything, and so we have to save
19286 all the call-clobbered registers as well. */
19287 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19288 /* FIQ handlers have registers r8 - r12 banked, so
19289 we only need to check r0 - r7, Normal ISRs only
19290 bank r14 and r15, so we must check up to r12.
19291 r13 is the stack pointer which is always preserved,
19292 so we do not need to consider it here. */
19293 max_reg = 7;
19294 else
19295 max_reg = 12;
19296
19297 for (reg = 0; reg <= max_reg; reg++)
19298 if (df_regs_ever_live_p (reg)
19299 || (! crtl->is_leaf && call_used_regs[reg]))
19300 save_reg_mask |= (1 << reg);
19301
19302 /* Also save the pic base register if necessary. */
19303 if (flag_pic
19304 && !TARGET_SINGLE_PIC_BASE
19305 && arm_pic_register != INVALID_REGNUM
19306 && crtl->uses_pic_offset_table)
19307 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19308 }
19309 else if (IS_VOLATILE(func_type))
19310 {
19311 /* For noreturn functions we historically omitted register saves
19312 altogether. However this really messes up debugging. As a
19313 compromise save just the frame pointers. Combined with the link
19314 register saved elsewhere this should be sufficient to get
19315 a backtrace. */
19316 if (frame_pointer_needed)
19317 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19318 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19319 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19320 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19321 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19322 }
19323 else
19324 {
19325 /* In the normal case we only need to save those registers
19326 which are call saved and which are used by this function. */
19327 for (reg = 0; reg <= 11; reg++)
19328 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19329 save_reg_mask |= (1 << reg);
19330
19331 /* Handle the frame pointer as a special case. */
19332 if (frame_pointer_needed)
19333 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19334
19335 /* If we aren't loading the PIC register,
19336 don't stack it even though it may be live. */
19337 if (flag_pic
19338 && !TARGET_SINGLE_PIC_BASE
19339 && arm_pic_register != INVALID_REGNUM
19340 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19341 || crtl->uses_pic_offset_table))
19342 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19343
19344 /* The prologue will copy SP into R0, so save it. */
19345 if (IS_STACKALIGN (func_type))
19346 save_reg_mask |= 1;
19347 }
19348
19349 /* Save registers so the exception handler can modify them. */
19350 if (crtl->calls_eh_return)
19351 {
19352 unsigned int i;
19353
19354 for (i = 0; ; i++)
19355 {
19356 reg = EH_RETURN_DATA_REGNO (i);
19357 if (reg == INVALID_REGNUM)
19358 break;
19359 save_reg_mask |= 1 << reg;
19360 }
19361 }
19362
19363 return save_reg_mask;
19364 }
19365
19366 /* Return true if r3 is live at the start of the function. */
19367
19368 static bool
19369 arm_r3_live_at_start_p (void)
19370 {
19371 /* Just look at cfg info, which is still close enough to correct at this
19372 point. This gives false positives for broken functions that might use
19373 uninitialized data that happens to be allocated in r3, but who cares? */
19374 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19375 }
19376
19377 /* Compute the number of bytes used to store the static chain register on the
19378 stack, above the stack frame. We need to know this accurately to get the
19379 alignment of the rest of the stack frame correct. */
19380
19381 static int
19382 arm_compute_static_chain_stack_bytes (void)
19383 {
19384 /* Once the value is updated from the init value of -1, do not
19385 re-compute. */
19386 if (cfun->machine->static_chain_stack_bytes != -1)
19387 return cfun->machine->static_chain_stack_bytes;
19388
19389 /* See the defining assertion in arm_expand_prologue. */
19390 if (IS_NESTED (arm_current_func_type ())
19391 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19392 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19393 || flag_stack_clash_protection)
19394 && !df_regs_ever_live_p (LR_REGNUM)))
19395 && arm_r3_live_at_start_p ()
19396 && crtl->args.pretend_args_size == 0)
19397 return 4;
19398
19399 return 0;
19400 }
19401
19402 /* Compute a bit mask of which core registers need to be
19403 saved on the stack for the current function.
19404 This is used by arm_compute_frame_layout, which may add extra registers. */
19405
19406 static unsigned long
19407 arm_compute_save_core_reg_mask (void)
19408 {
19409 unsigned int save_reg_mask = 0;
19410 unsigned long func_type = arm_current_func_type ();
19411 unsigned int reg;
19412
19413 if (IS_NAKED (func_type))
19414 /* This should never really happen. */
19415 return 0;
19416
19417 /* If we are creating a stack frame, then we must save the frame pointer,
19418 IP (which will hold the old stack pointer), LR and the PC. */
19419 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19420 save_reg_mask |=
19421 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19422 | (1 << IP_REGNUM)
19423 | (1 << LR_REGNUM)
19424 | (1 << PC_REGNUM);
19425
19426 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19427
19428 /* Decide if we need to save the link register.
19429 Interrupt routines have their own banked link register,
19430 so they never need to save it.
19431 Otherwise if we do not use the link register we do not need to save
19432 it. If we are pushing other registers onto the stack however, we
19433 can save an instruction in the epilogue by pushing the link register
19434 now and then popping it back into the PC. This incurs extra memory
19435 accesses though, so we only do it when optimizing for size, and only
19436 if we know that we will not need a fancy return sequence. */
19437 if (df_regs_ever_live_p (LR_REGNUM)
19438 || (save_reg_mask
19439 && optimize_size
19440 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19441 && !crtl->tail_call_emit
19442 && !crtl->calls_eh_return))
19443 save_reg_mask |= 1 << LR_REGNUM;
19444
19445 if (cfun->machine->lr_save_eliminated)
19446 save_reg_mask &= ~ (1 << LR_REGNUM);
19447
19448 if (TARGET_REALLY_IWMMXT
19449 && ((bit_count (save_reg_mask)
19450 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19451 arm_compute_static_chain_stack_bytes())
19452 ) % 2) != 0)
19453 {
19454 /* The total number of registers that are going to be pushed
19455 onto the stack is odd. We need to ensure that the stack
19456 is 64-bit aligned before we start to save iWMMXt registers,
19457 and also before we start to create locals. (A local variable
19458 might be a double or long long which we will load/store using
19459 an iWMMXt instruction). Therefore we need to push another
19460 ARM register, so that the stack will be 64-bit aligned. We
19461 try to avoid using the arg registers (r0 -r3) as they might be
19462 used to pass values in a tail call. */
19463 for (reg = 4; reg <= 12; reg++)
19464 if ((save_reg_mask & (1 << reg)) == 0)
19465 break;
19466
19467 if (reg <= 12)
19468 save_reg_mask |= (1 << reg);
19469 else
19470 {
19471 cfun->machine->sibcall_blocked = 1;
19472 save_reg_mask |= (1 << 3);
19473 }
19474 }
19475
19476 /* We may need to push an additional register for use initializing the
19477 PIC base register. */
19478 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19479 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19480 {
19481 reg = thumb_find_work_register (1 << 4);
19482 if (!call_used_regs[reg])
19483 save_reg_mask |= (1 << reg);
19484 }
19485
19486 return save_reg_mask;
19487 }
19488
19489 /* Compute a bit mask of which core registers need to be
19490 saved on the stack for the current function. */
19491 static unsigned long
19492 thumb1_compute_save_core_reg_mask (void)
19493 {
19494 unsigned long mask;
19495 unsigned reg;
19496
19497 mask = 0;
19498 for (reg = 0; reg < 12; reg ++)
19499 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19500 mask |= 1 << reg;
19501
19502 /* Handle the frame pointer as a special case. */
19503 if (frame_pointer_needed)
19504 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19505
19506 if (flag_pic
19507 && !TARGET_SINGLE_PIC_BASE
19508 && arm_pic_register != INVALID_REGNUM
19509 && crtl->uses_pic_offset_table)
19510 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19511
19512 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19513 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19514 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19515
19516 /* LR will also be pushed if any lo regs are pushed. */
19517 if (mask & 0xff || thumb_force_lr_save ())
19518 mask |= (1 << LR_REGNUM);
19519
19520 /* Make sure we have a low work register if we need one.
19521 We will need one if we are going to push a high register,
19522 but we are not currently intending to push a low register. */
19523 if ((mask & 0xff) == 0
19524 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19525 {
19526 /* Use thumb_find_work_register to choose which register
19527 we will use. If the register is live then we will
19528 have to push it. Use LAST_LO_REGNUM as our fallback
19529 choice for the register to select. */
19530 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19531 /* Make sure the register returned by thumb_find_work_register is
19532 not part of the return value. */
19533 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19534 reg = LAST_LO_REGNUM;
19535
19536 if (callee_saved_reg_p (reg))
19537 mask |= 1 << reg;
19538 }
19539
19540 /* The 504 below is 8 bytes less than 512 because there are two possible
19541 alignment words. We can't tell here if they will be present or not so we
19542 have to play it safe and assume that they are. */
19543 if ((CALLER_INTERWORKING_SLOT_SIZE +
19544 ROUND_UP_WORD (get_frame_size ()) +
19545 crtl->outgoing_args_size) >= 504)
19546 {
19547 /* This is the same as the code in thumb1_expand_prologue() which
19548 determines which register to use for stack decrement. */
19549 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19550 if (mask & (1 << reg))
19551 break;
19552
19553 if (reg > LAST_LO_REGNUM)
19554 {
19555 /* Make sure we have a register available for stack decrement. */
19556 mask |= 1 << LAST_LO_REGNUM;
19557 }
19558 }
19559
19560 return mask;
19561 }
19562
19563
19564 /* Return the number of bytes required to save VFP registers. */
19565 static int
19566 arm_get_vfp_saved_size (void)
19567 {
19568 unsigned int regno;
19569 int count;
19570 int saved;
19571
19572 saved = 0;
19573 /* Space for saved VFP registers. */
19574 if (TARGET_HARD_FLOAT)
19575 {
19576 count = 0;
19577 for (regno = FIRST_VFP_REGNUM;
19578 regno < LAST_VFP_REGNUM;
19579 regno += 2)
19580 {
19581 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19582 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19583 {
19584 if (count > 0)
19585 {
19586 /* Workaround ARM10 VFPr1 bug. */
19587 if (count == 2 && !arm_arch6)
19588 count++;
19589 saved += count * 8;
19590 }
19591 count = 0;
19592 }
19593 else
19594 count++;
19595 }
19596 if (count > 0)
19597 {
19598 if (count == 2 && !arm_arch6)
19599 count++;
19600 saved += count * 8;
19601 }
19602 }
19603 return saved;
19604 }
19605
19606
19607 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19608 everything bar the final return instruction. If simple_return is true,
19609 then do not output epilogue, because it has already been emitted in RTL.
19610
19611 Note: do not forget to update length attribute of corresponding insn pattern
19612 when changing assembly output (eg. length attribute of
19613 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19614 register clearing sequences). */
19615 const char *
19616 output_return_instruction (rtx operand, bool really_return, bool reverse,
19617 bool simple_return)
19618 {
19619 char conditional[10];
19620 char instr[100];
19621 unsigned reg;
19622 unsigned long live_regs_mask;
19623 unsigned long func_type;
19624 arm_stack_offsets *offsets;
19625
19626 func_type = arm_current_func_type ();
19627
19628 if (IS_NAKED (func_type))
19629 return "";
19630
19631 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19632 {
19633 /* If this function was declared non-returning, and we have
19634 found a tail call, then we have to trust that the called
19635 function won't return. */
19636 if (really_return)
19637 {
19638 rtx ops[2];
19639
19640 /* Otherwise, trap an attempted return by aborting. */
19641 ops[0] = operand;
19642 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19643 : "abort");
19644 assemble_external_libcall (ops[1]);
19645 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19646 }
19647
19648 return "";
19649 }
19650
19651 gcc_assert (!cfun->calls_alloca || really_return);
19652
19653 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19654
19655 cfun->machine->return_used_this_function = 1;
19656
19657 offsets = arm_get_frame_offsets ();
19658 live_regs_mask = offsets->saved_regs_mask;
19659
19660 if (!simple_return && live_regs_mask)
19661 {
19662 const char * return_reg;
19663
19664 /* If we do not have any special requirements for function exit
19665 (e.g. interworking) then we can load the return address
19666 directly into the PC. Otherwise we must load it into LR. */
19667 if (really_return
19668 && !IS_CMSE_ENTRY (func_type)
19669 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19670 return_reg = reg_names[PC_REGNUM];
19671 else
19672 return_reg = reg_names[LR_REGNUM];
19673
19674 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19675 {
19676 /* There are three possible reasons for the IP register
19677 being saved. 1) a stack frame was created, in which case
19678 IP contains the old stack pointer, or 2) an ISR routine
19679 corrupted it, or 3) it was saved to align the stack on
19680 iWMMXt. In case 1, restore IP into SP, otherwise just
19681 restore IP. */
19682 if (frame_pointer_needed)
19683 {
19684 live_regs_mask &= ~ (1 << IP_REGNUM);
19685 live_regs_mask |= (1 << SP_REGNUM);
19686 }
19687 else
19688 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19689 }
19690
19691 /* On some ARM architectures it is faster to use LDR rather than
19692 LDM to load a single register. On other architectures, the
19693 cost is the same. In 26 bit mode, or for exception handlers,
19694 we have to use LDM to load the PC so that the CPSR is also
19695 restored. */
19696 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19697 if (live_regs_mask == (1U << reg))
19698 break;
19699
19700 if (reg <= LAST_ARM_REGNUM
19701 && (reg != LR_REGNUM
19702 || ! really_return
19703 || ! IS_INTERRUPT (func_type)))
19704 {
19705 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19706 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19707 }
19708 else
19709 {
19710 char *p;
19711 int first = 1;
19712
19713 /* Generate the load multiple instruction to restore the
19714 registers. Note we can get here, even if
19715 frame_pointer_needed is true, but only if sp already
19716 points to the base of the saved core registers. */
19717 if (live_regs_mask & (1 << SP_REGNUM))
19718 {
19719 unsigned HOST_WIDE_INT stack_adjust;
19720
19721 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19722 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19723
19724 if (stack_adjust && arm_arch5t && TARGET_ARM)
19725 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19726 else
19727 {
19728 /* If we can't use ldmib (SA110 bug),
19729 then try to pop r3 instead. */
19730 if (stack_adjust)
19731 live_regs_mask |= 1 << 3;
19732
19733 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19734 }
19735 }
19736 /* For interrupt returns we have to use an LDM rather than
19737 a POP so that we can use the exception return variant. */
19738 else if (IS_INTERRUPT (func_type))
19739 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19740 else
19741 sprintf (instr, "pop%s\t{", conditional);
19742
19743 p = instr + strlen (instr);
19744
19745 for (reg = 0; reg <= SP_REGNUM; reg++)
19746 if (live_regs_mask & (1 << reg))
19747 {
19748 int l = strlen (reg_names[reg]);
19749
19750 if (first)
19751 first = 0;
19752 else
19753 {
19754 memcpy (p, ", ", 2);
19755 p += 2;
19756 }
19757
19758 memcpy (p, "%|", 2);
19759 memcpy (p + 2, reg_names[reg], l);
19760 p += l + 2;
19761 }
19762
19763 if (live_regs_mask & (1 << LR_REGNUM))
19764 {
19765 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19766 /* If returning from an interrupt, restore the CPSR. */
19767 if (IS_INTERRUPT (func_type))
19768 strcat (p, "^");
19769 }
19770 else
19771 strcpy (p, "}");
19772 }
19773
19774 output_asm_insn (instr, & operand);
19775
19776 /* See if we need to generate an extra instruction to
19777 perform the actual function return. */
19778 if (really_return
19779 && func_type != ARM_FT_INTERWORKED
19780 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19781 {
19782 /* The return has already been handled
19783 by loading the LR into the PC. */
19784 return "";
19785 }
19786 }
19787
19788 if (really_return)
19789 {
19790 switch ((int) ARM_FUNC_TYPE (func_type))
19791 {
19792 case ARM_FT_ISR:
19793 case ARM_FT_FIQ:
19794 /* ??? This is wrong for unified assembly syntax. */
19795 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19796 break;
19797
19798 case ARM_FT_INTERWORKED:
19799 gcc_assert (arm_arch5t || arm_arch4t);
19800 sprintf (instr, "bx%s\t%%|lr", conditional);
19801 break;
19802
19803 case ARM_FT_EXCEPTION:
19804 /* ??? This is wrong for unified assembly syntax. */
19805 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19806 break;
19807
19808 default:
19809 if (IS_CMSE_ENTRY (func_type))
19810 {
19811 /* Check if we have to clear the 'GE bits' which is only used if
19812 parallel add and subtraction instructions are available. */
19813 if (TARGET_INT_SIMD)
19814 snprintf (instr, sizeof (instr),
19815 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19816 else
19817 snprintf (instr, sizeof (instr),
19818 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19819
19820 output_asm_insn (instr, & operand);
19821 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19822 {
19823 /* Clear the cumulative exception-status bits (0-4,7) and the
19824 condition code bits (28-31) of the FPSCR. We need to
19825 remember to clear the first scratch register used (IP) and
19826 save and restore the second (r4). */
19827 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19828 output_asm_insn (instr, & operand);
19829 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19830 output_asm_insn (instr, & operand);
19831 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19832 output_asm_insn (instr, & operand);
19833 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19834 output_asm_insn (instr, & operand);
19835 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19836 output_asm_insn (instr, & operand);
19837 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19838 output_asm_insn (instr, & operand);
19839 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19840 output_asm_insn (instr, & operand);
19841 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19842 output_asm_insn (instr, & operand);
19843 }
19844 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19845 }
19846 /* Use bx if it's available. */
19847 else if (arm_arch5t || arm_arch4t)
19848 sprintf (instr, "bx%s\t%%|lr", conditional);
19849 else
19850 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19851 break;
19852 }
19853
19854 output_asm_insn (instr, & operand);
19855 }
19856
19857 return "";
19858 }
19859
19860 /* Output in FILE asm statements needed to declare the NAME of the function
19861 defined by its DECL node. */
19862
19863 void
19864 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19865 {
19866 size_t cmse_name_len;
19867 char *cmse_name = 0;
19868 char cmse_prefix[] = "__acle_se_";
19869
19870 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19871 extra function label for each function with the 'cmse_nonsecure_entry'
19872 attribute. This extra function label should be prepended with
19873 '__acle_se_', telling the linker that it needs to create secure gateway
19874 veneers for this function. */
19875 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19876 DECL_ATTRIBUTES (decl)))
19877 {
19878 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19879 cmse_name = XALLOCAVEC (char, cmse_name_len);
19880 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19881 targetm.asm_out.globalize_label (file, cmse_name);
19882
19883 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19884 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19885 }
19886
19887 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19888 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19889 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19890 ASM_OUTPUT_LABEL (file, name);
19891
19892 if (cmse_name)
19893 ASM_OUTPUT_LABEL (file, cmse_name);
19894
19895 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19896 }
19897
19898 /* Write the function name into the code section, directly preceding
19899 the function prologue.
19900
19901 Code will be output similar to this:
19902 t0
19903 .ascii "arm_poke_function_name", 0
19904 .align
19905 t1
19906 .word 0xff000000 + (t1 - t0)
19907 arm_poke_function_name
19908 mov ip, sp
19909 stmfd sp!, {fp, ip, lr, pc}
19910 sub fp, ip, #4
19911
19912 When performing a stack backtrace, code can inspect the value
19913 of 'pc' stored at 'fp' + 0. If the trace function then looks
19914 at location pc - 12 and the top 8 bits are set, then we know
19915 that there is a function name embedded immediately preceding this
19916 location and has length ((pc[-3]) & 0xff000000).
19917
19918 We assume that pc is declared as a pointer to an unsigned long.
19919
19920 It is of no benefit to output the function name if we are assembling
19921 a leaf function. These function types will not contain a stack
19922 backtrace structure, therefore it is not possible to determine the
19923 function name. */
19924 void
19925 arm_poke_function_name (FILE *stream, const char *name)
19926 {
19927 unsigned long alignlength;
19928 unsigned long length;
19929 rtx x;
19930
19931 length = strlen (name) + 1;
19932 alignlength = ROUND_UP_WORD (length);
19933
19934 ASM_OUTPUT_ASCII (stream, name, length);
19935 ASM_OUTPUT_ALIGN (stream, 2);
19936 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19937 assemble_aligned_integer (UNITS_PER_WORD, x);
19938 }
19939
19940 /* Place some comments into the assembler stream
19941 describing the current function. */
19942 static void
19943 arm_output_function_prologue (FILE *f)
19944 {
19945 unsigned long func_type;
19946
19947 /* Sanity check. */
19948 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19949
19950 func_type = arm_current_func_type ();
19951
19952 switch ((int) ARM_FUNC_TYPE (func_type))
19953 {
19954 default:
19955 case ARM_FT_NORMAL:
19956 break;
19957 case ARM_FT_INTERWORKED:
19958 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19959 break;
19960 case ARM_FT_ISR:
19961 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19962 break;
19963 case ARM_FT_FIQ:
19964 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19965 break;
19966 case ARM_FT_EXCEPTION:
19967 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19968 break;
19969 }
19970
19971 if (IS_NAKED (func_type))
19972 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19973
19974 if (IS_VOLATILE (func_type))
19975 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19976
19977 if (IS_NESTED (func_type))
19978 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19979 if (IS_STACKALIGN (func_type))
19980 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19981 if (IS_CMSE_ENTRY (func_type))
19982 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19983
19984 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
19985 (HOST_WIDE_INT) crtl->args.size,
19986 crtl->args.pretend_args_size,
19987 (HOST_WIDE_INT) get_frame_size ());
19988
19989 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19990 frame_pointer_needed,
19991 cfun->machine->uses_anonymous_args);
19992
19993 if (cfun->machine->lr_save_eliminated)
19994 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19995
19996 if (crtl->calls_eh_return)
19997 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19998
19999 }
20000
20001 static void
20002 arm_output_function_epilogue (FILE *)
20003 {
20004 arm_stack_offsets *offsets;
20005
20006 if (TARGET_THUMB1)
20007 {
20008 int regno;
20009
20010 /* Emit any call-via-reg trampolines that are needed for v4t support
20011 of call_reg and call_value_reg type insns. */
20012 for (regno = 0; regno < LR_REGNUM; regno++)
20013 {
20014 rtx label = cfun->machine->call_via[regno];
20015
20016 if (label != NULL)
20017 {
20018 switch_to_section (function_section (current_function_decl));
20019 targetm.asm_out.internal_label (asm_out_file, "L",
20020 CODE_LABEL_NUMBER (label));
20021 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20022 }
20023 }
20024
20025 /* ??? Probably not safe to set this here, since it assumes that a
20026 function will be emitted as assembly immediately after we generate
20027 RTL for it. This does not happen for inline functions. */
20028 cfun->machine->return_used_this_function = 0;
20029 }
20030 else /* TARGET_32BIT */
20031 {
20032 /* We need to take into account any stack-frame rounding. */
20033 offsets = arm_get_frame_offsets ();
20034
20035 gcc_assert (!use_return_insn (FALSE, NULL)
20036 || (cfun->machine->return_used_this_function != 0)
20037 || offsets->saved_regs == offsets->outgoing_args
20038 || frame_pointer_needed);
20039 }
20040 }
20041
20042 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20043 STR and STRD. If an even number of registers are being pushed, one
20044 or more STRD patterns are created for each register pair. If an
20045 odd number of registers are pushed, emit an initial STR followed by
20046 as many STRD instructions as are needed. This works best when the
20047 stack is initially 64-bit aligned (the normal case), since it
20048 ensures that each STRD is also 64-bit aligned. */
20049 static void
20050 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20051 {
20052 int num_regs = 0;
20053 int i;
20054 int regno;
20055 rtx par = NULL_RTX;
20056 rtx dwarf = NULL_RTX;
20057 rtx tmp;
20058 bool first = true;
20059
20060 num_regs = bit_count (saved_regs_mask);
20061
20062 /* Must be at least one register to save, and can't save SP or PC. */
20063 gcc_assert (num_regs > 0 && num_regs <= 14);
20064 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20065 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20066
20067 /* Create sequence for DWARF info. All the frame-related data for
20068 debugging is held in this wrapper. */
20069 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20070
20071 /* Describe the stack adjustment. */
20072 tmp = gen_rtx_SET (stack_pointer_rtx,
20073 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20074 RTX_FRAME_RELATED_P (tmp) = 1;
20075 XVECEXP (dwarf, 0, 0) = tmp;
20076
20077 /* Find the first register. */
20078 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20079 ;
20080
20081 i = 0;
20082
20083 /* If there's an odd number of registers to push. Start off by
20084 pushing a single register. This ensures that subsequent strd
20085 operations are dword aligned (assuming that SP was originally
20086 64-bit aligned). */
20087 if ((num_regs & 1) != 0)
20088 {
20089 rtx reg, mem, insn;
20090
20091 reg = gen_rtx_REG (SImode, regno);
20092 if (num_regs == 1)
20093 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20094 stack_pointer_rtx));
20095 else
20096 mem = gen_frame_mem (Pmode,
20097 gen_rtx_PRE_MODIFY
20098 (Pmode, stack_pointer_rtx,
20099 plus_constant (Pmode, stack_pointer_rtx,
20100 -4 * num_regs)));
20101
20102 tmp = gen_rtx_SET (mem, reg);
20103 RTX_FRAME_RELATED_P (tmp) = 1;
20104 insn = emit_insn (tmp);
20105 RTX_FRAME_RELATED_P (insn) = 1;
20106 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20107 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20108 RTX_FRAME_RELATED_P (tmp) = 1;
20109 i++;
20110 regno++;
20111 XVECEXP (dwarf, 0, i) = tmp;
20112 first = false;
20113 }
20114
20115 while (i < num_regs)
20116 if (saved_regs_mask & (1 << regno))
20117 {
20118 rtx reg1, reg2, mem1, mem2;
20119 rtx tmp0, tmp1, tmp2;
20120 int regno2;
20121
20122 /* Find the register to pair with this one. */
20123 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20124 regno2++)
20125 ;
20126
20127 reg1 = gen_rtx_REG (SImode, regno);
20128 reg2 = gen_rtx_REG (SImode, regno2);
20129
20130 if (first)
20131 {
20132 rtx insn;
20133
20134 first = false;
20135 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20136 stack_pointer_rtx,
20137 -4 * num_regs));
20138 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20139 stack_pointer_rtx,
20140 -4 * (num_regs - 1)));
20141 tmp0 = gen_rtx_SET (stack_pointer_rtx,
20142 plus_constant (Pmode, stack_pointer_rtx,
20143 -4 * (num_regs)));
20144 tmp1 = gen_rtx_SET (mem1, reg1);
20145 tmp2 = gen_rtx_SET (mem2, reg2);
20146 RTX_FRAME_RELATED_P (tmp0) = 1;
20147 RTX_FRAME_RELATED_P (tmp1) = 1;
20148 RTX_FRAME_RELATED_P (tmp2) = 1;
20149 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20150 XVECEXP (par, 0, 0) = tmp0;
20151 XVECEXP (par, 0, 1) = tmp1;
20152 XVECEXP (par, 0, 2) = tmp2;
20153 insn = emit_insn (par);
20154 RTX_FRAME_RELATED_P (insn) = 1;
20155 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20156 }
20157 else
20158 {
20159 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20160 stack_pointer_rtx,
20161 4 * i));
20162 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20163 stack_pointer_rtx,
20164 4 * (i + 1)));
20165 tmp1 = gen_rtx_SET (mem1, reg1);
20166 tmp2 = gen_rtx_SET (mem2, reg2);
20167 RTX_FRAME_RELATED_P (tmp1) = 1;
20168 RTX_FRAME_RELATED_P (tmp2) = 1;
20169 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20170 XVECEXP (par, 0, 0) = tmp1;
20171 XVECEXP (par, 0, 1) = tmp2;
20172 emit_insn (par);
20173 }
20174
20175 /* Create unwind information. This is an approximation. */
20176 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20177 plus_constant (Pmode,
20178 stack_pointer_rtx,
20179 4 * i)),
20180 reg1);
20181 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20182 plus_constant (Pmode,
20183 stack_pointer_rtx,
20184 4 * (i + 1))),
20185 reg2);
20186
20187 RTX_FRAME_RELATED_P (tmp1) = 1;
20188 RTX_FRAME_RELATED_P (tmp2) = 1;
20189 XVECEXP (dwarf, 0, i + 1) = tmp1;
20190 XVECEXP (dwarf, 0, i + 2) = tmp2;
20191 i += 2;
20192 regno = regno2 + 1;
20193 }
20194 else
20195 regno++;
20196
20197 return;
20198 }
20199
20200 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20201 whenever possible, otherwise it emits single-word stores. The first store
20202 also allocates stack space for all saved registers, using writeback with
20203 post-addressing mode. All other stores use offset addressing. If no STRD
20204 can be emitted, this function emits a sequence of single-word stores,
20205 and not an STM as before, because single-word stores provide more freedom
20206 scheduling and can be turned into an STM by peephole optimizations. */
20207 static void
20208 arm_emit_strd_push (unsigned long saved_regs_mask)
20209 {
20210 int num_regs = 0;
20211 int i, j, dwarf_index = 0;
20212 int offset = 0;
20213 rtx dwarf = NULL_RTX;
20214 rtx insn = NULL_RTX;
20215 rtx tmp, mem;
20216
20217 /* TODO: A more efficient code can be emitted by changing the
20218 layout, e.g., first push all pairs that can use STRD to keep the
20219 stack aligned, and then push all other registers. */
20220 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20221 if (saved_regs_mask & (1 << i))
20222 num_regs++;
20223
20224 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20225 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20226 gcc_assert (num_regs > 0);
20227
20228 /* Create sequence for DWARF info. */
20229 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20230
20231 /* For dwarf info, we generate explicit stack update. */
20232 tmp = gen_rtx_SET (stack_pointer_rtx,
20233 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20234 RTX_FRAME_RELATED_P (tmp) = 1;
20235 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20236
20237 /* Save registers. */
20238 offset = - 4 * num_regs;
20239 j = 0;
20240 while (j <= LAST_ARM_REGNUM)
20241 if (saved_regs_mask & (1 << j))
20242 {
20243 if ((j % 2 == 0)
20244 && (saved_regs_mask & (1 << (j + 1))))
20245 {
20246 /* Current register and previous register form register pair for
20247 which STRD can be generated. */
20248 if (offset < 0)
20249 {
20250 /* Allocate stack space for all saved registers. */
20251 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20252 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20253 mem = gen_frame_mem (DImode, tmp);
20254 offset = 0;
20255 }
20256 else if (offset > 0)
20257 mem = gen_frame_mem (DImode,
20258 plus_constant (Pmode,
20259 stack_pointer_rtx,
20260 offset));
20261 else
20262 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20263
20264 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20265 RTX_FRAME_RELATED_P (tmp) = 1;
20266 tmp = emit_insn (tmp);
20267
20268 /* Record the first store insn. */
20269 if (dwarf_index == 1)
20270 insn = tmp;
20271
20272 /* Generate dwarf info. */
20273 mem = gen_frame_mem (SImode,
20274 plus_constant (Pmode,
20275 stack_pointer_rtx,
20276 offset));
20277 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20278 RTX_FRAME_RELATED_P (tmp) = 1;
20279 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20280
20281 mem = gen_frame_mem (SImode,
20282 plus_constant (Pmode,
20283 stack_pointer_rtx,
20284 offset + 4));
20285 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20286 RTX_FRAME_RELATED_P (tmp) = 1;
20287 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20288
20289 offset += 8;
20290 j += 2;
20291 }
20292 else
20293 {
20294 /* Emit a single word store. */
20295 if (offset < 0)
20296 {
20297 /* Allocate stack space for all saved registers. */
20298 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20299 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20300 mem = gen_frame_mem (SImode, tmp);
20301 offset = 0;
20302 }
20303 else if (offset > 0)
20304 mem = gen_frame_mem (SImode,
20305 plus_constant (Pmode,
20306 stack_pointer_rtx,
20307 offset));
20308 else
20309 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20310
20311 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20312 RTX_FRAME_RELATED_P (tmp) = 1;
20313 tmp = emit_insn (tmp);
20314
20315 /* Record the first store insn. */
20316 if (dwarf_index == 1)
20317 insn = tmp;
20318
20319 /* Generate dwarf info. */
20320 mem = gen_frame_mem (SImode,
20321 plus_constant(Pmode,
20322 stack_pointer_rtx,
20323 offset));
20324 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20325 RTX_FRAME_RELATED_P (tmp) = 1;
20326 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20327
20328 offset += 4;
20329 j += 1;
20330 }
20331 }
20332 else
20333 j++;
20334
20335 /* Attach dwarf info to the first insn we generate. */
20336 gcc_assert (insn != NULL_RTX);
20337 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20338 RTX_FRAME_RELATED_P (insn) = 1;
20339 }
20340
20341 /* Generate and emit an insn that we will recognize as a push_multi.
20342 Unfortunately, since this insn does not reflect very well the actual
20343 semantics of the operation, we need to annotate the insn for the benefit
20344 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20345 MASK for registers that should be annotated for DWARF2 frame unwind
20346 information. */
20347 static rtx
20348 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20349 {
20350 int num_regs = 0;
20351 int num_dwarf_regs = 0;
20352 int i, j;
20353 rtx par;
20354 rtx dwarf;
20355 int dwarf_par_index;
20356 rtx tmp, reg;
20357
20358 /* We don't record the PC in the dwarf frame information. */
20359 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20360
20361 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20362 {
20363 if (mask & (1 << i))
20364 num_regs++;
20365 if (dwarf_regs_mask & (1 << i))
20366 num_dwarf_regs++;
20367 }
20368
20369 gcc_assert (num_regs && num_regs <= 16);
20370 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20371
20372 /* For the body of the insn we are going to generate an UNSPEC in
20373 parallel with several USEs. This allows the insn to be recognized
20374 by the push_multi pattern in the arm.md file.
20375
20376 The body of the insn looks something like this:
20377
20378 (parallel [
20379 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20380 (const_int:SI <num>)))
20381 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20382 (use (reg:SI XX))
20383 (use (reg:SI YY))
20384 ...
20385 ])
20386
20387 For the frame note however, we try to be more explicit and actually
20388 show each register being stored into the stack frame, plus a (single)
20389 decrement of the stack pointer. We do it this way in order to be
20390 friendly to the stack unwinding code, which only wants to see a single
20391 stack decrement per instruction. The RTL we generate for the note looks
20392 something like this:
20393
20394 (sequence [
20395 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20396 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20397 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20398 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20399 ...
20400 ])
20401
20402 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20403 instead we'd have a parallel expression detailing all
20404 the stores to the various memory addresses so that debug
20405 information is more up-to-date. Remember however while writing
20406 this to take care of the constraints with the push instruction.
20407
20408 Note also that this has to be taken care of for the VFP registers.
20409
20410 For more see PR43399. */
20411
20412 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20413 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20414 dwarf_par_index = 1;
20415
20416 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20417 {
20418 if (mask & (1 << i))
20419 {
20420 reg = gen_rtx_REG (SImode, i);
20421
20422 XVECEXP (par, 0, 0)
20423 = gen_rtx_SET (gen_frame_mem
20424 (BLKmode,
20425 gen_rtx_PRE_MODIFY (Pmode,
20426 stack_pointer_rtx,
20427 plus_constant
20428 (Pmode, stack_pointer_rtx,
20429 -4 * num_regs))
20430 ),
20431 gen_rtx_UNSPEC (BLKmode,
20432 gen_rtvec (1, reg),
20433 UNSPEC_PUSH_MULT));
20434
20435 if (dwarf_regs_mask & (1 << i))
20436 {
20437 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20438 reg);
20439 RTX_FRAME_RELATED_P (tmp) = 1;
20440 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20441 }
20442
20443 break;
20444 }
20445 }
20446
20447 for (j = 1, i++; j < num_regs; i++)
20448 {
20449 if (mask & (1 << i))
20450 {
20451 reg = gen_rtx_REG (SImode, i);
20452
20453 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20454
20455 if (dwarf_regs_mask & (1 << i))
20456 {
20457 tmp
20458 = gen_rtx_SET (gen_frame_mem
20459 (SImode,
20460 plus_constant (Pmode, stack_pointer_rtx,
20461 4 * j)),
20462 reg);
20463 RTX_FRAME_RELATED_P (tmp) = 1;
20464 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20465 }
20466
20467 j++;
20468 }
20469 }
20470
20471 par = emit_insn (par);
20472
20473 tmp = gen_rtx_SET (stack_pointer_rtx,
20474 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20475 RTX_FRAME_RELATED_P (tmp) = 1;
20476 XVECEXP (dwarf, 0, 0) = tmp;
20477
20478 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20479
20480 return par;
20481 }
20482
20483 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20484 SIZE is the offset to be adjusted.
20485 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20486 static void
20487 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20488 {
20489 rtx dwarf;
20490
20491 RTX_FRAME_RELATED_P (insn) = 1;
20492 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20493 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20494 }
20495
20496 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20497 SAVED_REGS_MASK shows which registers need to be restored.
20498
20499 Unfortunately, since this insn does not reflect very well the actual
20500 semantics of the operation, we need to annotate the insn for the benefit
20501 of DWARF2 frame unwind information. */
20502 static void
20503 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20504 {
20505 int num_regs = 0;
20506 int i, j;
20507 rtx par;
20508 rtx dwarf = NULL_RTX;
20509 rtx tmp, reg;
20510 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20511 int offset_adj;
20512 int emit_update;
20513
20514 offset_adj = return_in_pc ? 1 : 0;
20515 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20516 if (saved_regs_mask & (1 << i))
20517 num_regs++;
20518
20519 gcc_assert (num_regs && num_regs <= 16);
20520
20521 /* If SP is in reglist, then we don't emit SP update insn. */
20522 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20523
20524 /* The parallel needs to hold num_regs SETs
20525 and one SET for the stack update. */
20526 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20527
20528 if (return_in_pc)
20529 XVECEXP (par, 0, 0) = ret_rtx;
20530
20531 if (emit_update)
20532 {
20533 /* Increment the stack pointer, based on there being
20534 num_regs 4-byte registers to restore. */
20535 tmp = gen_rtx_SET (stack_pointer_rtx,
20536 plus_constant (Pmode,
20537 stack_pointer_rtx,
20538 4 * num_regs));
20539 RTX_FRAME_RELATED_P (tmp) = 1;
20540 XVECEXP (par, 0, offset_adj) = tmp;
20541 }
20542
20543 /* Now restore every reg, which may include PC. */
20544 for (j = 0, i = 0; j < num_regs; i++)
20545 if (saved_regs_mask & (1 << i))
20546 {
20547 reg = gen_rtx_REG (SImode, i);
20548 if ((num_regs == 1) && emit_update && !return_in_pc)
20549 {
20550 /* Emit single load with writeback. */
20551 tmp = gen_frame_mem (SImode,
20552 gen_rtx_POST_INC (Pmode,
20553 stack_pointer_rtx));
20554 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20555 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20556 return;
20557 }
20558
20559 tmp = gen_rtx_SET (reg,
20560 gen_frame_mem
20561 (SImode,
20562 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20563 RTX_FRAME_RELATED_P (tmp) = 1;
20564 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20565
20566 /* We need to maintain a sequence for DWARF info too. As dwarf info
20567 should not have PC, skip PC. */
20568 if (i != PC_REGNUM)
20569 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20570
20571 j++;
20572 }
20573
20574 if (return_in_pc)
20575 par = emit_jump_insn (par);
20576 else
20577 par = emit_insn (par);
20578
20579 REG_NOTES (par) = dwarf;
20580 if (!return_in_pc)
20581 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20582 stack_pointer_rtx, stack_pointer_rtx);
20583 }
20584
20585 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20586 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20587
20588 Unfortunately, since this insn does not reflect very well the actual
20589 semantics of the operation, we need to annotate the insn for the benefit
20590 of DWARF2 frame unwind information. */
20591 static void
20592 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20593 {
20594 int i, j;
20595 rtx par;
20596 rtx dwarf = NULL_RTX;
20597 rtx tmp, reg;
20598
20599 gcc_assert (num_regs && num_regs <= 32);
20600
20601 /* Workaround ARM10 VFPr1 bug. */
20602 if (num_regs == 2 && !arm_arch6)
20603 {
20604 if (first_reg == 15)
20605 first_reg--;
20606
20607 num_regs++;
20608 }
20609
20610 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20611 there could be up to 32 D-registers to restore.
20612 If there are more than 16 D-registers, make two recursive calls,
20613 each of which emits one pop_multi instruction. */
20614 if (num_regs > 16)
20615 {
20616 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20617 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20618 return;
20619 }
20620
20621 /* The parallel needs to hold num_regs SETs
20622 and one SET for the stack update. */
20623 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20624
20625 /* Increment the stack pointer, based on there being
20626 num_regs 8-byte registers to restore. */
20627 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20628 RTX_FRAME_RELATED_P (tmp) = 1;
20629 XVECEXP (par, 0, 0) = tmp;
20630
20631 /* Now show every reg that will be restored, using a SET for each. */
20632 for (j = 0, i=first_reg; j < num_regs; i += 2)
20633 {
20634 reg = gen_rtx_REG (DFmode, i);
20635
20636 tmp = gen_rtx_SET (reg,
20637 gen_frame_mem
20638 (DFmode,
20639 plus_constant (Pmode, base_reg, 8 * j)));
20640 RTX_FRAME_RELATED_P (tmp) = 1;
20641 XVECEXP (par, 0, j + 1) = tmp;
20642
20643 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20644
20645 j++;
20646 }
20647
20648 par = emit_insn (par);
20649 REG_NOTES (par) = dwarf;
20650
20651 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20652 if (REGNO (base_reg) == IP_REGNUM)
20653 {
20654 RTX_FRAME_RELATED_P (par) = 1;
20655 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20656 }
20657 else
20658 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20659 base_reg, base_reg);
20660 }
20661
20662 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20663 number of registers are being popped, multiple LDRD patterns are created for
20664 all register pairs. If odd number of registers are popped, last register is
20665 loaded by using LDR pattern. */
20666 static void
20667 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20668 {
20669 int num_regs = 0;
20670 int i, j;
20671 rtx par = NULL_RTX;
20672 rtx dwarf = NULL_RTX;
20673 rtx tmp, reg, tmp1;
20674 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20675
20676 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20677 if (saved_regs_mask & (1 << i))
20678 num_regs++;
20679
20680 gcc_assert (num_regs && num_regs <= 16);
20681
20682 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20683 to be popped. So, if num_regs is even, now it will become odd,
20684 and we can generate pop with PC. If num_regs is odd, it will be
20685 even now, and ldr with return can be generated for PC. */
20686 if (return_in_pc)
20687 num_regs--;
20688
20689 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20690
20691 /* Var j iterates over all the registers to gather all the registers in
20692 saved_regs_mask. Var i gives index of saved registers in stack frame.
20693 A PARALLEL RTX of register-pair is created here, so that pattern for
20694 LDRD can be matched. As PC is always last register to be popped, and
20695 we have already decremented num_regs if PC, we don't have to worry
20696 about PC in this loop. */
20697 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20698 if (saved_regs_mask & (1 << j))
20699 {
20700 /* Create RTX for memory load. */
20701 reg = gen_rtx_REG (SImode, j);
20702 tmp = gen_rtx_SET (reg,
20703 gen_frame_mem (SImode,
20704 plus_constant (Pmode,
20705 stack_pointer_rtx, 4 * i)));
20706 RTX_FRAME_RELATED_P (tmp) = 1;
20707
20708 if (i % 2 == 0)
20709 {
20710 /* When saved-register index (i) is even, the RTX to be emitted is
20711 yet to be created. Hence create it first. The LDRD pattern we
20712 are generating is :
20713 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20714 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20715 where target registers need not be consecutive. */
20716 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20717 dwarf = NULL_RTX;
20718 }
20719
20720 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20721 added as 0th element and if i is odd, reg_i is added as 1st element
20722 of LDRD pattern shown above. */
20723 XVECEXP (par, 0, (i % 2)) = tmp;
20724 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20725
20726 if ((i % 2) == 1)
20727 {
20728 /* When saved-register index (i) is odd, RTXs for both the registers
20729 to be loaded are generated in above given LDRD pattern, and the
20730 pattern can be emitted now. */
20731 par = emit_insn (par);
20732 REG_NOTES (par) = dwarf;
20733 RTX_FRAME_RELATED_P (par) = 1;
20734 }
20735
20736 i++;
20737 }
20738
20739 /* If the number of registers pushed is odd AND return_in_pc is false OR
20740 number of registers are even AND return_in_pc is true, last register is
20741 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20742 then LDR with post increment. */
20743
20744 /* Increment the stack pointer, based on there being
20745 num_regs 4-byte registers to restore. */
20746 tmp = gen_rtx_SET (stack_pointer_rtx,
20747 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20748 RTX_FRAME_RELATED_P (tmp) = 1;
20749 tmp = emit_insn (tmp);
20750 if (!return_in_pc)
20751 {
20752 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20753 stack_pointer_rtx, stack_pointer_rtx);
20754 }
20755
20756 dwarf = NULL_RTX;
20757
20758 if (((num_regs % 2) == 1 && !return_in_pc)
20759 || ((num_regs % 2) == 0 && return_in_pc))
20760 {
20761 /* Scan for the single register to be popped. Skip until the saved
20762 register is found. */
20763 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20764
20765 /* Gen LDR with post increment here. */
20766 tmp1 = gen_rtx_MEM (SImode,
20767 gen_rtx_POST_INC (SImode,
20768 stack_pointer_rtx));
20769 set_mem_alias_set (tmp1, get_frame_alias_set ());
20770
20771 reg = gen_rtx_REG (SImode, j);
20772 tmp = gen_rtx_SET (reg, tmp1);
20773 RTX_FRAME_RELATED_P (tmp) = 1;
20774 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20775
20776 if (return_in_pc)
20777 {
20778 /* If return_in_pc, j must be PC_REGNUM. */
20779 gcc_assert (j == PC_REGNUM);
20780 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20781 XVECEXP (par, 0, 0) = ret_rtx;
20782 XVECEXP (par, 0, 1) = tmp;
20783 par = emit_jump_insn (par);
20784 }
20785 else
20786 {
20787 par = emit_insn (tmp);
20788 REG_NOTES (par) = dwarf;
20789 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20790 stack_pointer_rtx, stack_pointer_rtx);
20791 }
20792
20793 }
20794 else if ((num_regs % 2) == 1 && return_in_pc)
20795 {
20796 /* There are 2 registers to be popped. So, generate the pattern
20797 pop_multiple_with_stack_update_and_return to pop in PC. */
20798 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20799 }
20800
20801 return;
20802 }
20803
20804 /* LDRD in ARM mode needs consecutive registers as operands. This function
20805 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20806 offset addressing and then generates one separate stack udpate. This provides
20807 more scheduling freedom, compared to writeback on every load. However,
20808 if the function returns using load into PC directly
20809 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20810 before the last load. TODO: Add a peephole optimization to recognize
20811 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20812 peephole optimization to merge the load at stack-offset zero
20813 with the stack update instruction using load with writeback
20814 in post-index addressing mode. */
20815 static void
20816 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20817 {
20818 int j = 0;
20819 int offset = 0;
20820 rtx par = NULL_RTX;
20821 rtx dwarf = NULL_RTX;
20822 rtx tmp, mem;
20823
20824 /* Restore saved registers. */
20825 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20826 j = 0;
20827 while (j <= LAST_ARM_REGNUM)
20828 if (saved_regs_mask & (1 << j))
20829 {
20830 if ((j % 2) == 0
20831 && (saved_regs_mask & (1 << (j + 1)))
20832 && (j + 1) != PC_REGNUM)
20833 {
20834 /* Current register and next register form register pair for which
20835 LDRD can be generated. PC is always the last register popped, and
20836 we handle it separately. */
20837 if (offset > 0)
20838 mem = gen_frame_mem (DImode,
20839 plus_constant (Pmode,
20840 stack_pointer_rtx,
20841 offset));
20842 else
20843 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20844
20845 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20846 tmp = emit_insn (tmp);
20847 RTX_FRAME_RELATED_P (tmp) = 1;
20848
20849 /* Generate dwarf info. */
20850
20851 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20852 gen_rtx_REG (SImode, j),
20853 NULL_RTX);
20854 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20855 gen_rtx_REG (SImode, j + 1),
20856 dwarf);
20857
20858 REG_NOTES (tmp) = dwarf;
20859
20860 offset += 8;
20861 j += 2;
20862 }
20863 else if (j != PC_REGNUM)
20864 {
20865 /* Emit a single word load. */
20866 if (offset > 0)
20867 mem = gen_frame_mem (SImode,
20868 plus_constant (Pmode,
20869 stack_pointer_rtx,
20870 offset));
20871 else
20872 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20873
20874 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20875 tmp = emit_insn (tmp);
20876 RTX_FRAME_RELATED_P (tmp) = 1;
20877
20878 /* Generate dwarf info. */
20879 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20880 gen_rtx_REG (SImode, j),
20881 NULL_RTX);
20882
20883 offset += 4;
20884 j += 1;
20885 }
20886 else /* j == PC_REGNUM */
20887 j++;
20888 }
20889 else
20890 j++;
20891
20892 /* Update the stack. */
20893 if (offset > 0)
20894 {
20895 tmp = gen_rtx_SET (stack_pointer_rtx,
20896 plus_constant (Pmode,
20897 stack_pointer_rtx,
20898 offset));
20899 tmp = emit_insn (tmp);
20900 arm_add_cfa_adjust_cfa_note (tmp, offset,
20901 stack_pointer_rtx, stack_pointer_rtx);
20902 offset = 0;
20903 }
20904
20905 if (saved_regs_mask & (1 << PC_REGNUM))
20906 {
20907 /* Only PC is to be popped. */
20908 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20909 XVECEXP (par, 0, 0) = ret_rtx;
20910 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20911 gen_frame_mem (SImode,
20912 gen_rtx_POST_INC (SImode,
20913 stack_pointer_rtx)));
20914 RTX_FRAME_RELATED_P (tmp) = 1;
20915 XVECEXP (par, 0, 1) = tmp;
20916 par = emit_jump_insn (par);
20917
20918 /* Generate dwarf info. */
20919 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20920 gen_rtx_REG (SImode, PC_REGNUM),
20921 NULL_RTX);
20922 REG_NOTES (par) = dwarf;
20923 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20924 stack_pointer_rtx, stack_pointer_rtx);
20925 }
20926 }
20927
20928 /* Calculate the size of the return value that is passed in registers. */
20929 static unsigned
20930 arm_size_return_regs (void)
20931 {
20932 machine_mode mode;
20933
20934 if (crtl->return_rtx != 0)
20935 mode = GET_MODE (crtl->return_rtx);
20936 else
20937 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20938
20939 return GET_MODE_SIZE (mode);
20940 }
20941
20942 /* Return true if the current function needs to save/restore LR. */
20943 static bool
20944 thumb_force_lr_save (void)
20945 {
20946 return !cfun->machine->lr_save_eliminated
20947 && (!crtl->is_leaf
20948 || thumb_far_jump_used_p ()
20949 || df_regs_ever_live_p (LR_REGNUM));
20950 }
20951
20952 /* We do not know if r3 will be available because
20953 we do have an indirect tailcall happening in this
20954 particular case. */
20955 static bool
20956 is_indirect_tailcall_p (rtx call)
20957 {
20958 rtx pat = PATTERN (call);
20959
20960 /* Indirect tail call. */
20961 pat = XVECEXP (pat, 0, 0);
20962 if (GET_CODE (pat) == SET)
20963 pat = SET_SRC (pat);
20964
20965 pat = XEXP (XEXP (pat, 0), 0);
20966 return REG_P (pat);
20967 }
20968
20969 /* Return true if r3 is used by any of the tail call insns in the
20970 current function. */
20971 static bool
20972 any_sibcall_could_use_r3 (void)
20973 {
20974 edge_iterator ei;
20975 edge e;
20976
20977 if (!crtl->tail_call_emit)
20978 return false;
20979 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20980 if (e->flags & EDGE_SIBCALL)
20981 {
20982 rtx_insn *call = BB_END (e->src);
20983 if (!CALL_P (call))
20984 call = prev_nonnote_nondebug_insn (call);
20985 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20986 if (find_regno_fusage (call, USE, 3)
20987 || is_indirect_tailcall_p (call))
20988 return true;
20989 }
20990 return false;
20991 }
20992
20993
20994 /* Compute the distance from register FROM to register TO.
20995 These can be the arg pointer (26), the soft frame pointer (25),
20996 the stack pointer (13) or the hard frame pointer (11).
20997 In thumb mode r7 is used as the soft frame pointer, if needed.
20998 Typical stack layout looks like this:
20999
21000 old stack pointer -> | |
21001 ----
21002 | | \
21003 | | saved arguments for
21004 | | vararg functions
21005 | | /
21006 --
21007 hard FP & arg pointer -> | | \
21008 | | stack
21009 | | frame
21010 | | /
21011 --
21012 | | \
21013 | | call saved
21014 | | registers
21015 soft frame pointer -> | | /
21016 --
21017 | | \
21018 | | local
21019 | | variables
21020 locals base pointer -> | | /
21021 --
21022 | | \
21023 | | outgoing
21024 | | arguments
21025 current stack pointer -> | | /
21026 --
21027
21028 For a given function some or all of these stack components
21029 may not be needed, giving rise to the possibility of
21030 eliminating some of the registers.
21031
21032 The values returned by this function must reflect the behavior
21033 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21034
21035 The sign of the number returned reflects the direction of stack
21036 growth, so the values are positive for all eliminations except
21037 from the soft frame pointer to the hard frame pointer.
21038
21039 SFP may point just inside the local variables block to ensure correct
21040 alignment. */
21041
21042
21043 /* Return cached stack offsets. */
21044
21045 static arm_stack_offsets *
21046 arm_get_frame_offsets (void)
21047 {
21048 struct arm_stack_offsets *offsets;
21049
21050 offsets = &cfun->machine->stack_offsets;
21051
21052 return offsets;
21053 }
21054
21055
21056 /* Calculate stack offsets. These are used to calculate register elimination
21057 offsets and in prologue/epilogue code. Also calculates which registers
21058 should be saved. */
21059
21060 static void
21061 arm_compute_frame_layout (void)
21062 {
21063 struct arm_stack_offsets *offsets;
21064 unsigned long func_type;
21065 int saved;
21066 int core_saved;
21067 HOST_WIDE_INT frame_size;
21068 int i;
21069
21070 offsets = &cfun->machine->stack_offsets;
21071
21072 /* Initially this is the size of the local variables. It will translated
21073 into an offset once we have determined the size of preceding data. */
21074 frame_size = ROUND_UP_WORD (get_frame_size ());
21075
21076 /* Space for variadic functions. */
21077 offsets->saved_args = crtl->args.pretend_args_size;
21078
21079 /* In Thumb mode this is incorrect, but never used. */
21080 offsets->frame
21081 = (offsets->saved_args
21082 + arm_compute_static_chain_stack_bytes ()
21083 + (frame_pointer_needed ? 4 : 0));
21084
21085 if (TARGET_32BIT)
21086 {
21087 unsigned int regno;
21088
21089 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21090 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21091 saved = core_saved;
21092
21093 /* We know that SP will be doubleword aligned on entry, and we must
21094 preserve that condition at any subroutine call. We also require the
21095 soft frame pointer to be doubleword aligned. */
21096
21097 if (TARGET_REALLY_IWMMXT)
21098 {
21099 /* Check for the call-saved iWMMXt registers. */
21100 for (regno = FIRST_IWMMXT_REGNUM;
21101 regno <= LAST_IWMMXT_REGNUM;
21102 regno++)
21103 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
21104 saved += 8;
21105 }
21106
21107 func_type = arm_current_func_type ();
21108 /* Space for saved VFP registers. */
21109 if (! IS_VOLATILE (func_type)
21110 && TARGET_HARD_FLOAT)
21111 saved += arm_get_vfp_saved_size ();
21112 }
21113 else /* TARGET_THUMB1 */
21114 {
21115 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21116 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21117 saved = core_saved;
21118 if (TARGET_BACKTRACE)
21119 saved += 16;
21120 }
21121
21122 /* Saved registers include the stack frame. */
21123 offsets->saved_regs
21124 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21125 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21126
21127 /* A leaf function does not need any stack alignment if it has nothing
21128 on the stack. */
21129 if (crtl->is_leaf && frame_size == 0
21130 /* However if it calls alloca(), we have a dynamically allocated
21131 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21132 && ! cfun->calls_alloca)
21133 {
21134 offsets->outgoing_args = offsets->soft_frame;
21135 offsets->locals_base = offsets->soft_frame;
21136 return;
21137 }
21138
21139 /* Ensure SFP has the correct alignment. */
21140 if (ARM_DOUBLEWORD_ALIGN
21141 && (offsets->soft_frame & 7))
21142 {
21143 offsets->soft_frame += 4;
21144 /* Try to align stack by pushing an extra reg. Don't bother doing this
21145 when there is a stack frame as the alignment will be rolled into
21146 the normal stack adjustment. */
21147 if (frame_size + crtl->outgoing_args_size == 0)
21148 {
21149 int reg = -1;
21150
21151 /* Register r3 is caller-saved. Normally it does not need to be
21152 saved on entry by the prologue. However if we choose to save
21153 it for padding then we may confuse the compiler into thinking
21154 a prologue sequence is required when in fact it is not. This
21155 will occur when shrink-wrapping if r3 is used as a scratch
21156 register and there are no other callee-saved writes.
21157
21158 This situation can be avoided when other callee-saved registers
21159 are available and r3 is not mandatory if we choose a callee-saved
21160 register for padding. */
21161 bool prefer_callee_reg_p = false;
21162
21163 /* If it is safe to use r3, then do so. This sometimes
21164 generates better code on Thumb-2 by avoiding the need to
21165 use 32-bit push/pop instructions. */
21166 if (! any_sibcall_could_use_r3 ()
21167 && arm_size_return_regs () <= 12
21168 && (offsets->saved_regs_mask & (1 << 3)) == 0
21169 && (TARGET_THUMB2
21170 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21171 {
21172 reg = 3;
21173 if (!TARGET_THUMB2)
21174 prefer_callee_reg_p = true;
21175 }
21176 if (reg == -1
21177 || prefer_callee_reg_p)
21178 {
21179 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21180 {
21181 /* Avoid fixed registers; they may be changed at
21182 arbitrary times so it's unsafe to restore them
21183 during the epilogue. */
21184 if (!fixed_regs[i]
21185 && (offsets->saved_regs_mask & (1 << i)) == 0)
21186 {
21187 reg = i;
21188 break;
21189 }
21190 }
21191 }
21192
21193 if (reg != -1)
21194 {
21195 offsets->saved_regs += 4;
21196 offsets->saved_regs_mask |= (1 << reg);
21197 }
21198 }
21199 }
21200
21201 offsets->locals_base = offsets->soft_frame + frame_size;
21202 offsets->outgoing_args = (offsets->locals_base
21203 + crtl->outgoing_args_size);
21204
21205 if (ARM_DOUBLEWORD_ALIGN)
21206 {
21207 /* Ensure SP remains doubleword aligned. */
21208 if (offsets->outgoing_args & 7)
21209 offsets->outgoing_args += 4;
21210 gcc_assert (!(offsets->outgoing_args & 7));
21211 }
21212 }
21213
21214
21215 /* Calculate the relative offsets for the different stack pointers. Positive
21216 offsets are in the direction of stack growth. */
21217
21218 HOST_WIDE_INT
21219 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21220 {
21221 arm_stack_offsets *offsets;
21222
21223 offsets = arm_get_frame_offsets ();
21224
21225 /* OK, now we have enough information to compute the distances.
21226 There must be an entry in these switch tables for each pair
21227 of registers in ELIMINABLE_REGS, even if some of the entries
21228 seem to be redundant or useless. */
21229 switch (from)
21230 {
21231 case ARG_POINTER_REGNUM:
21232 switch (to)
21233 {
21234 case THUMB_HARD_FRAME_POINTER_REGNUM:
21235 return 0;
21236
21237 case FRAME_POINTER_REGNUM:
21238 /* This is the reverse of the soft frame pointer
21239 to hard frame pointer elimination below. */
21240 return offsets->soft_frame - offsets->saved_args;
21241
21242 case ARM_HARD_FRAME_POINTER_REGNUM:
21243 /* This is only non-zero in the case where the static chain register
21244 is stored above the frame. */
21245 return offsets->frame - offsets->saved_args - 4;
21246
21247 case STACK_POINTER_REGNUM:
21248 /* If nothing has been pushed on the stack at all
21249 then this will return -4. This *is* correct! */
21250 return offsets->outgoing_args - (offsets->saved_args + 4);
21251
21252 default:
21253 gcc_unreachable ();
21254 }
21255 gcc_unreachable ();
21256
21257 case FRAME_POINTER_REGNUM:
21258 switch (to)
21259 {
21260 case THUMB_HARD_FRAME_POINTER_REGNUM:
21261 return 0;
21262
21263 case ARM_HARD_FRAME_POINTER_REGNUM:
21264 /* The hard frame pointer points to the top entry in the
21265 stack frame. The soft frame pointer to the bottom entry
21266 in the stack frame. If there is no stack frame at all,
21267 then they are identical. */
21268
21269 return offsets->frame - offsets->soft_frame;
21270
21271 case STACK_POINTER_REGNUM:
21272 return offsets->outgoing_args - offsets->soft_frame;
21273
21274 default:
21275 gcc_unreachable ();
21276 }
21277 gcc_unreachable ();
21278
21279 default:
21280 /* You cannot eliminate from the stack pointer.
21281 In theory you could eliminate from the hard frame
21282 pointer to the stack pointer, but this will never
21283 happen, since if a stack frame is not needed the
21284 hard frame pointer will never be used. */
21285 gcc_unreachable ();
21286 }
21287 }
21288
21289 /* Given FROM and TO register numbers, say whether this elimination is
21290 allowed. Frame pointer elimination is automatically handled.
21291
21292 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21293 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21294 pointer, we must eliminate FRAME_POINTER_REGNUM into
21295 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21296 ARG_POINTER_REGNUM. */
21297
21298 bool
21299 arm_can_eliminate (const int from, const int to)
21300 {
21301 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21302 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21303 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21304 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21305 true);
21306 }
21307
21308 /* Emit RTL to save coprocessor registers on function entry. Returns the
21309 number of bytes pushed. */
21310
21311 static int
21312 arm_save_coproc_regs(void)
21313 {
21314 int saved_size = 0;
21315 unsigned reg;
21316 unsigned start_reg;
21317 rtx insn;
21318
21319 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21320 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21321 {
21322 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21323 insn = gen_rtx_MEM (V2SImode, insn);
21324 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21325 RTX_FRAME_RELATED_P (insn) = 1;
21326 saved_size += 8;
21327 }
21328
21329 if (TARGET_HARD_FLOAT)
21330 {
21331 start_reg = FIRST_VFP_REGNUM;
21332
21333 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21334 {
21335 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21336 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21337 {
21338 if (start_reg != reg)
21339 saved_size += vfp_emit_fstmd (start_reg,
21340 (reg - start_reg) / 2);
21341 start_reg = reg + 2;
21342 }
21343 }
21344 if (start_reg != reg)
21345 saved_size += vfp_emit_fstmd (start_reg,
21346 (reg - start_reg) / 2);
21347 }
21348 return saved_size;
21349 }
21350
21351
21352 /* Set the Thumb frame pointer from the stack pointer. */
21353
21354 static void
21355 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21356 {
21357 HOST_WIDE_INT amount;
21358 rtx insn, dwarf;
21359
21360 amount = offsets->outgoing_args - offsets->locals_base;
21361 if (amount < 1024)
21362 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21363 stack_pointer_rtx, GEN_INT (amount)));
21364 else
21365 {
21366 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21367 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21368 expects the first two operands to be the same. */
21369 if (TARGET_THUMB2)
21370 {
21371 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21372 stack_pointer_rtx,
21373 hard_frame_pointer_rtx));
21374 }
21375 else
21376 {
21377 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21378 hard_frame_pointer_rtx,
21379 stack_pointer_rtx));
21380 }
21381 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21382 plus_constant (Pmode, stack_pointer_rtx, amount));
21383 RTX_FRAME_RELATED_P (dwarf) = 1;
21384 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21385 }
21386
21387 RTX_FRAME_RELATED_P (insn) = 1;
21388 }
21389
21390 struct scratch_reg {
21391 rtx reg;
21392 bool saved;
21393 };
21394
21395 /* Return a short-lived scratch register for use as a 2nd scratch register on
21396 function entry after the registers are saved in the prologue. This register
21397 must be released by means of release_scratch_register_on_entry. IP is not
21398 considered since it is always used as the 1st scratch register if available.
21399
21400 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21401 mask of live registers. */
21402
21403 static void
21404 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21405 unsigned long live_regs)
21406 {
21407 int regno = -1;
21408
21409 sr->saved = false;
21410
21411 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21412 regno = LR_REGNUM;
21413 else
21414 {
21415 unsigned int i;
21416
21417 for (i = 4; i < 11; i++)
21418 if (regno1 != i && (live_regs & (1 << i)) != 0)
21419 {
21420 regno = i;
21421 break;
21422 }
21423
21424 if (regno < 0)
21425 {
21426 /* If IP is used as the 1st scratch register for a nested function,
21427 then either r3 wasn't available or is used to preserve IP. */
21428 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21429 regno1 = 3;
21430 regno = (regno1 == 3 ? 2 : 3);
21431 sr->saved
21432 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21433 regno);
21434 }
21435 }
21436
21437 sr->reg = gen_rtx_REG (SImode, regno);
21438 if (sr->saved)
21439 {
21440 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21441 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21442 rtx x = gen_rtx_SET (stack_pointer_rtx,
21443 plus_constant (Pmode, stack_pointer_rtx, -4));
21444 RTX_FRAME_RELATED_P (insn) = 1;
21445 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21446 }
21447 }
21448
21449 /* Release a scratch register obtained from the preceding function. */
21450
21451 static void
21452 release_scratch_register_on_entry (struct scratch_reg *sr)
21453 {
21454 if (sr->saved)
21455 {
21456 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21457 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21458 rtx x = gen_rtx_SET (stack_pointer_rtx,
21459 plus_constant (Pmode, stack_pointer_rtx, 4));
21460 RTX_FRAME_RELATED_P (insn) = 1;
21461 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21462 }
21463 }
21464
21465 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21466
21467 #if PROBE_INTERVAL > 4096
21468 #error Cannot use indexed addressing mode for stack probing
21469 #endif
21470
21471 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21472 inclusive. These are offsets from the current stack pointer. REGNO1
21473 is the index number of the 1st scratch register and LIVE_REGS is the
21474 mask of live registers. */
21475
21476 static void
21477 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21478 unsigned int regno1, unsigned long live_regs)
21479 {
21480 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21481
21482 /* See if we have a constant small number of probes to generate. If so,
21483 that's the easy case. */
21484 if (size <= PROBE_INTERVAL)
21485 {
21486 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21487 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21488 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21489 }
21490
21491 /* The run-time loop is made up of 10 insns in the generic case while the
21492 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21493 else if (size <= 5 * PROBE_INTERVAL)
21494 {
21495 HOST_WIDE_INT i, rem;
21496
21497 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21498 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21499 emit_stack_probe (reg1);
21500
21501 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21502 it exceeds SIZE. If only two probes are needed, this will not
21503 generate any code. Then probe at FIRST + SIZE. */
21504 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21505 {
21506 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21507 emit_stack_probe (reg1);
21508 }
21509
21510 rem = size - (i - PROBE_INTERVAL);
21511 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21512 {
21513 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21514 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21515 }
21516 else
21517 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21518 }
21519
21520 /* Otherwise, do the same as above, but in a loop. Note that we must be
21521 extra careful with variables wrapping around because we might be at
21522 the very top (or the very bottom) of the address space and we have
21523 to be able to handle this case properly; in particular, we use an
21524 equality test for the loop condition. */
21525 else
21526 {
21527 HOST_WIDE_INT rounded_size;
21528 struct scratch_reg sr;
21529
21530 get_scratch_register_on_entry (&sr, regno1, live_regs);
21531
21532 emit_move_insn (reg1, GEN_INT (first));
21533
21534
21535 /* Step 1: round SIZE to the previous multiple of the interval. */
21536
21537 rounded_size = size & -PROBE_INTERVAL;
21538 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21539
21540
21541 /* Step 2: compute initial and final value of the loop counter. */
21542
21543 /* TEST_ADDR = SP + FIRST. */
21544 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21545
21546 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21547 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21548
21549
21550 /* Step 3: the loop
21551
21552 do
21553 {
21554 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21555 probe at TEST_ADDR
21556 }
21557 while (TEST_ADDR != LAST_ADDR)
21558
21559 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21560 until it is equal to ROUNDED_SIZE. */
21561
21562 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21563
21564
21565 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21566 that SIZE is equal to ROUNDED_SIZE. */
21567
21568 if (size != rounded_size)
21569 {
21570 HOST_WIDE_INT rem = size - rounded_size;
21571
21572 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21573 {
21574 emit_set_insn (sr.reg,
21575 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21576 emit_stack_probe (plus_constant (Pmode, sr.reg,
21577 PROBE_INTERVAL - rem));
21578 }
21579 else
21580 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21581 }
21582
21583 release_scratch_register_on_entry (&sr);
21584 }
21585
21586 /* Make sure nothing is scheduled before we are done. */
21587 emit_insn (gen_blockage ());
21588 }
21589
21590 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21591 absolute addresses. */
21592
21593 const char *
21594 output_probe_stack_range (rtx reg1, rtx reg2)
21595 {
21596 static int labelno = 0;
21597 char loop_lab[32];
21598 rtx xops[2];
21599
21600 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21601
21602 /* Loop. */
21603 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21604
21605 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21606 xops[0] = reg1;
21607 xops[1] = GEN_INT (PROBE_INTERVAL);
21608 output_asm_insn ("sub\t%0, %0, %1", xops);
21609
21610 /* Probe at TEST_ADDR. */
21611 output_asm_insn ("str\tr0, [%0, #0]", xops);
21612
21613 /* Test if TEST_ADDR == LAST_ADDR. */
21614 xops[1] = reg2;
21615 output_asm_insn ("cmp\t%0, %1", xops);
21616
21617 /* Branch. */
21618 fputs ("\tbne\t", asm_out_file);
21619 assemble_name_raw (asm_out_file, loop_lab);
21620 fputc ('\n', asm_out_file);
21621
21622 return "";
21623 }
21624
21625 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21626 function. */
21627 void
21628 arm_expand_prologue (void)
21629 {
21630 rtx amount;
21631 rtx insn;
21632 rtx ip_rtx;
21633 unsigned long live_regs_mask;
21634 unsigned long func_type;
21635 int fp_offset = 0;
21636 int saved_pretend_args = 0;
21637 int saved_regs = 0;
21638 unsigned HOST_WIDE_INT args_to_push;
21639 HOST_WIDE_INT size;
21640 arm_stack_offsets *offsets;
21641 bool clobber_ip;
21642
21643 func_type = arm_current_func_type ();
21644
21645 /* Naked functions don't have prologues. */
21646 if (IS_NAKED (func_type))
21647 {
21648 if (flag_stack_usage_info)
21649 current_function_static_stack_size = 0;
21650 return;
21651 }
21652
21653 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21654 args_to_push = crtl->args.pretend_args_size;
21655
21656 /* Compute which register we will have to save onto the stack. */
21657 offsets = arm_get_frame_offsets ();
21658 live_regs_mask = offsets->saved_regs_mask;
21659
21660 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21661
21662 if (IS_STACKALIGN (func_type))
21663 {
21664 rtx r0, r1;
21665
21666 /* Handle a word-aligned stack pointer. We generate the following:
21667
21668 mov r0, sp
21669 bic r1, r0, #7
21670 mov sp, r1
21671 <save and restore r0 in normal prologue/epilogue>
21672 mov sp, r0
21673 bx lr
21674
21675 The unwinder doesn't need to know about the stack realignment.
21676 Just tell it we saved SP in r0. */
21677 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21678
21679 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21680 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21681
21682 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21683 RTX_FRAME_RELATED_P (insn) = 1;
21684 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21685
21686 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21687
21688 /* ??? The CFA changes here, which may cause GDB to conclude that it
21689 has entered a different function. That said, the unwind info is
21690 correct, individually, before and after this instruction because
21691 we've described the save of SP, which will override the default
21692 handling of SP as restoring from the CFA. */
21693 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21694 }
21695
21696 /* Let's compute the static_chain_stack_bytes required and store it. Right
21697 now the value must be -1 as stored by arm_init_machine_status (). */
21698 cfun->machine->static_chain_stack_bytes
21699 = arm_compute_static_chain_stack_bytes ();
21700
21701 /* The static chain register is the same as the IP register. If it is
21702 clobbered when creating the frame, we need to save and restore it. */
21703 clobber_ip = IS_NESTED (func_type)
21704 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21705 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21706 || flag_stack_clash_protection)
21707 && !df_regs_ever_live_p (LR_REGNUM)
21708 && arm_r3_live_at_start_p ()));
21709
21710 /* Find somewhere to store IP whilst the frame is being created.
21711 We try the following places in order:
21712
21713 1. The last argument register r3 if it is available.
21714 2. A slot on the stack above the frame if there are no
21715 arguments to push onto the stack.
21716 3. Register r3 again, after pushing the argument registers
21717 onto the stack, if this is a varargs function.
21718 4. The last slot on the stack created for the arguments to
21719 push, if this isn't a varargs function.
21720
21721 Note - we only need to tell the dwarf2 backend about the SP
21722 adjustment in the second variant; the static chain register
21723 doesn't need to be unwound, as it doesn't contain a value
21724 inherited from the caller. */
21725 if (clobber_ip)
21726 {
21727 if (!arm_r3_live_at_start_p ())
21728 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21729 else if (args_to_push == 0)
21730 {
21731 rtx addr, dwarf;
21732
21733 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21734 saved_regs += 4;
21735
21736 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21737 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21738 fp_offset = 4;
21739
21740 /* Just tell the dwarf backend that we adjusted SP. */
21741 dwarf = gen_rtx_SET (stack_pointer_rtx,
21742 plus_constant (Pmode, stack_pointer_rtx,
21743 -fp_offset));
21744 RTX_FRAME_RELATED_P (insn) = 1;
21745 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21746 }
21747 else
21748 {
21749 /* Store the args on the stack. */
21750 if (cfun->machine->uses_anonymous_args)
21751 {
21752 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21753 (0xf0 >> (args_to_push / 4)) & 0xf);
21754 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21755 saved_pretend_args = 1;
21756 }
21757 else
21758 {
21759 rtx addr, dwarf;
21760
21761 if (args_to_push == 4)
21762 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21763 else
21764 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21765 plus_constant (Pmode,
21766 stack_pointer_rtx,
21767 -args_to_push));
21768
21769 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21770
21771 /* Just tell the dwarf backend that we adjusted SP. */
21772 dwarf = gen_rtx_SET (stack_pointer_rtx,
21773 plus_constant (Pmode, stack_pointer_rtx,
21774 -args_to_push));
21775 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21776 }
21777
21778 RTX_FRAME_RELATED_P (insn) = 1;
21779 fp_offset = args_to_push;
21780 args_to_push = 0;
21781 }
21782 }
21783
21784 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21785 {
21786 if (IS_INTERRUPT (func_type))
21787 {
21788 /* Interrupt functions must not corrupt any registers.
21789 Creating a frame pointer however, corrupts the IP
21790 register, so we must push it first. */
21791 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21792
21793 /* Do not set RTX_FRAME_RELATED_P on this insn.
21794 The dwarf stack unwinding code only wants to see one
21795 stack decrement per function, and this is not it. If
21796 this instruction is labeled as being part of the frame
21797 creation sequence then dwarf2out_frame_debug_expr will
21798 die when it encounters the assignment of IP to FP
21799 later on, since the use of SP here establishes SP as
21800 the CFA register and not IP.
21801
21802 Anyway this instruction is not really part of the stack
21803 frame creation although it is part of the prologue. */
21804 }
21805
21806 insn = emit_set_insn (ip_rtx,
21807 plus_constant (Pmode, stack_pointer_rtx,
21808 fp_offset));
21809 RTX_FRAME_RELATED_P (insn) = 1;
21810 }
21811
21812 if (args_to_push)
21813 {
21814 /* Push the argument registers, or reserve space for them. */
21815 if (cfun->machine->uses_anonymous_args)
21816 insn = emit_multi_reg_push
21817 ((0xf0 >> (args_to_push / 4)) & 0xf,
21818 (0xf0 >> (args_to_push / 4)) & 0xf);
21819 else
21820 insn = emit_insn
21821 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21822 GEN_INT (- args_to_push)));
21823 RTX_FRAME_RELATED_P (insn) = 1;
21824 }
21825
21826 /* If this is an interrupt service routine, and the link register
21827 is going to be pushed, and we're not generating extra
21828 push of IP (needed when frame is needed and frame layout if apcs),
21829 subtracting four from LR now will mean that the function return
21830 can be done with a single instruction. */
21831 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21832 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21833 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21834 && TARGET_ARM)
21835 {
21836 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21837
21838 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21839 }
21840
21841 if (live_regs_mask)
21842 {
21843 unsigned long dwarf_regs_mask = live_regs_mask;
21844
21845 saved_regs += bit_count (live_regs_mask) * 4;
21846 if (optimize_size && !frame_pointer_needed
21847 && saved_regs == offsets->saved_regs - offsets->saved_args)
21848 {
21849 /* If no coprocessor registers are being pushed and we don't have
21850 to worry about a frame pointer then push extra registers to
21851 create the stack frame. This is done in a way that does not
21852 alter the frame layout, so is independent of the epilogue. */
21853 int n;
21854 int frame;
21855 n = 0;
21856 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21857 n++;
21858 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21859 if (frame && n * 4 >= frame)
21860 {
21861 n = frame / 4;
21862 live_regs_mask |= (1 << n) - 1;
21863 saved_regs += frame;
21864 }
21865 }
21866
21867 if (TARGET_LDRD
21868 && current_tune->prefer_ldrd_strd
21869 && !optimize_function_for_size_p (cfun))
21870 {
21871 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21872 if (TARGET_THUMB2)
21873 thumb2_emit_strd_push (live_regs_mask);
21874 else if (TARGET_ARM
21875 && !TARGET_APCS_FRAME
21876 && !IS_INTERRUPT (func_type))
21877 arm_emit_strd_push (live_regs_mask);
21878 else
21879 {
21880 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21881 RTX_FRAME_RELATED_P (insn) = 1;
21882 }
21883 }
21884 else
21885 {
21886 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21887 RTX_FRAME_RELATED_P (insn) = 1;
21888 }
21889 }
21890
21891 if (! IS_VOLATILE (func_type))
21892 saved_regs += arm_save_coproc_regs ();
21893
21894 if (frame_pointer_needed && TARGET_ARM)
21895 {
21896 /* Create the new frame pointer. */
21897 if (TARGET_APCS_FRAME)
21898 {
21899 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21900 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21901 RTX_FRAME_RELATED_P (insn) = 1;
21902 }
21903 else
21904 {
21905 insn = GEN_INT (saved_regs - (4 + fp_offset));
21906 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21907 stack_pointer_rtx, insn));
21908 RTX_FRAME_RELATED_P (insn) = 1;
21909 }
21910 }
21911
21912 size = offsets->outgoing_args - offsets->saved_args;
21913 if (flag_stack_usage_info)
21914 current_function_static_stack_size = size;
21915
21916 /* If this isn't an interrupt service routine and we have a frame, then do
21917 stack checking. We use IP as the first scratch register, except for the
21918 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21919 if (!IS_INTERRUPT (func_type)
21920 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21921 || flag_stack_clash_protection))
21922 {
21923 unsigned int regno;
21924
21925 if (!IS_NESTED (func_type) || clobber_ip)
21926 regno = IP_REGNUM;
21927 else if (df_regs_ever_live_p (LR_REGNUM))
21928 regno = LR_REGNUM;
21929 else
21930 regno = 3;
21931
21932 if (crtl->is_leaf && !cfun->calls_alloca)
21933 {
21934 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21935 arm_emit_probe_stack_range (get_stack_check_protect (),
21936 size - get_stack_check_protect (),
21937 regno, live_regs_mask);
21938 }
21939 else if (size > 0)
21940 arm_emit_probe_stack_range (get_stack_check_protect (), size,
21941 regno, live_regs_mask);
21942 }
21943
21944 /* Recover the static chain register. */
21945 if (clobber_ip)
21946 {
21947 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21948 insn = gen_rtx_REG (SImode, 3);
21949 else
21950 {
21951 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21952 insn = gen_frame_mem (SImode, insn);
21953 }
21954 emit_set_insn (ip_rtx, insn);
21955 emit_insn (gen_force_register_use (ip_rtx));
21956 }
21957
21958 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21959 {
21960 /* This add can produce multiple insns for a large constant, so we
21961 need to get tricky. */
21962 rtx_insn *last = get_last_insn ();
21963
21964 amount = GEN_INT (offsets->saved_args + saved_regs
21965 - offsets->outgoing_args);
21966
21967 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21968 amount));
21969 do
21970 {
21971 last = last ? NEXT_INSN (last) : get_insns ();
21972 RTX_FRAME_RELATED_P (last) = 1;
21973 }
21974 while (last != insn);
21975
21976 /* If the frame pointer is needed, emit a special barrier that
21977 will prevent the scheduler from moving stores to the frame
21978 before the stack adjustment. */
21979 if (frame_pointer_needed)
21980 emit_insn (gen_stack_tie (stack_pointer_rtx,
21981 hard_frame_pointer_rtx));
21982 }
21983
21984
21985 if (frame_pointer_needed && TARGET_THUMB2)
21986 thumb_set_frame_pointer (offsets);
21987
21988 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21989 {
21990 unsigned long mask;
21991
21992 mask = live_regs_mask;
21993 mask &= THUMB2_WORK_REGS;
21994 if (!IS_NESTED (func_type))
21995 mask |= (1 << IP_REGNUM);
21996 arm_load_pic_register (mask);
21997 }
21998
21999 /* If we are profiling, make sure no instructions are scheduled before
22000 the call to mcount. Similarly if the user has requested no
22001 scheduling in the prolog. Similarly if we want non-call exceptions
22002 using the EABI unwinder, to prevent faulting instructions from being
22003 swapped with a stack adjustment. */
22004 if (crtl->profile || !TARGET_SCHED_PROLOG
22005 || (arm_except_unwind_info (&global_options) == UI_TARGET
22006 && cfun->can_throw_non_call_exceptions))
22007 emit_insn (gen_blockage ());
22008
22009 /* If the link register is being kept alive, with the return address in it,
22010 then make sure that it does not get reused by the ce2 pass. */
22011 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22012 cfun->machine->lr_save_eliminated = 1;
22013 }
22014 \f
22015 /* Print condition code to STREAM. Helper function for arm_print_operand. */
22016 static void
22017 arm_print_condition (FILE *stream)
22018 {
22019 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22020 {
22021 /* Branch conversion is not implemented for Thumb-2. */
22022 if (TARGET_THUMB)
22023 {
22024 output_operand_lossage ("predicated Thumb instruction");
22025 return;
22026 }
22027 if (current_insn_predicate != NULL)
22028 {
22029 output_operand_lossage
22030 ("predicated instruction in conditional sequence");
22031 return;
22032 }
22033
22034 fputs (arm_condition_codes[arm_current_cc], stream);
22035 }
22036 else if (current_insn_predicate)
22037 {
22038 enum arm_cond_code code;
22039
22040 if (TARGET_THUMB1)
22041 {
22042 output_operand_lossage ("predicated Thumb instruction");
22043 return;
22044 }
22045
22046 code = get_arm_condition_code (current_insn_predicate);
22047 fputs (arm_condition_codes[code], stream);
22048 }
22049 }
22050
22051
22052 /* Globally reserved letters: acln
22053 Puncutation letters currently used: @_|?().!#
22054 Lower case letters currently used: bcdefhimpqtvwxyz
22055 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22056 Letters previously used, but now deprecated/obsolete: sVWXYZ.
22057
22058 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22059
22060 If CODE is 'd', then the X is a condition operand and the instruction
22061 should only be executed if the condition is true.
22062 if CODE is 'D', then the X is a condition operand and the instruction
22063 should only be executed if the condition is false: however, if the mode
22064 of the comparison is CCFPEmode, then always execute the instruction -- we
22065 do this because in these circumstances !GE does not necessarily imply LT;
22066 in these cases the instruction pattern will take care to make sure that
22067 an instruction containing %d will follow, thereby undoing the effects of
22068 doing this instruction unconditionally.
22069 If CODE is 'N' then X is a floating point operand that must be negated
22070 before output.
22071 If CODE is 'B' then output a bitwise inverted value of X (a const int).
22072 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
22073 static void
22074 arm_print_operand (FILE *stream, rtx x, int code)
22075 {
22076 switch (code)
22077 {
22078 case '@':
22079 fputs (ASM_COMMENT_START, stream);
22080 return;
22081
22082 case '_':
22083 fputs (user_label_prefix, stream);
22084 return;
22085
22086 case '|':
22087 fputs (REGISTER_PREFIX, stream);
22088 return;
22089
22090 case '?':
22091 arm_print_condition (stream);
22092 return;
22093
22094 case '.':
22095 /* The current condition code for a condition code setting instruction.
22096 Preceded by 's' in unified syntax, otherwise followed by 's'. */
22097 fputc('s', stream);
22098 arm_print_condition (stream);
22099 return;
22100
22101 case '!':
22102 /* If the instruction is conditionally executed then print
22103 the current condition code, otherwise print 's'. */
22104 gcc_assert (TARGET_THUMB2);
22105 if (current_insn_predicate)
22106 arm_print_condition (stream);
22107 else
22108 fputc('s', stream);
22109 break;
22110
22111 /* %# is a "break" sequence. It doesn't output anything, but is used to
22112 separate e.g. operand numbers from following text, if that text consists
22113 of further digits which we don't want to be part of the operand
22114 number. */
22115 case '#':
22116 return;
22117
22118 case 'N':
22119 {
22120 REAL_VALUE_TYPE r;
22121 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22122 fprintf (stream, "%s", fp_const_from_val (&r));
22123 }
22124 return;
22125
22126 /* An integer or symbol address without a preceding # sign. */
22127 case 'c':
22128 switch (GET_CODE (x))
22129 {
22130 case CONST_INT:
22131 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22132 break;
22133
22134 case SYMBOL_REF:
22135 output_addr_const (stream, x);
22136 break;
22137
22138 case CONST:
22139 if (GET_CODE (XEXP (x, 0)) == PLUS
22140 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22141 {
22142 output_addr_const (stream, x);
22143 break;
22144 }
22145 /* Fall through. */
22146
22147 default:
22148 output_operand_lossage ("Unsupported operand for code '%c'", code);
22149 }
22150 return;
22151
22152 /* An integer that we want to print in HEX. */
22153 case 'x':
22154 switch (GET_CODE (x))
22155 {
22156 case CONST_INT:
22157 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22158 break;
22159
22160 default:
22161 output_operand_lossage ("Unsupported operand for code '%c'", code);
22162 }
22163 return;
22164
22165 case 'B':
22166 if (CONST_INT_P (x))
22167 {
22168 HOST_WIDE_INT val;
22169 val = ARM_SIGN_EXTEND (~INTVAL (x));
22170 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22171 }
22172 else
22173 {
22174 putc ('~', stream);
22175 output_addr_const (stream, x);
22176 }
22177 return;
22178
22179 case 'b':
22180 /* Print the log2 of a CONST_INT. */
22181 {
22182 HOST_WIDE_INT val;
22183
22184 if (!CONST_INT_P (x)
22185 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22186 output_operand_lossage ("Unsupported operand for code '%c'", code);
22187 else
22188 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22189 }
22190 return;
22191
22192 case 'L':
22193 /* The low 16 bits of an immediate constant. */
22194 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22195 return;
22196
22197 case 'i':
22198 fprintf (stream, "%s", arithmetic_instr (x, 1));
22199 return;
22200
22201 case 'I':
22202 fprintf (stream, "%s", arithmetic_instr (x, 0));
22203 return;
22204
22205 case 'S':
22206 {
22207 HOST_WIDE_INT val;
22208 const char *shift;
22209
22210 shift = shift_op (x, &val);
22211
22212 if (shift)
22213 {
22214 fprintf (stream, ", %s ", shift);
22215 if (val == -1)
22216 arm_print_operand (stream, XEXP (x, 1), 0);
22217 else
22218 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22219 }
22220 }
22221 return;
22222
22223 /* An explanation of the 'Q', 'R' and 'H' register operands:
22224
22225 In a pair of registers containing a DI or DF value the 'Q'
22226 operand returns the register number of the register containing
22227 the least significant part of the value. The 'R' operand returns
22228 the register number of the register containing the most
22229 significant part of the value.
22230
22231 The 'H' operand returns the higher of the two register numbers.
22232 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22233 same as the 'Q' operand, since the most significant part of the
22234 value is held in the lower number register. The reverse is true
22235 on systems where WORDS_BIG_ENDIAN is false.
22236
22237 The purpose of these operands is to distinguish between cases
22238 where the endian-ness of the values is important (for example
22239 when they are added together), and cases where the endian-ness
22240 is irrelevant, but the order of register operations is important.
22241 For example when loading a value from memory into a register
22242 pair, the endian-ness does not matter. Provided that the value
22243 from the lower memory address is put into the lower numbered
22244 register, and the value from the higher address is put into the
22245 higher numbered register, the load will work regardless of whether
22246 the value being loaded is big-wordian or little-wordian. The
22247 order of the two register loads can matter however, if the address
22248 of the memory location is actually held in one of the registers
22249 being overwritten by the load.
22250
22251 The 'Q' and 'R' constraints are also available for 64-bit
22252 constants. */
22253 case 'Q':
22254 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22255 {
22256 rtx part = gen_lowpart (SImode, x);
22257 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22258 return;
22259 }
22260
22261 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22262 {
22263 output_operand_lossage ("invalid operand for code '%c'", code);
22264 return;
22265 }
22266
22267 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22268 return;
22269
22270 case 'R':
22271 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22272 {
22273 machine_mode mode = GET_MODE (x);
22274 rtx part;
22275
22276 if (mode == VOIDmode)
22277 mode = DImode;
22278 part = gen_highpart_mode (SImode, mode, x);
22279 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22280 return;
22281 }
22282
22283 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22284 {
22285 output_operand_lossage ("invalid operand for code '%c'", code);
22286 return;
22287 }
22288
22289 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22290 return;
22291
22292 case 'H':
22293 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22294 {
22295 output_operand_lossage ("invalid operand for code '%c'", code);
22296 return;
22297 }
22298
22299 asm_fprintf (stream, "%r", REGNO (x) + 1);
22300 return;
22301
22302 case 'J':
22303 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22304 {
22305 output_operand_lossage ("invalid operand for code '%c'", code);
22306 return;
22307 }
22308
22309 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22310 return;
22311
22312 case 'K':
22313 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22314 {
22315 output_operand_lossage ("invalid operand for code '%c'", code);
22316 return;
22317 }
22318
22319 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22320 return;
22321
22322 case 'm':
22323 asm_fprintf (stream, "%r",
22324 REG_P (XEXP (x, 0))
22325 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22326 return;
22327
22328 case 'M':
22329 asm_fprintf (stream, "{%r-%r}",
22330 REGNO (x),
22331 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22332 return;
22333
22334 /* Like 'M', but writing doubleword vector registers, for use by Neon
22335 insns. */
22336 case 'h':
22337 {
22338 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22339 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22340 if (numregs == 1)
22341 asm_fprintf (stream, "{d%d}", regno);
22342 else
22343 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22344 }
22345 return;
22346
22347 case 'd':
22348 /* CONST_TRUE_RTX means always -- that's the default. */
22349 if (x == const_true_rtx)
22350 return;
22351
22352 if (!COMPARISON_P (x))
22353 {
22354 output_operand_lossage ("invalid operand for code '%c'", code);
22355 return;
22356 }
22357
22358 fputs (arm_condition_codes[get_arm_condition_code (x)],
22359 stream);
22360 return;
22361
22362 case 'D':
22363 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22364 want to do that. */
22365 if (x == const_true_rtx)
22366 {
22367 output_operand_lossage ("instruction never executed");
22368 return;
22369 }
22370 if (!COMPARISON_P (x))
22371 {
22372 output_operand_lossage ("invalid operand for code '%c'", code);
22373 return;
22374 }
22375
22376 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22377 (get_arm_condition_code (x))],
22378 stream);
22379 return;
22380
22381 case 's':
22382 case 'V':
22383 case 'W':
22384 case 'X':
22385 case 'Y':
22386 case 'Z':
22387 /* Former Maverick support, removed after GCC-4.7. */
22388 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22389 return;
22390
22391 case 'U':
22392 if (!REG_P (x)
22393 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22394 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22395 /* Bad value for wCG register number. */
22396 {
22397 output_operand_lossage ("invalid operand for code '%c'", code);
22398 return;
22399 }
22400
22401 else
22402 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22403 return;
22404
22405 /* Print an iWMMXt control register name. */
22406 case 'w':
22407 if (!CONST_INT_P (x)
22408 || INTVAL (x) < 0
22409 || INTVAL (x) >= 16)
22410 /* Bad value for wC register number. */
22411 {
22412 output_operand_lossage ("invalid operand for code '%c'", code);
22413 return;
22414 }
22415
22416 else
22417 {
22418 static const char * wc_reg_names [16] =
22419 {
22420 "wCID", "wCon", "wCSSF", "wCASF",
22421 "wC4", "wC5", "wC6", "wC7",
22422 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22423 "wC12", "wC13", "wC14", "wC15"
22424 };
22425
22426 fputs (wc_reg_names [INTVAL (x)], stream);
22427 }
22428 return;
22429
22430 /* Print the high single-precision register of a VFP double-precision
22431 register. */
22432 case 'p':
22433 {
22434 machine_mode mode = GET_MODE (x);
22435 int regno;
22436
22437 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22438 {
22439 output_operand_lossage ("invalid operand for code '%c'", code);
22440 return;
22441 }
22442
22443 regno = REGNO (x);
22444 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22445 {
22446 output_operand_lossage ("invalid operand for code '%c'", code);
22447 return;
22448 }
22449
22450 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22451 }
22452 return;
22453
22454 /* Print a VFP/Neon double precision or quad precision register name. */
22455 case 'P':
22456 case 'q':
22457 {
22458 machine_mode mode = GET_MODE (x);
22459 int is_quad = (code == 'q');
22460 int regno;
22461
22462 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22463 {
22464 output_operand_lossage ("invalid operand for code '%c'", code);
22465 return;
22466 }
22467
22468 if (!REG_P (x)
22469 || !IS_VFP_REGNUM (REGNO (x)))
22470 {
22471 output_operand_lossage ("invalid operand for code '%c'", code);
22472 return;
22473 }
22474
22475 regno = REGNO (x);
22476 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22477 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22478 {
22479 output_operand_lossage ("invalid operand for code '%c'", code);
22480 return;
22481 }
22482
22483 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22484 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22485 }
22486 return;
22487
22488 /* These two codes print the low/high doubleword register of a Neon quad
22489 register, respectively. For pair-structure types, can also print
22490 low/high quadword registers. */
22491 case 'e':
22492 case 'f':
22493 {
22494 machine_mode mode = GET_MODE (x);
22495 int regno;
22496
22497 if ((GET_MODE_SIZE (mode) != 16
22498 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22499 {
22500 output_operand_lossage ("invalid operand for code '%c'", code);
22501 return;
22502 }
22503
22504 regno = REGNO (x);
22505 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22506 {
22507 output_operand_lossage ("invalid operand for code '%c'", code);
22508 return;
22509 }
22510
22511 if (GET_MODE_SIZE (mode) == 16)
22512 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22513 + (code == 'f' ? 1 : 0));
22514 else
22515 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22516 + (code == 'f' ? 1 : 0));
22517 }
22518 return;
22519
22520 /* Print a VFPv3 floating-point constant, represented as an integer
22521 index. */
22522 case 'G':
22523 {
22524 int index = vfp3_const_double_index (x);
22525 gcc_assert (index != -1);
22526 fprintf (stream, "%d", index);
22527 }
22528 return;
22529
22530 /* Print bits representing opcode features for Neon.
22531
22532 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22533 and polynomials as unsigned.
22534
22535 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22536
22537 Bit 2 is 1 for rounding functions, 0 otherwise. */
22538
22539 /* Identify the type as 's', 'u', 'p' or 'f'. */
22540 case 'T':
22541 {
22542 HOST_WIDE_INT bits = INTVAL (x);
22543 fputc ("uspf"[bits & 3], stream);
22544 }
22545 return;
22546
22547 /* Likewise, but signed and unsigned integers are both 'i'. */
22548 case 'F':
22549 {
22550 HOST_WIDE_INT bits = INTVAL (x);
22551 fputc ("iipf"[bits & 3], stream);
22552 }
22553 return;
22554
22555 /* As for 'T', but emit 'u' instead of 'p'. */
22556 case 't':
22557 {
22558 HOST_WIDE_INT bits = INTVAL (x);
22559 fputc ("usuf"[bits & 3], stream);
22560 }
22561 return;
22562
22563 /* Bit 2: rounding (vs none). */
22564 case 'O':
22565 {
22566 HOST_WIDE_INT bits = INTVAL (x);
22567 fputs ((bits & 4) != 0 ? "r" : "", stream);
22568 }
22569 return;
22570
22571 /* Memory operand for vld1/vst1 instruction. */
22572 case 'A':
22573 {
22574 rtx addr;
22575 bool postinc = FALSE;
22576 rtx postinc_reg = NULL;
22577 unsigned align, memsize, align_bits;
22578
22579 gcc_assert (MEM_P (x));
22580 addr = XEXP (x, 0);
22581 if (GET_CODE (addr) == POST_INC)
22582 {
22583 postinc = 1;
22584 addr = XEXP (addr, 0);
22585 }
22586 if (GET_CODE (addr) == POST_MODIFY)
22587 {
22588 postinc_reg = XEXP( XEXP (addr, 1), 1);
22589 addr = XEXP (addr, 0);
22590 }
22591 asm_fprintf (stream, "[%r", REGNO (addr));
22592
22593 /* We know the alignment of this access, so we can emit a hint in the
22594 instruction (for some alignments) as an aid to the memory subsystem
22595 of the target. */
22596 align = MEM_ALIGN (x) >> 3;
22597 memsize = MEM_SIZE (x);
22598
22599 /* Only certain alignment specifiers are supported by the hardware. */
22600 if (memsize == 32 && (align % 32) == 0)
22601 align_bits = 256;
22602 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22603 align_bits = 128;
22604 else if (memsize >= 8 && (align % 8) == 0)
22605 align_bits = 64;
22606 else
22607 align_bits = 0;
22608
22609 if (align_bits != 0)
22610 asm_fprintf (stream, ":%d", align_bits);
22611
22612 asm_fprintf (stream, "]");
22613
22614 if (postinc)
22615 fputs("!", stream);
22616 if (postinc_reg)
22617 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22618 }
22619 return;
22620
22621 case 'C':
22622 {
22623 rtx addr;
22624
22625 gcc_assert (MEM_P (x));
22626 addr = XEXP (x, 0);
22627 gcc_assert (REG_P (addr));
22628 asm_fprintf (stream, "[%r]", REGNO (addr));
22629 }
22630 return;
22631
22632 /* Translate an S register number into a D register number and element index. */
22633 case 'y':
22634 {
22635 machine_mode mode = GET_MODE (x);
22636 int regno;
22637
22638 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22639 {
22640 output_operand_lossage ("invalid operand for code '%c'", code);
22641 return;
22642 }
22643
22644 regno = REGNO (x);
22645 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22646 {
22647 output_operand_lossage ("invalid operand for code '%c'", code);
22648 return;
22649 }
22650
22651 regno = regno - FIRST_VFP_REGNUM;
22652 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22653 }
22654 return;
22655
22656 case 'v':
22657 gcc_assert (CONST_DOUBLE_P (x));
22658 int result;
22659 result = vfp3_const_double_for_fract_bits (x);
22660 if (result == 0)
22661 result = vfp3_const_double_for_bits (x);
22662 fprintf (stream, "#%d", result);
22663 return;
22664
22665 /* Register specifier for vld1.16/vst1.16. Translate the S register
22666 number into a D register number and element index. */
22667 case 'z':
22668 {
22669 machine_mode mode = GET_MODE (x);
22670 int regno;
22671
22672 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22673 {
22674 output_operand_lossage ("invalid operand for code '%c'", code);
22675 return;
22676 }
22677
22678 regno = REGNO (x);
22679 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22680 {
22681 output_operand_lossage ("invalid operand for code '%c'", code);
22682 return;
22683 }
22684
22685 regno = regno - FIRST_VFP_REGNUM;
22686 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22687 }
22688 return;
22689
22690 default:
22691 if (x == 0)
22692 {
22693 output_operand_lossage ("missing operand");
22694 return;
22695 }
22696
22697 switch (GET_CODE (x))
22698 {
22699 case REG:
22700 asm_fprintf (stream, "%r", REGNO (x));
22701 break;
22702
22703 case MEM:
22704 output_address (GET_MODE (x), XEXP (x, 0));
22705 break;
22706
22707 case CONST_DOUBLE:
22708 {
22709 char fpstr[20];
22710 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22711 sizeof (fpstr), 0, 1);
22712 fprintf (stream, "#%s", fpstr);
22713 }
22714 break;
22715
22716 default:
22717 gcc_assert (GET_CODE (x) != NEG);
22718 fputc ('#', stream);
22719 if (GET_CODE (x) == HIGH)
22720 {
22721 fputs (":lower16:", stream);
22722 x = XEXP (x, 0);
22723 }
22724
22725 output_addr_const (stream, x);
22726 break;
22727 }
22728 }
22729 }
22730 \f
22731 /* Target hook for printing a memory address. */
22732 static void
22733 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22734 {
22735 if (TARGET_32BIT)
22736 {
22737 int is_minus = GET_CODE (x) == MINUS;
22738
22739 if (REG_P (x))
22740 asm_fprintf (stream, "[%r]", REGNO (x));
22741 else if (GET_CODE (x) == PLUS || is_minus)
22742 {
22743 rtx base = XEXP (x, 0);
22744 rtx index = XEXP (x, 1);
22745 HOST_WIDE_INT offset = 0;
22746 if (!REG_P (base)
22747 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22748 {
22749 /* Ensure that BASE is a register. */
22750 /* (one of them must be). */
22751 /* Also ensure the SP is not used as in index register. */
22752 std::swap (base, index);
22753 }
22754 switch (GET_CODE (index))
22755 {
22756 case CONST_INT:
22757 offset = INTVAL (index);
22758 if (is_minus)
22759 offset = -offset;
22760 asm_fprintf (stream, "[%r, #%wd]",
22761 REGNO (base), offset);
22762 break;
22763
22764 case REG:
22765 asm_fprintf (stream, "[%r, %s%r]",
22766 REGNO (base), is_minus ? "-" : "",
22767 REGNO (index));
22768 break;
22769
22770 case MULT:
22771 case ASHIFTRT:
22772 case LSHIFTRT:
22773 case ASHIFT:
22774 case ROTATERT:
22775 {
22776 asm_fprintf (stream, "[%r, %s%r",
22777 REGNO (base), is_minus ? "-" : "",
22778 REGNO (XEXP (index, 0)));
22779 arm_print_operand (stream, index, 'S');
22780 fputs ("]", stream);
22781 break;
22782 }
22783
22784 default:
22785 gcc_unreachable ();
22786 }
22787 }
22788 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22789 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22790 {
22791 gcc_assert (REG_P (XEXP (x, 0)));
22792
22793 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22794 asm_fprintf (stream, "[%r, #%s%d]!",
22795 REGNO (XEXP (x, 0)),
22796 GET_CODE (x) == PRE_DEC ? "-" : "",
22797 GET_MODE_SIZE (mode));
22798 else
22799 asm_fprintf (stream, "[%r], #%s%d",
22800 REGNO (XEXP (x, 0)),
22801 GET_CODE (x) == POST_DEC ? "-" : "",
22802 GET_MODE_SIZE (mode));
22803 }
22804 else if (GET_CODE (x) == PRE_MODIFY)
22805 {
22806 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22807 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22808 asm_fprintf (stream, "#%wd]!",
22809 INTVAL (XEXP (XEXP (x, 1), 1)));
22810 else
22811 asm_fprintf (stream, "%r]!",
22812 REGNO (XEXP (XEXP (x, 1), 1)));
22813 }
22814 else if (GET_CODE (x) == POST_MODIFY)
22815 {
22816 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22817 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22818 asm_fprintf (stream, "#%wd",
22819 INTVAL (XEXP (XEXP (x, 1), 1)));
22820 else
22821 asm_fprintf (stream, "%r",
22822 REGNO (XEXP (XEXP (x, 1), 1)));
22823 }
22824 else output_addr_const (stream, x);
22825 }
22826 else
22827 {
22828 if (REG_P (x))
22829 asm_fprintf (stream, "[%r]", REGNO (x));
22830 else if (GET_CODE (x) == POST_INC)
22831 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22832 else if (GET_CODE (x) == PLUS)
22833 {
22834 gcc_assert (REG_P (XEXP (x, 0)));
22835 if (CONST_INT_P (XEXP (x, 1)))
22836 asm_fprintf (stream, "[%r, #%wd]",
22837 REGNO (XEXP (x, 0)),
22838 INTVAL (XEXP (x, 1)));
22839 else
22840 asm_fprintf (stream, "[%r, %r]",
22841 REGNO (XEXP (x, 0)),
22842 REGNO (XEXP (x, 1)));
22843 }
22844 else
22845 output_addr_const (stream, x);
22846 }
22847 }
22848 \f
22849 /* Target hook for indicating whether a punctuation character for
22850 TARGET_PRINT_OPERAND is valid. */
22851 static bool
22852 arm_print_operand_punct_valid_p (unsigned char code)
22853 {
22854 return (code == '@' || code == '|' || code == '.'
22855 || code == '(' || code == ')' || code == '#'
22856 || (TARGET_32BIT && (code == '?'))
22857 || (TARGET_THUMB2 && (code == '!'))
22858 || (TARGET_THUMB && (code == '_')));
22859 }
22860 \f
22861 /* Target hook for assembling integer objects. The ARM version needs to
22862 handle word-sized values specially. */
22863 static bool
22864 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22865 {
22866 machine_mode mode;
22867
22868 if (size == UNITS_PER_WORD && aligned_p)
22869 {
22870 fputs ("\t.word\t", asm_out_file);
22871 output_addr_const (asm_out_file, x);
22872
22873 /* Mark symbols as position independent. We only do this in the
22874 .text segment, not in the .data segment. */
22875 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22876 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22877 {
22878 /* See legitimize_pic_address for an explanation of the
22879 TARGET_VXWORKS_RTP check. */
22880 /* References to weak symbols cannot be resolved locally:
22881 they may be overridden by a non-weak definition at link
22882 time. */
22883 if (!arm_pic_data_is_text_relative
22884 || (GET_CODE (x) == SYMBOL_REF
22885 && (!SYMBOL_REF_LOCAL_P (x)
22886 || (SYMBOL_REF_DECL (x)
22887 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22888 fputs ("(GOT)", asm_out_file);
22889 else
22890 fputs ("(GOTOFF)", asm_out_file);
22891 }
22892 fputc ('\n', asm_out_file);
22893 return true;
22894 }
22895
22896 mode = GET_MODE (x);
22897
22898 if (arm_vector_mode_supported_p (mode))
22899 {
22900 int i, units;
22901
22902 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22903
22904 units = CONST_VECTOR_NUNITS (x);
22905 size = GET_MODE_UNIT_SIZE (mode);
22906
22907 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22908 for (i = 0; i < units; i++)
22909 {
22910 rtx elt = CONST_VECTOR_ELT (x, i);
22911 assemble_integer
22912 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22913 }
22914 else
22915 for (i = 0; i < units; i++)
22916 {
22917 rtx elt = CONST_VECTOR_ELT (x, i);
22918 assemble_real
22919 (*CONST_DOUBLE_REAL_VALUE (elt),
22920 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22921 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22922 }
22923
22924 return true;
22925 }
22926
22927 return default_assemble_integer (x, size, aligned_p);
22928 }
22929
22930 static void
22931 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22932 {
22933 section *s;
22934
22935 if (!TARGET_AAPCS_BASED)
22936 {
22937 (is_ctor ?
22938 default_named_section_asm_out_constructor
22939 : default_named_section_asm_out_destructor) (symbol, priority);
22940 return;
22941 }
22942
22943 /* Put these in the .init_array section, using a special relocation. */
22944 if (priority != DEFAULT_INIT_PRIORITY)
22945 {
22946 char buf[18];
22947 sprintf (buf, "%s.%.5u",
22948 is_ctor ? ".init_array" : ".fini_array",
22949 priority);
22950 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22951 }
22952 else if (is_ctor)
22953 s = ctors_section;
22954 else
22955 s = dtors_section;
22956
22957 switch_to_section (s);
22958 assemble_align (POINTER_SIZE);
22959 fputs ("\t.word\t", asm_out_file);
22960 output_addr_const (asm_out_file, symbol);
22961 fputs ("(target1)\n", asm_out_file);
22962 }
22963
22964 /* Add a function to the list of static constructors. */
22965
22966 static void
22967 arm_elf_asm_constructor (rtx symbol, int priority)
22968 {
22969 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22970 }
22971
22972 /* Add a function to the list of static destructors. */
22973
22974 static void
22975 arm_elf_asm_destructor (rtx symbol, int priority)
22976 {
22977 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22978 }
22979 \f
22980 /* A finite state machine takes care of noticing whether or not instructions
22981 can be conditionally executed, and thus decrease execution time and code
22982 size by deleting branch instructions. The fsm is controlled by
22983 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22984
22985 /* The state of the fsm controlling condition codes are:
22986 0: normal, do nothing special
22987 1: make ASM_OUTPUT_OPCODE not output this instruction
22988 2: make ASM_OUTPUT_OPCODE not output this instruction
22989 3: make instructions conditional
22990 4: make instructions conditional
22991
22992 State transitions (state->state by whom under condition):
22993 0 -> 1 final_prescan_insn if the `target' is a label
22994 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22995 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22996 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22997 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22998 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22999 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23000 (the target insn is arm_target_insn).
23001
23002 If the jump clobbers the conditions then we use states 2 and 4.
23003
23004 A similar thing can be done with conditional return insns.
23005
23006 XXX In case the `target' is an unconditional branch, this conditionalising
23007 of the instructions always reduces code size, but not always execution
23008 time. But then, I want to reduce the code size to somewhere near what
23009 /bin/cc produces. */
23010
23011 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23012 instructions. When a COND_EXEC instruction is seen the subsequent
23013 instructions are scanned so that multiple conditional instructions can be
23014 combined into a single IT block. arm_condexec_count and arm_condexec_mask
23015 specify the length and true/false mask for the IT block. These will be
23016 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
23017
23018 /* Returns the index of the ARM condition code string in
23019 `arm_condition_codes', or ARM_NV if the comparison is invalid.
23020 COMPARISON should be an rtx like `(eq (...) (...))'. */
23021
23022 enum arm_cond_code
23023 maybe_get_arm_condition_code (rtx comparison)
23024 {
23025 machine_mode mode = GET_MODE (XEXP (comparison, 0));
23026 enum arm_cond_code code;
23027 enum rtx_code comp_code = GET_CODE (comparison);
23028
23029 if (GET_MODE_CLASS (mode) != MODE_CC)
23030 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23031 XEXP (comparison, 1));
23032
23033 switch (mode)
23034 {
23035 case E_CC_DNEmode: code = ARM_NE; goto dominance;
23036 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23037 case E_CC_DGEmode: code = ARM_GE; goto dominance;
23038 case E_CC_DGTmode: code = ARM_GT; goto dominance;
23039 case E_CC_DLEmode: code = ARM_LE; goto dominance;
23040 case E_CC_DLTmode: code = ARM_LT; goto dominance;
23041 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23042 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23043 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23044 case E_CC_DLTUmode: code = ARM_CC;
23045
23046 dominance:
23047 if (comp_code == EQ)
23048 return ARM_INVERSE_CONDITION_CODE (code);
23049 if (comp_code == NE)
23050 return code;
23051 return ARM_NV;
23052
23053 case E_CC_NOOVmode:
23054 switch (comp_code)
23055 {
23056 case NE: return ARM_NE;
23057 case EQ: return ARM_EQ;
23058 case GE: return ARM_PL;
23059 case LT: return ARM_MI;
23060 default: return ARM_NV;
23061 }
23062
23063 case E_CC_Zmode:
23064 switch (comp_code)
23065 {
23066 case NE: return ARM_NE;
23067 case EQ: return ARM_EQ;
23068 default: return ARM_NV;
23069 }
23070
23071 case E_CC_Nmode:
23072 switch (comp_code)
23073 {
23074 case NE: return ARM_MI;
23075 case EQ: return ARM_PL;
23076 default: return ARM_NV;
23077 }
23078
23079 case E_CCFPEmode:
23080 case E_CCFPmode:
23081 /* We can handle all cases except UNEQ and LTGT. */
23082 switch (comp_code)
23083 {
23084 case GE: return ARM_GE;
23085 case GT: return ARM_GT;
23086 case LE: return ARM_LS;
23087 case LT: return ARM_MI;
23088 case NE: return ARM_NE;
23089 case EQ: return ARM_EQ;
23090 case ORDERED: return ARM_VC;
23091 case UNORDERED: return ARM_VS;
23092 case UNLT: return ARM_LT;
23093 case UNLE: return ARM_LE;
23094 case UNGT: return ARM_HI;
23095 case UNGE: return ARM_PL;
23096 /* UNEQ and LTGT do not have a representation. */
23097 case UNEQ: /* Fall through. */
23098 case LTGT: /* Fall through. */
23099 default: return ARM_NV;
23100 }
23101
23102 case E_CC_SWPmode:
23103 switch (comp_code)
23104 {
23105 case NE: return ARM_NE;
23106 case EQ: return ARM_EQ;
23107 case GE: return ARM_LE;
23108 case GT: return ARM_LT;
23109 case LE: return ARM_GE;
23110 case LT: return ARM_GT;
23111 case GEU: return ARM_LS;
23112 case GTU: return ARM_CC;
23113 case LEU: return ARM_CS;
23114 case LTU: return ARM_HI;
23115 default: return ARM_NV;
23116 }
23117
23118 case E_CC_Cmode:
23119 switch (comp_code)
23120 {
23121 case LTU: return ARM_CS;
23122 case GEU: return ARM_CC;
23123 case NE: return ARM_CS;
23124 case EQ: return ARM_CC;
23125 default: return ARM_NV;
23126 }
23127
23128 case E_CC_CZmode:
23129 switch (comp_code)
23130 {
23131 case NE: return ARM_NE;
23132 case EQ: return ARM_EQ;
23133 case GEU: return ARM_CS;
23134 case GTU: return ARM_HI;
23135 case LEU: return ARM_LS;
23136 case LTU: return ARM_CC;
23137 default: return ARM_NV;
23138 }
23139
23140 case E_CC_NCVmode:
23141 switch (comp_code)
23142 {
23143 case GE: return ARM_GE;
23144 case LT: return ARM_LT;
23145 case GEU: return ARM_CS;
23146 case LTU: return ARM_CC;
23147 default: return ARM_NV;
23148 }
23149
23150 case E_CC_Vmode:
23151 switch (comp_code)
23152 {
23153 case NE: return ARM_VS;
23154 case EQ: return ARM_VC;
23155 default: return ARM_NV;
23156 }
23157
23158 case E_CCmode:
23159 switch (comp_code)
23160 {
23161 case NE: return ARM_NE;
23162 case EQ: return ARM_EQ;
23163 case GE: return ARM_GE;
23164 case GT: return ARM_GT;
23165 case LE: return ARM_LE;
23166 case LT: return ARM_LT;
23167 case GEU: return ARM_CS;
23168 case GTU: return ARM_HI;
23169 case LEU: return ARM_LS;
23170 case LTU: return ARM_CC;
23171 default: return ARM_NV;
23172 }
23173
23174 default: gcc_unreachable ();
23175 }
23176 }
23177
23178 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23179 static enum arm_cond_code
23180 get_arm_condition_code (rtx comparison)
23181 {
23182 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23183 gcc_assert (code != ARM_NV);
23184 return code;
23185 }
23186
23187 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
23188 code registers when not targetting Thumb1. The VFP condition register
23189 only exists when generating hard-float code. */
23190 static bool
23191 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23192 {
23193 if (!TARGET_32BIT)
23194 return false;
23195
23196 *p1 = CC_REGNUM;
23197 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23198 return true;
23199 }
23200
23201 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23202 instructions. */
23203 void
23204 thumb2_final_prescan_insn (rtx_insn *insn)
23205 {
23206 rtx_insn *first_insn = insn;
23207 rtx body = PATTERN (insn);
23208 rtx predicate;
23209 enum arm_cond_code code;
23210 int n;
23211 int mask;
23212 int max;
23213
23214 /* max_insns_skipped in the tune was already taken into account in the
23215 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23216 just emit the IT blocks as we can. It does not make sense to split
23217 the IT blocks. */
23218 max = MAX_INSN_PER_IT_BLOCK;
23219
23220 /* Remove the previous insn from the count of insns to be output. */
23221 if (arm_condexec_count)
23222 arm_condexec_count--;
23223
23224 /* Nothing to do if we are already inside a conditional block. */
23225 if (arm_condexec_count)
23226 return;
23227
23228 if (GET_CODE (body) != COND_EXEC)
23229 return;
23230
23231 /* Conditional jumps are implemented directly. */
23232 if (JUMP_P (insn))
23233 return;
23234
23235 predicate = COND_EXEC_TEST (body);
23236 arm_current_cc = get_arm_condition_code (predicate);
23237
23238 n = get_attr_ce_count (insn);
23239 arm_condexec_count = 1;
23240 arm_condexec_mask = (1 << n) - 1;
23241 arm_condexec_masklen = n;
23242 /* See if subsequent instructions can be combined into the same block. */
23243 for (;;)
23244 {
23245 insn = next_nonnote_insn (insn);
23246
23247 /* Jumping into the middle of an IT block is illegal, so a label or
23248 barrier terminates the block. */
23249 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23250 break;
23251
23252 body = PATTERN (insn);
23253 /* USE and CLOBBER aren't really insns, so just skip them. */
23254 if (GET_CODE (body) == USE
23255 || GET_CODE (body) == CLOBBER)
23256 continue;
23257
23258 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23259 if (GET_CODE (body) != COND_EXEC)
23260 break;
23261 /* Maximum number of conditionally executed instructions in a block. */
23262 n = get_attr_ce_count (insn);
23263 if (arm_condexec_masklen + n > max)
23264 break;
23265
23266 predicate = COND_EXEC_TEST (body);
23267 code = get_arm_condition_code (predicate);
23268 mask = (1 << n) - 1;
23269 if (arm_current_cc == code)
23270 arm_condexec_mask |= (mask << arm_condexec_masklen);
23271 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23272 break;
23273
23274 arm_condexec_count++;
23275 arm_condexec_masklen += n;
23276
23277 /* A jump must be the last instruction in a conditional block. */
23278 if (JUMP_P (insn))
23279 break;
23280 }
23281 /* Restore recog_data (getting the attributes of other insns can
23282 destroy this array, but final.c assumes that it remains intact
23283 across this call). */
23284 extract_constrain_insn_cached (first_insn);
23285 }
23286
23287 void
23288 arm_final_prescan_insn (rtx_insn *insn)
23289 {
23290 /* BODY will hold the body of INSN. */
23291 rtx body = PATTERN (insn);
23292
23293 /* This will be 1 if trying to repeat the trick, and things need to be
23294 reversed if it appears to fail. */
23295 int reverse = 0;
23296
23297 /* If we start with a return insn, we only succeed if we find another one. */
23298 int seeking_return = 0;
23299 enum rtx_code return_code = UNKNOWN;
23300
23301 /* START_INSN will hold the insn from where we start looking. This is the
23302 first insn after the following code_label if REVERSE is true. */
23303 rtx_insn *start_insn = insn;
23304
23305 /* If in state 4, check if the target branch is reached, in order to
23306 change back to state 0. */
23307 if (arm_ccfsm_state == 4)
23308 {
23309 if (insn == arm_target_insn)
23310 {
23311 arm_target_insn = NULL;
23312 arm_ccfsm_state = 0;
23313 }
23314 return;
23315 }
23316
23317 /* If in state 3, it is possible to repeat the trick, if this insn is an
23318 unconditional branch to a label, and immediately following this branch
23319 is the previous target label which is only used once, and the label this
23320 branch jumps to is not too far off. */
23321 if (arm_ccfsm_state == 3)
23322 {
23323 if (simplejump_p (insn))
23324 {
23325 start_insn = next_nonnote_insn (start_insn);
23326 if (BARRIER_P (start_insn))
23327 {
23328 /* XXX Isn't this always a barrier? */
23329 start_insn = next_nonnote_insn (start_insn);
23330 }
23331 if (LABEL_P (start_insn)
23332 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23333 && LABEL_NUSES (start_insn) == 1)
23334 reverse = TRUE;
23335 else
23336 return;
23337 }
23338 else if (ANY_RETURN_P (body))
23339 {
23340 start_insn = next_nonnote_insn (start_insn);
23341 if (BARRIER_P (start_insn))
23342 start_insn = next_nonnote_insn (start_insn);
23343 if (LABEL_P (start_insn)
23344 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23345 && LABEL_NUSES (start_insn) == 1)
23346 {
23347 reverse = TRUE;
23348 seeking_return = 1;
23349 return_code = GET_CODE (body);
23350 }
23351 else
23352 return;
23353 }
23354 else
23355 return;
23356 }
23357
23358 gcc_assert (!arm_ccfsm_state || reverse);
23359 if (!JUMP_P (insn))
23360 return;
23361
23362 /* This jump might be paralleled with a clobber of the condition codes
23363 the jump should always come first */
23364 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23365 body = XVECEXP (body, 0, 0);
23366
23367 if (reverse
23368 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23369 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23370 {
23371 int insns_skipped;
23372 int fail = FALSE, succeed = FALSE;
23373 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23374 int then_not_else = TRUE;
23375 rtx_insn *this_insn = start_insn;
23376 rtx label = 0;
23377
23378 /* Register the insn jumped to. */
23379 if (reverse)
23380 {
23381 if (!seeking_return)
23382 label = XEXP (SET_SRC (body), 0);
23383 }
23384 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23385 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23386 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23387 {
23388 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23389 then_not_else = FALSE;
23390 }
23391 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23392 {
23393 seeking_return = 1;
23394 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23395 }
23396 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23397 {
23398 seeking_return = 1;
23399 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23400 then_not_else = FALSE;
23401 }
23402 else
23403 gcc_unreachable ();
23404
23405 /* See how many insns this branch skips, and what kind of insns. If all
23406 insns are okay, and the label or unconditional branch to the same
23407 label is not too far away, succeed. */
23408 for (insns_skipped = 0;
23409 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23410 {
23411 rtx scanbody;
23412
23413 this_insn = next_nonnote_insn (this_insn);
23414 if (!this_insn)
23415 break;
23416
23417 switch (GET_CODE (this_insn))
23418 {
23419 case CODE_LABEL:
23420 /* Succeed if it is the target label, otherwise fail since
23421 control falls in from somewhere else. */
23422 if (this_insn == label)
23423 {
23424 arm_ccfsm_state = 1;
23425 succeed = TRUE;
23426 }
23427 else
23428 fail = TRUE;
23429 break;
23430
23431 case BARRIER:
23432 /* Succeed if the following insn is the target label.
23433 Otherwise fail.
23434 If return insns are used then the last insn in a function
23435 will be a barrier. */
23436 this_insn = next_nonnote_insn (this_insn);
23437 if (this_insn && this_insn == label)
23438 {
23439 arm_ccfsm_state = 1;
23440 succeed = TRUE;
23441 }
23442 else
23443 fail = TRUE;
23444 break;
23445
23446 case CALL_INSN:
23447 /* The AAPCS says that conditional calls should not be
23448 used since they make interworking inefficient (the
23449 linker can't transform BL<cond> into BLX). That's
23450 only a problem if the machine has BLX. */
23451 if (arm_arch5t)
23452 {
23453 fail = TRUE;
23454 break;
23455 }
23456
23457 /* Succeed if the following insn is the target label, or
23458 if the following two insns are a barrier and the
23459 target label. */
23460 this_insn = next_nonnote_insn (this_insn);
23461 if (this_insn && BARRIER_P (this_insn))
23462 this_insn = next_nonnote_insn (this_insn);
23463
23464 if (this_insn && this_insn == label
23465 && insns_skipped < max_insns_skipped)
23466 {
23467 arm_ccfsm_state = 1;
23468 succeed = TRUE;
23469 }
23470 else
23471 fail = TRUE;
23472 break;
23473
23474 case JUMP_INSN:
23475 /* If this is an unconditional branch to the same label, succeed.
23476 If it is to another label, do nothing. If it is conditional,
23477 fail. */
23478 /* XXX Probably, the tests for SET and the PC are
23479 unnecessary. */
23480
23481 scanbody = PATTERN (this_insn);
23482 if (GET_CODE (scanbody) == SET
23483 && GET_CODE (SET_DEST (scanbody)) == PC)
23484 {
23485 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23486 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23487 {
23488 arm_ccfsm_state = 2;
23489 succeed = TRUE;
23490 }
23491 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23492 fail = TRUE;
23493 }
23494 /* Fail if a conditional return is undesirable (e.g. on a
23495 StrongARM), but still allow this if optimizing for size. */
23496 else if (GET_CODE (scanbody) == return_code
23497 && !use_return_insn (TRUE, NULL)
23498 && !optimize_size)
23499 fail = TRUE;
23500 else if (GET_CODE (scanbody) == return_code)
23501 {
23502 arm_ccfsm_state = 2;
23503 succeed = TRUE;
23504 }
23505 else if (GET_CODE (scanbody) == PARALLEL)
23506 {
23507 switch (get_attr_conds (this_insn))
23508 {
23509 case CONDS_NOCOND:
23510 break;
23511 default:
23512 fail = TRUE;
23513 break;
23514 }
23515 }
23516 else
23517 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23518
23519 break;
23520
23521 case INSN:
23522 /* Instructions using or affecting the condition codes make it
23523 fail. */
23524 scanbody = PATTERN (this_insn);
23525 if (!(GET_CODE (scanbody) == SET
23526 || GET_CODE (scanbody) == PARALLEL)
23527 || get_attr_conds (this_insn) != CONDS_NOCOND)
23528 fail = TRUE;
23529 break;
23530
23531 default:
23532 break;
23533 }
23534 }
23535 if (succeed)
23536 {
23537 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23538 arm_target_label = CODE_LABEL_NUMBER (label);
23539 else
23540 {
23541 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23542
23543 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23544 {
23545 this_insn = next_nonnote_insn (this_insn);
23546 gcc_assert (!this_insn
23547 || (!BARRIER_P (this_insn)
23548 && !LABEL_P (this_insn)));
23549 }
23550 if (!this_insn)
23551 {
23552 /* Oh, dear! we ran off the end.. give up. */
23553 extract_constrain_insn_cached (insn);
23554 arm_ccfsm_state = 0;
23555 arm_target_insn = NULL;
23556 return;
23557 }
23558 arm_target_insn = this_insn;
23559 }
23560
23561 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23562 what it was. */
23563 if (!reverse)
23564 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23565
23566 if (reverse || then_not_else)
23567 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23568 }
23569
23570 /* Restore recog_data (getting the attributes of other insns can
23571 destroy this array, but final.c assumes that it remains intact
23572 across this call. */
23573 extract_constrain_insn_cached (insn);
23574 }
23575 }
23576
23577 /* Output IT instructions. */
23578 void
23579 thumb2_asm_output_opcode (FILE * stream)
23580 {
23581 char buff[5];
23582 int n;
23583
23584 if (arm_condexec_mask)
23585 {
23586 for (n = 0; n < arm_condexec_masklen; n++)
23587 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23588 buff[n] = 0;
23589 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23590 arm_condition_codes[arm_current_cc]);
23591 arm_condexec_mask = 0;
23592 }
23593 }
23594
23595 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23596 UNITS_PER_WORD bytes wide. */
23597 static unsigned int
23598 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23599 {
23600 if (TARGET_32BIT
23601 && regno > PC_REGNUM
23602 && regno != FRAME_POINTER_REGNUM
23603 && regno != ARG_POINTER_REGNUM
23604 && !IS_VFP_REGNUM (regno))
23605 return 1;
23606
23607 return ARM_NUM_REGS (mode);
23608 }
23609
23610 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23611 static bool
23612 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23613 {
23614 if (GET_MODE_CLASS (mode) == MODE_CC)
23615 return (regno == CC_REGNUM
23616 || (TARGET_HARD_FLOAT
23617 && regno == VFPCC_REGNUM));
23618
23619 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23620 return false;
23621
23622 if (TARGET_THUMB1)
23623 /* For the Thumb we only allow values bigger than SImode in
23624 registers 0 - 6, so that there is always a second low
23625 register available to hold the upper part of the value.
23626 We probably we ought to ensure that the register is the
23627 start of an even numbered register pair. */
23628 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23629
23630 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23631 {
23632 if (mode == SFmode || mode == SImode)
23633 return VFP_REGNO_OK_FOR_SINGLE (regno);
23634
23635 if (mode == DFmode)
23636 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23637
23638 if (mode == HFmode)
23639 return VFP_REGNO_OK_FOR_SINGLE (regno);
23640
23641 /* VFP registers can hold HImode values. */
23642 if (mode == HImode)
23643 return VFP_REGNO_OK_FOR_SINGLE (regno);
23644
23645 if (TARGET_NEON)
23646 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23647 || (VALID_NEON_QREG_MODE (mode)
23648 && NEON_REGNO_OK_FOR_QUAD (regno))
23649 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23650 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23651 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23652 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23653 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23654
23655 return false;
23656 }
23657
23658 if (TARGET_REALLY_IWMMXT)
23659 {
23660 if (IS_IWMMXT_GR_REGNUM (regno))
23661 return mode == SImode;
23662
23663 if (IS_IWMMXT_REGNUM (regno))
23664 return VALID_IWMMXT_REG_MODE (mode);
23665 }
23666
23667 /* We allow almost any value to be stored in the general registers.
23668 Restrict doubleword quantities to even register pairs in ARM state
23669 so that we can use ldrd. Do not allow very large Neon structure
23670 opaque modes in general registers; they would use too many. */
23671 if (regno <= LAST_ARM_REGNUM)
23672 {
23673 if (ARM_NUM_REGS (mode) > 4)
23674 return false;
23675
23676 if (TARGET_THUMB2)
23677 return true;
23678
23679 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23680 }
23681
23682 if (regno == FRAME_POINTER_REGNUM
23683 || regno == ARG_POINTER_REGNUM)
23684 /* We only allow integers in the fake hard registers. */
23685 return GET_MODE_CLASS (mode) == MODE_INT;
23686
23687 return false;
23688 }
23689
23690 /* Implement TARGET_MODES_TIEABLE_P. */
23691
23692 static bool
23693 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23694 {
23695 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23696 return true;
23697
23698 /* We specifically want to allow elements of "structure" modes to
23699 be tieable to the structure. This more general condition allows
23700 other rarer situations too. */
23701 if (TARGET_NEON
23702 && (VALID_NEON_DREG_MODE (mode1)
23703 || VALID_NEON_QREG_MODE (mode1)
23704 || VALID_NEON_STRUCT_MODE (mode1))
23705 && (VALID_NEON_DREG_MODE (mode2)
23706 || VALID_NEON_QREG_MODE (mode2)
23707 || VALID_NEON_STRUCT_MODE (mode2)))
23708 return true;
23709
23710 return false;
23711 }
23712
23713 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23714 not used in arm mode. */
23715
23716 enum reg_class
23717 arm_regno_class (int regno)
23718 {
23719 if (regno == PC_REGNUM)
23720 return NO_REGS;
23721
23722 if (TARGET_THUMB1)
23723 {
23724 if (regno == STACK_POINTER_REGNUM)
23725 return STACK_REG;
23726 if (regno == CC_REGNUM)
23727 return CC_REG;
23728 if (regno < 8)
23729 return LO_REGS;
23730 return HI_REGS;
23731 }
23732
23733 if (TARGET_THUMB2 && regno < 8)
23734 return LO_REGS;
23735
23736 if ( regno <= LAST_ARM_REGNUM
23737 || regno == FRAME_POINTER_REGNUM
23738 || regno == ARG_POINTER_REGNUM)
23739 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23740
23741 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23742 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23743
23744 if (IS_VFP_REGNUM (regno))
23745 {
23746 if (regno <= D7_VFP_REGNUM)
23747 return VFP_D0_D7_REGS;
23748 else if (regno <= LAST_LO_VFP_REGNUM)
23749 return VFP_LO_REGS;
23750 else
23751 return VFP_HI_REGS;
23752 }
23753
23754 if (IS_IWMMXT_REGNUM (regno))
23755 return IWMMXT_REGS;
23756
23757 if (IS_IWMMXT_GR_REGNUM (regno))
23758 return IWMMXT_GR_REGS;
23759
23760 return NO_REGS;
23761 }
23762
23763 /* Handle a special case when computing the offset
23764 of an argument from the frame pointer. */
23765 int
23766 arm_debugger_arg_offset (int value, rtx addr)
23767 {
23768 rtx_insn *insn;
23769
23770 /* We are only interested if dbxout_parms() failed to compute the offset. */
23771 if (value != 0)
23772 return 0;
23773
23774 /* We can only cope with the case where the address is held in a register. */
23775 if (!REG_P (addr))
23776 return 0;
23777
23778 /* If we are using the frame pointer to point at the argument, then
23779 an offset of 0 is correct. */
23780 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23781 return 0;
23782
23783 /* If we are using the stack pointer to point at the
23784 argument, then an offset of 0 is correct. */
23785 /* ??? Check this is consistent with thumb2 frame layout. */
23786 if ((TARGET_THUMB || !frame_pointer_needed)
23787 && REGNO (addr) == SP_REGNUM)
23788 return 0;
23789
23790 /* Oh dear. The argument is pointed to by a register rather
23791 than being held in a register, or being stored at a known
23792 offset from the frame pointer. Since GDB only understands
23793 those two kinds of argument we must translate the address
23794 held in the register into an offset from the frame pointer.
23795 We do this by searching through the insns for the function
23796 looking to see where this register gets its value. If the
23797 register is initialized from the frame pointer plus an offset
23798 then we are in luck and we can continue, otherwise we give up.
23799
23800 This code is exercised by producing debugging information
23801 for a function with arguments like this:
23802
23803 double func (double a, double b, int c, double d) {return d;}
23804
23805 Without this code the stab for parameter 'd' will be set to
23806 an offset of 0 from the frame pointer, rather than 8. */
23807
23808 /* The if() statement says:
23809
23810 If the insn is a normal instruction
23811 and if the insn is setting the value in a register
23812 and if the register being set is the register holding the address of the argument
23813 and if the address is computing by an addition
23814 that involves adding to a register
23815 which is the frame pointer
23816 a constant integer
23817
23818 then... */
23819
23820 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23821 {
23822 if ( NONJUMP_INSN_P (insn)
23823 && GET_CODE (PATTERN (insn)) == SET
23824 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23825 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23826 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23827 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23828 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23829 )
23830 {
23831 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23832
23833 break;
23834 }
23835 }
23836
23837 if (value == 0)
23838 {
23839 debug_rtx (addr);
23840 warning (0, "unable to compute real location of stacked parameter");
23841 value = 8; /* XXX magic hack */
23842 }
23843
23844 return value;
23845 }
23846 \f
23847 /* Implement TARGET_PROMOTED_TYPE. */
23848
23849 static tree
23850 arm_promoted_type (const_tree t)
23851 {
23852 if (SCALAR_FLOAT_TYPE_P (t)
23853 && TYPE_PRECISION (t) == 16
23854 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23855 return float_type_node;
23856 return NULL_TREE;
23857 }
23858
23859 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23860 This simply adds HFmode as a supported mode; even though we don't
23861 implement arithmetic on this type directly, it's supported by
23862 optabs conversions, much the way the double-word arithmetic is
23863 special-cased in the default hook. */
23864
23865 static bool
23866 arm_scalar_mode_supported_p (scalar_mode mode)
23867 {
23868 if (mode == HFmode)
23869 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23870 else if (ALL_FIXED_POINT_MODE_P (mode))
23871 return true;
23872 else
23873 return default_scalar_mode_supported_p (mode);
23874 }
23875
23876 /* Set the value of FLT_EVAL_METHOD.
23877 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23878
23879 0: evaluate all operations and constants, whose semantic type has at
23880 most the range and precision of type float, to the range and
23881 precision of float; evaluate all other operations and constants to
23882 the range and precision of the semantic type;
23883
23884 N, where _FloatN is a supported interchange floating type
23885 evaluate all operations and constants, whose semantic type has at
23886 most the range and precision of _FloatN type, to the range and
23887 precision of the _FloatN type; evaluate all other operations and
23888 constants to the range and precision of the semantic type;
23889
23890 If we have the ARMv8.2-A extensions then we support _Float16 in native
23891 precision, so we should set this to 16. Otherwise, we support the type,
23892 but want to evaluate expressions in float precision, so set this to
23893 0. */
23894
23895 static enum flt_eval_method
23896 arm_excess_precision (enum excess_precision_type type)
23897 {
23898 switch (type)
23899 {
23900 case EXCESS_PRECISION_TYPE_FAST:
23901 case EXCESS_PRECISION_TYPE_STANDARD:
23902 /* We can calculate either in 16-bit range and precision or
23903 32-bit range and precision. Make that decision based on whether
23904 we have native support for the ARMv8.2-A 16-bit floating-point
23905 instructions or not. */
23906 return (TARGET_VFP_FP16INST
23907 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23908 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23909 case EXCESS_PRECISION_TYPE_IMPLICIT:
23910 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23911 default:
23912 gcc_unreachable ();
23913 }
23914 return FLT_EVAL_METHOD_UNPREDICTABLE;
23915 }
23916
23917
23918 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23919 _Float16 if we are using anything other than ieee format for 16-bit
23920 floating point. Otherwise, punt to the default implementation. */
23921 static opt_scalar_float_mode
23922 arm_floatn_mode (int n, bool extended)
23923 {
23924 if (!extended && n == 16)
23925 {
23926 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23927 return HFmode;
23928 return opt_scalar_float_mode ();
23929 }
23930
23931 return default_floatn_mode (n, extended);
23932 }
23933
23934
23935 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23936 not to early-clobber SRC registers in the process.
23937
23938 We assume that the operands described by SRC and DEST represent a
23939 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23940 number of components into which the copy has been decomposed. */
23941 void
23942 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23943 {
23944 unsigned int i;
23945
23946 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23947 || REGNO (operands[0]) < REGNO (operands[1]))
23948 {
23949 for (i = 0; i < count; i++)
23950 {
23951 operands[2 * i] = dest[i];
23952 operands[2 * i + 1] = src[i];
23953 }
23954 }
23955 else
23956 {
23957 for (i = 0; i < count; i++)
23958 {
23959 operands[2 * i] = dest[count - i - 1];
23960 operands[2 * i + 1] = src[count - i - 1];
23961 }
23962 }
23963 }
23964
23965 /* Split operands into moves from op[1] + op[2] into op[0]. */
23966
23967 void
23968 neon_split_vcombine (rtx operands[3])
23969 {
23970 unsigned int dest = REGNO (operands[0]);
23971 unsigned int src1 = REGNO (operands[1]);
23972 unsigned int src2 = REGNO (operands[2]);
23973 machine_mode halfmode = GET_MODE (operands[1]);
23974 unsigned int halfregs = REG_NREGS (operands[1]);
23975 rtx destlo, desthi;
23976
23977 if (src1 == dest && src2 == dest + halfregs)
23978 {
23979 /* No-op move. Can't split to nothing; emit something. */
23980 emit_note (NOTE_INSN_DELETED);
23981 return;
23982 }
23983
23984 /* Preserve register attributes for variable tracking. */
23985 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23986 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23987 GET_MODE_SIZE (halfmode));
23988
23989 /* Special case of reversed high/low parts. Use VSWP. */
23990 if (src2 == dest && src1 == dest + halfregs)
23991 {
23992 rtx x = gen_rtx_SET (destlo, operands[1]);
23993 rtx y = gen_rtx_SET (desthi, operands[2]);
23994 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23995 return;
23996 }
23997
23998 if (!reg_overlap_mentioned_p (operands[2], destlo))
23999 {
24000 /* Try to avoid unnecessary moves if part of the result
24001 is in the right place already. */
24002 if (src1 != dest)
24003 emit_move_insn (destlo, operands[1]);
24004 if (src2 != dest + halfregs)
24005 emit_move_insn (desthi, operands[2]);
24006 }
24007 else
24008 {
24009 if (src2 != dest + halfregs)
24010 emit_move_insn (desthi, operands[2]);
24011 if (src1 != dest)
24012 emit_move_insn (destlo, operands[1]);
24013 }
24014 }
24015 \f
24016 /* Return the number (counting from 0) of
24017 the least significant set bit in MASK. */
24018
24019 inline static int
24020 number_of_first_bit_set (unsigned mask)
24021 {
24022 return ctz_hwi (mask);
24023 }
24024
24025 /* Like emit_multi_reg_push, but allowing for a different set of
24026 registers to be described as saved. MASK is the set of registers
24027 to be saved; REAL_REGS is the set of registers to be described as
24028 saved. If REAL_REGS is 0, only describe the stack adjustment. */
24029
24030 static rtx_insn *
24031 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24032 {
24033 unsigned long regno;
24034 rtx par[10], tmp, reg;
24035 rtx_insn *insn;
24036 int i, j;
24037
24038 /* Build the parallel of the registers actually being stored. */
24039 for (i = 0; mask; ++i, mask &= mask - 1)
24040 {
24041 regno = ctz_hwi (mask);
24042 reg = gen_rtx_REG (SImode, regno);
24043
24044 if (i == 0)
24045 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24046 else
24047 tmp = gen_rtx_USE (VOIDmode, reg);
24048
24049 par[i] = tmp;
24050 }
24051
24052 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24053 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24054 tmp = gen_frame_mem (BLKmode, tmp);
24055 tmp = gen_rtx_SET (tmp, par[0]);
24056 par[0] = tmp;
24057
24058 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24059 insn = emit_insn (tmp);
24060
24061 /* Always build the stack adjustment note for unwind info. */
24062 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24063 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24064 par[0] = tmp;
24065
24066 /* Build the parallel of the registers recorded as saved for unwind. */
24067 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24068 {
24069 regno = ctz_hwi (real_regs);
24070 reg = gen_rtx_REG (SImode, regno);
24071
24072 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24073 tmp = gen_frame_mem (SImode, tmp);
24074 tmp = gen_rtx_SET (tmp, reg);
24075 RTX_FRAME_RELATED_P (tmp) = 1;
24076 par[j + 1] = tmp;
24077 }
24078
24079 if (j == 0)
24080 tmp = par[0];
24081 else
24082 {
24083 RTX_FRAME_RELATED_P (par[0]) = 1;
24084 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24085 }
24086
24087 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24088
24089 return insn;
24090 }
24091
24092 /* Emit code to push or pop registers to or from the stack. F is the
24093 assembly file. MASK is the registers to pop. */
24094 static void
24095 thumb_pop (FILE *f, unsigned long mask)
24096 {
24097 int regno;
24098 int lo_mask = mask & 0xFF;
24099
24100 gcc_assert (mask);
24101
24102 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24103 {
24104 /* Special case. Do not generate a POP PC statement here, do it in
24105 thumb_exit() */
24106 thumb_exit (f, -1);
24107 return;
24108 }
24109
24110 fprintf (f, "\tpop\t{");
24111
24112 /* Look at the low registers first. */
24113 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24114 {
24115 if (lo_mask & 1)
24116 {
24117 asm_fprintf (f, "%r", regno);
24118
24119 if ((lo_mask & ~1) != 0)
24120 fprintf (f, ", ");
24121 }
24122 }
24123
24124 if (mask & (1 << PC_REGNUM))
24125 {
24126 /* Catch popping the PC. */
24127 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24128 || IS_CMSE_ENTRY (arm_current_func_type ()))
24129 {
24130 /* The PC is never poped directly, instead
24131 it is popped into r3 and then BX is used. */
24132 fprintf (f, "}\n");
24133
24134 thumb_exit (f, -1);
24135
24136 return;
24137 }
24138 else
24139 {
24140 if (mask & 0xFF)
24141 fprintf (f, ", ");
24142
24143 asm_fprintf (f, "%r", PC_REGNUM);
24144 }
24145 }
24146
24147 fprintf (f, "}\n");
24148 }
24149
24150 /* Generate code to return from a thumb function.
24151 If 'reg_containing_return_addr' is -1, then the return address is
24152 actually on the stack, at the stack pointer.
24153
24154 Note: do not forget to update length attribute of corresponding insn pattern
24155 when changing assembly output (eg. length attribute of epilogue_insns when
24156 updating Armv8-M Baseline Security Extensions register clearing
24157 sequences). */
24158 static void
24159 thumb_exit (FILE *f, int reg_containing_return_addr)
24160 {
24161 unsigned regs_available_for_popping;
24162 unsigned regs_to_pop;
24163 int pops_needed;
24164 unsigned available;
24165 unsigned required;
24166 machine_mode mode;
24167 int size;
24168 int restore_a4 = FALSE;
24169
24170 /* Compute the registers we need to pop. */
24171 regs_to_pop = 0;
24172 pops_needed = 0;
24173
24174 if (reg_containing_return_addr == -1)
24175 {
24176 regs_to_pop |= 1 << LR_REGNUM;
24177 ++pops_needed;
24178 }
24179
24180 if (TARGET_BACKTRACE)
24181 {
24182 /* Restore the (ARM) frame pointer and stack pointer. */
24183 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24184 pops_needed += 2;
24185 }
24186
24187 /* If there is nothing to pop then just emit the BX instruction and
24188 return. */
24189 if (pops_needed == 0)
24190 {
24191 if (crtl->calls_eh_return)
24192 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24193
24194 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24195 {
24196 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24197 reg_containing_return_addr);
24198 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24199 }
24200 else
24201 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24202 return;
24203 }
24204 /* Otherwise if we are not supporting interworking and we have not created
24205 a backtrace structure and the function was not entered in ARM mode then
24206 just pop the return address straight into the PC. */
24207 else if (!TARGET_INTERWORK
24208 && !TARGET_BACKTRACE
24209 && !is_called_in_ARM_mode (current_function_decl)
24210 && !crtl->calls_eh_return
24211 && !IS_CMSE_ENTRY (arm_current_func_type ()))
24212 {
24213 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24214 return;
24215 }
24216
24217 /* Find out how many of the (return) argument registers we can corrupt. */
24218 regs_available_for_popping = 0;
24219
24220 /* If returning via __builtin_eh_return, the bottom three registers
24221 all contain information needed for the return. */
24222 if (crtl->calls_eh_return)
24223 size = 12;
24224 else
24225 {
24226 /* If we can deduce the registers used from the function's
24227 return value. This is more reliable that examining
24228 df_regs_ever_live_p () because that will be set if the register is
24229 ever used in the function, not just if the register is used
24230 to hold a return value. */
24231
24232 if (crtl->return_rtx != 0)
24233 mode = GET_MODE (crtl->return_rtx);
24234 else
24235 mode = DECL_MODE (DECL_RESULT (current_function_decl));
24236
24237 size = GET_MODE_SIZE (mode);
24238
24239 if (size == 0)
24240 {
24241 /* In a void function we can use any argument register.
24242 In a function that returns a structure on the stack
24243 we can use the second and third argument registers. */
24244 if (mode == VOIDmode)
24245 regs_available_for_popping =
24246 (1 << ARG_REGISTER (1))
24247 | (1 << ARG_REGISTER (2))
24248 | (1 << ARG_REGISTER (3));
24249 else
24250 regs_available_for_popping =
24251 (1 << ARG_REGISTER (2))
24252 | (1 << ARG_REGISTER (3));
24253 }
24254 else if (size <= 4)
24255 regs_available_for_popping =
24256 (1 << ARG_REGISTER (2))
24257 | (1 << ARG_REGISTER (3));
24258 else if (size <= 8)
24259 regs_available_for_popping =
24260 (1 << ARG_REGISTER (3));
24261 }
24262
24263 /* Match registers to be popped with registers into which we pop them. */
24264 for (available = regs_available_for_popping,
24265 required = regs_to_pop;
24266 required != 0 && available != 0;
24267 available &= ~(available & - available),
24268 required &= ~(required & - required))
24269 -- pops_needed;
24270
24271 /* If we have any popping registers left over, remove them. */
24272 if (available > 0)
24273 regs_available_for_popping &= ~available;
24274
24275 /* Otherwise if we need another popping register we can use
24276 the fourth argument register. */
24277 else if (pops_needed)
24278 {
24279 /* If we have not found any free argument registers and
24280 reg a4 contains the return address, we must move it. */
24281 if (regs_available_for_popping == 0
24282 && reg_containing_return_addr == LAST_ARG_REGNUM)
24283 {
24284 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24285 reg_containing_return_addr = LR_REGNUM;
24286 }
24287 else if (size > 12)
24288 {
24289 /* Register a4 is being used to hold part of the return value,
24290 but we have dire need of a free, low register. */
24291 restore_a4 = TRUE;
24292
24293 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24294 }
24295
24296 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24297 {
24298 /* The fourth argument register is available. */
24299 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24300
24301 --pops_needed;
24302 }
24303 }
24304
24305 /* Pop as many registers as we can. */
24306 thumb_pop (f, regs_available_for_popping);
24307
24308 /* Process the registers we popped. */
24309 if (reg_containing_return_addr == -1)
24310 {
24311 /* The return address was popped into the lowest numbered register. */
24312 regs_to_pop &= ~(1 << LR_REGNUM);
24313
24314 reg_containing_return_addr =
24315 number_of_first_bit_set (regs_available_for_popping);
24316
24317 /* Remove this register for the mask of available registers, so that
24318 the return address will not be corrupted by further pops. */
24319 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24320 }
24321
24322 /* If we popped other registers then handle them here. */
24323 if (regs_available_for_popping)
24324 {
24325 int frame_pointer;
24326
24327 /* Work out which register currently contains the frame pointer. */
24328 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24329
24330 /* Move it into the correct place. */
24331 asm_fprintf (f, "\tmov\t%r, %r\n",
24332 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24333
24334 /* (Temporarily) remove it from the mask of popped registers. */
24335 regs_available_for_popping &= ~(1 << frame_pointer);
24336 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24337
24338 if (regs_available_for_popping)
24339 {
24340 int stack_pointer;
24341
24342 /* We popped the stack pointer as well,
24343 find the register that contains it. */
24344 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24345
24346 /* Move it into the stack register. */
24347 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24348
24349 /* At this point we have popped all necessary registers, so
24350 do not worry about restoring regs_available_for_popping
24351 to its correct value:
24352
24353 assert (pops_needed == 0)
24354 assert (regs_available_for_popping == (1 << frame_pointer))
24355 assert (regs_to_pop == (1 << STACK_POINTER)) */
24356 }
24357 else
24358 {
24359 /* Since we have just move the popped value into the frame
24360 pointer, the popping register is available for reuse, and
24361 we know that we still have the stack pointer left to pop. */
24362 regs_available_for_popping |= (1 << frame_pointer);
24363 }
24364 }
24365
24366 /* If we still have registers left on the stack, but we no longer have
24367 any registers into which we can pop them, then we must move the return
24368 address into the link register and make available the register that
24369 contained it. */
24370 if (regs_available_for_popping == 0 && pops_needed > 0)
24371 {
24372 regs_available_for_popping |= 1 << reg_containing_return_addr;
24373
24374 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24375 reg_containing_return_addr);
24376
24377 reg_containing_return_addr = LR_REGNUM;
24378 }
24379
24380 /* If we have registers left on the stack then pop some more.
24381 We know that at most we will want to pop FP and SP. */
24382 if (pops_needed > 0)
24383 {
24384 int popped_into;
24385 int move_to;
24386
24387 thumb_pop (f, regs_available_for_popping);
24388
24389 /* We have popped either FP or SP.
24390 Move whichever one it is into the correct register. */
24391 popped_into = number_of_first_bit_set (regs_available_for_popping);
24392 move_to = number_of_first_bit_set (regs_to_pop);
24393
24394 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24395 --pops_needed;
24396 }
24397
24398 /* If we still have not popped everything then we must have only
24399 had one register available to us and we are now popping the SP. */
24400 if (pops_needed > 0)
24401 {
24402 int popped_into;
24403
24404 thumb_pop (f, regs_available_for_popping);
24405
24406 popped_into = number_of_first_bit_set (regs_available_for_popping);
24407
24408 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24409 /*
24410 assert (regs_to_pop == (1 << STACK_POINTER))
24411 assert (pops_needed == 1)
24412 */
24413 }
24414
24415 /* If necessary restore the a4 register. */
24416 if (restore_a4)
24417 {
24418 if (reg_containing_return_addr != LR_REGNUM)
24419 {
24420 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24421 reg_containing_return_addr = LR_REGNUM;
24422 }
24423
24424 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24425 }
24426
24427 if (crtl->calls_eh_return)
24428 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24429
24430 /* Return to caller. */
24431 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24432 {
24433 /* This is for the cases where LR is not being used to contain the return
24434 address. It may therefore contain information that we might not want
24435 to leak, hence it must be cleared. The value in R0 will never be a
24436 secret at this point, so it is safe to use it, see the clearing code
24437 in 'cmse_nonsecure_entry_clear_before_return'. */
24438 if (reg_containing_return_addr != LR_REGNUM)
24439 asm_fprintf (f, "\tmov\tlr, r0\n");
24440
24441 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24442 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24443 }
24444 else
24445 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24446 }
24447 \f
24448 /* Scan INSN just before assembler is output for it.
24449 For Thumb-1, we track the status of the condition codes; this
24450 information is used in the cbranchsi4_insn pattern. */
24451 void
24452 thumb1_final_prescan_insn (rtx_insn *insn)
24453 {
24454 if (flag_print_asm_name)
24455 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24456 INSN_ADDRESSES (INSN_UID (insn)));
24457 /* Don't overwrite the previous setter when we get to a cbranch. */
24458 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24459 {
24460 enum attr_conds conds;
24461
24462 if (cfun->machine->thumb1_cc_insn)
24463 {
24464 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24465 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24466 CC_STATUS_INIT;
24467 }
24468 conds = get_attr_conds (insn);
24469 if (conds == CONDS_SET)
24470 {
24471 rtx set = single_set (insn);
24472 cfun->machine->thumb1_cc_insn = insn;
24473 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24474 cfun->machine->thumb1_cc_op1 = const0_rtx;
24475 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24476 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24477 {
24478 rtx src1 = XEXP (SET_SRC (set), 1);
24479 if (src1 == const0_rtx)
24480 cfun->machine->thumb1_cc_mode = CCmode;
24481 }
24482 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24483 {
24484 /* Record the src register operand instead of dest because
24485 cprop_hardreg pass propagates src. */
24486 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24487 }
24488 }
24489 else if (conds != CONDS_NOCOND)
24490 cfun->machine->thumb1_cc_insn = NULL_RTX;
24491 }
24492
24493 /* Check if unexpected far jump is used. */
24494 if (cfun->machine->lr_save_eliminated
24495 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24496 internal_error("Unexpected thumb1 far jump");
24497 }
24498
24499 int
24500 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24501 {
24502 unsigned HOST_WIDE_INT mask = 0xff;
24503 int i;
24504
24505 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24506 if (val == 0) /* XXX */
24507 return 0;
24508
24509 for (i = 0; i < 25; i++)
24510 if ((val & (mask << i)) == val)
24511 return 1;
24512
24513 return 0;
24514 }
24515
24516 /* Returns nonzero if the current function contains,
24517 or might contain a far jump. */
24518 static int
24519 thumb_far_jump_used_p (void)
24520 {
24521 rtx_insn *insn;
24522 bool far_jump = false;
24523 unsigned int func_size = 0;
24524
24525 /* If we have already decided that far jumps may be used,
24526 do not bother checking again, and always return true even if
24527 it turns out that they are not being used. Once we have made
24528 the decision that far jumps are present (and that hence the link
24529 register will be pushed onto the stack) we cannot go back on it. */
24530 if (cfun->machine->far_jump_used)
24531 return 1;
24532
24533 /* If this function is not being called from the prologue/epilogue
24534 generation code then it must be being called from the
24535 INITIAL_ELIMINATION_OFFSET macro. */
24536 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24537 {
24538 /* In this case we know that we are being asked about the elimination
24539 of the arg pointer register. If that register is not being used,
24540 then there are no arguments on the stack, and we do not have to
24541 worry that a far jump might force the prologue to push the link
24542 register, changing the stack offsets. In this case we can just
24543 return false, since the presence of far jumps in the function will
24544 not affect stack offsets.
24545
24546 If the arg pointer is live (or if it was live, but has now been
24547 eliminated and so set to dead) then we do have to test to see if
24548 the function might contain a far jump. This test can lead to some
24549 false negatives, since before reload is completed, then length of
24550 branch instructions is not known, so gcc defaults to returning their
24551 longest length, which in turn sets the far jump attribute to true.
24552
24553 A false negative will not result in bad code being generated, but it
24554 will result in a needless push and pop of the link register. We
24555 hope that this does not occur too often.
24556
24557 If we need doubleword stack alignment this could affect the other
24558 elimination offsets so we can't risk getting it wrong. */
24559 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24560 cfun->machine->arg_pointer_live = 1;
24561 else if (!cfun->machine->arg_pointer_live)
24562 return 0;
24563 }
24564
24565 /* We should not change far_jump_used during or after reload, as there is
24566 no chance to change stack frame layout. */
24567 if (reload_in_progress || reload_completed)
24568 return 0;
24569
24570 /* Check to see if the function contains a branch
24571 insn with the far jump attribute set. */
24572 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24573 {
24574 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24575 {
24576 far_jump = true;
24577 }
24578 func_size += get_attr_length (insn);
24579 }
24580
24581 /* Attribute far_jump will always be true for thumb1 before
24582 shorten_branch pass. So checking far_jump attribute before
24583 shorten_branch isn't much useful.
24584
24585 Following heuristic tries to estimate more accurately if a far jump
24586 may finally be used. The heuristic is very conservative as there is
24587 no chance to roll-back the decision of not to use far jump.
24588
24589 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24590 2-byte insn is associated with a 4 byte constant pool. Using
24591 function size 2048/3 as the threshold is conservative enough. */
24592 if (far_jump)
24593 {
24594 if ((func_size * 3) >= 2048)
24595 {
24596 /* Record the fact that we have decided that
24597 the function does use far jumps. */
24598 cfun->machine->far_jump_used = 1;
24599 return 1;
24600 }
24601 }
24602
24603 return 0;
24604 }
24605
24606 /* Return nonzero if FUNC must be entered in ARM mode. */
24607 static bool
24608 is_called_in_ARM_mode (tree func)
24609 {
24610 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24611
24612 /* Ignore the problem about functions whose address is taken. */
24613 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24614 return true;
24615
24616 #ifdef ARM_PE
24617 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24618 #else
24619 return false;
24620 #endif
24621 }
24622
24623 /* Given the stack offsets and register mask in OFFSETS, decide how
24624 many additional registers to push instead of subtracting a constant
24625 from SP. For epilogues the principle is the same except we use pop.
24626 FOR_PROLOGUE indicates which we're generating. */
24627 static int
24628 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24629 {
24630 HOST_WIDE_INT amount;
24631 unsigned long live_regs_mask = offsets->saved_regs_mask;
24632 /* Extract a mask of the ones we can give to the Thumb's push/pop
24633 instruction. */
24634 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24635 /* Then count how many other high registers will need to be pushed. */
24636 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24637 int n_free, reg_base, size;
24638
24639 if (!for_prologue && frame_pointer_needed)
24640 amount = offsets->locals_base - offsets->saved_regs;
24641 else
24642 amount = offsets->outgoing_args - offsets->saved_regs;
24643
24644 /* If the stack frame size is 512 exactly, we can save one load
24645 instruction, which should make this a win even when optimizing
24646 for speed. */
24647 if (!optimize_size && amount != 512)
24648 return 0;
24649
24650 /* Can't do this if there are high registers to push. */
24651 if (high_regs_pushed != 0)
24652 return 0;
24653
24654 /* Shouldn't do it in the prologue if no registers would normally
24655 be pushed at all. In the epilogue, also allow it if we'll have
24656 a pop insn for the PC. */
24657 if (l_mask == 0
24658 && (for_prologue
24659 || TARGET_BACKTRACE
24660 || (live_regs_mask & 1 << LR_REGNUM) == 0
24661 || TARGET_INTERWORK
24662 || crtl->args.pretend_args_size != 0))
24663 return 0;
24664
24665 /* Don't do this if thumb_expand_prologue wants to emit instructions
24666 between the push and the stack frame allocation. */
24667 if (for_prologue
24668 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24669 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24670 return 0;
24671
24672 reg_base = 0;
24673 n_free = 0;
24674 if (!for_prologue)
24675 {
24676 size = arm_size_return_regs ();
24677 reg_base = ARM_NUM_INTS (size);
24678 live_regs_mask >>= reg_base;
24679 }
24680
24681 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24682 && (for_prologue || call_used_regs[reg_base + n_free]))
24683 {
24684 live_regs_mask >>= 1;
24685 n_free++;
24686 }
24687
24688 if (n_free == 0)
24689 return 0;
24690 gcc_assert (amount / 4 * 4 == amount);
24691
24692 if (amount >= 512 && (amount - n_free * 4) < 512)
24693 return (amount - 508) / 4;
24694 if (amount <= n_free * 4)
24695 return amount / 4;
24696 return 0;
24697 }
24698
24699 /* The bits which aren't usefully expanded as rtl. */
24700 const char *
24701 thumb1_unexpanded_epilogue (void)
24702 {
24703 arm_stack_offsets *offsets;
24704 int regno;
24705 unsigned long live_regs_mask = 0;
24706 int high_regs_pushed = 0;
24707 int extra_pop;
24708 int had_to_push_lr;
24709 int size;
24710
24711 if (cfun->machine->return_used_this_function != 0)
24712 return "";
24713
24714 if (IS_NAKED (arm_current_func_type ()))
24715 return "";
24716
24717 offsets = arm_get_frame_offsets ();
24718 live_regs_mask = offsets->saved_regs_mask;
24719 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24720
24721 /* If we can deduce the registers used from the function's return value.
24722 This is more reliable that examining df_regs_ever_live_p () because that
24723 will be set if the register is ever used in the function, not just if
24724 the register is used to hold a return value. */
24725 size = arm_size_return_regs ();
24726
24727 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24728 if (extra_pop > 0)
24729 {
24730 unsigned long extra_mask = (1 << extra_pop) - 1;
24731 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24732 }
24733
24734 /* The prolog may have pushed some high registers to use as
24735 work registers. e.g. the testsuite file:
24736 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24737 compiles to produce:
24738 push {r4, r5, r6, r7, lr}
24739 mov r7, r9
24740 mov r6, r8
24741 push {r6, r7}
24742 as part of the prolog. We have to undo that pushing here. */
24743
24744 if (high_regs_pushed)
24745 {
24746 unsigned long mask = live_regs_mask & 0xff;
24747 int next_hi_reg;
24748
24749 /* The available low registers depend on the size of the value we are
24750 returning. */
24751 if (size <= 12)
24752 mask |= 1 << 3;
24753 if (size <= 8)
24754 mask |= 1 << 2;
24755
24756 if (mask == 0)
24757 /* Oh dear! We have no low registers into which we can pop
24758 high registers! */
24759 internal_error
24760 ("no low registers available for popping high registers");
24761
24762 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24763 if (live_regs_mask & (1 << next_hi_reg))
24764 break;
24765
24766 while (high_regs_pushed)
24767 {
24768 /* Find lo register(s) into which the high register(s) can
24769 be popped. */
24770 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24771 {
24772 if (mask & (1 << regno))
24773 high_regs_pushed--;
24774 if (high_regs_pushed == 0)
24775 break;
24776 }
24777
24778 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24779
24780 /* Pop the values into the low register(s). */
24781 thumb_pop (asm_out_file, mask);
24782
24783 /* Move the value(s) into the high registers. */
24784 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24785 {
24786 if (mask & (1 << regno))
24787 {
24788 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24789 regno);
24790
24791 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24792 if (live_regs_mask & (1 << next_hi_reg))
24793 break;
24794 }
24795 }
24796 }
24797 live_regs_mask &= ~0x0f00;
24798 }
24799
24800 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24801 live_regs_mask &= 0xff;
24802
24803 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24804 {
24805 /* Pop the return address into the PC. */
24806 if (had_to_push_lr)
24807 live_regs_mask |= 1 << PC_REGNUM;
24808
24809 /* Either no argument registers were pushed or a backtrace
24810 structure was created which includes an adjusted stack
24811 pointer, so just pop everything. */
24812 if (live_regs_mask)
24813 thumb_pop (asm_out_file, live_regs_mask);
24814
24815 /* We have either just popped the return address into the
24816 PC or it is was kept in LR for the entire function.
24817 Note that thumb_pop has already called thumb_exit if the
24818 PC was in the list. */
24819 if (!had_to_push_lr)
24820 thumb_exit (asm_out_file, LR_REGNUM);
24821 }
24822 else
24823 {
24824 /* Pop everything but the return address. */
24825 if (live_regs_mask)
24826 thumb_pop (asm_out_file, live_regs_mask);
24827
24828 if (had_to_push_lr)
24829 {
24830 if (size > 12)
24831 {
24832 /* We have no free low regs, so save one. */
24833 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24834 LAST_ARG_REGNUM);
24835 }
24836
24837 /* Get the return address into a temporary register. */
24838 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24839
24840 if (size > 12)
24841 {
24842 /* Move the return address to lr. */
24843 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24844 LAST_ARG_REGNUM);
24845 /* Restore the low register. */
24846 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24847 IP_REGNUM);
24848 regno = LR_REGNUM;
24849 }
24850 else
24851 regno = LAST_ARG_REGNUM;
24852 }
24853 else
24854 regno = LR_REGNUM;
24855
24856 /* Remove the argument registers that were pushed onto the stack. */
24857 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24858 SP_REGNUM, SP_REGNUM,
24859 crtl->args.pretend_args_size);
24860
24861 thumb_exit (asm_out_file, regno);
24862 }
24863
24864 return "";
24865 }
24866
24867 /* Functions to save and restore machine-specific function data. */
24868 static struct machine_function *
24869 arm_init_machine_status (void)
24870 {
24871 struct machine_function *machine;
24872 machine = ggc_cleared_alloc<machine_function> ();
24873
24874 #if ARM_FT_UNKNOWN != 0
24875 machine->func_type = ARM_FT_UNKNOWN;
24876 #endif
24877 machine->static_chain_stack_bytes = -1;
24878 return machine;
24879 }
24880
24881 /* Return an RTX indicating where the return address to the
24882 calling function can be found. */
24883 rtx
24884 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24885 {
24886 if (count != 0)
24887 return NULL_RTX;
24888
24889 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24890 }
24891
24892 /* Do anything needed before RTL is emitted for each function. */
24893 void
24894 arm_init_expanders (void)
24895 {
24896 /* Arrange to initialize and mark the machine per-function status. */
24897 init_machine_status = arm_init_machine_status;
24898
24899 /* This is to stop the combine pass optimizing away the alignment
24900 adjustment of va_arg. */
24901 /* ??? It is claimed that this should not be necessary. */
24902 if (cfun)
24903 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24904 }
24905
24906 /* Check that FUNC is called with a different mode. */
24907
24908 bool
24909 arm_change_mode_p (tree func)
24910 {
24911 if (TREE_CODE (func) != FUNCTION_DECL)
24912 return false;
24913
24914 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24915
24916 if (!callee_tree)
24917 callee_tree = target_option_default_node;
24918
24919 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24920 int flags = callee_opts->x_target_flags;
24921
24922 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24923 }
24924
24925 /* Like arm_compute_initial_elimination offset. Simpler because there
24926 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24927 to point at the base of the local variables after static stack
24928 space for a function has been allocated. */
24929
24930 HOST_WIDE_INT
24931 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24932 {
24933 arm_stack_offsets *offsets;
24934
24935 offsets = arm_get_frame_offsets ();
24936
24937 switch (from)
24938 {
24939 case ARG_POINTER_REGNUM:
24940 switch (to)
24941 {
24942 case STACK_POINTER_REGNUM:
24943 return offsets->outgoing_args - offsets->saved_args;
24944
24945 case FRAME_POINTER_REGNUM:
24946 return offsets->soft_frame - offsets->saved_args;
24947
24948 case ARM_HARD_FRAME_POINTER_REGNUM:
24949 return offsets->saved_regs - offsets->saved_args;
24950
24951 case THUMB_HARD_FRAME_POINTER_REGNUM:
24952 return offsets->locals_base - offsets->saved_args;
24953
24954 default:
24955 gcc_unreachable ();
24956 }
24957 break;
24958
24959 case FRAME_POINTER_REGNUM:
24960 switch (to)
24961 {
24962 case STACK_POINTER_REGNUM:
24963 return offsets->outgoing_args - offsets->soft_frame;
24964
24965 case ARM_HARD_FRAME_POINTER_REGNUM:
24966 return offsets->saved_regs - offsets->soft_frame;
24967
24968 case THUMB_HARD_FRAME_POINTER_REGNUM:
24969 return offsets->locals_base - offsets->soft_frame;
24970
24971 default:
24972 gcc_unreachable ();
24973 }
24974 break;
24975
24976 default:
24977 gcc_unreachable ();
24978 }
24979 }
24980
24981 /* Generate the function's prologue. */
24982
24983 void
24984 thumb1_expand_prologue (void)
24985 {
24986 rtx_insn *insn;
24987
24988 HOST_WIDE_INT amount;
24989 HOST_WIDE_INT size;
24990 arm_stack_offsets *offsets;
24991 unsigned long func_type;
24992 int regno;
24993 unsigned long live_regs_mask;
24994 unsigned long l_mask;
24995 unsigned high_regs_pushed = 0;
24996 bool lr_needs_saving;
24997
24998 func_type = arm_current_func_type ();
24999
25000 /* Naked functions don't have prologues. */
25001 if (IS_NAKED (func_type))
25002 {
25003 if (flag_stack_usage_info)
25004 current_function_static_stack_size = 0;
25005 return;
25006 }
25007
25008 if (IS_INTERRUPT (func_type))
25009 {
25010 error ("interrupt Service Routines cannot be coded in Thumb mode");
25011 return;
25012 }
25013
25014 if (is_called_in_ARM_mode (current_function_decl))
25015 emit_insn (gen_prologue_thumb1_interwork ());
25016
25017 offsets = arm_get_frame_offsets ();
25018 live_regs_mask = offsets->saved_regs_mask;
25019 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25020
25021 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
25022 l_mask = live_regs_mask & 0x40ff;
25023 /* Then count how many other high registers will need to be pushed. */
25024 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25025
25026 if (crtl->args.pretend_args_size)
25027 {
25028 rtx x = GEN_INT (-crtl->args.pretend_args_size);
25029
25030 if (cfun->machine->uses_anonymous_args)
25031 {
25032 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25033 unsigned long mask;
25034
25035 mask = 1ul << (LAST_ARG_REGNUM + 1);
25036 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25037
25038 insn = thumb1_emit_multi_reg_push (mask, 0);
25039 }
25040 else
25041 {
25042 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25043 stack_pointer_rtx, x));
25044 }
25045 RTX_FRAME_RELATED_P (insn) = 1;
25046 }
25047
25048 if (TARGET_BACKTRACE)
25049 {
25050 HOST_WIDE_INT offset = 0;
25051 unsigned work_register;
25052 rtx work_reg, x, arm_hfp_rtx;
25053
25054 /* We have been asked to create a stack backtrace structure.
25055 The code looks like this:
25056
25057 0 .align 2
25058 0 func:
25059 0 sub SP, #16 Reserve space for 4 registers.
25060 2 push {R7} Push low registers.
25061 4 add R7, SP, #20 Get the stack pointer before the push.
25062 6 str R7, [SP, #8] Store the stack pointer
25063 (before reserving the space).
25064 8 mov R7, PC Get hold of the start of this code + 12.
25065 10 str R7, [SP, #16] Store it.
25066 12 mov R7, FP Get hold of the current frame pointer.
25067 14 str R7, [SP, #4] Store it.
25068 16 mov R7, LR Get hold of the current return address.
25069 18 str R7, [SP, #12] Store it.
25070 20 add R7, SP, #16 Point at the start of the
25071 backtrace structure.
25072 22 mov FP, R7 Put this value into the frame pointer. */
25073
25074 work_register = thumb_find_work_register (live_regs_mask);
25075 work_reg = gen_rtx_REG (SImode, work_register);
25076 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25077
25078 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25079 stack_pointer_rtx, GEN_INT (-16)));
25080 RTX_FRAME_RELATED_P (insn) = 1;
25081
25082 if (l_mask)
25083 {
25084 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25085 RTX_FRAME_RELATED_P (insn) = 1;
25086 lr_needs_saving = false;
25087
25088 offset = bit_count (l_mask) * UNITS_PER_WORD;
25089 }
25090
25091 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25092 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25093
25094 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25095 x = gen_frame_mem (SImode, x);
25096 emit_move_insn (x, work_reg);
25097
25098 /* Make sure that the instruction fetching the PC is in the right place
25099 to calculate "start of backtrace creation code + 12". */
25100 /* ??? The stores using the common WORK_REG ought to be enough to
25101 prevent the scheduler from doing anything weird. Failing that
25102 we could always move all of the following into an UNSPEC_VOLATILE. */
25103 if (l_mask)
25104 {
25105 x = gen_rtx_REG (SImode, PC_REGNUM);
25106 emit_move_insn (work_reg, x);
25107
25108 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25109 x = gen_frame_mem (SImode, x);
25110 emit_move_insn (x, work_reg);
25111
25112 emit_move_insn (work_reg, arm_hfp_rtx);
25113
25114 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25115 x = gen_frame_mem (SImode, x);
25116 emit_move_insn (x, work_reg);
25117 }
25118 else
25119 {
25120 emit_move_insn (work_reg, arm_hfp_rtx);
25121
25122 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25123 x = gen_frame_mem (SImode, x);
25124 emit_move_insn (x, work_reg);
25125
25126 x = gen_rtx_REG (SImode, PC_REGNUM);
25127 emit_move_insn (work_reg, x);
25128
25129 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25130 x = gen_frame_mem (SImode, x);
25131 emit_move_insn (x, work_reg);
25132 }
25133
25134 x = gen_rtx_REG (SImode, LR_REGNUM);
25135 emit_move_insn (work_reg, x);
25136
25137 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25138 x = gen_frame_mem (SImode, x);
25139 emit_move_insn (x, work_reg);
25140
25141 x = GEN_INT (offset + 12);
25142 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25143
25144 emit_move_insn (arm_hfp_rtx, work_reg);
25145 }
25146 /* Optimization: If we are not pushing any low registers but we are going
25147 to push some high registers then delay our first push. This will just
25148 be a push of LR and we can combine it with the push of the first high
25149 register. */
25150 else if ((l_mask & 0xff) != 0
25151 || (high_regs_pushed == 0 && lr_needs_saving))
25152 {
25153 unsigned long mask = l_mask;
25154 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25155 insn = thumb1_emit_multi_reg_push (mask, mask);
25156 RTX_FRAME_RELATED_P (insn) = 1;
25157 lr_needs_saving = false;
25158 }
25159
25160 if (high_regs_pushed)
25161 {
25162 unsigned pushable_regs;
25163 unsigned next_hi_reg;
25164 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25165 : crtl->args.info.nregs;
25166 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25167
25168 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25169 if (live_regs_mask & (1 << next_hi_reg))
25170 break;
25171
25172 /* Here we need to mask out registers used for passing arguments
25173 even if they can be pushed. This is to avoid using them to stash the high
25174 registers. Such kind of stash may clobber the use of arguments. */
25175 pushable_regs = l_mask & (~arg_regs_mask);
25176 if (lr_needs_saving)
25177 pushable_regs &= ~(1 << LR_REGNUM);
25178
25179 if (pushable_regs == 0)
25180 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25181
25182 while (high_regs_pushed > 0)
25183 {
25184 unsigned long real_regs_mask = 0;
25185 unsigned long push_mask = 0;
25186
25187 for (regno = LR_REGNUM; regno >= 0; regno --)
25188 {
25189 if (pushable_regs & (1 << regno))
25190 {
25191 emit_move_insn (gen_rtx_REG (SImode, regno),
25192 gen_rtx_REG (SImode, next_hi_reg));
25193
25194 high_regs_pushed --;
25195 real_regs_mask |= (1 << next_hi_reg);
25196 push_mask |= (1 << regno);
25197
25198 if (high_regs_pushed)
25199 {
25200 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25201 next_hi_reg --)
25202 if (live_regs_mask & (1 << next_hi_reg))
25203 break;
25204 }
25205 else
25206 break;
25207 }
25208 }
25209
25210 /* If we had to find a work register and we have not yet
25211 saved the LR then add it to the list of regs to push. */
25212 if (lr_needs_saving)
25213 {
25214 push_mask |= 1 << LR_REGNUM;
25215 real_regs_mask |= 1 << LR_REGNUM;
25216 lr_needs_saving = false;
25217 }
25218
25219 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25220 RTX_FRAME_RELATED_P (insn) = 1;
25221 }
25222 }
25223
25224 /* Load the pic register before setting the frame pointer,
25225 so we can use r7 as a temporary work register. */
25226 if (flag_pic && arm_pic_register != INVALID_REGNUM)
25227 arm_load_pic_register (live_regs_mask);
25228
25229 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25230 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25231 stack_pointer_rtx);
25232
25233 size = offsets->outgoing_args - offsets->saved_args;
25234 if (flag_stack_usage_info)
25235 current_function_static_stack_size = size;
25236
25237 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25238 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25239 || flag_stack_clash_protection)
25240 && size)
25241 sorry ("-fstack-check=specific for Thumb-1");
25242
25243 amount = offsets->outgoing_args - offsets->saved_regs;
25244 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25245 if (amount)
25246 {
25247 if (amount < 512)
25248 {
25249 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25250 GEN_INT (- amount)));
25251 RTX_FRAME_RELATED_P (insn) = 1;
25252 }
25253 else
25254 {
25255 rtx reg, dwarf;
25256
25257 /* The stack decrement is too big for an immediate value in a single
25258 insn. In theory we could issue multiple subtracts, but after
25259 three of them it becomes more space efficient to place the full
25260 value in the constant pool and load into a register. (Also the
25261 ARM debugger really likes to see only one stack decrement per
25262 function). So instead we look for a scratch register into which
25263 we can load the decrement, and then we subtract this from the
25264 stack pointer. Unfortunately on the thumb the only available
25265 scratch registers are the argument registers, and we cannot use
25266 these as they may hold arguments to the function. Instead we
25267 attempt to locate a call preserved register which is used by this
25268 function. If we can find one, then we know that it will have
25269 been pushed at the start of the prologue and so we can corrupt
25270 it now. */
25271 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25272 if (live_regs_mask & (1 << regno))
25273 break;
25274
25275 gcc_assert(regno <= LAST_LO_REGNUM);
25276
25277 reg = gen_rtx_REG (SImode, regno);
25278
25279 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25280
25281 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25282 stack_pointer_rtx, reg));
25283
25284 dwarf = gen_rtx_SET (stack_pointer_rtx,
25285 plus_constant (Pmode, stack_pointer_rtx,
25286 -amount));
25287 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25288 RTX_FRAME_RELATED_P (insn) = 1;
25289 }
25290 }
25291
25292 if (frame_pointer_needed)
25293 thumb_set_frame_pointer (offsets);
25294
25295 /* If we are profiling, make sure no instructions are scheduled before
25296 the call to mcount. Similarly if the user has requested no
25297 scheduling in the prolog. Similarly if we want non-call exceptions
25298 using the EABI unwinder, to prevent faulting instructions from being
25299 swapped with a stack adjustment. */
25300 if (crtl->profile || !TARGET_SCHED_PROLOG
25301 || (arm_except_unwind_info (&global_options) == UI_TARGET
25302 && cfun->can_throw_non_call_exceptions))
25303 emit_insn (gen_blockage ());
25304
25305 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25306 if (live_regs_mask & 0xff)
25307 cfun->machine->lr_save_eliminated = 0;
25308 }
25309
25310 /* Clear caller saved registers not used to pass return values and leaked
25311 condition flags before exiting a cmse_nonsecure_entry function. */
25312
25313 void
25314 cmse_nonsecure_entry_clear_before_return (void)
25315 {
25316 int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25317 uint32_t padding_bits_to_clear = 0;
25318 auto_sbitmap to_clear_bitmap (maxregno + 1);
25319 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
25320 tree result_type;
25321
25322 bitmap_clear (to_clear_bitmap);
25323 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25324 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25325
25326 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25327 registers. */
25328 if (TARGET_HARD_FLOAT)
25329 {
25330 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25331
25332 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25333
25334 /* Make sure we don't clear the two scratch registers used to clear the
25335 relevant FPSCR bits in output_return_instruction. */
25336 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25337 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25338 emit_use (gen_rtx_REG (SImode, 4));
25339 bitmap_clear_bit (to_clear_bitmap, 4);
25340 }
25341
25342 /* If the user has defined registers to be caller saved, these are no longer
25343 restored by the function before returning and must thus be cleared for
25344 security purposes. */
25345 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25346 {
25347 /* We do not touch registers that can be used to pass arguments as per
25348 the AAPCS, since these should never be made callee-saved by user
25349 options. */
25350 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25351 continue;
25352 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25353 continue;
25354 if (call_used_regs[regno])
25355 bitmap_set_bit (to_clear_bitmap, regno);
25356 }
25357
25358 /* Make sure we do not clear the registers used to return the result in. */
25359 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25360 if (!VOID_TYPE_P (result_type))
25361 {
25362 uint64_t to_clear_return_mask;
25363 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25364
25365 /* No need to check that we return in registers, because we don't
25366 support returning on stack yet. */
25367 gcc_assert (REG_P (result_rtl));
25368 to_clear_return_mask
25369 = compute_not_to_clear_mask (result_type, result_rtl, 0,
25370 &padding_bits_to_clear);
25371 if (to_clear_return_mask)
25372 {
25373 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25374 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25375 {
25376 if (to_clear_return_mask & (1ULL << regno))
25377 bitmap_clear_bit (to_clear_bitmap, regno);
25378 }
25379 }
25380 }
25381
25382 if (padding_bits_to_clear != 0)
25383 {
25384 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
25385 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
25386
25387 /* Padding_bits_to_clear is not 0 so we know we are dealing with
25388 returning a composite type, which only uses r0. Let's make sure that
25389 r1-r3 is cleared too. */
25390 bitmap_clear (to_clear_arg_regs_bitmap);
25391 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
25392 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25393 }
25394
25395 /* Clear full registers that leak before returning. */
25396 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
25397 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
25398 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
25399 clearing_reg);
25400 }
25401
25402 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25403 POP instruction can be generated. LR should be replaced by PC. All
25404 the checks required are already done by USE_RETURN_INSN (). Hence,
25405 all we really need to check here is if single register is to be
25406 returned, or multiple register return. */
25407 void
25408 thumb2_expand_return (bool simple_return)
25409 {
25410 int i, num_regs;
25411 unsigned long saved_regs_mask;
25412 arm_stack_offsets *offsets;
25413
25414 offsets = arm_get_frame_offsets ();
25415 saved_regs_mask = offsets->saved_regs_mask;
25416
25417 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25418 if (saved_regs_mask & (1 << i))
25419 num_regs++;
25420
25421 if (!simple_return && saved_regs_mask)
25422 {
25423 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25424 functions or adapt code to handle according to ACLE. This path should
25425 not be reachable for cmse_nonsecure_entry functions though we prefer
25426 to assert it for now to ensure that future code changes do not silently
25427 change this behavior. */
25428 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25429 if (num_regs == 1)
25430 {
25431 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25432 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25433 rtx addr = gen_rtx_MEM (SImode,
25434 gen_rtx_POST_INC (SImode,
25435 stack_pointer_rtx));
25436 set_mem_alias_set (addr, get_frame_alias_set ());
25437 XVECEXP (par, 0, 0) = ret_rtx;
25438 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25439 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25440 emit_jump_insn (par);
25441 }
25442 else
25443 {
25444 saved_regs_mask &= ~ (1 << LR_REGNUM);
25445 saved_regs_mask |= (1 << PC_REGNUM);
25446 arm_emit_multi_reg_pop (saved_regs_mask);
25447 }
25448 }
25449 else
25450 {
25451 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25452 cmse_nonsecure_entry_clear_before_return ();
25453 emit_jump_insn (simple_return_rtx);
25454 }
25455 }
25456
25457 void
25458 thumb1_expand_epilogue (void)
25459 {
25460 HOST_WIDE_INT amount;
25461 arm_stack_offsets *offsets;
25462 int regno;
25463
25464 /* Naked functions don't have prologues. */
25465 if (IS_NAKED (arm_current_func_type ()))
25466 return;
25467
25468 offsets = arm_get_frame_offsets ();
25469 amount = offsets->outgoing_args - offsets->saved_regs;
25470
25471 if (frame_pointer_needed)
25472 {
25473 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25474 amount = offsets->locals_base - offsets->saved_regs;
25475 }
25476 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25477
25478 gcc_assert (amount >= 0);
25479 if (amount)
25480 {
25481 emit_insn (gen_blockage ());
25482
25483 if (amount < 512)
25484 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25485 GEN_INT (amount)));
25486 else
25487 {
25488 /* r3 is always free in the epilogue. */
25489 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25490
25491 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25492 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25493 }
25494 }
25495
25496 /* Emit a USE (stack_pointer_rtx), so that
25497 the stack adjustment will not be deleted. */
25498 emit_insn (gen_force_register_use (stack_pointer_rtx));
25499
25500 if (crtl->profile || !TARGET_SCHED_PROLOG)
25501 emit_insn (gen_blockage ());
25502
25503 /* Emit a clobber for each insn that will be restored in the epilogue,
25504 so that flow2 will get register lifetimes correct. */
25505 for (regno = 0; regno < 13; regno++)
25506 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25507 emit_clobber (gen_rtx_REG (SImode, regno));
25508
25509 if (! df_regs_ever_live_p (LR_REGNUM))
25510 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25511
25512 /* Clear all caller-saved regs that are not used to return. */
25513 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25514 cmse_nonsecure_entry_clear_before_return ();
25515 }
25516
25517 /* Epilogue code for APCS frame. */
25518 static void
25519 arm_expand_epilogue_apcs_frame (bool really_return)
25520 {
25521 unsigned long func_type;
25522 unsigned long saved_regs_mask;
25523 int num_regs = 0;
25524 int i;
25525 int floats_from_frame = 0;
25526 arm_stack_offsets *offsets;
25527
25528 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25529 func_type = arm_current_func_type ();
25530
25531 /* Get frame offsets for ARM. */
25532 offsets = arm_get_frame_offsets ();
25533 saved_regs_mask = offsets->saved_regs_mask;
25534
25535 /* Find the offset of the floating-point save area in the frame. */
25536 floats_from_frame
25537 = (offsets->saved_args
25538 + arm_compute_static_chain_stack_bytes ()
25539 - offsets->frame);
25540
25541 /* Compute how many core registers saved and how far away the floats are. */
25542 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25543 if (saved_regs_mask & (1 << i))
25544 {
25545 num_regs++;
25546 floats_from_frame += 4;
25547 }
25548
25549 if (TARGET_HARD_FLOAT)
25550 {
25551 int start_reg;
25552 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25553
25554 /* The offset is from IP_REGNUM. */
25555 int saved_size = arm_get_vfp_saved_size ();
25556 if (saved_size > 0)
25557 {
25558 rtx_insn *insn;
25559 floats_from_frame += saved_size;
25560 insn = emit_insn (gen_addsi3 (ip_rtx,
25561 hard_frame_pointer_rtx,
25562 GEN_INT (-floats_from_frame)));
25563 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25564 ip_rtx, hard_frame_pointer_rtx);
25565 }
25566
25567 /* Generate VFP register multi-pop. */
25568 start_reg = FIRST_VFP_REGNUM;
25569
25570 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25571 /* Look for a case where a reg does not need restoring. */
25572 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25573 && (!df_regs_ever_live_p (i + 1)
25574 || call_used_regs[i + 1]))
25575 {
25576 if (start_reg != i)
25577 arm_emit_vfp_multi_reg_pop (start_reg,
25578 (i - start_reg) / 2,
25579 gen_rtx_REG (SImode,
25580 IP_REGNUM));
25581 start_reg = i + 2;
25582 }
25583
25584 /* Restore the remaining regs that we have discovered (or possibly
25585 even all of them, if the conditional in the for loop never
25586 fired). */
25587 if (start_reg != i)
25588 arm_emit_vfp_multi_reg_pop (start_reg,
25589 (i - start_reg) / 2,
25590 gen_rtx_REG (SImode, IP_REGNUM));
25591 }
25592
25593 if (TARGET_IWMMXT)
25594 {
25595 /* The frame pointer is guaranteed to be non-double-word aligned, as
25596 it is set to double-word-aligned old_stack_pointer - 4. */
25597 rtx_insn *insn;
25598 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25599
25600 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25601 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25602 {
25603 rtx addr = gen_frame_mem (V2SImode,
25604 plus_constant (Pmode, hard_frame_pointer_rtx,
25605 - lrm_count * 4));
25606 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25607 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25608 gen_rtx_REG (V2SImode, i),
25609 NULL_RTX);
25610 lrm_count += 2;
25611 }
25612 }
25613
25614 /* saved_regs_mask should contain IP which contains old stack pointer
25615 at the time of activation creation. Since SP and IP are adjacent registers,
25616 we can restore the value directly into SP. */
25617 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25618 saved_regs_mask &= ~(1 << IP_REGNUM);
25619 saved_regs_mask |= (1 << SP_REGNUM);
25620
25621 /* There are two registers left in saved_regs_mask - LR and PC. We
25622 only need to restore LR (the return address), but to
25623 save time we can load it directly into PC, unless we need a
25624 special function exit sequence, or we are not really returning. */
25625 if (really_return
25626 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25627 && !crtl->calls_eh_return)
25628 /* Delete LR from the register mask, so that LR on
25629 the stack is loaded into the PC in the register mask. */
25630 saved_regs_mask &= ~(1 << LR_REGNUM);
25631 else
25632 saved_regs_mask &= ~(1 << PC_REGNUM);
25633
25634 num_regs = bit_count (saved_regs_mask);
25635 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25636 {
25637 rtx_insn *insn;
25638 emit_insn (gen_blockage ());
25639 /* Unwind the stack to just below the saved registers. */
25640 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25641 hard_frame_pointer_rtx,
25642 GEN_INT (- 4 * num_regs)));
25643
25644 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25645 stack_pointer_rtx, hard_frame_pointer_rtx);
25646 }
25647
25648 arm_emit_multi_reg_pop (saved_regs_mask);
25649
25650 if (IS_INTERRUPT (func_type))
25651 {
25652 /* Interrupt handlers will have pushed the
25653 IP onto the stack, so restore it now. */
25654 rtx_insn *insn;
25655 rtx addr = gen_rtx_MEM (SImode,
25656 gen_rtx_POST_INC (SImode,
25657 stack_pointer_rtx));
25658 set_mem_alias_set (addr, get_frame_alias_set ());
25659 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25660 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25661 gen_rtx_REG (SImode, IP_REGNUM),
25662 NULL_RTX);
25663 }
25664
25665 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25666 return;
25667
25668 if (crtl->calls_eh_return)
25669 emit_insn (gen_addsi3 (stack_pointer_rtx,
25670 stack_pointer_rtx,
25671 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25672
25673 if (IS_STACKALIGN (func_type))
25674 /* Restore the original stack pointer. Before prologue, the stack was
25675 realigned and the original stack pointer saved in r0. For details,
25676 see comment in arm_expand_prologue. */
25677 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25678
25679 emit_jump_insn (simple_return_rtx);
25680 }
25681
25682 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25683 function is not a sibcall. */
25684 void
25685 arm_expand_epilogue (bool really_return)
25686 {
25687 unsigned long func_type;
25688 unsigned long saved_regs_mask;
25689 int num_regs = 0;
25690 int i;
25691 int amount;
25692 arm_stack_offsets *offsets;
25693
25694 func_type = arm_current_func_type ();
25695
25696 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25697 let output_return_instruction take care of instruction emission if any. */
25698 if (IS_NAKED (func_type)
25699 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25700 {
25701 if (really_return)
25702 emit_jump_insn (simple_return_rtx);
25703 return;
25704 }
25705
25706 /* If we are throwing an exception, then we really must be doing a
25707 return, so we can't tail-call. */
25708 gcc_assert (!crtl->calls_eh_return || really_return);
25709
25710 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25711 {
25712 arm_expand_epilogue_apcs_frame (really_return);
25713 return;
25714 }
25715
25716 /* Get frame offsets for ARM. */
25717 offsets = arm_get_frame_offsets ();
25718 saved_regs_mask = offsets->saved_regs_mask;
25719 num_regs = bit_count (saved_regs_mask);
25720
25721 if (frame_pointer_needed)
25722 {
25723 rtx_insn *insn;
25724 /* Restore stack pointer if necessary. */
25725 if (TARGET_ARM)
25726 {
25727 /* In ARM mode, frame pointer points to first saved register.
25728 Restore stack pointer to last saved register. */
25729 amount = offsets->frame - offsets->saved_regs;
25730
25731 /* Force out any pending memory operations that reference stacked data
25732 before stack de-allocation occurs. */
25733 emit_insn (gen_blockage ());
25734 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25735 hard_frame_pointer_rtx,
25736 GEN_INT (amount)));
25737 arm_add_cfa_adjust_cfa_note (insn, amount,
25738 stack_pointer_rtx,
25739 hard_frame_pointer_rtx);
25740
25741 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25742 deleted. */
25743 emit_insn (gen_force_register_use (stack_pointer_rtx));
25744 }
25745 else
25746 {
25747 /* In Thumb-2 mode, the frame pointer points to the last saved
25748 register. */
25749 amount = offsets->locals_base - offsets->saved_regs;
25750 if (amount)
25751 {
25752 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25753 hard_frame_pointer_rtx,
25754 GEN_INT (amount)));
25755 arm_add_cfa_adjust_cfa_note (insn, amount,
25756 hard_frame_pointer_rtx,
25757 hard_frame_pointer_rtx);
25758 }
25759
25760 /* Force out any pending memory operations that reference stacked data
25761 before stack de-allocation occurs. */
25762 emit_insn (gen_blockage ());
25763 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25764 hard_frame_pointer_rtx));
25765 arm_add_cfa_adjust_cfa_note (insn, 0,
25766 stack_pointer_rtx,
25767 hard_frame_pointer_rtx);
25768 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25769 deleted. */
25770 emit_insn (gen_force_register_use (stack_pointer_rtx));
25771 }
25772 }
25773 else
25774 {
25775 /* Pop off outgoing args and local frame to adjust stack pointer to
25776 last saved register. */
25777 amount = offsets->outgoing_args - offsets->saved_regs;
25778 if (amount)
25779 {
25780 rtx_insn *tmp;
25781 /* Force out any pending memory operations that reference stacked data
25782 before stack de-allocation occurs. */
25783 emit_insn (gen_blockage ());
25784 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25785 stack_pointer_rtx,
25786 GEN_INT (amount)));
25787 arm_add_cfa_adjust_cfa_note (tmp, amount,
25788 stack_pointer_rtx, stack_pointer_rtx);
25789 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25790 not deleted. */
25791 emit_insn (gen_force_register_use (stack_pointer_rtx));
25792 }
25793 }
25794
25795 if (TARGET_HARD_FLOAT)
25796 {
25797 /* Generate VFP register multi-pop. */
25798 int end_reg = LAST_VFP_REGNUM + 1;
25799
25800 /* Scan the registers in reverse order. We need to match
25801 any groupings made in the prologue and generate matching
25802 vldm operations. The need to match groups is because,
25803 unlike pop, vldm can only do consecutive regs. */
25804 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25805 /* Look for a case where a reg does not need restoring. */
25806 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25807 && (!df_regs_ever_live_p (i + 1)
25808 || call_used_regs[i + 1]))
25809 {
25810 /* Restore the regs discovered so far (from reg+2 to
25811 end_reg). */
25812 if (end_reg > i + 2)
25813 arm_emit_vfp_multi_reg_pop (i + 2,
25814 (end_reg - (i + 2)) / 2,
25815 stack_pointer_rtx);
25816 end_reg = i;
25817 }
25818
25819 /* Restore the remaining regs that we have discovered (or possibly
25820 even all of them, if the conditional in the for loop never
25821 fired). */
25822 if (end_reg > i + 2)
25823 arm_emit_vfp_multi_reg_pop (i + 2,
25824 (end_reg - (i + 2)) / 2,
25825 stack_pointer_rtx);
25826 }
25827
25828 if (TARGET_IWMMXT)
25829 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25830 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25831 {
25832 rtx_insn *insn;
25833 rtx addr = gen_rtx_MEM (V2SImode,
25834 gen_rtx_POST_INC (SImode,
25835 stack_pointer_rtx));
25836 set_mem_alias_set (addr, get_frame_alias_set ());
25837 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25838 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25839 gen_rtx_REG (V2SImode, i),
25840 NULL_RTX);
25841 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25842 stack_pointer_rtx, stack_pointer_rtx);
25843 }
25844
25845 if (saved_regs_mask)
25846 {
25847 rtx insn;
25848 bool return_in_pc = false;
25849
25850 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25851 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25852 && !IS_CMSE_ENTRY (func_type)
25853 && !IS_STACKALIGN (func_type)
25854 && really_return
25855 && crtl->args.pretend_args_size == 0
25856 && saved_regs_mask & (1 << LR_REGNUM)
25857 && !crtl->calls_eh_return)
25858 {
25859 saved_regs_mask &= ~(1 << LR_REGNUM);
25860 saved_regs_mask |= (1 << PC_REGNUM);
25861 return_in_pc = true;
25862 }
25863
25864 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25865 {
25866 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25867 if (saved_regs_mask & (1 << i))
25868 {
25869 rtx addr = gen_rtx_MEM (SImode,
25870 gen_rtx_POST_INC (SImode,
25871 stack_pointer_rtx));
25872 set_mem_alias_set (addr, get_frame_alias_set ());
25873
25874 if (i == PC_REGNUM)
25875 {
25876 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25877 XVECEXP (insn, 0, 0) = ret_rtx;
25878 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25879 addr);
25880 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25881 insn = emit_jump_insn (insn);
25882 }
25883 else
25884 {
25885 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25886 addr));
25887 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25888 gen_rtx_REG (SImode, i),
25889 NULL_RTX);
25890 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25891 stack_pointer_rtx,
25892 stack_pointer_rtx);
25893 }
25894 }
25895 }
25896 else
25897 {
25898 if (TARGET_LDRD
25899 && current_tune->prefer_ldrd_strd
25900 && !optimize_function_for_size_p (cfun))
25901 {
25902 if (TARGET_THUMB2)
25903 thumb2_emit_ldrd_pop (saved_regs_mask);
25904 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25905 arm_emit_ldrd_pop (saved_regs_mask);
25906 else
25907 arm_emit_multi_reg_pop (saved_regs_mask);
25908 }
25909 else
25910 arm_emit_multi_reg_pop (saved_regs_mask);
25911 }
25912
25913 if (return_in_pc)
25914 return;
25915 }
25916
25917 amount
25918 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25919 if (amount)
25920 {
25921 int i, j;
25922 rtx dwarf = NULL_RTX;
25923 rtx_insn *tmp =
25924 emit_insn (gen_addsi3 (stack_pointer_rtx,
25925 stack_pointer_rtx,
25926 GEN_INT (amount)));
25927
25928 RTX_FRAME_RELATED_P (tmp) = 1;
25929
25930 if (cfun->machine->uses_anonymous_args)
25931 {
25932 /* Restore pretend args. Refer arm_expand_prologue on how to save
25933 pretend_args in stack. */
25934 int num_regs = crtl->args.pretend_args_size / 4;
25935 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25936 for (j = 0, i = 0; j < num_regs; i++)
25937 if (saved_regs_mask & (1 << i))
25938 {
25939 rtx reg = gen_rtx_REG (SImode, i);
25940 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25941 j++;
25942 }
25943 REG_NOTES (tmp) = dwarf;
25944 }
25945 arm_add_cfa_adjust_cfa_note (tmp, amount,
25946 stack_pointer_rtx, stack_pointer_rtx);
25947 }
25948
25949 /* Clear all caller-saved regs that are not used to return. */
25950 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25951 {
25952 /* CMSE_ENTRY always returns. */
25953 gcc_assert (really_return);
25954 cmse_nonsecure_entry_clear_before_return ();
25955 }
25956
25957 if (!really_return)
25958 return;
25959
25960 if (crtl->calls_eh_return)
25961 emit_insn (gen_addsi3 (stack_pointer_rtx,
25962 stack_pointer_rtx,
25963 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25964
25965 if (IS_STACKALIGN (func_type))
25966 /* Restore the original stack pointer. Before prologue, the stack was
25967 realigned and the original stack pointer saved in r0. For details,
25968 see comment in arm_expand_prologue. */
25969 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25970
25971 emit_jump_insn (simple_return_rtx);
25972 }
25973
25974 /* Implementation of insn prologue_thumb1_interwork. This is the first
25975 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25976
25977 const char *
25978 thumb1_output_interwork (void)
25979 {
25980 const char * name;
25981 FILE *f = asm_out_file;
25982
25983 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25984 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25985 == SYMBOL_REF);
25986 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25987
25988 /* Generate code sequence to switch us into Thumb mode. */
25989 /* The .code 32 directive has already been emitted by
25990 ASM_DECLARE_FUNCTION_NAME. */
25991 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25992 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25993
25994 /* Generate a label, so that the debugger will notice the
25995 change in instruction sets. This label is also used by
25996 the assembler to bypass the ARM code when this function
25997 is called from a Thumb encoded function elsewhere in the
25998 same file. Hence the definition of STUB_NAME here must
25999 agree with the definition in gas/config/tc-arm.c. */
26000
26001 #define STUB_NAME ".real_start_of"
26002
26003 fprintf (f, "\t.code\t16\n");
26004 #ifdef ARM_PE
26005 if (arm_dllexport_name_p (name))
26006 name = arm_strip_name_encoding (name);
26007 #endif
26008 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26009 fprintf (f, "\t.thumb_func\n");
26010 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26011
26012 return "";
26013 }
26014
26015 /* Handle the case of a double word load into a low register from
26016 a computed memory address. The computed address may involve a
26017 register which is overwritten by the load. */
26018 const char *
26019 thumb_load_double_from_address (rtx *operands)
26020 {
26021 rtx addr;
26022 rtx base;
26023 rtx offset;
26024 rtx arg1;
26025 rtx arg2;
26026
26027 gcc_assert (REG_P (operands[0]));
26028 gcc_assert (MEM_P (operands[1]));
26029
26030 /* Get the memory address. */
26031 addr = XEXP (operands[1], 0);
26032
26033 /* Work out how the memory address is computed. */
26034 switch (GET_CODE (addr))
26035 {
26036 case REG:
26037 operands[2] = adjust_address (operands[1], SImode, 4);
26038
26039 if (REGNO (operands[0]) == REGNO (addr))
26040 {
26041 output_asm_insn ("ldr\t%H0, %2", operands);
26042 output_asm_insn ("ldr\t%0, %1", operands);
26043 }
26044 else
26045 {
26046 output_asm_insn ("ldr\t%0, %1", operands);
26047 output_asm_insn ("ldr\t%H0, %2", operands);
26048 }
26049 break;
26050
26051 case CONST:
26052 /* Compute <address> + 4 for the high order load. */
26053 operands[2] = adjust_address (operands[1], SImode, 4);
26054
26055 output_asm_insn ("ldr\t%0, %1", operands);
26056 output_asm_insn ("ldr\t%H0, %2", operands);
26057 break;
26058
26059 case PLUS:
26060 arg1 = XEXP (addr, 0);
26061 arg2 = XEXP (addr, 1);
26062
26063 if (CONSTANT_P (arg1))
26064 base = arg2, offset = arg1;
26065 else
26066 base = arg1, offset = arg2;
26067
26068 gcc_assert (REG_P (base));
26069
26070 /* Catch the case of <address> = <reg> + <reg> */
26071 if (REG_P (offset))
26072 {
26073 int reg_offset = REGNO (offset);
26074 int reg_base = REGNO (base);
26075 int reg_dest = REGNO (operands[0]);
26076
26077 /* Add the base and offset registers together into the
26078 higher destination register. */
26079 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26080 reg_dest + 1, reg_base, reg_offset);
26081
26082 /* Load the lower destination register from the address in
26083 the higher destination register. */
26084 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26085 reg_dest, reg_dest + 1);
26086
26087 /* Load the higher destination register from its own address
26088 plus 4. */
26089 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26090 reg_dest + 1, reg_dest + 1);
26091 }
26092 else
26093 {
26094 /* Compute <address> + 4 for the high order load. */
26095 operands[2] = adjust_address (operands[1], SImode, 4);
26096
26097 /* If the computed address is held in the low order register
26098 then load the high order register first, otherwise always
26099 load the low order register first. */
26100 if (REGNO (operands[0]) == REGNO (base))
26101 {
26102 output_asm_insn ("ldr\t%H0, %2", operands);
26103 output_asm_insn ("ldr\t%0, %1", operands);
26104 }
26105 else
26106 {
26107 output_asm_insn ("ldr\t%0, %1", operands);
26108 output_asm_insn ("ldr\t%H0, %2", operands);
26109 }
26110 }
26111 break;
26112
26113 case LABEL_REF:
26114 /* With no registers to worry about we can just load the value
26115 directly. */
26116 operands[2] = adjust_address (operands[1], SImode, 4);
26117
26118 output_asm_insn ("ldr\t%H0, %2", operands);
26119 output_asm_insn ("ldr\t%0, %1", operands);
26120 break;
26121
26122 default:
26123 gcc_unreachable ();
26124 }
26125
26126 return "";
26127 }
26128
26129 const char *
26130 thumb_output_move_mem_multiple (int n, rtx *operands)
26131 {
26132 switch (n)
26133 {
26134 case 2:
26135 if (REGNO (operands[4]) > REGNO (operands[5]))
26136 std::swap (operands[4], operands[5]);
26137
26138 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26139 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26140 break;
26141
26142 case 3:
26143 if (REGNO (operands[4]) > REGNO (operands[5]))
26144 std::swap (operands[4], operands[5]);
26145 if (REGNO (operands[5]) > REGNO (operands[6]))
26146 std::swap (operands[5], operands[6]);
26147 if (REGNO (operands[4]) > REGNO (operands[5]))
26148 std::swap (operands[4], operands[5]);
26149
26150 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26151 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26152 break;
26153
26154 default:
26155 gcc_unreachable ();
26156 }
26157
26158 return "";
26159 }
26160
26161 /* Output a call-via instruction for thumb state. */
26162 const char *
26163 thumb_call_via_reg (rtx reg)
26164 {
26165 int regno = REGNO (reg);
26166 rtx *labelp;
26167
26168 gcc_assert (regno < LR_REGNUM);
26169
26170 /* If we are in the normal text section we can use a single instance
26171 per compilation unit. If we are doing function sections, then we need
26172 an entry per section, since we can't rely on reachability. */
26173 if (in_section == text_section)
26174 {
26175 thumb_call_reg_needed = 1;
26176
26177 if (thumb_call_via_label[regno] == NULL)
26178 thumb_call_via_label[regno] = gen_label_rtx ();
26179 labelp = thumb_call_via_label + regno;
26180 }
26181 else
26182 {
26183 if (cfun->machine->call_via[regno] == NULL)
26184 cfun->machine->call_via[regno] = gen_label_rtx ();
26185 labelp = cfun->machine->call_via + regno;
26186 }
26187
26188 output_asm_insn ("bl\t%a0", labelp);
26189 return "";
26190 }
26191
26192 /* Routines for generating rtl. */
26193 void
26194 thumb_expand_movmemqi (rtx *operands)
26195 {
26196 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26197 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26198 HOST_WIDE_INT len = INTVAL (operands[2]);
26199 HOST_WIDE_INT offset = 0;
26200
26201 while (len >= 12)
26202 {
26203 emit_insn (gen_movmem12b (out, in, out, in));
26204 len -= 12;
26205 }
26206
26207 if (len >= 8)
26208 {
26209 emit_insn (gen_movmem8b (out, in, out, in));
26210 len -= 8;
26211 }
26212
26213 if (len >= 4)
26214 {
26215 rtx reg = gen_reg_rtx (SImode);
26216 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26217 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26218 len -= 4;
26219 offset += 4;
26220 }
26221
26222 if (len >= 2)
26223 {
26224 rtx reg = gen_reg_rtx (HImode);
26225 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26226 plus_constant (Pmode, in,
26227 offset))));
26228 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26229 offset)),
26230 reg));
26231 len -= 2;
26232 offset += 2;
26233 }
26234
26235 if (len)
26236 {
26237 rtx reg = gen_reg_rtx (QImode);
26238 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26239 plus_constant (Pmode, in,
26240 offset))));
26241 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26242 offset)),
26243 reg));
26244 }
26245 }
26246
26247 void
26248 thumb_reload_out_hi (rtx *operands)
26249 {
26250 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26251 }
26252
26253 /* Return the length of a function name prefix
26254 that starts with the character 'c'. */
26255 static int
26256 arm_get_strip_length (int c)
26257 {
26258 switch (c)
26259 {
26260 ARM_NAME_ENCODING_LENGTHS
26261 default: return 0;
26262 }
26263 }
26264
26265 /* Return a pointer to a function's name with any
26266 and all prefix encodings stripped from it. */
26267 const char *
26268 arm_strip_name_encoding (const char *name)
26269 {
26270 int skip;
26271
26272 while ((skip = arm_get_strip_length (* name)))
26273 name += skip;
26274
26275 return name;
26276 }
26277
26278 /* If there is a '*' anywhere in the name's prefix, then
26279 emit the stripped name verbatim, otherwise prepend an
26280 underscore if leading underscores are being used. */
26281 void
26282 arm_asm_output_labelref (FILE *stream, const char *name)
26283 {
26284 int skip;
26285 int verbatim = 0;
26286
26287 while ((skip = arm_get_strip_length (* name)))
26288 {
26289 verbatim |= (*name == '*');
26290 name += skip;
26291 }
26292
26293 if (verbatim)
26294 fputs (name, stream);
26295 else
26296 asm_fprintf (stream, "%U%s", name);
26297 }
26298
26299 /* This function is used to emit an EABI tag and its associated value.
26300 We emit the numerical value of the tag in case the assembler does not
26301 support textual tags. (Eg gas prior to 2.20). If requested we include
26302 the tag name in a comment so that anyone reading the assembler output
26303 will know which tag is being set.
26304
26305 This function is not static because arm-c.c needs it too. */
26306
26307 void
26308 arm_emit_eabi_attribute (const char *name, int num, int val)
26309 {
26310 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26311 if (flag_verbose_asm || flag_debug_asm)
26312 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26313 asm_fprintf (asm_out_file, "\n");
26314 }
26315
26316 /* This function is used to print CPU tuning information as comment
26317 in assembler file. Pointers are not printed for now. */
26318
26319 void
26320 arm_print_tune_info (void)
26321 {
26322 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26323 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26324 current_tune->constant_limit);
26325 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26326 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26327 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26328 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26329 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26330 "prefetch.l1_cache_size:\t%d\n",
26331 current_tune->prefetch.l1_cache_size);
26332 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26333 "prefetch.l1_cache_line_size:\t%d\n",
26334 current_tune->prefetch.l1_cache_line_size);
26335 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26336 "prefer_constant_pool:\t%d\n",
26337 (int) current_tune->prefer_constant_pool);
26338 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26339 "branch_cost:\t(s:speed, p:predictable)\n");
26340 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26341 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26342 current_tune->branch_cost (false, false));
26343 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26344 current_tune->branch_cost (false, true));
26345 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26346 current_tune->branch_cost (true, false));
26347 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26348 current_tune->branch_cost (true, true));
26349 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26350 "prefer_ldrd_strd:\t%d\n",
26351 (int) current_tune->prefer_ldrd_strd);
26352 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26353 "logical_op_non_short_circuit:\t[%d,%d]\n",
26354 (int) current_tune->logical_op_non_short_circuit_thumb,
26355 (int) current_tune->logical_op_non_short_circuit_arm);
26356 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26357 "prefer_neon_for_64bits:\t%d\n",
26358 (int) current_tune->prefer_neon_for_64bits);
26359 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26360 "disparage_flag_setting_t16_encodings:\t%d\n",
26361 (int) current_tune->disparage_flag_setting_t16_encodings);
26362 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26363 "string_ops_prefer_neon:\t%d\n",
26364 (int) current_tune->string_ops_prefer_neon);
26365 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26366 "max_insns_inline_memset:\t%d\n",
26367 current_tune->max_insns_inline_memset);
26368 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26369 current_tune->fusible_ops);
26370 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26371 (int) current_tune->sched_autopref);
26372 }
26373
26374 /* Print .arch and .arch_extension directives corresponding to the
26375 current architecture configuration. */
26376 static void
26377 arm_print_asm_arch_directives ()
26378 {
26379 const arch_option *arch
26380 = arm_parse_arch_option_name (all_architectures, "-march",
26381 arm_active_target.arch_name);
26382 auto_sbitmap opt_bits (isa_num_bits);
26383
26384 gcc_assert (arch);
26385
26386 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26387 arm_last_printed_arch_string = arm_active_target.arch_name;
26388 if (!arch->common.extensions)
26389 return;
26390
26391 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26392 opt->name != NULL;
26393 opt++)
26394 {
26395 if (!opt->remove)
26396 {
26397 arm_initialize_isa (opt_bits, opt->isa_bits);
26398
26399 /* If every feature bit of this option is set in the target
26400 ISA specification, print out the option name. However,
26401 don't print anything if all the bits are part of the
26402 FPU specification. */
26403 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26404 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26405 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26406 }
26407 }
26408 }
26409
26410 static void
26411 arm_file_start (void)
26412 {
26413 int val;
26414
26415 if (TARGET_BPABI)
26416 {
26417 /* We don't have a specified CPU. Use the architecture to
26418 generate the tags.
26419
26420 Note: it might be better to do this unconditionally, then the
26421 assembler would not need to know about all new CPU names as
26422 they are added. */
26423 if (!arm_active_target.core_name)
26424 {
26425 /* armv7ve doesn't support any extensions. */
26426 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26427 {
26428 /* Keep backward compatability for assemblers
26429 which don't support armv7ve. */
26430 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26431 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26432 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26433 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26434 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26435 arm_last_printed_arch_string = "armv7ve";
26436 }
26437 else
26438 arm_print_asm_arch_directives ();
26439 }
26440 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26441 {
26442 asm_fprintf (asm_out_file, "\t.arch %s\n",
26443 arm_active_target.core_name + 8);
26444 arm_last_printed_arch_string = arm_active_target.core_name + 8;
26445 }
26446 else
26447 {
26448 const char* truncated_name
26449 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26450 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26451 }
26452
26453 if (print_tune_info)
26454 arm_print_tune_info ();
26455
26456 if (! TARGET_SOFT_FLOAT)
26457 {
26458 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26459 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26460
26461 if (TARGET_HARD_FLOAT_ABI)
26462 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26463 }
26464
26465 /* Some of these attributes only apply when the corresponding features
26466 are used. However we don't have any easy way of figuring this out.
26467 Conservatively record the setting that would have been used. */
26468
26469 if (flag_rounding_math)
26470 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26471
26472 if (!flag_unsafe_math_optimizations)
26473 {
26474 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26475 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26476 }
26477 if (flag_signaling_nans)
26478 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26479
26480 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26481 flag_finite_math_only ? 1 : 3);
26482
26483 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26484 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26485 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26486 flag_short_enums ? 1 : 2);
26487
26488 /* Tag_ABI_optimization_goals. */
26489 if (optimize_size)
26490 val = 4;
26491 else if (optimize >= 2)
26492 val = 2;
26493 else if (optimize)
26494 val = 1;
26495 else
26496 val = 6;
26497 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26498
26499 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26500 unaligned_access);
26501
26502 if (arm_fp16_format)
26503 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26504 (int) arm_fp16_format);
26505
26506 if (arm_lang_output_object_attributes_hook)
26507 arm_lang_output_object_attributes_hook();
26508 }
26509
26510 default_file_start ();
26511 }
26512
26513 static void
26514 arm_file_end (void)
26515 {
26516 int regno;
26517
26518 if (NEED_INDICATE_EXEC_STACK)
26519 /* Add .note.GNU-stack. */
26520 file_end_indicate_exec_stack ();
26521
26522 if (! thumb_call_reg_needed)
26523 return;
26524
26525 switch_to_section (text_section);
26526 asm_fprintf (asm_out_file, "\t.code 16\n");
26527 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26528
26529 for (regno = 0; regno < LR_REGNUM; regno++)
26530 {
26531 rtx label = thumb_call_via_label[regno];
26532
26533 if (label != 0)
26534 {
26535 targetm.asm_out.internal_label (asm_out_file, "L",
26536 CODE_LABEL_NUMBER (label));
26537 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26538 }
26539 }
26540 }
26541
26542 #ifndef ARM_PE
26543 /* Symbols in the text segment can be accessed without indirecting via the
26544 constant pool; it may take an extra binary operation, but this is still
26545 faster than indirecting via memory. Don't do this when not optimizing,
26546 since we won't be calculating al of the offsets necessary to do this
26547 simplification. */
26548
26549 static void
26550 arm_encode_section_info (tree decl, rtx rtl, int first)
26551 {
26552 if (optimize > 0 && TREE_CONSTANT (decl))
26553 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26554
26555 default_encode_section_info (decl, rtl, first);
26556 }
26557 #endif /* !ARM_PE */
26558
26559 static void
26560 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26561 {
26562 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26563 && !strcmp (prefix, "L"))
26564 {
26565 arm_ccfsm_state = 0;
26566 arm_target_insn = NULL;
26567 }
26568 default_internal_label (stream, prefix, labelno);
26569 }
26570
26571 /* Output code to add DELTA to the first argument, and then jump
26572 to FUNCTION. Used for C++ multiple inheritance. */
26573
26574 static void
26575 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26576 HOST_WIDE_INT, tree function)
26577 {
26578 static int thunk_label = 0;
26579 char label[256];
26580 char labelpc[256];
26581 int mi_delta = delta;
26582 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26583 int shift = 0;
26584 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26585 ? 1 : 0);
26586 if (mi_delta < 0)
26587 mi_delta = - mi_delta;
26588
26589 final_start_function (emit_barrier (), file, 1);
26590
26591 if (TARGET_THUMB1)
26592 {
26593 int labelno = thunk_label++;
26594 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26595 /* Thunks are entered in arm mode when available. */
26596 if (TARGET_THUMB1_ONLY)
26597 {
26598 /* push r3 so we can use it as a temporary. */
26599 /* TODO: Omit this save if r3 is not used. */
26600 fputs ("\tpush {r3}\n", file);
26601 fputs ("\tldr\tr3, ", file);
26602 }
26603 else
26604 {
26605 fputs ("\tldr\tr12, ", file);
26606 }
26607 assemble_name (file, label);
26608 fputc ('\n', file);
26609 if (flag_pic)
26610 {
26611 /* If we are generating PIC, the ldr instruction below loads
26612 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26613 the address of the add + 8, so we have:
26614
26615 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26616 = target + 1.
26617
26618 Note that we have "+ 1" because some versions of GNU ld
26619 don't set the low bit of the result for R_ARM_REL32
26620 relocations against thumb function symbols.
26621 On ARMv6M this is +4, not +8. */
26622 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26623 assemble_name (file, labelpc);
26624 fputs (":\n", file);
26625 if (TARGET_THUMB1_ONLY)
26626 {
26627 /* This is 2 insns after the start of the thunk, so we know it
26628 is 4-byte aligned. */
26629 fputs ("\tadd\tr3, pc, r3\n", file);
26630 fputs ("\tmov r12, r3\n", file);
26631 }
26632 else
26633 fputs ("\tadd\tr12, pc, r12\n", file);
26634 }
26635 else if (TARGET_THUMB1_ONLY)
26636 fputs ("\tmov r12, r3\n", file);
26637 }
26638 if (TARGET_THUMB1_ONLY)
26639 {
26640 if (mi_delta > 255)
26641 {
26642 fputs ("\tldr\tr3, ", file);
26643 assemble_name (file, label);
26644 fputs ("+4\n", file);
26645 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26646 mi_op, this_regno, this_regno);
26647 }
26648 else if (mi_delta != 0)
26649 {
26650 /* Thumb1 unified syntax requires s suffix in instruction name when
26651 one of the operands is immediate. */
26652 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26653 mi_op, this_regno, this_regno,
26654 mi_delta);
26655 }
26656 }
26657 else
26658 {
26659 /* TODO: Use movw/movt for large constants when available. */
26660 while (mi_delta != 0)
26661 {
26662 if ((mi_delta & (3 << shift)) == 0)
26663 shift += 2;
26664 else
26665 {
26666 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26667 mi_op, this_regno, this_regno,
26668 mi_delta & (0xff << shift));
26669 mi_delta &= ~(0xff << shift);
26670 shift += 8;
26671 }
26672 }
26673 }
26674 if (TARGET_THUMB1)
26675 {
26676 if (TARGET_THUMB1_ONLY)
26677 fputs ("\tpop\t{r3}\n", file);
26678
26679 fprintf (file, "\tbx\tr12\n");
26680 ASM_OUTPUT_ALIGN (file, 2);
26681 assemble_name (file, label);
26682 fputs (":\n", file);
26683 if (flag_pic)
26684 {
26685 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26686 rtx tem = XEXP (DECL_RTL (function), 0);
26687 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26688 pipeline offset is four rather than eight. Adjust the offset
26689 accordingly. */
26690 tem = plus_constant (GET_MODE (tem), tem,
26691 TARGET_THUMB1_ONLY ? -3 : -7);
26692 tem = gen_rtx_MINUS (GET_MODE (tem),
26693 tem,
26694 gen_rtx_SYMBOL_REF (Pmode,
26695 ggc_strdup (labelpc)));
26696 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26697 }
26698 else
26699 /* Output ".word .LTHUNKn". */
26700 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26701
26702 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26703 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26704 }
26705 else
26706 {
26707 fputs ("\tb\t", file);
26708 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26709 if (NEED_PLT_RELOC)
26710 fputs ("(PLT)", file);
26711 fputc ('\n', file);
26712 }
26713
26714 final_end_function ();
26715 }
26716
26717 /* MI thunk handling for TARGET_32BIT. */
26718
26719 static void
26720 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26721 HOST_WIDE_INT vcall_offset, tree function)
26722 {
26723 /* On ARM, this_regno is R0 or R1 depending on
26724 whether the function returns an aggregate or not.
26725 */
26726 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26727 function)
26728 ? R1_REGNUM : R0_REGNUM);
26729
26730 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26731 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26732 reload_completed = 1;
26733 emit_note (NOTE_INSN_PROLOGUE_END);
26734
26735 /* Add DELTA to THIS_RTX. */
26736 if (delta != 0)
26737 arm_split_constant (PLUS, Pmode, NULL_RTX,
26738 delta, this_rtx, this_rtx, false);
26739
26740 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26741 if (vcall_offset != 0)
26742 {
26743 /* Load *THIS_RTX. */
26744 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26745 /* Compute *THIS_RTX + VCALL_OFFSET. */
26746 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26747 false);
26748 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26749 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26750 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26751 }
26752
26753 /* Generate a tail call to the target function. */
26754 if (!TREE_USED (function))
26755 {
26756 assemble_external (function);
26757 TREE_USED (function) = 1;
26758 }
26759 rtx funexp = XEXP (DECL_RTL (function), 0);
26760 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26761 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26762 SIBLING_CALL_P (insn) = 1;
26763
26764 insn = get_insns ();
26765 shorten_branches (insn);
26766 final_start_function (insn, file, 1);
26767 final (insn, file, 1);
26768 final_end_function ();
26769
26770 /* Stop pretending this is a post-reload pass. */
26771 reload_completed = 0;
26772 }
26773
26774 /* Output code to add DELTA to the first argument, and then jump
26775 to FUNCTION. Used for C++ multiple inheritance. */
26776
26777 static void
26778 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26779 HOST_WIDE_INT vcall_offset, tree function)
26780 {
26781 if (TARGET_32BIT)
26782 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26783 else
26784 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26785 }
26786
26787 int
26788 arm_emit_vector_const (FILE *file, rtx x)
26789 {
26790 int i;
26791 const char * pattern;
26792
26793 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26794
26795 switch (GET_MODE (x))
26796 {
26797 case E_V2SImode: pattern = "%08x"; break;
26798 case E_V4HImode: pattern = "%04x"; break;
26799 case E_V8QImode: pattern = "%02x"; break;
26800 default: gcc_unreachable ();
26801 }
26802
26803 fprintf (file, "0x");
26804 for (i = CONST_VECTOR_NUNITS (x); i--;)
26805 {
26806 rtx element;
26807
26808 element = CONST_VECTOR_ELT (x, i);
26809 fprintf (file, pattern, INTVAL (element));
26810 }
26811
26812 return 1;
26813 }
26814
26815 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26816 HFmode constant pool entries are actually loaded with ldr. */
26817 void
26818 arm_emit_fp16_const (rtx c)
26819 {
26820 long bits;
26821
26822 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26823 if (WORDS_BIG_ENDIAN)
26824 assemble_zeros (2);
26825 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26826 if (!WORDS_BIG_ENDIAN)
26827 assemble_zeros (2);
26828 }
26829
26830 const char *
26831 arm_output_load_gr (rtx *operands)
26832 {
26833 rtx reg;
26834 rtx offset;
26835 rtx wcgr;
26836 rtx sum;
26837
26838 if (!MEM_P (operands [1])
26839 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26840 || !REG_P (reg = XEXP (sum, 0))
26841 || !CONST_INT_P (offset = XEXP (sum, 1))
26842 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26843 return "wldrw%?\t%0, %1";
26844
26845 /* Fix up an out-of-range load of a GR register. */
26846 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26847 wcgr = operands[0];
26848 operands[0] = reg;
26849 output_asm_insn ("ldr%?\t%0, %1", operands);
26850
26851 operands[0] = wcgr;
26852 operands[1] = reg;
26853 output_asm_insn ("tmcr%?\t%0, %1", operands);
26854 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26855
26856 return "";
26857 }
26858
26859 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26860
26861 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26862 named arg and all anonymous args onto the stack.
26863 XXX I know the prologue shouldn't be pushing registers, but it is faster
26864 that way. */
26865
26866 static void
26867 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26868 machine_mode mode,
26869 tree type,
26870 int *pretend_size,
26871 int second_time ATTRIBUTE_UNUSED)
26872 {
26873 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26874 int nregs;
26875
26876 cfun->machine->uses_anonymous_args = 1;
26877 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26878 {
26879 nregs = pcum->aapcs_ncrn;
26880 if (nregs & 1)
26881 {
26882 int res = arm_needs_doubleword_align (mode, type);
26883 if (res < 0 && warn_psabi)
26884 inform (input_location, "parameter passing for argument of "
26885 "type %qT changed in GCC 7.1", type);
26886 else if (res > 0)
26887 nregs++;
26888 }
26889 }
26890 else
26891 nregs = pcum->nregs;
26892
26893 if (nregs < NUM_ARG_REGS)
26894 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26895 }
26896
26897 /* We can't rely on the caller doing the proper promotion when
26898 using APCS or ATPCS. */
26899
26900 static bool
26901 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26902 {
26903 return !TARGET_AAPCS_BASED;
26904 }
26905
26906 static machine_mode
26907 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26908 machine_mode mode,
26909 int *punsignedp ATTRIBUTE_UNUSED,
26910 const_tree fntype ATTRIBUTE_UNUSED,
26911 int for_return ATTRIBUTE_UNUSED)
26912 {
26913 if (GET_MODE_CLASS (mode) == MODE_INT
26914 && GET_MODE_SIZE (mode) < 4)
26915 return SImode;
26916
26917 return mode;
26918 }
26919
26920
26921 static bool
26922 arm_default_short_enums (void)
26923 {
26924 return ARM_DEFAULT_SHORT_ENUMS;
26925 }
26926
26927
26928 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26929
26930 static bool
26931 arm_align_anon_bitfield (void)
26932 {
26933 return TARGET_AAPCS_BASED;
26934 }
26935
26936
26937 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26938
26939 static tree
26940 arm_cxx_guard_type (void)
26941 {
26942 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26943 }
26944
26945
26946 /* The EABI says test the least significant bit of a guard variable. */
26947
26948 static bool
26949 arm_cxx_guard_mask_bit (void)
26950 {
26951 return TARGET_AAPCS_BASED;
26952 }
26953
26954
26955 /* The EABI specifies that all array cookies are 8 bytes long. */
26956
26957 static tree
26958 arm_get_cookie_size (tree type)
26959 {
26960 tree size;
26961
26962 if (!TARGET_AAPCS_BASED)
26963 return default_cxx_get_cookie_size (type);
26964
26965 size = build_int_cst (sizetype, 8);
26966 return size;
26967 }
26968
26969
26970 /* The EABI says that array cookies should also contain the element size. */
26971
26972 static bool
26973 arm_cookie_has_size (void)
26974 {
26975 return TARGET_AAPCS_BASED;
26976 }
26977
26978
26979 /* The EABI says constructors and destructors should return a pointer to
26980 the object constructed/destroyed. */
26981
26982 static bool
26983 arm_cxx_cdtor_returns_this (void)
26984 {
26985 return TARGET_AAPCS_BASED;
26986 }
26987
26988 /* The EABI says that an inline function may never be the key
26989 method. */
26990
26991 static bool
26992 arm_cxx_key_method_may_be_inline (void)
26993 {
26994 return !TARGET_AAPCS_BASED;
26995 }
26996
26997 static void
26998 arm_cxx_determine_class_data_visibility (tree decl)
26999 {
27000 if (!TARGET_AAPCS_BASED
27001 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27002 return;
27003
27004 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27005 is exported. However, on systems without dynamic vague linkage,
27006 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27007 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27008 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27009 else
27010 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27011 DECL_VISIBILITY_SPECIFIED (decl) = 1;
27012 }
27013
27014 static bool
27015 arm_cxx_class_data_always_comdat (void)
27016 {
27017 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27018 vague linkage if the class has no key function. */
27019 return !TARGET_AAPCS_BASED;
27020 }
27021
27022
27023 /* The EABI says __aeabi_atexit should be used to register static
27024 destructors. */
27025
27026 static bool
27027 arm_cxx_use_aeabi_atexit (void)
27028 {
27029 return TARGET_AAPCS_BASED;
27030 }
27031
27032
27033 void
27034 arm_set_return_address (rtx source, rtx scratch)
27035 {
27036 arm_stack_offsets *offsets;
27037 HOST_WIDE_INT delta;
27038 rtx addr, mem;
27039 unsigned long saved_regs;
27040
27041 offsets = arm_get_frame_offsets ();
27042 saved_regs = offsets->saved_regs_mask;
27043
27044 if ((saved_regs & (1 << LR_REGNUM)) == 0)
27045 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27046 else
27047 {
27048 if (frame_pointer_needed)
27049 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27050 else
27051 {
27052 /* LR will be the first saved register. */
27053 delta = offsets->outgoing_args - (offsets->frame + 4);
27054
27055
27056 if (delta >= 4096)
27057 {
27058 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27059 GEN_INT (delta & ~4095)));
27060 addr = scratch;
27061 delta &= 4095;
27062 }
27063 else
27064 addr = stack_pointer_rtx;
27065
27066 addr = plus_constant (Pmode, addr, delta);
27067 }
27068
27069 /* The store needs to be marked to prevent DSE from deleting
27070 it as dead if it is based on fp. */
27071 mem = gen_frame_mem (Pmode, addr);
27072 MEM_VOLATILE_P (mem) = true;
27073 emit_move_insn (mem, source);
27074 }
27075 }
27076
27077
27078 void
27079 thumb_set_return_address (rtx source, rtx scratch)
27080 {
27081 arm_stack_offsets *offsets;
27082 HOST_WIDE_INT delta;
27083 HOST_WIDE_INT limit;
27084 int reg;
27085 rtx addr, mem;
27086 unsigned long mask;
27087
27088 emit_use (source);
27089
27090 offsets = arm_get_frame_offsets ();
27091 mask = offsets->saved_regs_mask;
27092 if (mask & (1 << LR_REGNUM))
27093 {
27094 limit = 1024;
27095 /* Find the saved regs. */
27096 if (frame_pointer_needed)
27097 {
27098 delta = offsets->soft_frame - offsets->saved_args;
27099 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27100 if (TARGET_THUMB1)
27101 limit = 128;
27102 }
27103 else
27104 {
27105 delta = offsets->outgoing_args - offsets->saved_args;
27106 reg = SP_REGNUM;
27107 }
27108 /* Allow for the stack frame. */
27109 if (TARGET_THUMB1 && TARGET_BACKTRACE)
27110 delta -= 16;
27111 /* The link register is always the first saved register. */
27112 delta -= 4;
27113
27114 /* Construct the address. */
27115 addr = gen_rtx_REG (SImode, reg);
27116 if (delta > limit)
27117 {
27118 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27119 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27120 addr = scratch;
27121 }
27122 else
27123 addr = plus_constant (Pmode, addr, delta);
27124
27125 /* The store needs to be marked to prevent DSE from deleting
27126 it as dead if it is based on fp. */
27127 mem = gen_frame_mem (Pmode, addr);
27128 MEM_VOLATILE_P (mem) = true;
27129 emit_move_insn (mem, source);
27130 }
27131 else
27132 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27133 }
27134
27135 /* Implements target hook vector_mode_supported_p. */
27136 bool
27137 arm_vector_mode_supported_p (machine_mode mode)
27138 {
27139 /* Neon also supports V2SImode, etc. listed in the clause below. */
27140 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27141 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27142 || mode == V2DImode || mode == V8HFmode))
27143 return true;
27144
27145 if ((TARGET_NEON || TARGET_IWMMXT)
27146 && ((mode == V2SImode)
27147 || (mode == V4HImode)
27148 || (mode == V8QImode)))
27149 return true;
27150
27151 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27152 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27153 || mode == V2HAmode))
27154 return true;
27155
27156 return false;
27157 }
27158
27159 /* Implements target hook array_mode_supported_p. */
27160
27161 static bool
27162 arm_array_mode_supported_p (machine_mode mode,
27163 unsigned HOST_WIDE_INT nelems)
27164 {
27165 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
27166 for now, as the lane-swapping logic needs to be extended in the expanders.
27167 See PR target/82518. */
27168 if (TARGET_NEON && !BYTES_BIG_ENDIAN
27169 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27170 && (nelems >= 2 && nelems <= 4))
27171 return true;
27172
27173 return false;
27174 }
27175
27176 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27177 registers when autovectorizing for Neon, at least until multiple vector
27178 widths are supported properly by the middle-end. */
27179
27180 static machine_mode
27181 arm_preferred_simd_mode (scalar_mode mode)
27182 {
27183 if (TARGET_NEON)
27184 switch (mode)
27185 {
27186 case E_SFmode:
27187 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27188 case E_SImode:
27189 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27190 case E_HImode:
27191 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27192 case E_QImode:
27193 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27194 case E_DImode:
27195 if (!TARGET_NEON_VECTORIZE_DOUBLE)
27196 return V2DImode;
27197 break;
27198
27199 default:;
27200 }
27201
27202 if (TARGET_REALLY_IWMMXT)
27203 switch (mode)
27204 {
27205 case E_SImode:
27206 return V2SImode;
27207 case E_HImode:
27208 return V4HImode;
27209 case E_QImode:
27210 return V8QImode;
27211
27212 default:;
27213 }
27214
27215 return word_mode;
27216 }
27217
27218 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27219
27220 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27221 using r0-r4 for function arguments, r7 for the stack frame and don't have
27222 enough left over to do doubleword arithmetic. For Thumb-2 all the
27223 potentially problematic instructions accept high registers so this is not
27224 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27225 that require many low registers. */
27226 static bool
27227 arm_class_likely_spilled_p (reg_class_t rclass)
27228 {
27229 if ((TARGET_THUMB1 && rclass == LO_REGS)
27230 || rclass == CC_REG)
27231 return true;
27232
27233 return false;
27234 }
27235
27236 /* Implements target hook small_register_classes_for_mode_p. */
27237 bool
27238 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27239 {
27240 return TARGET_THUMB1;
27241 }
27242
27243 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27244 ARM insns and therefore guarantee that the shift count is modulo 256.
27245 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27246 guarantee no particular behavior for out-of-range counts. */
27247
27248 static unsigned HOST_WIDE_INT
27249 arm_shift_truncation_mask (machine_mode mode)
27250 {
27251 return mode == SImode ? 255 : 0;
27252 }
27253
27254
27255 /* Map internal gcc register numbers to DWARF2 register numbers. */
27256
27257 unsigned int
27258 arm_dbx_register_number (unsigned int regno)
27259 {
27260 if (regno < 16)
27261 return regno;
27262
27263 if (IS_VFP_REGNUM (regno))
27264 {
27265 /* See comment in arm_dwarf_register_span. */
27266 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27267 return 64 + regno - FIRST_VFP_REGNUM;
27268 else
27269 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27270 }
27271
27272 if (IS_IWMMXT_GR_REGNUM (regno))
27273 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27274
27275 if (IS_IWMMXT_REGNUM (regno))
27276 return 112 + regno - FIRST_IWMMXT_REGNUM;
27277
27278 return DWARF_FRAME_REGISTERS;
27279 }
27280
27281 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27282 GCC models tham as 64 32-bit registers, so we need to describe this to
27283 the DWARF generation code. Other registers can use the default. */
27284 static rtx
27285 arm_dwarf_register_span (rtx rtl)
27286 {
27287 machine_mode mode;
27288 unsigned regno;
27289 rtx parts[16];
27290 int nregs;
27291 int i;
27292
27293 regno = REGNO (rtl);
27294 if (!IS_VFP_REGNUM (regno))
27295 return NULL_RTX;
27296
27297 /* XXX FIXME: The EABI defines two VFP register ranges:
27298 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27299 256-287: D0-D31
27300 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27301 corresponding D register. Until GDB supports this, we shall use the
27302 legacy encodings. We also use these encodings for D0-D15 for
27303 compatibility with older debuggers. */
27304 mode = GET_MODE (rtl);
27305 if (GET_MODE_SIZE (mode) < 8)
27306 return NULL_RTX;
27307
27308 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27309 {
27310 nregs = GET_MODE_SIZE (mode) / 4;
27311 for (i = 0; i < nregs; i += 2)
27312 if (TARGET_BIG_END)
27313 {
27314 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27315 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27316 }
27317 else
27318 {
27319 parts[i] = gen_rtx_REG (SImode, regno + i);
27320 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27321 }
27322 }
27323 else
27324 {
27325 nregs = GET_MODE_SIZE (mode) / 8;
27326 for (i = 0; i < nregs; i++)
27327 parts[i] = gen_rtx_REG (DImode, regno + i);
27328 }
27329
27330 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27331 }
27332
27333 #if ARM_UNWIND_INFO
27334 /* Emit unwind directives for a store-multiple instruction or stack pointer
27335 push during alignment.
27336 These should only ever be generated by the function prologue code, so
27337 expect them to have a particular form.
27338 The store-multiple instruction sometimes pushes pc as the last register,
27339 although it should not be tracked into unwind information, or for -Os
27340 sometimes pushes some dummy registers before first register that needs
27341 to be tracked in unwind information; such dummy registers are there just
27342 to avoid separate stack adjustment, and will not be restored in the
27343 epilogue. */
27344
27345 static void
27346 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27347 {
27348 int i;
27349 HOST_WIDE_INT offset;
27350 HOST_WIDE_INT nregs;
27351 int reg_size;
27352 unsigned reg;
27353 unsigned lastreg;
27354 unsigned padfirst = 0, padlast = 0;
27355 rtx e;
27356
27357 e = XVECEXP (p, 0, 0);
27358 gcc_assert (GET_CODE (e) == SET);
27359
27360 /* First insn will adjust the stack pointer. */
27361 gcc_assert (GET_CODE (e) == SET
27362 && REG_P (SET_DEST (e))
27363 && REGNO (SET_DEST (e)) == SP_REGNUM
27364 && GET_CODE (SET_SRC (e)) == PLUS);
27365
27366 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27367 nregs = XVECLEN (p, 0) - 1;
27368 gcc_assert (nregs);
27369
27370 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27371 if (reg < 16)
27372 {
27373 /* For -Os dummy registers can be pushed at the beginning to
27374 avoid separate stack pointer adjustment. */
27375 e = XVECEXP (p, 0, 1);
27376 e = XEXP (SET_DEST (e), 0);
27377 if (GET_CODE (e) == PLUS)
27378 padfirst = INTVAL (XEXP (e, 1));
27379 gcc_assert (padfirst == 0 || optimize_size);
27380 /* The function prologue may also push pc, but not annotate it as it is
27381 never restored. We turn this into a stack pointer adjustment. */
27382 e = XVECEXP (p, 0, nregs);
27383 e = XEXP (SET_DEST (e), 0);
27384 if (GET_CODE (e) == PLUS)
27385 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27386 else
27387 padlast = offset - 4;
27388 gcc_assert (padlast == 0 || padlast == 4);
27389 if (padlast == 4)
27390 fprintf (asm_out_file, "\t.pad #4\n");
27391 reg_size = 4;
27392 fprintf (asm_out_file, "\t.save {");
27393 }
27394 else if (IS_VFP_REGNUM (reg))
27395 {
27396 reg_size = 8;
27397 fprintf (asm_out_file, "\t.vsave {");
27398 }
27399 else
27400 /* Unknown register type. */
27401 gcc_unreachable ();
27402
27403 /* If the stack increment doesn't match the size of the saved registers,
27404 something has gone horribly wrong. */
27405 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27406
27407 offset = padfirst;
27408 lastreg = 0;
27409 /* The remaining insns will describe the stores. */
27410 for (i = 1; i <= nregs; i++)
27411 {
27412 /* Expect (set (mem <addr>) (reg)).
27413 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27414 e = XVECEXP (p, 0, i);
27415 gcc_assert (GET_CODE (e) == SET
27416 && MEM_P (SET_DEST (e))
27417 && REG_P (SET_SRC (e)));
27418
27419 reg = REGNO (SET_SRC (e));
27420 gcc_assert (reg >= lastreg);
27421
27422 if (i != 1)
27423 fprintf (asm_out_file, ", ");
27424 /* We can't use %r for vfp because we need to use the
27425 double precision register names. */
27426 if (IS_VFP_REGNUM (reg))
27427 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27428 else
27429 asm_fprintf (asm_out_file, "%r", reg);
27430
27431 if (flag_checking)
27432 {
27433 /* Check that the addresses are consecutive. */
27434 e = XEXP (SET_DEST (e), 0);
27435 if (GET_CODE (e) == PLUS)
27436 gcc_assert (REG_P (XEXP (e, 0))
27437 && REGNO (XEXP (e, 0)) == SP_REGNUM
27438 && CONST_INT_P (XEXP (e, 1))
27439 && offset == INTVAL (XEXP (e, 1)));
27440 else
27441 gcc_assert (i == 1
27442 && REG_P (e)
27443 && REGNO (e) == SP_REGNUM);
27444 offset += reg_size;
27445 }
27446 }
27447 fprintf (asm_out_file, "}\n");
27448 if (padfirst)
27449 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27450 }
27451
27452 /* Emit unwind directives for a SET. */
27453
27454 static void
27455 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27456 {
27457 rtx e0;
27458 rtx e1;
27459 unsigned reg;
27460
27461 e0 = XEXP (p, 0);
27462 e1 = XEXP (p, 1);
27463 switch (GET_CODE (e0))
27464 {
27465 case MEM:
27466 /* Pushing a single register. */
27467 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27468 || !REG_P (XEXP (XEXP (e0, 0), 0))
27469 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27470 abort ();
27471
27472 asm_fprintf (asm_out_file, "\t.save ");
27473 if (IS_VFP_REGNUM (REGNO (e1)))
27474 asm_fprintf(asm_out_file, "{d%d}\n",
27475 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27476 else
27477 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27478 break;
27479
27480 case REG:
27481 if (REGNO (e0) == SP_REGNUM)
27482 {
27483 /* A stack increment. */
27484 if (GET_CODE (e1) != PLUS
27485 || !REG_P (XEXP (e1, 0))
27486 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27487 || !CONST_INT_P (XEXP (e1, 1)))
27488 abort ();
27489
27490 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27491 -INTVAL (XEXP (e1, 1)));
27492 }
27493 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27494 {
27495 HOST_WIDE_INT offset;
27496
27497 if (GET_CODE (e1) == PLUS)
27498 {
27499 if (!REG_P (XEXP (e1, 0))
27500 || !CONST_INT_P (XEXP (e1, 1)))
27501 abort ();
27502 reg = REGNO (XEXP (e1, 0));
27503 offset = INTVAL (XEXP (e1, 1));
27504 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27505 HARD_FRAME_POINTER_REGNUM, reg,
27506 offset);
27507 }
27508 else if (REG_P (e1))
27509 {
27510 reg = REGNO (e1);
27511 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27512 HARD_FRAME_POINTER_REGNUM, reg);
27513 }
27514 else
27515 abort ();
27516 }
27517 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27518 {
27519 /* Move from sp to reg. */
27520 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27521 }
27522 else if (GET_CODE (e1) == PLUS
27523 && REG_P (XEXP (e1, 0))
27524 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27525 && CONST_INT_P (XEXP (e1, 1)))
27526 {
27527 /* Set reg to offset from sp. */
27528 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27529 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27530 }
27531 else
27532 abort ();
27533 break;
27534
27535 default:
27536 abort ();
27537 }
27538 }
27539
27540
27541 /* Emit unwind directives for the given insn. */
27542
27543 static void
27544 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27545 {
27546 rtx note, pat;
27547 bool handled_one = false;
27548
27549 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27550 return;
27551
27552 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27553 && (TREE_NOTHROW (current_function_decl)
27554 || crtl->all_throwers_are_sibcalls))
27555 return;
27556
27557 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27558 return;
27559
27560 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27561 {
27562 switch (REG_NOTE_KIND (note))
27563 {
27564 case REG_FRAME_RELATED_EXPR:
27565 pat = XEXP (note, 0);
27566 goto found;
27567
27568 case REG_CFA_REGISTER:
27569 pat = XEXP (note, 0);
27570 if (pat == NULL)
27571 {
27572 pat = PATTERN (insn);
27573 if (GET_CODE (pat) == PARALLEL)
27574 pat = XVECEXP (pat, 0, 0);
27575 }
27576
27577 /* Only emitted for IS_STACKALIGN re-alignment. */
27578 {
27579 rtx dest, src;
27580 unsigned reg;
27581
27582 src = SET_SRC (pat);
27583 dest = SET_DEST (pat);
27584
27585 gcc_assert (src == stack_pointer_rtx);
27586 reg = REGNO (dest);
27587 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27588 reg + 0x90, reg);
27589 }
27590 handled_one = true;
27591 break;
27592
27593 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27594 to get correct dwarf information for shrink-wrap. We should not
27595 emit unwind information for it because these are used either for
27596 pretend arguments or notes to adjust sp and restore registers from
27597 stack. */
27598 case REG_CFA_DEF_CFA:
27599 case REG_CFA_ADJUST_CFA:
27600 case REG_CFA_RESTORE:
27601 return;
27602
27603 case REG_CFA_EXPRESSION:
27604 case REG_CFA_OFFSET:
27605 /* ??? Only handling here what we actually emit. */
27606 gcc_unreachable ();
27607
27608 default:
27609 break;
27610 }
27611 }
27612 if (handled_one)
27613 return;
27614 pat = PATTERN (insn);
27615 found:
27616
27617 switch (GET_CODE (pat))
27618 {
27619 case SET:
27620 arm_unwind_emit_set (asm_out_file, pat);
27621 break;
27622
27623 case SEQUENCE:
27624 /* Store multiple. */
27625 arm_unwind_emit_sequence (asm_out_file, pat);
27626 break;
27627
27628 default:
27629 abort();
27630 }
27631 }
27632
27633
27634 /* Output a reference from a function exception table to the type_info
27635 object X. The EABI specifies that the symbol should be relocated by
27636 an R_ARM_TARGET2 relocation. */
27637
27638 static bool
27639 arm_output_ttype (rtx x)
27640 {
27641 fputs ("\t.word\t", asm_out_file);
27642 output_addr_const (asm_out_file, x);
27643 /* Use special relocations for symbol references. */
27644 if (!CONST_INT_P (x))
27645 fputs ("(TARGET2)", asm_out_file);
27646 fputc ('\n', asm_out_file);
27647
27648 return TRUE;
27649 }
27650
27651 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27652
27653 static void
27654 arm_asm_emit_except_personality (rtx personality)
27655 {
27656 fputs ("\t.personality\t", asm_out_file);
27657 output_addr_const (asm_out_file, personality);
27658 fputc ('\n', asm_out_file);
27659 }
27660 #endif /* ARM_UNWIND_INFO */
27661
27662 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27663
27664 static void
27665 arm_asm_init_sections (void)
27666 {
27667 #if ARM_UNWIND_INFO
27668 exception_section = get_unnamed_section (0, output_section_asm_op,
27669 "\t.handlerdata");
27670 #endif /* ARM_UNWIND_INFO */
27671
27672 #ifdef OBJECT_FORMAT_ELF
27673 if (target_pure_code)
27674 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27675 #endif
27676 }
27677
27678 /* Output unwind directives for the start/end of a function. */
27679
27680 void
27681 arm_output_fn_unwind (FILE * f, bool prologue)
27682 {
27683 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27684 return;
27685
27686 if (prologue)
27687 fputs ("\t.fnstart\n", f);
27688 else
27689 {
27690 /* If this function will never be unwound, then mark it as such.
27691 The came condition is used in arm_unwind_emit to suppress
27692 the frame annotations. */
27693 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27694 && (TREE_NOTHROW (current_function_decl)
27695 || crtl->all_throwers_are_sibcalls))
27696 fputs("\t.cantunwind\n", f);
27697
27698 fputs ("\t.fnend\n", f);
27699 }
27700 }
27701
27702 static bool
27703 arm_emit_tls_decoration (FILE *fp, rtx x)
27704 {
27705 enum tls_reloc reloc;
27706 rtx val;
27707
27708 val = XVECEXP (x, 0, 0);
27709 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27710
27711 output_addr_const (fp, val);
27712
27713 switch (reloc)
27714 {
27715 case TLS_GD32:
27716 fputs ("(tlsgd)", fp);
27717 break;
27718 case TLS_LDM32:
27719 fputs ("(tlsldm)", fp);
27720 break;
27721 case TLS_LDO32:
27722 fputs ("(tlsldo)", fp);
27723 break;
27724 case TLS_IE32:
27725 fputs ("(gottpoff)", fp);
27726 break;
27727 case TLS_LE32:
27728 fputs ("(tpoff)", fp);
27729 break;
27730 case TLS_DESCSEQ:
27731 fputs ("(tlsdesc)", fp);
27732 break;
27733 default:
27734 gcc_unreachable ();
27735 }
27736
27737 switch (reloc)
27738 {
27739 case TLS_GD32:
27740 case TLS_LDM32:
27741 case TLS_IE32:
27742 case TLS_DESCSEQ:
27743 fputs (" + (. - ", fp);
27744 output_addr_const (fp, XVECEXP (x, 0, 2));
27745 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27746 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27747 output_addr_const (fp, XVECEXP (x, 0, 3));
27748 fputc (')', fp);
27749 break;
27750 default:
27751 break;
27752 }
27753
27754 return TRUE;
27755 }
27756
27757 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27758
27759 static void
27760 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27761 {
27762 gcc_assert (size == 4);
27763 fputs ("\t.word\t", file);
27764 output_addr_const (file, x);
27765 fputs ("(tlsldo)", file);
27766 }
27767
27768 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27769
27770 static bool
27771 arm_output_addr_const_extra (FILE *fp, rtx x)
27772 {
27773 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27774 return arm_emit_tls_decoration (fp, x);
27775 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27776 {
27777 char label[256];
27778 int labelno = INTVAL (XVECEXP (x, 0, 0));
27779
27780 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27781 assemble_name_raw (fp, label);
27782
27783 return TRUE;
27784 }
27785 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27786 {
27787 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27788 if (GOT_PCREL)
27789 fputs ("+.", fp);
27790 fputs ("-(", fp);
27791 output_addr_const (fp, XVECEXP (x, 0, 0));
27792 fputc (')', fp);
27793 return TRUE;
27794 }
27795 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27796 {
27797 output_addr_const (fp, XVECEXP (x, 0, 0));
27798 if (GOT_PCREL)
27799 fputs ("+.", fp);
27800 fputs ("-(", fp);
27801 output_addr_const (fp, XVECEXP (x, 0, 1));
27802 fputc (')', fp);
27803 return TRUE;
27804 }
27805 else if (GET_CODE (x) == CONST_VECTOR)
27806 return arm_emit_vector_const (fp, x);
27807
27808 return FALSE;
27809 }
27810
27811 /* Output assembly for a shift instruction.
27812 SET_FLAGS determines how the instruction modifies the condition codes.
27813 0 - Do not set condition codes.
27814 1 - Set condition codes.
27815 2 - Use smallest instruction. */
27816 const char *
27817 arm_output_shift(rtx * operands, int set_flags)
27818 {
27819 char pattern[100];
27820 static const char flag_chars[3] = {'?', '.', '!'};
27821 const char *shift;
27822 HOST_WIDE_INT val;
27823 char c;
27824
27825 c = flag_chars[set_flags];
27826 shift = shift_op(operands[3], &val);
27827 if (shift)
27828 {
27829 if (val != -1)
27830 operands[2] = GEN_INT(val);
27831 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27832 }
27833 else
27834 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27835
27836 output_asm_insn (pattern, operands);
27837 return "";
27838 }
27839
27840 /* Output assembly for a WMMX immediate shift instruction. */
27841 const char *
27842 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27843 {
27844 int shift = INTVAL (operands[2]);
27845 char templ[50];
27846 machine_mode opmode = GET_MODE (operands[0]);
27847
27848 gcc_assert (shift >= 0);
27849
27850 /* If the shift value in the register versions is > 63 (for D qualifier),
27851 31 (for W qualifier) or 15 (for H qualifier). */
27852 if (((opmode == V4HImode) && (shift > 15))
27853 || ((opmode == V2SImode) && (shift > 31))
27854 || ((opmode == DImode) && (shift > 63)))
27855 {
27856 if (wror_or_wsra)
27857 {
27858 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27859 output_asm_insn (templ, operands);
27860 if (opmode == DImode)
27861 {
27862 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27863 output_asm_insn (templ, operands);
27864 }
27865 }
27866 else
27867 {
27868 /* The destination register will contain all zeros. */
27869 sprintf (templ, "wzero\t%%0");
27870 output_asm_insn (templ, operands);
27871 }
27872 return "";
27873 }
27874
27875 if ((opmode == DImode) && (shift > 32))
27876 {
27877 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27878 output_asm_insn (templ, operands);
27879 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27880 output_asm_insn (templ, operands);
27881 }
27882 else
27883 {
27884 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27885 output_asm_insn (templ, operands);
27886 }
27887 return "";
27888 }
27889
27890 /* Output assembly for a WMMX tinsr instruction. */
27891 const char *
27892 arm_output_iwmmxt_tinsr (rtx *operands)
27893 {
27894 int mask = INTVAL (operands[3]);
27895 int i;
27896 char templ[50];
27897 int units = mode_nunits[GET_MODE (operands[0])];
27898 gcc_assert ((mask & (mask - 1)) == 0);
27899 for (i = 0; i < units; ++i)
27900 {
27901 if ((mask & 0x01) == 1)
27902 {
27903 break;
27904 }
27905 mask >>= 1;
27906 }
27907 gcc_assert (i < units);
27908 {
27909 switch (GET_MODE (operands[0]))
27910 {
27911 case E_V8QImode:
27912 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27913 break;
27914 case E_V4HImode:
27915 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27916 break;
27917 case E_V2SImode:
27918 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27919 break;
27920 default:
27921 gcc_unreachable ();
27922 break;
27923 }
27924 output_asm_insn (templ, operands);
27925 }
27926 return "";
27927 }
27928
27929 /* Output a Thumb-1 casesi dispatch sequence. */
27930 const char *
27931 thumb1_output_casesi (rtx *operands)
27932 {
27933 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27934
27935 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27936
27937 switch (GET_MODE(diff_vec))
27938 {
27939 case E_QImode:
27940 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27941 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27942 case E_HImode:
27943 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27944 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27945 case E_SImode:
27946 return "bl\t%___gnu_thumb1_case_si";
27947 default:
27948 gcc_unreachable ();
27949 }
27950 }
27951
27952 /* Output a Thumb-2 casesi instruction. */
27953 const char *
27954 thumb2_output_casesi (rtx *operands)
27955 {
27956 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27957
27958 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27959
27960 output_asm_insn ("cmp\t%0, %1", operands);
27961 output_asm_insn ("bhi\t%l3", operands);
27962 switch (GET_MODE(diff_vec))
27963 {
27964 case E_QImode:
27965 return "tbb\t[%|pc, %0]";
27966 case E_HImode:
27967 return "tbh\t[%|pc, %0, lsl #1]";
27968 case E_SImode:
27969 if (flag_pic)
27970 {
27971 output_asm_insn ("adr\t%4, %l2", operands);
27972 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27973 output_asm_insn ("add\t%4, %4, %5", operands);
27974 return "bx\t%4";
27975 }
27976 else
27977 {
27978 output_asm_insn ("adr\t%4, %l2", operands);
27979 return "ldr\t%|pc, [%4, %0, lsl #2]";
27980 }
27981 default:
27982 gcc_unreachable ();
27983 }
27984 }
27985
27986 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27987 per-core tuning structs. */
27988 static int
27989 arm_issue_rate (void)
27990 {
27991 return current_tune->issue_rate;
27992 }
27993
27994 /* Return how many instructions should scheduler lookahead to choose the
27995 best one. */
27996 static int
27997 arm_first_cycle_multipass_dfa_lookahead (void)
27998 {
27999 int issue_rate = arm_issue_rate ();
28000
28001 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
28002 }
28003
28004 /* Enable modeling of L2 auto-prefetcher. */
28005 static int
28006 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28007 {
28008 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28009 }
28010
28011 const char *
28012 arm_mangle_type (const_tree type)
28013 {
28014 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28015 has to be managled as if it is in the "std" namespace. */
28016 if (TARGET_AAPCS_BASED
28017 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28018 return "St9__va_list";
28019
28020 /* Half-precision float. */
28021 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28022 return "Dh";
28023
28024 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28025 builtin type. */
28026 if (TYPE_NAME (type) != NULL)
28027 return arm_mangle_builtin_type (type);
28028
28029 /* Use the default mangling. */
28030 return NULL;
28031 }
28032
28033 /* Order of allocation of core registers for Thumb: this allocation is
28034 written over the corresponding initial entries of the array
28035 initialized with REG_ALLOC_ORDER. We allocate all low registers
28036 first. Saving and restoring a low register is usually cheaper than
28037 using a call-clobbered high register. */
28038
28039 static const int thumb_core_reg_alloc_order[] =
28040 {
28041 3, 2, 1, 0, 4, 5, 6, 7,
28042 12, 14, 8, 9, 10, 11
28043 };
28044
28045 /* Adjust register allocation order when compiling for Thumb. */
28046
28047 void
28048 arm_order_regs_for_local_alloc (void)
28049 {
28050 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28051 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28052 if (TARGET_THUMB)
28053 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28054 sizeof (thumb_core_reg_alloc_order));
28055 }
28056
28057 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28058
28059 bool
28060 arm_frame_pointer_required (void)
28061 {
28062 if (SUBTARGET_FRAME_POINTER_REQUIRED)
28063 return true;
28064
28065 /* If the function receives nonlocal gotos, it needs to save the frame
28066 pointer in the nonlocal_goto_save_area object. */
28067 if (cfun->has_nonlocal_label)
28068 return true;
28069
28070 /* The frame pointer is required for non-leaf APCS frames. */
28071 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28072 return true;
28073
28074 /* If we are probing the stack in the prologue, we will have a faulting
28075 instruction prior to the stack adjustment and this requires a frame
28076 pointer if we want to catch the exception using the EABI unwinder. */
28077 if (!IS_INTERRUPT (arm_current_func_type ())
28078 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28079 || flag_stack_clash_protection)
28080 && arm_except_unwind_info (&global_options) == UI_TARGET
28081 && cfun->can_throw_non_call_exceptions)
28082 {
28083 HOST_WIDE_INT size = get_frame_size ();
28084
28085 /* That's irrelevant if there is no stack adjustment. */
28086 if (size <= 0)
28087 return false;
28088
28089 /* That's relevant only if there is a stack probe. */
28090 if (crtl->is_leaf && !cfun->calls_alloca)
28091 {
28092 /* We don't have the final size of the frame so adjust. */
28093 size += 32 * UNITS_PER_WORD;
28094 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
28095 return true;
28096 }
28097 else
28098 return true;
28099 }
28100
28101 return false;
28102 }
28103
28104 /* Only thumb1 can't support conditional execution, so return true if
28105 the target is not thumb1. */
28106 static bool
28107 arm_have_conditional_execution (void)
28108 {
28109 return !TARGET_THUMB1;
28110 }
28111
28112 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
28113 static HOST_WIDE_INT
28114 arm_vector_alignment (const_tree type)
28115 {
28116 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28117
28118 if (TARGET_AAPCS_BASED)
28119 align = MIN (align, 64);
28120
28121 return align;
28122 }
28123
28124 static void
28125 arm_autovectorize_vector_sizes (vector_sizes *sizes)
28126 {
28127 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28128 {
28129 sizes->safe_push (16);
28130 sizes->safe_push (8);
28131 }
28132 }
28133
28134 static bool
28135 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28136 {
28137 /* Vectors which aren't in packed structures will not be less aligned than
28138 the natural alignment of their element type, so this is safe. */
28139 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28140 return !is_packed;
28141
28142 return default_builtin_vector_alignment_reachable (type, is_packed);
28143 }
28144
28145 static bool
28146 arm_builtin_support_vector_misalignment (machine_mode mode,
28147 const_tree type, int misalignment,
28148 bool is_packed)
28149 {
28150 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28151 {
28152 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28153
28154 if (is_packed)
28155 return align == 1;
28156
28157 /* If the misalignment is unknown, we should be able to handle the access
28158 so long as it is not to a member of a packed data structure. */
28159 if (misalignment == -1)
28160 return true;
28161
28162 /* Return true if the misalignment is a multiple of the natural alignment
28163 of the vector's element type. This is probably always going to be
28164 true in practice, since we've already established that this isn't a
28165 packed access. */
28166 return ((misalignment % align) == 0);
28167 }
28168
28169 return default_builtin_support_vector_misalignment (mode, type, misalignment,
28170 is_packed);
28171 }
28172
28173 static void
28174 arm_conditional_register_usage (void)
28175 {
28176 int regno;
28177
28178 if (TARGET_THUMB1 && optimize_size)
28179 {
28180 /* When optimizing for size on Thumb-1, it's better not
28181 to use the HI regs, because of the overhead of
28182 stacking them. */
28183 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28184 fixed_regs[regno] = call_used_regs[regno] = 1;
28185 }
28186
28187 /* The link register can be clobbered by any branch insn,
28188 but we have no way to track that at present, so mark
28189 it as unavailable. */
28190 if (TARGET_THUMB1)
28191 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28192
28193 if (TARGET_32BIT && TARGET_HARD_FLOAT)
28194 {
28195 /* VFPv3 registers are disabled when earlier VFP
28196 versions are selected due to the definition of
28197 LAST_VFP_REGNUM. */
28198 for (regno = FIRST_VFP_REGNUM;
28199 regno <= LAST_VFP_REGNUM; ++ regno)
28200 {
28201 fixed_regs[regno] = 0;
28202 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28203 || regno >= FIRST_VFP_REGNUM + 32;
28204 }
28205 }
28206
28207 if (TARGET_REALLY_IWMMXT)
28208 {
28209 regno = FIRST_IWMMXT_GR_REGNUM;
28210 /* The 2002/10/09 revision of the XScale ABI has wCG0
28211 and wCG1 as call-preserved registers. The 2002/11/21
28212 revision changed this so that all wCG registers are
28213 scratch registers. */
28214 for (regno = FIRST_IWMMXT_GR_REGNUM;
28215 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28216 fixed_regs[regno] = 0;
28217 /* The XScale ABI has wR0 - wR9 as scratch registers,
28218 the rest as call-preserved registers. */
28219 for (regno = FIRST_IWMMXT_REGNUM;
28220 regno <= LAST_IWMMXT_REGNUM; ++ regno)
28221 {
28222 fixed_regs[regno] = 0;
28223 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28224 }
28225 }
28226
28227 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28228 {
28229 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28230 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28231 }
28232 else if (TARGET_APCS_STACK)
28233 {
28234 fixed_regs[10] = 1;
28235 call_used_regs[10] = 1;
28236 }
28237 /* -mcaller-super-interworking reserves r11 for calls to
28238 _interwork_r11_call_via_rN(). Making the register global
28239 is an easy way of ensuring that it remains valid for all
28240 calls. */
28241 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28242 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28243 {
28244 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28245 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28246 if (TARGET_CALLER_INTERWORKING)
28247 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28248 }
28249 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28250 }
28251
28252 static reg_class_t
28253 arm_preferred_rename_class (reg_class_t rclass)
28254 {
28255 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28256 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28257 and code size can be reduced. */
28258 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28259 return LO_REGS;
28260 else
28261 return NO_REGS;
28262 }
28263
28264 /* Compute the attribute "length" of insn "*push_multi".
28265 So this function MUST be kept in sync with that insn pattern. */
28266 int
28267 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28268 {
28269 int i, regno, hi_reg;
28270 int num_saves = XVECLEN (parallel_op, 0);
28271
28272 /* ARM mode. */
28273 if (TARGET_ARM)
28274 return 4;
28275 /* Thumb1 mode. */
28276 if (TARGET_THUMB1)
28277 return 2;
28278
28279 /* Thumb2 mode. */
28280 regno = REGNO (first_op);
28281 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28282 list is 8-bit. Normally this means all registers in the list must be
28283 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28284 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28285 with 16-bit encoding. */
28286 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28287 for (i = 1; i < num_saves && !hi_reg; i++)
28288 {
28289 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28290 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28291 }
28292
28293 if (!hi_reg)
28294 return 2;
28295 return 4;
28296 }
28297
28298 /* Compute the attribute "length" of insn. Currently, this function is used
28299 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28300 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28301 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28302 true if OPERANDS contains insn which explicit updates base register. */
28303
28304 int
28305 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28306 {
28307 /* ARM mode. */
28308 if (TARGET_ARM)
28309 return 4;
28310 /* Thumb1 mode. */
28311 if (TARGET_THUMB1)
28312 return 2;
28313
28314 rtx parallel_op = operands[0];
28315 /* Initialize to elements number of PARALLEL. */
28316 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28317 /* Initialize the value to base register. */
28318 unsigned regno = REGNO (operands[1]);
28319 /* Skip return and write back pattern.
28320 We only need register pop pattern for later analysis. */
28321 unsigned first_indx = 0;
28322 first_indx += return_pc ? 1 : 0;
28323 first_indx += write_back_p ? 1 : 0;
28324
28325 /* A pop operation can be done through LDM or POP. If the base register is SP
28326 and if it's with write back, then a LDM will be alias of POP. */
28327 bool pop_p = (regno == SP_REGNUM && write_back_p);
28328 bool ldm_p = !pop_p;
28329
28330 /* Check base register for LDM. */
28331 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28332 return 4;
28333
28334 /* Check each register in the list. */
28335 for (; indx >= first_indx; indx--)
28336 {
28337 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28338 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28339 comment in arm_attr_length_push_multi. */
28340 if (REGNO_REG_CLASS (regno) == HI_REGS
28341 && (regno != PC_REGNUM || ldm_p))
28342 return 4;
28343 }
28344
28345 return 2;
28346 }
28347
28348 /* Compute the number of instructions emitted by output_move_double. */
28349 int
28350 arm_count_output_move_double_insns (rtx *operands)
28351 {
28352 int count;
28353 rtx ops[2];
28354 /* output_move_double may modify the operands array, so call it
28355 here on a copy of the array. */
28356 ops[0] = operands[0];
28357 ops[1] = operands[1];
28358 output_move_double (ops, false, &count);
28359 return count;
28360 }
28361
28362 int
28363 vfp3_const_double_for_fract_bits (rtx operand)
28364 {
28365 REAL_VALUE_TYPE r0;
28366
28367 if (!CONST_DOUBLE_P (operand))
28368 return 0;
28369
28370 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28371 if (exact_real_inverse (DFmode, &r0)
28372 && !REAL_VALUE_NEGATIVE (r0))
28373 {
28374 if (exact_real_truncate (DFmode, &r0))
28375 {
28376 HOST_WIDE_INT value = real_to_integer (&r0);
28377 value = value & 0xffffffff;
28378 if ((value != 0) && ( (value & (value - 1)) == 0))
28379 {
28380 int ret = exact_log2 (value);
28381 gcc_assert (IN_RANGE (ret, 0, 31));
28382 return ret;
28383 }
28384 }
28385 }
28386 return 0;
28387 }
28388
28389 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28390 log2 is in [1, 32], return that log2. Otherwise return -1.
28391 This is used in the patterns for vcvt.s32.f32 floating-point to
28392 fixed-point conversions. */
28393
28394 int
28395 vfp3_const_double_for_bits (rtx x)
28396 {
28397 const REAL_VALUE_TYPE *r;
28398
28399 if (!CONST_DOUBLE_P (x))
28400 return -1;
28401
28402 r = CONST_DOUBLE_REAL_VALUE (x);
28403
28404 if (REAL_VALUE_NEGATIVE (*r)
28405 || REAL_VALUE_ISNAN (*r)
28406 || REAL_VALUE_ISINF (*r)
28407 || !real_isinteger (r, SFmode))
28408 return -1;
28409
28410 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28411
28412 /* The exact_log2 above will have returned -1 if this is
28413 not an exact log2. */
28414 if (!IN_RANGE (hwint, 1, 32))
28415 return -1;
28416
28417 return hwint;
28418 }
28419
28420 \f
28421 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28422
28423 static void
28424 arm_pre_atomic_barrier (enum memmodel model)
28425 {
28426 if (need_atomic_barrier_p (model, true))
28427 emit_insn (gen_memory_barrier ());
28428 }
28429
28430 static void
28431 arm_post_atomic_barrier (enum memmodel model)
28432 {
28433 if (need_atomic_barrier_p (model, false))
28434 emit_insn (gen_memory_barrier ());
28435 }
28436
28437 /* Emit the load-exclusive and store-exclusive instructions.
28438 Use acquire and release versions if necessary. */
28439
28440 static void
28441 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28442 {
28443 rtx (*gen) (rtx, rtx);
28444
28445 if (acq)
28446 {
28447 switch (mode)
28448 {
28449 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28450 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28451 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28452 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28453 default:
28454 gcc_unreachable ();
28455 }
28456 }
28457 else
28458 {
28459 switch (mode)
28460 {
28461 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28462 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28463 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28464 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28465 default:
28466 gcc_unreachable ();
28467 }
28468 }
28469
28470 emit_insn (gen (rval, mem));
28471 }
28472
28473 static void
28474 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28475 rtx mem, bool rel)
28476 {
28477 rtx (*gen) (rtx, rtx, rtx);
28478
28479 if (rel)
28480 {
28481 switch (mode)
28482 {
28483 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28484 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28485 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28486 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28487 default:
28488 gcc_unreachable ();
28489 }
28490 }
28491 else
28492 {
28493 switch (mode)
28494 {
28495 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28496 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28497 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28498 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28499 default:
28500 gcc_unreachable ();
28501 }
28502 }
28503
28504 emit_insn (gen (bval, rval, mem));
28505 }
28506
28507 /* Mark the previous jump instruction as unlikely. */
28508
28509 static void
28510 emit_unlikely_jump (rtx insn)
28511 {
28512 rtx_insn *jump = emit_jump_insn (insn);
28513 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28514 }
28515
28516 /* Expand a compare and swap pattern. */
28517
28518 void
28519 arm_expand_compare_and_swap (rtx operands[])
28520 {
28521 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28522 machine_mode mode;
28523 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28524
28525 bval = operands[0];
28526 rval = operands[1];
28527 mem = operands[2];
28528 oldval = operands[3];
28529 newval = operands[4];
28530 is_weak = operands[5];
28531 mod_s = operands[6];
28532 mod_f = operands[7];
28533 mode = GET_MODE (mem);
28534
28535 /* Normally the succ memory model must be stronger than fail, but in the
28536 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28537 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28538
28539 if (TARGET_HAVE_LDACQ
28540 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28541 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28542 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28543
28544 switch (mode)
28545 {
28546 case E_QImode:
28547 case E_HImode:
28548 /* For narrow modes, we're going to perform the comparison in SImode,
28549 so do the zero-extension now. */
28550 rval = gen_reg_rtx (SImode);
28551 oldval = convert_modes (SImode, mode, oldval, true);
28552 /* FALLTHRU */
28553
28554 case E_SImode:
28555 /* Force the value into a register if needed. We waited until after
28556 the zero-extension above to do this properly. */
28557 if (!arm_add_operand (oldval, SImode))
28558 oldval = force_reg (SImode, oldval);
28559 break;
28560
28561 case E_DImode:
28562 if (!cmpdi_operand (oldval, mode))
28563 oldval = force_reg (mode, oldval);
28564 break;
28565
28566 default:
28567 gcc_unreachable ();
28568 }
28569
28570 if (TARGET_THUMB1)
28571 {
28572 switch (mode)
28573 {
28574 case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28575 case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28576 case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28577 case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28578 default:
28579 gcc_unreachable ();
28580 }
28581 }
28582 else
28583 {
28584 switch (mode)
28585 {
28586 case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28587 case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28588 case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28589 case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28590 default:
28591 gcc_unreachable ();
28592 }
28593 }
28594
28595 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28596 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28597
28598 if (mode == QImode || mode == HImode)
28599 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28600
28601 /* In all cases, we arrange for success to be signaled by Z set.
28602 This arrangement allows for the boolean result to be used directly
28603 in a subsequent branch, post optimization. For Thumb-1 targets, the
28604 boolean negation of the result is also stored in bval because Thumb-1
28605 backend lacks dependency tracking for CC flag due to flag-setting not
28606 being represented at RTL level. */
28607 if (TARGET_THUMB1)
28608 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28609 else
28610 {
28611 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28612 emit_insn (gen_rtx_SET (bval, x));
28613 }
28614 }
28615
28616 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28617 another memory store between the load-exclusive and store-exclusive can
28618 reset the monitor from Exclusive to Open state. This means we must wait
28619 until after reload to split the pattern, lest we get a register spill in
28620 the middle of the atomic sequence. Success of the compare and swap is
28621 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28622 for Thumb-1 targets (ie. negation of the boolean value returned by
28623 atomic_compare_and_swapmode standard pattern in operand 0). */
28624
28625 void
28626 arm_split_compare_and_swap (rtx operands[])
28627 {
28628 rtx rval, mem, oldval, newval, neg_bval;
28629 machine_mode mode;
28630 enum memmodel mod_s, mod_f;
28631 bool is_weak;
28632 rtx_code_label *label1, *label2;
28633 rtx x, cond;
28634
28635 rval = operands[1];
28636 mem = operands[2];
28637 oldval = operands[3];
28638 newval = operands[4];
28639 is_weak = (operands[5] != const0_rtx);
28640 mod_s = memmodel_from_int (INTVAL (operands[6]));
28641 mod_f = memmodel_from_int (INTVAL (operands[7]));
28642 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28643 mode = GET_MODE (mem);
28644
28645 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28646
28647 bool use_acquire = TARGET_HAVE_LDACQ
28648 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28649 || is_mm_release (mod_s));
28650
28651 bool use_release = TARGET_HAVE_LDACQ
28652 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28653 || is_mm_acquire (mod_s));
28654
28655 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28656 a full barrier is emitted after the store-release. */
28657 if (is_armv8_sync)
28658 use_acquire = false;
28659
28660 /* Checks whether a barrier is needed and emits one accordingly. */
28661 if (!(use_acquire || use_release))
28662 arm_pre_atomic_barrier (mod_s);
28663
28664 label1 = NULL;
28665 if (!is_weak)
28666 {
28667 label1 = gen_label_rtx ();
28668 emit_label (label1);
28669 }
28670 label2 = gen_label_rtx ();
28671
28672 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28673
28674 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28675 as required to communicate with arm_expand_compare_and_swap. */
28676 if (TARGET_32BIT)
28677 {
28678 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28679 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28680 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28681 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28682 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28683 }
28684 else
28685 {
28686 emit_move_insn (neg_bval, const1_rtx);
28687 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28688 if (thumb1_cmpneg_operand (oldval, SImode))
28689 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28690 label2, cond));
28691 else
28692 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28693 }
28694
28695 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28696
28697 /* Weak or strong, we want EQ to be true for success, so that we
28698 match the flags that we got from the compare above. */
28699 if (TARGET_32BIT)
28700 {
28701 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28702 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28703 emit_insn (gen_rtx_SET (cond, x));
28704 }
28705
28706 if (!is_weak)
28707 {
28708 /* Z is set to boolean value of !neg_bval, as required to communicate
28709 with arm_expand_compare_and_swap. */
28710 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28711 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28712 }
28713
28714 if (!is_mm_relaxed (mod_f))
28715 emit_label (label2);
28716
28717 /* Checks whether a barrier is needed and emits one accordingly. */
28718 if (is_armv8_sync
28719 || !(use_acquire || use_release))
28720 arm_post_atomic_barrier (mod_s);
28721
28722 if (is_mm_relaxed (mod_f))
28723 emit_label (label2);
28724 }
28725
28726 /* Split an atomic operation pattern. Operation is given by CODE and is one
28727 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28728 operation). Operation is performed on the content at MEM and on VALUE
28729 following the memory model MODEL_RTX. The content at MEM before and after
28730 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28731 success of the operation is returned in COND. Using a scratch register or
28732 an operand register for these determines what result is returned for that
28733 pattern. */
28734
28735 void
28736 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28737 rtx value, rtx model_rtx, rtx cond)
28738 {
28739 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28740 machine_mode mode = GET_MODE (mem);
28741 machine_mode wmode = (mode == DImode ? DImode : SImode);
28742 rtx_code_label *label;
28743 bool all_low_regs, bind_old_new;
28744 rtx x;
28745
28746 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28747
28748 bool use_acquire = TARGET_HAVE_LDACQ
28749 && !(is_mm_relaxed (model) || is_mm_consume (model)
28750 || is_mm_release (model));
28751
28752 bool use_release = TARGET_HAVE_LDACQ
28753 && !(is_mm_relaxed (model) || is_mm_consume (model)
28754 || is_mm_acquire (model));
28755
28756 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28757 a full barrier is emitted after the store-release. */
28758 if (is_armv8_sync)
28759 use_acquire = false;
28760
28761 /* Checks whether a barrier is needed and emits one accordingly. */
28762 if (!(use_acquire || use_release))
28763 arm_pre_atomic_barrier (model);
28764
28765 label = gen_label_rtx ();
28766 emit_label (label);
28767
28768 if (new_out)
28769 new_out = gen_lowpart (wmode, new_out);
28770 if (old_out)
28771 old_out = gen_lowpart (wmode, old_out);
28772 else
28773 old_out = new_out;
28774 value = simplify_gen_subreg (wmode, value, mode, 0);
28775
28776 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28777
28778 /* Does the operation require destination and first operand to use the same
28779 register? This is decided by register constraints of relevant insn
28780 patterns in thumb1.md. */
28781 gcc_assert (!new_out || REG_P (new_out));
28782 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28783 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28784 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28785 bind_old_new =
28786 (TARGET_THUMB1
28787 && code != SET
28788 && code != MINUS
28789 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28790
28791 /* We want to return the old value while putting the result of the operation
28792 in the same register as the old value so copy the old value over to the
28793 destination register and use that register for the operation. */
28794 if (old_out && bind_old_new)
28795 {
28796 emit_move_insn (new_out, old_out);
28797 old_out = new_out;
28798 }
28799
28800 switch (code)
28801 {
28802 case SET:
28803 new_out = value;
28804 break;
28805
28806 case NOT:
28807 x = gen_rtx_AND (wmode, old_out, value);
28808 emit_insn (gen_rtx_SET (new_out, x));
28809 x = gen_rtx_NOT (wmode, new_out);
28810 emit_insn (gen_rtx_SET (new_out, x));
28811 break;
28812
28813 case MINUS:
28814 if (CONST_INT_P (value))
28815 {
28816 value = GEN_INT (-INTVAL (value));
28817 code = PLUS;
28818 }
28819 /* FALLTHRU */
28820
28821 case PLUS:
28822 if (mode == DImode)
28823 {
28824 /* DImode plus/minus need to clobber flags. */
28825 /* The adddi3 and subdi3 patterns are incorrectly written so that
28826 they require matching operands, even when we could easily support
28827 three operands. Thankfully, this can be fixed up post-splitting,
28828 as the individual add+adc patterns do accept three operands and
28829 post-reload cprop can make these moves go away. */
28830 emit_move_insn (new_out, old_out);
28831 if (code == PLUS)
28832 x = gen_adddi3 (new_out, new_out, value);
28833 else
28834 x = gen_subdi3 (new_out, new_out, value);
28835 emit_insn (x);
28836 break;
28837 }
28838 /* FALLTHRU */
28839
28840 default:
28841 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28842 emit_insn (gen_rtx_SET (new_out, x));
28843 break;
28844 }
28845
28846 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28847 use_release);
28848
28849 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28850 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28851
28852 /* Checks whether a barrier is needed and emits one accordingly. */
28853 if (is_armv8_sync
28854 || !(use_acquire || use_release))
28855 arm_post_atomic_barrier (model);
28856 }
28857 \f
28858 #define MAX_VECT_LEN 16
28859
28860 struct expand_vec_perm_d
28861 {
28862 rtx target, op0, op1;
28863 vec_perm_indices perm;
28864 machine_mode vmode;
28865 bool one_vector_p;
28866 bool testing_p;
28867 };
28868
28869 /* Generate a variable permutation. */
28870
28871 static void
28872 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28873 {
28874 machine_mode vmode = GET_MODE (target);
28875 bool one_vector_p = rtx_equal_p (op0, op1);
28876
28877 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28878 gcc_checking_assert (GET_MODE (op0) == vmode);
28879 gcc_checking_assert (GET_MODE (op1) == vmode);
28880 gcc_checking_assert (GET_MODE (sel) == vmode);
28881 gcc_checking_assert (TARGET_NEON);
28882
28883 if (one_vector_p)
28884 {
28885 if (vmode == V8QImode)
28886 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28887 else
28888 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28889 }
28890 else
28891 {
28892 rtx pair;
28893
28894 if (vmode == V8QImode)
28895 {
28896 pair = gen_reg_rtx (V16QImode);
28897 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28898 pair = gen_lowpart (TImode, pair);
28899 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28900 }
28901 else
28902 {
28903 pair = gen_reg_rtx (OImode);
28904 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28905 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28906 }
28907 }
28908 }
28909
28910 void
28911 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28912 {
28913 machine_mode vmode = GET_MODE (target);
28914 unsigned int nelt = GET_MODE_NUNITS (vmode);
28915 bool one_vector_p = rtx_equal_p (op0, op1);
28916 rtx mask;
28917
28918 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28919 numbering of elements for big-endian, we must reverse the order. */
28920 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28921
28922 /* The VTBL instruction does not use a modulo index, so we must take care
28923 of that ourselves. */
28924 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28925 mask = gen_const_vec_duplicate (vmode, mask);
28926 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28927
28928 arm_expand_vec_perm_1 (target, op0, op1, sel);
28929 }
28930
28931 /* Map lane ordering between architectural lane order, and GCC lane order,
28932 taking into account ABI. See comment above output_move_neon for details. */
28933
28934 static int
28935 neon_endian_lane_map (machine_mode mode, int lane)
28936 {
28937 if (BYTES_BIG_ENDIAN)
28938 {
28939 int nelems = GET_MODE_NUNITS (mode);
28940 /* Reverse lane order. */
28941 lane = (nelems - 1 - lane);
28942 /* Reverse D register order, to match ABI. */
28943 if (GET_MODE_SIZE (mode) == 16)
28944 lane = lane ^ (nelems / 2);
28945 }
28946 return lane;
28947 }
28948
28949 /* Some permutations index into pairs of vectors, this is a helper function
28950 to map indexes into those pairs of vectors. */
28951
28952 static int
28953 neon_pair_endian_lane_map (machine_mode mode, int lane)
28954 {
28955 int nelem = GET_MODE_NUNITS (mode);
28956 if (BYTES_BIG_ENDIAN)
28957 lane =
28958 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28959 return lane;
28960 }
28961
28962 /* Generate or test for an insn that supports a constant permutation. */
28963
28964 /* Recognize patterns for the VUZP insns. */
28965
28966 static bool
28967 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28968 {
28969 unsigned int i, odd, mask, nelt = d->perm.length ();
28970 rtx out0, out1, in0, in1;
28971 rtx (*gen)(rtx, rtx, rtx, rtx);
28972 int first_elem;
28973 int swap_nelt;
28974
28975 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28976 return false;
28977
28978 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28979 big endian pattern on 64 bit vectors, so we correct for that. */
28980 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28981 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
28982
28983 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28984
28985 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28986 odd = 0;
28987 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28988 odd = 1;
28989 else
28990 return false;
28991 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28992
28993 for (i = 0; i < nelt; i++)
28994 {
28995 unsigned elt =
28996 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28997 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28998 return false;
28999 }
29000
29001 /* Success! */
29002 if (d->testing_p)
29003 return true;
29004
29005 switch (d->vmode)
29006 {
29007 case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
29008 case E_V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
29009 case E_V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
29010 case E_V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
29011 case E_V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
29012 case E_V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
29013 case E_V4SImode: gen = gen_neon_vuzpv4si_internal; break;
29014 case E_V2SImode: gen = gen_neon_vuzpv2si_internal; break;
29015 case E_V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
29016 case E_V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
29017 default:
29018 gcc_unreachable ();
29019 }
29020
29021 in0 = d->op0;
29022 in1 = d->op1;
29023 if (swap_nelt != 0)
29024 std::swap (in0, in1);
29025
29026 out0 = d->target;
29027 out1 = gen_reg_rtx (d->vmode);
29028 if (odd)
29029 std::swap (out0, out1);
29030
29031 emit_insn (gen (out0, in0, in1, out1));
29032 return true;
29033 }
29034
29035 /* Recognize patterns for the VZIP insns. */
29036
29037 static bool
29038 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29039 {
29040 unsigned int i, high, mask, nelt = d->perm.length ();
29041 rtx out0, out1, in0, in1;
29042 rtx (*gen)(rtx, rtx, rtx, rtx);
29043 int first_elem;
29044 bool is_swapped;
29045
29046 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29047 return false;
29048
29049 is_swapped = BYTES_BIG_ENDIAN;
29050
29051 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29052
29053 high = nelt / 2;
29054 if (first_elem == neon_endian_lane_map (d->vmode, high))
29055 ;
29056 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29057 high = 0;
29058 else
29059 return false;
29060 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29061
29062 for (i = 0; i < nelt / 2; i++)
29063 {
29064 unsigned elt =
29065 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29066 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29067 != elt)
29068 return false;
29069 elt =
29070 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29071 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29072 != elt)
29073 return false;
29074 }
29075
29076 /* Success! */
29077 if (d->testing_p)
29078 return true;
29079
29080 switch (d->vmode)
29081 {
29082 case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
29083 case E_V8QImode: gen = gen_neon_vzipv8qi_internal; break;
29084 case E_V8HImode: gen = gen_neon_vzipv8hi_internal; break;
29085 case E_V4HImode: gen = gen_neon_vzipv4hi_internal; break;
29086 case E_V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
29087 case E_V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
29088 case E_V4SImode: gen = gen_neon_vzipv4si_internal; break;
29089 case E_V2SImode: gen = gen_neon_vzipv2si_internal; break;
29090 case E_V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
29091 case E_V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
29092 default:
29093 gcc_unreachable ();
29094 }
29095
29096 in0 = d->op0;
29097 in1 = d->op1;
29098 if (is_swapped)
29099 std::swap (in0, in1);
29100
29101 out0 = d->target;
29102 out1 = gen_reg_rtx (d->vmode);
29103 if (high)
29104 std::swap (out0, out1);
29105
29106 emit_insn (gen (out0, in0, in1, out1));
29107 return true;
29108 }
29109
29110 /* Recognize patterns for the VREV insns. */
29111
29112 static bool
29113 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29114 {
29115 unsigned int i, j, diff, nelt = d->perm.length ();
29116 rtx (*gen)(rtx, rtx);
29117
29118 if (!d->one_vector_p)
29119 return false;
29120
29121 diff = d->perm[0];
29122 switch (diff)
29123 {
29124 case 7:
29125 switch (d->vmode)
29126 {
29127 case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
29128 case E_V8QImode: gen = gen_neon_vrev64v8qi; break;
29129 default:
29130 return false;
29131 }
29132 break;
29133 case 3:
29134 switch (d->vmode)
29135 {
29136 case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
29137 case E_V8QImode: gen = gen_neon_vrev32v8qi; break;
29138 case E_V8HImode: gen = gen_neon_vrev64v8hi; break;
29139 case E_V4HImode: gen = gen_neon_vrev64v4hi; break;
29140 case E_V8HFmode: gen = gen_neon_vrev64v8hf; break;
29141 case E_V4HFmode: gen = gen_neon_vrev64v4hf; break;
29142 default:
29143 return false;
29144 }
29145 break;
29146 case 1:
29147 switch (d->vmode)
29148 {
29149 case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
29150 case E_V8QImode: gen = gen_neon_vrev16v8qi; break;
29151 case E_V8HImode: gen = gen_neon_vrev32v8hi; break;
29152 case E_V4HImode: gen = gen_neon_vrev32v4hi; break;
29153 case E_V4SImode: gen = gen_neon_vrev64v4si; break;
29154 case E_V2SImode: gen = gen_neon_vrev64v2si; break;
29155 case E_V4SFmode: gen = gen_neon_vrev64v4sf; break;
29156 case E_V2SFmode: gen = gen_neon_vrev64v2sf; break;
29157 default:
29158 return false;
29159 }
29160 break;
29161 default:
29162 return false;
29163 }
29164
29165 for (i = 0; i < nelt ; i += diff + 1)
29166 for (j = 0; j <= diff; j += 1)
29167 {
29168 /* This is guaranteed to be true as the value of diff
29169 is 7, 3, 1 and we should have enough elements in the
29170 queue to generate this. Getting a vector mask with a
29171 value of diff other than these values implies that
29172 something is wrong by the time we get here. */
29173 gcc_assert (i + j < nelt);
29174 if (d->perm[i + j] != i + diff - j)
29175 return false;
29176 }
29177
29178 /* Success! */
29179 if (d->testing_p)
29180 return true;
29181
29182 emit_insn (gen (d->target, d->op0));
29183 return true;
29184 }
29185
29186 /* Recognize patterns for the VTRN insns. */
29187
29188 static bool
29189 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29190 {
29191 unsigned int i, odd, mask, nelt = d->perm.length ();
29192 rtx out0, out1, in0, in1;
29193 rtx (*gen)(rtx, rtx, rtx, rtx);
29194
29195 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29196 return false;
29197
29198 /* Note that these are little-endian tests. Adjust for big-endian later. */
29199 if (d->perm[0] == 0)
29200 odd = 0;
29201 else if (d->perm[0] == 1)
29202 odd = 1;
29203 else
29204 return false;
29205 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29206
29207 for (i = 0; i < nelt; i += 2)
29208 {
29209 if (d->perm[i] != i + odd)
29210 return false;
29211 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29212 return false;
29213 }
29214
29215 /* Success! */
29216 if (d->testing_p)
29217 return true;
29218
29219 switch (d->vmode)
29220 {
29221 case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29222 case E_V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
29223 case E_V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
29224 case E_V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
29225 case E_V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
29226 case E_V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
29227 case E_V4SImode: gen = gen_neon_vtrnv4si_internal; break;
29228 case E_V2SImode: gen = gen_neon_vtrnv2si_internal; break;
29229 case E_V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
29230 case E_V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
29231 default:
29232 gcc_unreachable ();
29233 }
29234
29235 in0 = d->op0;
29236 in1 = d->op1;
29237 if (BYTES_BIG_ENDIAN)
29238 {
29239 std::swap (in0, in1);
29240 odd = !odd;
29241 }
29242
29243 out0 = d->target;
29244 out1 = gen_reg_rtx (d->vmode);
29245 if (odd)
29246 std::swap (out0, out1);
29247
29248 emit_insn (gen (out0, in0, in1, out1));
29249 return true;
29250 }
29251
29252 /* Recognize patterns for the VEXT insns. */
29253
29254 static bool
29255 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29256 {
29257 unsigned int i, nelt = d->perm.length ();
29258 rtx (*gen) (rtx, rtx, rtx, rtx);
29259 rtx offset;
29260
29261 unsigned int location;
29262
29263 unsigned int next = d->perm[0] + 1;
29264
29265 /* TODO: Handle GCC's numbering of elements for big-endian. */
29266 if (BYTES_BIG_ENDIAN)
29267 return false;
29268
29269 /* Check if the extracted indexes are increasing by one. */
29270 for (i = 1; i < nelt; next++, i++)
29271 {
29272 /* If we hit the most significant element of the 2nd vector in
29273 the previous iteration, no need to test further. */
29274 if (next == 2 * nelt)
29275 return false;
29276
29277 /* If we are operating on only one vector: it could be a
29278 rotation. If there are only two elements of size < 64, let
29279 arm_evpc_neon_vrev catch it. */
29280 if (d->one_vector_p && (next == nelt))
29281 {
29282 if ((nelt == 2) && (d->vmode != V2DImode))
29283 return false;
29284 else
29285 next = 0;
29286 }
29287
29288 if (d->perm[i] != next)
29289 return false;
29290 }
29291
29292 location = d->perm[0];
29293
29294 switch (d->vmode)
29295 {
29296 case E_V16QImode: gen = gen_neon_vextv16qi; break;
29297 case E_V8QImode: gen = gen_neon_vextv8qi; break;
29298 case E_V4HImode: gen = gen_neon_vextv4hi; break;
29299 case E_V8HImode: gen = gen_neon_vextv8hi; break;
29300 case E_V2SImode: gen = gen_neon_vextv2si; break;
29301 case E_V4SImode: gen = gen_neon_vextv4si; break;
29302 case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29303 case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29304 case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29305 case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29306 case E_V2DImode: gen = gen_neon_vextv2di; break;
29307 default:
29308 return false;
29309 }
29310
29311 /* Success! */
29312 if (d->testing_p)
29313 return true;
29314
29315 offset = GEN_INT (location);
29316 emit_insn (gen (d->target, d->op0, d->op1, offset));
29317 return true;
29318 }
29319
29320 /* The NEON VTBL instruction is a fully variable permuation that's even
29321 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29322 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29323 can do slightly better by expanding this as a constant where we don't
29324 have to apply a mask. */
29325
29326 static bool
29327 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29328 {
29329 rtx rperm[MAX_VECT_LEN], sel;
29330 machine_mode vmode = d->vmode;
29331 unsigned int i, nelt = d->perm.length ();
29332
29333 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29334 numbering of elements for big-endian, we must reverse the order. */
29335 if (BYTES_BIG_ENDIAN)
29336 return false;
29337
29338 if (d->testing_p)
29339 return true;
29340
29341 /* Generic code will try constant permutation twice. Once with the
29342 original mode and again with the elements lowered to QImode.
29343 So wait and don't do the selector expansion ourselves. */
29344 if (vmode != V8QImode && vmode != V16QImode)
29345 return false;
29346
29347 for (i = 0; i < nelt; ++i)
29348 rperm[i] = GEN_INT (d->perm[i]);
29349 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29350 sel = force_reg (vmode, sel);
29351
29352 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29353 return true;
29354 }
29355
29356 static bool
29357 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29358 {
29359 /* Check if the input mask matches vext before reordering the
29360 operands. */
29361 if (TARGET_NEON)
29362 if (arm_evpc_neon_vext (d))
29363 return true;
29364
29365 /* The pattern matching functions above are written to look for a small
29366 number to begin the sequence (0, 1, N/2). If we begin with an index
29367 from the second operand, we can swap the operands. */
29368 unsigned int nelt = d->perm.length ();
29369 if (d->perm[0] >= nelt)
29370 {
29371 d->perm.rotate_inputs (1);
29372 std::swap (d->op0, d->op1);
29373 }
29374
29375 if (TARGET_NEON)
29376 {
29377 if (arm_evpc_neon_vuzp (d))
29378 return true;
29379 if (arm_evpc_neon_vzip (d))
29380 return true;
29381 if (arm_evpc_neon_vrev (d))
29382 return true;
29383 if (arm_evpc_neon_vtrn (d))
29384 return true;
29385 return arm_evpc_neon_vtbl (d);
29386 }
29387 return false;
29388 }
29389
29390 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
29391
29392 static bool
29393 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
29394 const vec_perm_indices &sel)
29395 {
29396 struct expand_vec_perm_d d;
29397 int i, nelt, which;
29398
29399 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
29400 return false;
29401
29402 d.target = target;
29403 d.op0 = op0;
29404 d.op1 = op1;
29405
29406 d.vmode = vmode;
29407 gcc_assert (VECTOR_MODE_P (d.vmode));
29408 d.testing_p = !target;
29409
29410 nelt = GET_MODE_NUNITS (d.vmode);
29411 for (i = which = 0; i < nelt; ++i)
29412 {
29413 int ei = sel[i] & (2 * nelt - 1);
29414 which |= (ei < nelt ? 1 : 2);
29415 }
29416
29417 switch (which)
29418 {
29419 default:
29420 gcc_unreachable();
29421
29422 case 3:
29423 d.one_vector_p = false;
29424 if (d.testing_p || !rtx_equal_p (op0, op1))
29425 break;
29426
29427 /* The elements of PERM do not suggest that only the first operand
29428 is used, but both operands are identical. Allow easier matching
29429 of the permutation by folding the permutation into the single
29430 input vector. */
29431 /* FALLTHRU */
29432 case 2:
29433 d.op0 = op1;
29434 d.one_vector_p = true;
29435 break;
29436
29437 case 1:
29438 d.op1 = op0;
29439 d.one_vector_p = true;
29440 break;
29441 }
29442
29443 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
29444
29445 if (!d.testing_p)
29446 return arm_expand_vec_perm_const_1 (&d);
29447
29448 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29449 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29450 if (!d.one_vector_p)
29451 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29452
29453 start_sequence ();
29454 bool ret = arm_expand_vec_perm_const_1 (&d);
29455 end_sequence ();
29456
29457 return ret;
29458 }
29459
29460 bool
29461 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29462 {
29463 /* If we are soft float and we do not have ldrd
29464 then all auto increment forms are ok. */
29465 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29466 return true;
29467
29468 switch (code)
29469 {
29470 /* Post increment and Pre Decrement are supported for all
29471 instruction forms except for vector forms. */
29472 case ARM_POST_INC:
29473 case ARM_PRE_DEC:
29474 if (VECTOR_MODE_P (mode))
29475 {
29476 if (code != ARM_PRE_DEC)
29477 return true;
29478 else
29479 return false;
29480 }
29481
29482 return true;
29483
29484 case ARM_POST_DEC:
29485 case ARM_PRE_INC:
29486 /* Without LDRD and mode size greater than
29487 word size, there is no point in auto-incrementing
29488 because ldm and stm will not have these forms. */
29489 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29490 return false;
29491
29492 /* Vector and floating point modes do not support
29493 these auto increment forms. */
29494 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29495 return false;
29496
29497 return true;
29498
29499 default:
29500 return false;
29501
29502 }
29503
29504 return false;
29505 }
29506
29507 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29508 on ARM, since we know that shifts by negative amounts are no-ops.
29509 Additionally, the default expansion code is not available or suitable
29510 for post-reload insn splits (this can occur when the register allocator
29511 chooses not to do a shift in NEON).
29512
29513 This function is used in both initial expand and post-reload splits, and
29514 handles all kinds of 64-bit shifts.
29515
29516 Input requirements:
29517 - It is safe for the input and output to be the same register, but
29518 early-clobber rules apply for the shift amount and scratch registers.
29519 - Shift by register requires both scratch registers. In all other cases
29520 the scratch registers may be NULL.
29521 - Ashiftrt by a register also clobbers the CC register. */
29522 void
29523 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29524 rtx amount, rtx scratch1, rtx scratch2)
29525 {
29526 rtx out_high = gen_highpart (SImode, out);
29527 rtx out_low = gen_lowpart (SImode, out);
29528 rtx in_high = gen_highpart (SImode, in);
29529 rtx in_low = gen_lowpart (SImode, in);
29530
29531 /* Terminology:
29532 in = the register pair containing the input value.
29533 out = the destination register pair.
29534 up = the high- or low-part of each pair.
29535 down = the opposite part to "up".
29536 In a shift, we can consider bits to shift from "up"-stream to
29537 "down"-stream, so in a left-shift "up" is the low-part and "down"
29538 is the high-part of each register pair. */
29539
29540 rtx out_up = code == ASHIFT ? out_low : out_high;
29541 rtx out_down = code == ASHIFT ? out_high : out_low;
29542 rtx in_up = code == ASHIFT ? in_low : in_high;
29543 rtx in_down = code == ASHIFT ? in_high : in_low;
29544
29545 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29546 gcc_assert (out
29547 && (REG_P (out) || GET_CODE (out) == SUBREG)
29548 && GET_MODE (out) == DImode);
29549 gcc_assert (in
29550 && (REG_P (in) || GET_CODE (in) == SUBREG)
29551 && GET_MODE (in) == DImode);
29552 gcc_assert (amount
29553 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29554 && GET_MODE (amount) == SImode)
29555 || CONST_INT_P (amount)));
29556 gcc_assert (scratch1 == NULL
29557 || (GET_CODE (scratch1) == SCRATCH)
29558 || (GET_MODE (scratch1) == SImode
29559 && REG_P (scratch1)));
29560 gcc_assert (scratch2 == NULL
29561 || (GET_CODE (scratch2) == SCRATCH)
29562 || (GET_MODE (scratch2) == SImode
29563 && REG_P (scratch2)));
29564 gcc_assert (!REG_P (out) || !REG_P (amount)
29565 || !HARD_REGISTER_P (out)
29566 || (REGNO (out) != REGNO (amount)
29567 && REGNO (out) + 1 != REGNO (amount)));
29568
29569 /* Macros to make following code more readable. */
29570 #define SUB_32(DEST,SRC) \
29571 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29572 #define RSB_32(DEST,SRC) \
29573 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29574 #define SUB_S_32(DEST,SRC) \
29575 gen_addsi3_compare0 ((DEST), (SRC), \
29576 GEN_INT (-32))
29577 #define SET(DEST,SRC) \
29578 gen_rtx_SET ((DEST), (SRC))
29579 #define SHIFT(CODE,SRC,AMOUNT) \
29580 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29581 #define LSHIFT(CODE,SRC,AMOUNT) \
29582 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29583 SImode, (SRC), (AMOUNT))
29584 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29585 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29586 SImode, (SRC), (AMOUNT))
29587 #define ORR(A,B) \
29588 gen_rtx_IOR (SImode, (A), (B))
29589 #define BRANCH(COND,LABEL) \
29590 gen_arm_cond_branch ((LABEL), \
29591 gen_rtx_ ## COND (CCmode, cc_reg, \
29592 const0_rtx), \
29593 cc_reg)
29594
29595 /* Shifts by register and shifts by constant are handled separately. */
29596 if (CONST_INT_P (amount))
29597 {
29598 /* We have a shift-by-constant. */
29599
29600 /* First, handle out-of-range shift amounts.
29601 In both cases we try to match the result an ARM instruction in a
29602 shift-by-register would give. This helps reduce execution
29603 differences between optimization levels, but it won't stop other
29604 parts of the compiler doing different things. This is "undefined
29605 behavior, in any case. */
29606 if (INTVAL (amount) <= 0)
29607 emit_insn (gen_movdi (out, in));
29608 else if (INTVAL (amount) >= 64)
29609 {
29610 if (code == ASHIFTRT)
29611 {
29612 rtx const31_rtx = GEN_INT (31);
29613 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29614 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29615 }
29616 else
29617 emit_insn (gen_movdi (out, const0_rtx));
29618 }
29619
29620 /* Now handle valid shifts. */
29621 else if (INTVAL (amount) < 32)
29622 {
29623 /* Shifts by a constant less than 32. */
29624 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29625
29626 /* Clearing the out register in DImode first avoids lots
29627 of spilling and results in less stack usage.
29628 Later this redundant insn is completely removed.
29629 Do that only if "in" and "out" are different registers. */
29630 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29631 emit_insn (SET (out, const0_rtx));
29632 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29633 emit_insn (SET (out_down,
29634 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29635 out_down)));
29636 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29637 }
29638 else
29639 {
29640 /* Shifts by a constant greater than 31. */
29641 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29642
29643 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29644 emit_insn (SET (out, const0_rtx));
29645 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29646 if (code == ASHIFTRT)
29647 emit_insn (gen_ashrsi3 (out_up, in_up,
29648 GEN_INT (31)));
29649 else
29650 emit_insn (SET (out_up, const0_rtx));
29651 }
29652 }
29653 else
29654 {
29655 /* We have a shift-by-register. */
29656 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29657
29658 /* This alternative requires the scratch registers. */
29659 gcc_assert (scratch1 && REG_P (scratch1));
29660 gcc_assert (scratch2 && REG_P (scratch2));
29661
29662 /* We will need the values "amount-32" and "32-amount" later.
29663 Swapping them around now allows the later code to be more general. */
29664 switch (code)
29665 {
29666 case ASHIFT:
29667 emit_insn (SUB_32 (scratch1, amount));
29668 emit_insn (RSB_32 (scratch2, amount));
29669 break;
29670 case ASHIFTRT:
29671 emit_insn (RSB_32 (scratch1, amount));
29672 /* Also set CC = amount > 32. */
29673 emit_insn (SUB_S_32 (scratch2, amount));
29674 break;
29675 case LSHIFTRT:
29676 emit_insn (RSB_32 (scratch1, amount));
29677 emit_insn (SUB_32 (scratch2, amount));
29678 break;
29679 default:
29680 gcc_unreachable ();
29681 }
29682
29683 /* Emit code like this:
29684
29685 arithmetic-left:
29686 out_down = in_down << amount;
29687 out_down = (in_up << (amount - 32)) | out_down;
29688 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29689 out_up = in_up << amount;
29690
29691 arithmetic-right:
29692 out_down = in_down >> amount;
29693 out_down = (in_up << (32 - amount)) | out_down;
29694 if (amount < 32)
29695 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29696 out_up = in_up << amount;
29697
29698 logical-right:
29699 out_down = in_down >> amount;
29700 out_down = (in_up << (32 - amount)) | out_down;
29701 if (amount < 32)
29702 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29703 out_up = in_up << amount;
29704
29705 The ARM and Thumb2 variants are the same but implemented slightly
29706 differently. If this were only called during expand we could just
29707 use the Thumb2 case and let combine do the right thing, but this
29708 can also be called from post-reload splitters. */
29709
29710 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29711
29712 if (!TARGET_THUMB2)
29713 {
29714 /* Emit code for ARM mode. */
29715 emit_insn (SET (out_down,
29716 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29717 if (code == ASHIFTRT)
29718 {
29719 rtx_code_label *done_label = gen_label_rtx ();
29720 emit_jump_insn (BRANCH (LT, done_label));
29721 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29722 out_down)));
29723 emit_label (done_label);
29724 }
29725 else
29726 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29727 out_down)));
29728 }
29729 else
29730 {
29731 /* Emit code for Thumb2 mode.
29732 Thumb2 can't do shift and or in one insn. */
29733 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29734 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29735
29736 if (code == ASHIFTRT)
29737 {
29738 rtx_code_label *done_label = gen_label_rtx ();
29739 emit_jump_insn (BRANCH (LT, done_label));
29740 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29741 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29742 emit_label (done_label);
29743 }
29744 else
29745 {
29746 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29747 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29748 }
29749 }
29750
29751 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29752 }
29753
29754 #undef SUB_32
29755 #undef RSB_32
29756 #undef SUB_S_32
29757 #undef SET
29758 #undef SHIFT
29759 #undef LSHIFT
29760 #undef REV_LSHIFT
29761 #undef ORR
29762 #undef BRANCH
29763 }
29764
29765 /* Returns true if the pattern is a valid symbolic address, which is either a
29766 symbol_ref or (symbol_ref + addend).
29767
29768 According to the ARM ELF ABI, the initial addend of REL-type relocations
29769 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29770 literal field of the instruction as a 16-bit signed value in the range
29771 -32768 <= A < 32768. */
29772
29773 bool
29774 arm_valid_symbolic_address_p (rtx addr)
29775 {
29776 rtx xop0, xop1 = NULL_RTX;
29777 rtx tmp = addr;
29778
29779 if (target_word_relocations)
29780 return false;
29781
29782 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29783 return true;
29784
29785 /* (const (plus: symbol_ref const_int)) */
29786 if (GET_CODE (addr) == CONST)
29787 tmp = XEXP (addr, 0);
29788
29789 if (GET_CODE (tmp) == PLUS)
29790 {
29791 xop0 = XEXP (tmp, 0);
29792 xop1 = XEXP (tmp, 1);
29793
29794 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29795 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29796 }
29797
29798 return false;
29799 }
29800
29801 /* Returns true if a valid comparison operation and makes
29802 the operands in a form that is valid. */
29803 bool
29804 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29805 {
29806 enum rtx_code code = GET_CODE (*comparison);
29807 int code_int;
29808 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29809 ? GET_MODE (*op2) : GET_MODE (*op1);
29810
29811 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29812
29813 if (code == UNEQ || code == LTGT)
29814 return false;
29815
29816 code_int = (int)code;
29817 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29818 PUT_CODE (*comparison, (enum rtx_code)code_int);
29819
29820 switch (mode)
29821 {
29822 case E_SImode:
29823 if (!arm_add_operand (*op1, mode))
29824 *op1 = force_reg (mode, *op1);
29825 if (!arm_add_operand (*op2, mode))
29826 *op2 = force_reg (mode, *op2);
29827 return true;
29828
29829 case E_DImode:
29830 if (!cmpdi_operand (*op1, mode))
29831 *op1 = force_reg (mode, *op1);
29832 if (!cmpdi_operand (*op2, mode))
29833 *op2 = force_reg (mode, *op2);
29834 return true;
29835
29836 case E_HFmode:
29837 if (!TARGET_VFP_FP16INST)
29838 break;
29839 /* FP16 comparisons are done in SF mode. */
29840 mode = SFmode;
29841 *op1 = convert_to_mode (mode, *op1, 1);
29842 *op2 = convert_to_mode (mode, *op2, 1);
29843 /* Fall through. */
29844 case E_SFmode:
29845 case E_DFmode:
29846 if (!vfp_compare_operand (*op1, mode))
29847 *op1 = force_reg (mode, *op1);
29848 if (!vfp_compare_operand (*op2, mode))
29849 *op2 = force_reg (mode, *op2);
29850 return true;
29851 default:
29852 break;
29853 }
29854
29855 return false;
29856
29857 }
29858
29859 /* Maximum number of instructions to set block of memory. */
29860 static int
29861 arm_block_set_max_insns (void)
29862 {
29863 if (optimize_function_for_size_p (cfun))
29864 return 4;
29865 else
29866 return current_tune->max_insns_inline_memset;
29867 }
29868
29869 /* Return TRUE if it's profitable to set block of memory for
29870 non-vectorized case. VAL is the value to set the memory
29871 with. LENGTH is the number of bytes to set. ALIGN is the
29872 alignment of the destination memory in bytes. UNALIGNED_P
29873 is TRUE if we can only set the memory with instructions
29874 meeting alignment requirements. USE_STRD_P is TRUE if we
29875 can use strd to set the memory. */
29876 static bool
29877 arm_block_set_non_vect_profit_p (rtx val,
29878 unsigned HOST_WIDE_INT length,
29879 unsigned HOST_WIDE_INT align,
29880 bool unaligned_p, bool use_strd_p)
29881 {
29882 int num = 0;
29883 /* For leftovers in bytes of 0-7, we can set the memory block using
29884 strb/strh/str with minimum instruction number. */
29885 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29886
29887 if (unaligned_p)
29888 {
29889 num = arm_const_inline_cost (SET, val);
29890 num += length / align + length % align;
29891 }
29892 else if (use_strd_p)
29893 {
29894 num = arm_const_double_inline_cost (val);
29895 num += (length >> 3) + leftover[length & 7];
29896 }
29897 else
29898 {
29899 num = arm_const_inline_cost (SET, val);
29900 num += (length >> 2) + leftover[length & 3];
29901 }
29902
29903 /* We may be able to combine last pair STRH/STRB into a single STR
29904 by shifting one byte back. */
29905 if (unaligned_access && length > 3 && (length & 3) == 3)
29906 num--;
29907
29908 return (num <= arm_block_set_max_insns ());
29909 }
29910
29911 /* Return TRUE if it's profitable to set block of memory for
29912 vectorized case. LENGTH is the number of bytes to set.
29913 ALIGN is the alignment of destination memory in bytes.
29914 MODE is the vector mode used to set the memory. */
29915 static bool
29916 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29917 unsigned HOST_WIDE_INT align,
29918 machine_mode mode)
29919 {
29920 int num;
29921 bool unaligned_p = ((align & 3) != 0);
29922 unsigned int nelt = GET_MODE_NUNITS (mode);
29923
29924 /* Instruction loading constant value. */
29925 num = 1;
29926 /* Instructions storing the memory. */
29927 num += (length + nelt - 1) / nelt;
29928 /* Instructions adjusting the address expression. Only need to
29929 adjust address expression if it's 4 bytes aligned and bytes
29930 leftover can only be stored by mis-aligned store instruction. */
29931 if (!unaligned_p && (length & 3) != 0)
29932 num++;
29933
29934 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29935 if (!unaligned_p && mode == V16QImode)
29936 num--;
29937
29938 return (num <= arm_block_set_max_insns ());
29939 }
29940
29941 /* Set a block of memory using vectorization instructions for the
29942 unaligned case. We fill the first LENGTH bytes of the memory
29943 area starting from DSTBASE with byte constant VALUE. ALIGN is
29944 the alignment requirement of memory. Return TRUE if succeeded. */
29945 static bool
29946 arm_block_set_unaligned_vect (rtx dstbase,
29947 unsigned HOST_WIDE_INT length,
29948 unsigned HOST_WIDE_INT value,
29949 unsigned HOST_WIDE_INT align)
29950 {
29951 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
29952 rtx dst, mem;
29953 rtx val_vec, reg;
29954 rtx (*gen_func) (rtx, rtx);
29955 machine_mode mode;
29956 unsigned HOST_WIDE_INT v = value;
29957 unsigned int offset = 0;
29958 gcc_assert ((align & 0x3) != 0);
29959 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29960 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29961 if (length >= nelt_v16)
29962 {
29963 mode = V16QImode;
29964 gen_func = gen_movmisalignv16qi;
29965 }
29966 else
29967 {
29968 mode = V8QImode;
29969 gen_func = gen_movmisalignv8qi;
29970 }
29971 nelt_mode = GET_MODE_NUNITS (mode);
29972 gcc_assert (length >= nelt_mode);
29973 /* Skip if it isn't profitable. */
29974 if (!arm_block_set_vect_profit_p (length, align, mode))
29975 return false;
29976
29977 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29978 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29979
29980 v = sext_hwi (v, BITS_PER_WORD);
29981
29982 reg = gen_reg_rtx (mode);
29983 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
29984 /* Emit instruction loading the constant value. */
29985 emit_move_insn (reg, val_vec);
29986
29987 /* Handle nelt_mode bytes in a vector. */
29988 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29989 {
29990 emit_insn ((*gen_func) (mem, reg));
29991 if (i + 2 * nelt_mode <= length)
29992 {
29993 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29994 offset += nelt_mode;
29995 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29996 }
29997 }
29998
29999 /* If there are not less than nelt_v8 bytes leftover, we must be in
30000 V16QI mode. */
30001 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
30002
30003 /* Handle (8, 16) bytes leftover. */
30004 if (i + nelt_v8 < length)
30005 {
30006 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
30007 offset += length - i;
30008 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30009
30010 /* We are shifting bytes back, set the alignment accordingly. */
30011 if ((length & 1) != 0 && align >= 2)
30012 set_mem_align (mem, BITS_PER_UNIT);
30013
30014 emit_insn (gen_movmisalignv16qi (mem, reg));
30015 }
30016 /* Handle (0, 8] bytes leftover. */
30017 else if (i < length && i + nelt_v8 >= length)
30018 {
30019 if (mode == V16QImode)
30020 reg = gen_lowpart (V8QImode, reg);
30021
30022 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30023 + (nelt_mode - nelt_v8))));
30024 offset += (length - i) + (nelt_mode - nelt_v8);
30025 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30026
30027 /* We are shifting bytes back, set the alignment accordingly. */
30028 if ((length & 1) != 0 && align >= 2)
30029 set_mem_align (mem, BITS_PER_UNIT);
30030
30031 emit_insn (gen_movmisalignv8qi (mem, reg));
30032 }
30033
30034 return true;
30035 }
30036
30037 /* Set a block of memory using vectorization instructions for the
30038 aligned case. We fill the first LENGTH bytes of the memory area
30039 starting from DSTBASE with byte constant VALUE. ALIGN is the
30040 alignment requirement of memory. Return TRUE if succeeded. */
30041 static bool
30042 arm_block_set_aligned_vect (rtx dstbase,
30043 unsigned HOST_WIDE_INT length,
30044 unsigned HOST_WIDE_INT value,
30045 unsigned HOST_WIDE_INT align)
30046 {
30047 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30048 rtx dst, addr, mem;
30049 rtx val_vec, reg;
30050 machine_mode mode;
30051 unsigned HOST_WIDE_INT v = value;
30052 unsigned int offset = 0;
30053
30054 gcc_assert ((align & 0x3) == 0);
30055 nelt_v8 = GET_MODE_NUNITS (V8QImode);
30056 nelt_v16 = GET_MODE_NUNITS (V16QImode);
30057 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30058 mode = V16QImode;
30059 else
30060 mode = V8QImode;
30061
30062 nelt_mode = GET_MODE_NUNITS (mode);
30063 gcc_assert (length >= nelt_mode);
30064 /* Skip if it isn't profitable. */
30065 if (!arm_block_set_vect_profit_p (length, align, mode))
30066 return false;
30067
30068 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30069
30070 v = sext_hwi (v, BITS_PER_WORD);
30071
30072 reg = gen_reg_rtx (mode);
30073 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30074 /* Emit instruction loading the constant value. */
30075 emit_move_insn (reg, val_vec);
30076
30077 i = 0;
30078 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
30079 if (mode == V16QImode)
30080 {
30081 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30082 emit_insn (gen_movmisalignv16qi (mem, reg));
30083 i += nelt_mode;
30084 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
30085 if (i + nelt_v8 < length && i + nelt_v16 > length)
30086 {
30087 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30088 offset += length - nelt_mode;
30089 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30090 /* We are shifting bytes back, set the alignment accordingly. */
30091 if ((length & 0x3) == 0)
30092 set_mem_align (mem, BITS_PER_UNIT * 4);
30093 else if ((length & 0x1) == 0)
30094 set_mem_align (mem, BITS_PER_UNIT * 2);
30095 else
30096 set_mem_align (mem, BITS_PER_UNIT);
30097
30098 emit_insn (gen_movmisalignv16qi (mem, reg));
30099 return true;
30100 }
30101 /* Fall through for bytes leftover. */
30102 mode = V8QImode;
30103 nelt_mode = GET_MODE_NUNITS (mode);
30104 reg = gen_lowpart (V8QImode, reg);
30105 }
30106
30107 /* Handle 8 bytes in a vector. */
30108 for (; (i + nelt_mode <= length); i += nelt_mode)
30109 {
30110 addr = plus_constant (Pmode, dst, i);
30111 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30112 emit_move_insn (mem, reg);
30113 }
30114
30115 /* Handle single word leftover by shifting 4 bytes back. We can
30116 use aligned access for this case. */
30117 if (i + UNITS_PER_WORD == length)
30118 {
30119 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30120 offset += i - UNITS_PER_WORD;
30121 mem = adjust_automodify_address (dstbase, mode, addr, offset);
30122 /* We are shifting 4 bytes back, set the alignment accordingly. */
30123 if (align > UNITS_PER_WORD)
30124 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30125
30126 emit_move_insn (mem, reg);
30127 }
30128 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30129 We have to use unaligned access for this case. */
30130 else if (i < length)
30131 {
30132 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30133 offset += length - nelt_mode;
30134 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30135 /* We are shifting bytes back, set the alignment accordingly. */
30136 if ((length & 1) == 0)
30137 set_mem_align (mem, BITS_PER_UNIT * 2);
30138 else
30139 set_mem_align (mem, BITS_PER_UNIT);
30140
30141 emit_insn (gen_movmisalignv8qi (mem, reg));
30142 }
30143
30144 return true;
30145 }
30146
30147 /* Set a block of memory using plain strh/strb instructions, only
30148 using instructions allowed by ALIGN on processor. We fill the
30149 first LENGTH bytes of the memory area starting from DSTBASE
30150 with byte constant VALUE. ALIGN is the alignment requirement
30151 of memory. */
30152 static bool
30153 arm_block_set_unaligned_non_vect (rtx dstbase,
30154 unsigned HOST_WIDE_INT length,
30155 unsigned HOST_WIDE_INT value,
30156 unsigned HOST_WIDE_INT align)
30157 {
30158 unsigned int i;
30159 rtx dst, addr, mem;
30160 rtx val_exp, val_reg, reg;
30161 machine_mode mode;
30162 HOST_WIDE_INT v = value;
30163
30164 gcc_assert (align == 1 || align == 2);
30165
30166 if (align == 2)
30167 v |= (value << BITS_PER_UNIT);
30168
30169 v = sext_hwi (v, BITS_PER_WORD);
30170 val_exp = GEN_INT (v);
30171 /* Skip if it isn't profitable. */
30172 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30173 align, true, false))
30174 return false;
30175
30176 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30177 mode = (align == 2 ? HImode : QImode);
30178 val_reg = force_reg (SImode, val_exp);
30179 reg = gen_lowpart (mode, val_reg);
30180
30181 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30182 {
30183 addr = plus_constant (Pmode, dst, i);
30184 mem = adjust_automodify_address (dstbase, mode, addr, i);
30185 emit_move_insn (mem, reg);
30186 }
30187
30188 /* Handle single byte leftover. */
30189 if (i + 1 == length)
30190 {
30191 reg = gen_lowpart (QImode, val_reg);
30192 addr = plus_constant (Pmode, dst, i);
30193 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30194 emit_move_insn (mem, reg);
30195 i++;
30196 }
30197
30198 gcc_assert (i == length);
30199 return true;
30200 }
30201
30202 /* Set a block of memory using plain strd/str/strh/strb instructions,
30203 to permit unaligned copies on processors which support unaligned
30204 semantics for those instructions. We fill the first LENGTH bytes
30205 of the memory area starting from DSTBASE with byte constant VALUE.
30206 ALIGN is the alignment requirement of memory. */
30207 static bool
30208 arm_block_set_aligned_non_vect (rtx dstbase,
30209 unsigned HOST_WIDE_INT length,
30210 unsigned HOST_WIDE_INT value,
30211 unsigned HOST_WIDE_INT align)
30212 {
30213 unsigned int i;
30214 rtx dst, addr, mem;
30215 rtx val_exp, val_reg, reg;
30216 unsigned HOST_WIDE_INT v;
30217 bool use_strd_p;
30218
30219 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30220 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30221
30222 v = (value | (value << 8) | (value << 16) | (value << 24));
30223 if (length < UNITS_PER_WORD)
30224 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30225
30226 if (use_strd_p)
30227 v |= (v << BITS_PER_WORD);
30228 else
30229 v = sext_hwi (v, BITS_PER_WORD);
30230
30231 val_exp = GEN_INT (v);
30232 /* Skip if it isn't profitable. */
30233 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30234 align, false, use_strd_p))
30235 {
30236 if (!use_strd_p)
30237 return false;
30238
30239 /* Try without strd. */
30240 v = (v >> BITS_PER_WORD);
30241 v = sext_hwi (v, BITS_PER_WORD);
30242 val_exp = GEN_INT (v);
30243 use_strd_p = false;
30244 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30245 align, false, use_strd_p))
30246 return false;
30247 }
30248
30249 i = 0;
30250 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30251 /* Handle double words using strd if possible. */
30252 if (use_strd_p)
30253 {
30254 val_reg = force_reg (DImode, val_exp);
30255 reg = val_reg;
30256 for (; (i + 8 <= length); i += 8)
30257 {
30258 addr = plus_constant (Pmode, dst, i);
30259 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30260 emit_move_insn (mem, reg);
30261 }
30262 }
30263 else
30264 val_reg = force_reg (SImode, val_exp);
30265
30266 /* Handle words. */
30267 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30268 for (; (i + 4 <= length); i += 4)
30269 {
30270 addr = plus_constant (Pmode, dst, i);
30271 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30272 if ((align & 3) == 0)
30273 emit_move_insn (mem, reg);
30274 else
30275 emit_insn (gen_unaligned_storesi (mem, reg));
30276 }
30277
30278 /* Merge last pair of STRH and STRB into a STR if possible. */
30279 if (unaligned_access && i > 0 && (i + 3) == length)
30280 {
30281 addr = plus_constant (Pmode, dst, i - 1);
30282 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30283 /* We are shifting one byte back, set the alignment accordingly. */
30284 if ((align & 1) == 0)
30285 set_mem_align (mem, BITS_PER_UNIT);
30286
30287 /* Most likely this is an unaligned access, and we can't tell at
30288 compilation time. */
30289 emit_insn (gen_unaligned_storesi (mem, reg));
30290 return true;
30291 }
30292
30293 /* Handle half word leftover. */
30294 if (i + 2 <= length)
30295 {
30296 reg = gen_lowpart (HImode, val_reg);
30297 addr = plus_constant (Pmode, dst, i);
30298 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30299 if ((align & 1) == 0)
30300 emit_move_insn (mem, reg);
30301 else
30302 emit_insn (gen_unaligned_storehi (mem, reg));
30303
30304 i += 2;
30305 }
30306
30307 /* Handle single byte leftover. */
30308 if (i + 1 == length)
30309 {
30310 reg = gen_lowpart (QImode, val_reg);
30311 addr = plus_constant (Pmode, dst, i);
30312 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30313 emit_move_insn (mem, reg);
30314 }
30315
30316 return true;
30317 }
30318
30319 /* Set a block of memory using vectorization instructions for both
30320 aligned and unaligned cases. We fill the first LENGTH bytes of
30321 the memory area starting from DSTBASE with byte constant VALUE.
30322 ALIGN is the alignment requirement of memory. */
30323 static bool
30324 arm_block_set_vect (rtx dstbase,
30325 unsigned HOST_WIDE_INT length,
30326 unsigned HOST_WIDE_INT value,
30327 unsigned HOST_WIDE_INT align)
30328 {
30329 /* Check whether we need to use unaligned store instruction. */
30330 if (((align & 3) != 0 || (length & 3) != 0)
30331 /* Check whether unaligned store instruction is available. */
30332 && (!unaligned_access || BYTES_BIG_ENDIAN))
30333 return false;
30334
30335 if ((align & 3) == 0)
30336 return arm_block_set_aligned_vect (dstbase, length, value, align);
30337 else
30338 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30339 }
30340
30341 /* Expand string store operation. Firstly we try to do that by using
30342 vectorization instructions, then try with ARM unaligned access and
30343 double-word store if profitable. OPERANDS[0] is the destination,
30344 OPERANDS[1] is the number of bytes, operands[2] is the value to
30345 initialize the memory, OPERANDS[3] is the known alignment of the
30346 destination. */
30347 bool
30348 arm_gen_setmem (rtx *operands)
30349 {
30350 rtx dstbase = operands[0];
30351 unsigned HOST_WIDE_INT length;
30352 unsigned HOST_WIDE_INT value;
30353 unsigned HOST_WIDE_INT align;
30354
30355 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30356 return false;
30357
30358 length = UINTVAL (operands[1]);
30359 if (length > 64)
30360 return false;
30361
30362 value = (UINTVAL (operands[2]) & 0xFF);
30363 align = UINTVAL (operands[3]);
30364 if (TARGET_NEON && length >= 8
30365 && current_tune->string_ops_prefer_neon
30366 && arm_block_set_vect (dstbase, length, value, align))
30367 return true;
30368
30369 if (!unaligned_access && (align & 3) != 0)
30370 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30371
30372 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30373 }
30374
30375
30376 static bool
30377 arm_macro_fusion_p (void)
30378 {
30379 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30380 }
30381
30382 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30383 for MOVW / MOVT macro fusion. */
30384
30385 static bool
30386 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30387 {
30388 /* We are trying to fuse
30389 movw imm / movt imm
30390 instructions as a group that gets scheduled together. */
30391
30392 rtx set_dest = SET_DEST (curr_set);
30393
30394 if (GET_MODE (set_dest) != SImode)
30395 return false;
30396
30397 /* We are trying to match:
30398 prev (movw) == (set (reg r0) (const_int imm16))
30399 curr (movt) == (set (zero_extract (reg r0)
30400 (const_int 16)
30401 (const_int 16))
30402 (const_int imm16_1))
30403 or
30404 prev (movw) == (set (reg r1)
30405 (high (symbol_ref ("SYM"))))
30406 curr (movt) == (set (reg r0)
30407 (lo_sum (reg r1)
30408 (symbol_ref ("SYM")))) */
30409
30410 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30411 {
30412 if (CONST_INT_P (SET_SRC (curr_set))
30413 && CONST_INT_P (SET_SRC (prev_set))
30414 && REG_P (XEXP (set_dest, 0))
30415 && REG_P (SET_DEST (prev_set))
30416 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30417 return true;
30418
30419 }
30420 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30421 && REG_P (SET_DEST (curr_set))
30422 && REG_P (SET_DEST (prev_set))
30423 && GET_CODE (SET_SRC (prev_set)) == HIGH
30424 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30425 return true;
30426
30427 return false;
30428 }
30429
30430 static bool
30431 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30432 {
30433 rtx prev_set = single_set (prev);
30434 rtx curr_set = single_set (curr);
30435
30436 if (!prev_set
30437 || !curr_set)
30438 return false;
30439
30440 if (any_condjump_p (curr))
30441 return false;
30442
30443 if (!arm_macro_fusion_p ())
30444 return false;
30445
30446 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30447 && aarch_crypto_can_dual_issue (prev, curr))
30448 return true;
30449
30450 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30451 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30452 return true;
30453
30454 return false;
30455 }
30456
30457 /* Return true iff the instruction fusion described by OP is enabled. */
30458 bool
30459 arm_fusion_enabled_p (tune_params::fuse_ops op)
30460 {
30461 return current_tune->fusible_ops & op;
30462 }
30463
30464 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30465 scheduled for speculative execution. Reject the long-running division
30466 and square-root instructions. */
30467
30468 static bool
30469 arm_sched_can_speculate_insn (rtx_insn *insn)
30470 {
30471 switch (get_attr_type (insn))
30472 {
30473 case TYPE_SDIV:
30474 case TYPE_UDIV:
30475 case TYPE_FDIVS:
30476 case TYPE_FDIVD:
30477 case TYPE_FSQRTS:
30478 case TYPE_FSQRTD:
30479 case TYPE_NEON_FP_SQRT_S:
30480 case TYPE_NEON_FP_SQRT_D:
30481 case TYPE_NEON_FP_SQRT_S_Q:
30482 case TYPE_NEON_FP_SQRT_D_Q:
30483 case TYPE_NEON_FP_DIV_S:
30484 case TYPE_NEON_FP_DIV_D:
30485 case TYPE_NEON_FP_DIV_S_Q:
30486 case TYPE_NEON_FP_DIV_D_Q:
30487 return false;
30488 default:
30489 return true;
30490 }
30491 }
30492
30493 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30494
30495 static unsigned HOST_WIDE_INT
30496 arm_asan_shadow_offset (void)
30497 {
30498 return HOST_WIDE_INT_1U << 29;
30499 }
30500
30501
30502 /* This is a temporary fix for PR60655. Ideally we need
30503 to handle most of these cases in the generic part but
30504 currently we reject minus (..) (sym_ref). We try to
30505 ameliorate the case with minus (sym_ref1) (sym_ref2)
30506 where they are in the same section. */
30507
30508 static bool
30509 arm_const_not_ok_for_debug_p (rtx p)
30510 {
30511 tree decl_op0 = NULL;
30512 tree decl_op1 = NULL;
30513
30514 if (GET_CODE (p) == UNSPEC)
30515 return true;
30516 if (GET_CODE (p) == MINUS)
30517 {
30518 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30519 {
30520 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30521 if (decl_op1
30522 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30523 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30524 {
30525 if ((VAR_P (decl_op1)
30526 || TREE_CODE (decl_op1) == CONST_DECL)
30527 && (VAR_P (decl_op0)
30528 || TREE_CODE (decl_op0) == CONST_DECL))
30529 return (get_variable_section (decl_op1, false)
30530 != get_variable_section (decl_op0, false));
30531
30532 if (TREE_CODE (decl_op1) == LABEL_DECL
30533 && TREE_CODE (decl_op0) == LABEL_DECL)
30534 return (DECL_CONTEXT (decl_op1)
30535 != DECL_CONTEXT (decl_op0));
30536 }
30537
30538 return true;
30539 }
30540 }
30541
30542 return false;
30543 }
30544
30545 /* return TRUE if x is a reference to a value in a constant pool */
30546 extern bool
30547 arm_is_constant_pool_ref (rtx x)
30548 {
30549 return (MEM_P (x)
30550 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30551 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30552 }
30553
30554 /* Remember the last target of arm_set_current_function. */
30555 static GTY(()) tree arm_previous_fndecl;
30556
30557 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30558
30559 void
30560 save_restore_target_globals (tree new_tree)
30561 {
30562 /* If we have a previous state, use it. */
30563 if (TREE_TARGET_GLOBALS (new_tree))
30564 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30565 else if (new_tree == target_option_default_node)
30566 restore_target_globals (&default_target_globals);
30567 else
30568 {
30569 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30570 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30571 }
30572
30573 arm_option_params_internal ();
30574 }
30575
30576 /* Invalidate arm_previous_fndecl. */
30577
30578 void
30579 arm_reset_previous_fndecl (void)
30580 {
30581 arm_previous_fndecl = NULL_TREE;
30582 }
30583
30584 /* Establish appropriate back-end context for processing the function
30585 FNDECL. The argument might be NULL to indicate processing at top
30586 level, outside of any function scope. */
30587
30588 static void
30589 arm_set_current_function (tree fndecl)
30590 {
30591 if (!fndecl || fndecl == arm_previous_fndecl)
30592 return;
30593
30594 tree old_tree = (arm_previous_fndecl
30595 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30596 : NULL_TREE);
30597
30598 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30599
30600 /* If current function has no attributes but previous one did,
30601 use the default node. */
30602 if (! new_tree && old_tree)
30603 new_tree = target_option_default_node;
30604
30605 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30606 the default have been handled by save_restore_target_globals from
30607 arm_pragma_target_parse. */
30608 if (old_tree == new_tree)
30609 return;
30610
30611 arm_previous_fndecl = fndecl;
30612
30613 /* First set the target options. */
30614 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30615
30616 save_restore_target_globals (new_tree);
30617 }
30618
30619 /* Implement TARGET_OPTION_PRINT. */
30620
30621 static void
30622 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30623 {
30624 int flags = ptr->x_target_flags;
30625 const char *fpu_name;
30626
30627 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30628 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30629
30630 fprintf (file, "%*sselected isa %s\n", indent, "",
30631 TARGET_THUMB2_P (flags) ? "thumb2" :
30632 TARGET_THUMB_P (flags) ? "thumb1" :
30633 "arm");
30634
30635 if (ptr->x_arm_arch_string)
30636 fprintf (file, "%*sselected architecture %s\n", indent, "",
30637 ptr->x_arm_arch_string);
30638
30639 if (ptr->x_arm_cpu_string)
30640 fprintf (file, "%*sselected CPU %s\n", indent, "",
30641 ptr->x_arm_cpu_string);
30642
30643 if (ptr->x_arm_tune_string)
30644 fprintf (file, "%*sselected tune %s\n", indent, "",
30645 ptr->x_arm_tune_string);
30646
30647 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30648 }
30649
30650 /* Hook to determine if one function can safely inline another. */
30651
30652 static bool
30653 arm_can_inline_p (tree caller, tree callee)
30654 {
30655 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30656 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30657 bool can_inline = true;
30658
30659 struct cl_target_option *caller_opts
30660 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30661 : target_option_default_node);
30662
30663 struct cl_target_option *callee_opts
30664 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30665 : target_option_default_node);
30666
30667 if (callee_opts == caller_opts)
30668 return true;
30669
30670 /* Callee's ISA features should be a subset of the caller's. */
30671 struct arm_build_target caller_target;
30672 struct arm_build_target callee_target;
30673 caller_target.isa = sbitmap_alloc (isa_num_bits);
30674 callee_target.isa = sbitmap_alloc (isa_num_bits);
30675
30676 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30677 false);
30678 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30679 false);
30680 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30681 can_inline = false;
30682
30683 sbitmap_free (caller_target.isa);
30684 sbitmap_free (callee_target.isa);
30685
30686 /* OK to inline between different modes.
30687 Function with mode specific instructions, e.g using asm,
30688 must be explicitly protected with noinline. */
30689 return can_inline;
30690 }
30691
30692 /* Hook to fix function's alignment affected by target attribute. */
30693
30694 static void
30695 arm_relayout_function (tree fndecl)
30696 {
30697 if (DECL_USER_ALIGN (fndecl))
30698 return;
30699
30700 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30701
30702 if (!callee_tree)
30703 callee_tree = target_option_default_node;
30704
30705 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30706 SET_DECL_ALIGN
30707 (fndecl,
30708 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30709 }
30710
30711 /* Inner function to process the attribute((target(...))), take an argument and
30712 set the current options from the argument. If we have a list, recursively
30713 go over the list. */
30714
30715 static bool
30716 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30717 {
30718 if (TREE_CODE (args) == TREE_LIST)
30719 {
30720 bool ret = true;
30721
30722 for (; args; args = TREE_CHAIN (args))
30723 if (TREE_VALUE (args)
30724 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30725 ret = false;
30726 return ret;
30727 }
30728
30729 else if (TREE_CODE (args) != STRING_CST)
30730 {
30731 error ("attribute %<target%> argument not a string");
30732 return false;
30733 }
30734
30735 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30736 char *q;
30737
30738 while ((q = strtok (argstr, ",")) != NULL)
30739 {
30740 while (ISSPACE (*q)) ++q;
30741
30742 argstr = NULL;
30743 if (!strncmp (q, "thumb", 5))
30744 opts->x_target_flags |= MASK_THUMB;
30745
30746 else if (!strncmp (q, "arm", 3))
30747 opts->x_target_flags &= ~MASK_THUMB;
30748
30749 else if (!strncmp (q, "fpu=", 4))
30750 {
30751 int fpu_index;
30752 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30753 &fpu_index, CL_TARGET))
30754 {
30755 error ("invalid fpu for target attribute or pragma %qs", q);
30756 return false;
30757 }
30758 if (fpu_index == TARGET_FPU_auto)
30759 {
30760 /* This doesn't really make sense until we support
30761 general dynamic selection of the architecture and all
30762 sub-features. */
30763 sorry ("auto fpu selection not currently permitted here");
30764 return false;
30765 }
30766 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30767 }
30768 else if (!strncmp (q, "arch=", 5))
30769 {
30770 char* arch = q+5;
30771 const arch_option *arm_selected_arch
30772 = arm_parse_arch_option_name (all_architectures, "arch", arch);
30773
30774 if (!arm_selected_arch)
30775 {
30776 error ("invalid architecture for target attribute or pragma %qs",
30777 q);
30778 return false;
30779 }
30780
30781 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
30782 }
30783 else if (q[0] == '+')
30784 {
30785 opts->x_arm_arch_string
30786 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
30787 }
30788 else
30789 {
30790 error ("unknown target attribute or pragma %qs", q);
30791 return false;
30792 }
30793 }
30794
30795 return true;
30796 }
30797
30798 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30799
30800 tree
30801 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30802 struct gcc_options *opts_set)
30803 {
30804 struct cl_target_option cl_opts;
30805
30806 if (!arm_valid_target_attribute_rec (args, opts))
30807 return NULL_TREE;
30808
30809 cl_target_option_save (&cl_opts, opts);
30810 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30811 arm_option_check_internal (opts);
30812 /* Do any overrides, such as global options arch=xxx.
30813 We do this since arm_active_target was overridden. */
30814 arm_option_reconfigure_globals ();
30815 arm_options_perform_arch_sanity_checks ();
30816 arm_option_override_internal (opts, opts_set);
30817
30818 return build_target_option_node (opts);
30819 }
30820
30821 static void
30822 add_attribute (const char * mode, tree *attributes)
30823 {
30824 size_t len = strlen (mode);
30825 tree value = build_string (len, mode);
30826
30827 TREE_TYPE (value) = build_array_type (char_type_node,
30828 build_index_type (size_int (len)));
30829
30830 *attributes = tree_cons (get_identifier ("target"),
30831 build_tree_list (NULL_TREE, value),
30832 *attributes);
30833 }
30834
30835 /* For testing. Insert thumb or arm modes alternatively on functions. */
30836
30837 static void
30838 arm_insert_attributes (tree fndecl, tree * attributes)
30839 {
30840 const char *mode;
30841
30842 if (! TARGET_FLIP_THUMB)
30843 return;
30844
30845 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30846 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30847 return;
30848
30849 /* Nested definitions must inherit mode. */
30850 if (current_function_decl)
30851 {
30852 mode = TARGET_THUMB ? "thumb" : "arm";
30853 add_attribute (mode, attributes);
30854 return;
30855 }
30856
30857 /* If there is already a setting don't change it. */
30858 if (lookup_attribute ("target", *attributes) != NULL)
30859 return;
30860
30861 mode = thumb_flipper ? "thumb" : "arm";
30862 add_attribute (mode, attributes);
30863
30864 thumb_flipper = !thumb_flipper;
30865 }
30866
30867 /* Hook to validate attribute((target("string"))). */
30868
30869 static bool
30870 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30871 tree args, int ARG_UNUSED (flags))
30872 {
30873 bool ret = true;
30874 struct gcc_options func_options;
30875 tree cur_tree, new_optimize;
30876 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30877
30878 /* Get the optimization options of the current function. */
30879 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30880
30881 /* If the function changed the optimization levels as well as setting target
30882 options, start with the optimizations specified. */
30883 if (!func_optimize)
30884 func_optimize = optimization_default_node;
30885
30886 /* Init func_options. */
30887 memset (&func_options, 0, sizeof (func_options));
30888 init_options_struct (&func_options, NULL);
30889 lang_hooks.init_options_struct (&func_options);
30890
30891 /* Initialize func_options to the defaults. */
30892 cl_optimization_restore (&func_options,
30893 TREE_OPTIMIZATION (func_optimize));
30894
30895 cl_target_option_restore (&func_options,
30896 TREE_TARGET_OPTION (target_option_default_node));
30897
30898 /* Set func_options flags with new target mode. */
30899 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30900 &global_options_set);
30901
30902 if (cur_tree == NULL_TREE)
30903 ret = false;
30904
30905 new_optimize = build_optimization_node (&func_options);
30906
30907 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30908
30909 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30910
30911 finalize_options_struct (&func_options);
30912
30913 return ret;
30914 }
30915
30916 /* Match an ISA feature bitmap to a named FPU. We always use the
30917 first entry that exactly matches the feature set, so that we
30918 effectively canonicalize the FPU name for the assembler. */
30919 static const char*
30920 arm_identify_fpu_from_isa (sbitmap isa)
30921 {
30922 auto_sbitmap fpubits (isa_num_bits);
30923 auto_sbitmap cand_fpubits (isa_num_bits);
30924
30925 bitmap_and (fpubits, isa, isa_all_fpubits);
30926
30927 /* If there are no ISA feature bits relating to the FPU, we must be
30928 doing soft-float. */
30929 if (bitmap_empty_p (fpubits))
30930 return "softvfp";
30931
30932 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30933 {
30934 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30935 if (bitmap_equal_p (fpubits, cand_fpubits))
30936 return all_fpus[i].name;
30937 }
30938 /* We must find an entry, or things have gone wrong. */
30939 gcc_unreachable ();
30940 }
30941
30942 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
30943 by the function fndecl. */
30944 void
30945 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30946 {
30947 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
30948
30949 struct cl_target_option *targ_options;
30950 if (target_parts)
30951 targ_options = TREE_TARGET_OPTION (target_parts);
30952 else
30953 targ_options = TREE_TARGET_OPTION (target_option_current_node);
30954 gcc_assert (targ_options);
30955
30956 /* Only update the assembler .arch string if it is distinct from the last
30957 such string we printed. arch_to_print is set conditionally in case
30958 targ_options->x_arm_arch_string is NULL which can be the case
30959 when cc1 is invoked directly without passing -march option. */
30960 std::string arch_to_print;
30961 if (targ_options->x_arm_arch_string)
30962 arch_to_print = targ_options->x_arm_arch_string;
30963
30964 if (arch_to_print != arm_last_printed_arch_string)
30965 {
30966 std::string arch_name
30967 = arch_to_print.substr (0, arch_to_print.find ("+"));
30968 asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
30969 const arch_option *arch
30970 = arm_parse_arch_option_name (all_architectures, "-march",
30971 targ_options->x_arm_arch_string);
30972 auto_sbitmap opt_bits (isa_num_bits);
30973
30974 gcc_assert (arch);
30975 if (arch->common.extensions)
30976 {
30977 for (const struct cpu_arch_extension *opt = arch->common.extensions;
30978 opt->name != NULL;
30979 opt++)
30980 {
30981 if (!opt->remove)
30982 {
30983 arm_initialize_isa (opt_bits, opt->isa_bits);
30984 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
30985 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
30986 asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
30987 opt->name);
30988 }
30989 }
30990 }
30991
30992 arm_last_printed_arch_string = arch_to_print;
30993 }
30994
30995 fprintf (stream, "\t.syntax unified\n");
30996
30997 if (TARGET_THUMB)
30998 {
30999 if (is_called_in_ARM_mode (decl)
31000 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
31001 && cfun->is_thunk))
31002 fprintf (stream, "\t.code 32\n");
31003 else if (TARGET_THUMB1)
31004 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
31005 else
31006 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
31007 }
31008 else
31009 fprintf (stream, "\t.arm\n");
31010
31011 std::string fpu_to_print
31012 = TARGET_SOFT_FLOAT
31013 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
31014
31015 if (fpu_to_print != arm_last_printed_arch_string)
31016 {
31017 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31018 arm_last_printed_fpu_string = fpu_to_print;
31019 }
31020
31021 if (TARGET_POKE_FUNCTION_NAME)
31022 arm_poke_function_name (stream, (const char *) name);
31023 }
31024
31025 /* If MEM is in the form of [base+offset], extract the two parts
31026 of address and set to BASE and OFFSET, otherwise return false
31027 after clearing BASE and OFFSET. */
31028
31029 static bool
31030 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31031 {
31032 rtx addr;
31033
31034 gcc_assert (MEM_P (mem));
31035
31036 addr = XEXP (mem, 0);
31037
31038 /* Strip off const from addresses like (const (addr)). */
31039 if (GET_CODE (addr) == CONST)
31040 addr = XEXP (addr, 0);
31041
31042 if (GET_CODE (addr) == REG)
31043 {
31044 *base = addr;
31045 *offset = const0_rtx;
31046 return true;
31047 }
31048
31049 if (GET_CODE (addr) == PLUS
31050 && GET_CODE (XEXP (addr, 0)) == REG
31051 && CONST_INT_P (XEXP (addr, 1)))
31052 {
31053 *base = XEXP (addr, 0);
31054 *offset = XEXP (addr, 1);
31055 return true;
31056 }
31057
31058 *base = NULL_RTX;
31059 *offset = NULL_RTX;
31060
31061 return false;
31062 }
31063
31064 /* If INSN is a load or store of address in the form of [base+offset],
31065 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
31066 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
31067 otherwise return FALSE. */
31068
31069 static bool
31070 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31071 {
31072 rtx x, dest, src;
31073
31074 gcc_assert (INSN_P (insn));
31075 x = PATTERN (insn);
31076 if (GET_CODE (x) != SET)
31077 return false;
31078
31079 src = SET_SRC (x);
31080 dest = SET_DEST (x);
31081 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31082 {
31083 *is_load = false;
31084 extract_base_offset_in_addr (dest, base, offset);
31085 }
31086 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31087 {
31088 *is_load = true;
31089 extract_base_offset_in_addr (src, base, offset);
31090 }
31091 else
31092 return false;
31093
31094 return (*base != NULL_RTX && *offset != NULL_RTX);
31095 }
31096
31097 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31098
31099 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31100 and PRI are only calculated for these instructions. For other instruction,
31101 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
31102 instruction fusion can be supported by returning different priorities.
31103
31104 It's important that irrelevant instructions get the largest FUSION_PRI. */
31105
31106 static void
31107 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31108 int *fusion_pri, int *pri)
31109 {
31110 int tmp, off_val;
31111 bool is_load;
31112 rtx base, offset;
31113
31114 gcc_assert (INSN_P (insn));
31115
31116 tmp = max_pri - 1;
31117 if (!fusion_load_store (insn, &base, &offset, &is_load))
31118 {
31119 *pri = tmp;
31120 *fusion_pri = tmp;
31121 return;
31122 }
31123
31124 /* Load goes first. */
31125 if (is_load)
31126 *fusion_pri = tmp - 1;
31127 else
31128 *fusion_pri = tmp - 2;
31129
31130 tmp /= 2;
31131
31132 /* INSN with smaller base register goes first. */
31133 tmp -= ((REGNO (base) & 0xff) << 20);
31134
31135 /* INSN with smaller offset goes first. */
31136 off_val = (int)(INTVAL (offset));
31137 if (off_val >= 0)
31138 tmp -= (off_val & 0xfffff);
31139 else
31140 tmp += ((- off_val) & 0xfffff);
31141
31142 *pri = tmp;
31143 return;
31144 }
31145
31146
31147 /* Construct and return a PARALLEL RTX vector with elements numbering the
31148 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31149 the vector - from the perspective of the architecture. This does not
31150 line up with GCC's perspective on lane numbers, so we end up with
31151 different masks depending on our target endian-ness. The diagram
31152 below may help. We must draw the distinction when building masks
31153 which select one half of the vector. An instruction selecting
31154 architectural low-lanes for a big-endian target, must be described using
31155 a mask selecting GCC high-lanes.
31156
31157 Big-Endian Little-Endian
31158
31159 GCC 0 1 2 3 3 2 1 0
31160 | x | x | x | x | | x | x | x | x |
31161 Architecture 3 2 1 0 3 2 1 0
31162
31163 Low Mask: { 2, 3 } { 0, 1 }
31164 High Mask: { 0, 1 } { 2, 3 }
31165 */
31166
31167 rtx
31168 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
31169 {
31170 int nunits = GET_MODE_NUNITS (mode);
31171 rtvec v = rtvec_alloc (nunits / 2);
31172 int high_base = nunits / 2;
31173 int low_base = 0;
31174 int base;
31175 rtx t1;
31176 int i;
31177
31178 if (BYTES_BIG_ENDIAN)
31179 base = high ? low_base : high_base;
31180 else
31181 base = high ? high_base : low_base;
31182
31183 for (i = 0; i < nunits / 2; i++)
31184 RTVEC_ELT (v, i) = GEN_INT (base + i);
31185
31186 t1 = gen_rtx_PARALLEL (mode, v);
31187 return t1;
31188 }
31189
31190 /* Check OP for validity as a PARALLEL RTX vector with elements
31191 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31192 from the perspective of the architecture. See the diagram above
31193 arm_simd_vect_par_cnst_half_p for more details. */
31194
31195 bool
31196 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31197 bool high)
31198 {
31199 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31200 HOST_WIDE_INT count_op = XVECLEN (op, 0);
31201 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31202 int i = 0;
31203
31204 if (!VECTOR_MODE_P (mode))
31205 return false;
31206
31207 if (count_op != count_ideal)
31208 return false;
31209
31210 for (i = 0; i < count_ideal; i++)
31211 {
31212 rtx elt_op = XVECEXP (op, 0, i);
31213 rtx elt_ideal = XVECEXP (ideal, 0, i);
31214
31215 if (!CONST_INT_P (elt_op)
31216 || INTVAL (elt_ideal) != INTVAL (elt_op))
31217 return false;
31218 }
31219 return true;
31220 }
31221
31222 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31223 in Thumb1. */
31224 static bool
31225 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31226 const_tree)
31227 {
31228 /* For now, we punt and not handle this for TARGET_THUMB1. */
31229 if (vcall_offset && TARGET_THUMB1)
31230 return false;
31231
31232 /* Otherwise ok. */
31233 return true;
31234 }
31235
31236 /* Generate RTL for a conditional branch with rtx comparison CODE in
31237 mode CC_MODE. The destination of the unlikely conditional branch
31238 is LABEL_REF. */
31239
31240 void
31241 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31242 rtx label_ref)
31243 {
31244 rtx x;
31245 x = gen_rtx_fmt_ee (code, VOIDmode,
31246 gen_rtx_REG (cc_mode, CC_REGNUM),
31247 const0_rtx);
31248
31249 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31250 gen_rtx_LABEL_REF (VOIDmode, label_ref),
31251 pc_rtx);
31252 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31253 }
31254
31255 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31256
31257 For pure-code sections there is no letter code for this attribute, so
31258 output all the section flags numerically when this is needed. */
31259
31260 static bool
31261 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31262 {
31263
31264 if (flags & SECTION_ARM_PURECODE)
31265 {
31266 *num = 0x20000000;
31267
31268 if (!(flags & SECTION_DEBUG))
31269 *num |= 0x2;
31270 if (flags & SECTION_EXCLUDE)
31271 *num |= 0x80000000;
31272 if (flags & SECTION_WRITE)
31273 *num |= 0x1;
31274 if (flags & SECTION_CODE)
31275 *num |= 0x4;
31276 if (flags & SECTION_MERGE)
31277 *num |= 0x10;
31278 if (flags & SECTION_STRINGS)
31279 *num |= 0x20;
31280 if (flags & SECTION_TLS)
31281 *num |= 0x400;
31282 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31283 *num |= 0x200;
31284
31285 return true;
31286 }
31287
31288 return false;
31289 }
31290
31291 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31292
31293 If pure-code is passed as an option, make sure all functions are in
31294 sections that have the SHF_ARM_PURECODE attribute. */
31295
31296 static section *
31297 arm_function_section (tree decl, enum node_frequency freq,
31298 bool startup, bool exit)
31299 {
31300 const char * section_name;
31301 section * sec;
31302
31303 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31304 return default_function_section (decl, freq, startup, exit);
31305
31306 if (!target_pure_code)
31307 return default_function_section (decl, freq, startup, exit);
31308
31309
31310 section_name = DECL_SECTION_NAME (decl);
31311
31312 /* If a function is not in a named section then it falls under the 'default'
31313 text section, also known as '.text'. We can preserve previous behavior as
31314 the default text section already has the SHF_ARM_PURECODE section
31315 attribute. */
31316 if (!section_name)
31317 {
31318 section *default_sec = default_function_section (decl, freq, startup,
31319 exit);
31320
31321 /* If default_sec is not null, then it must be a special section like for
31322 example .text.startup. We set the pure-code attribute and return the
31323 same section to preserve existing behavior. */
31324 if (default_sec)
31325 default_sec->common.flags |= SECTION_ARM_PURECODE;
31326 return default_sec;
31327 }
31328
31329 /* Otherwise look whether a section has already been created with
31330 'section_name'. */
31331 sec = get_named_section (decl, section_name, 0);
31332 if (!sec)
31333 /* If that is not the case passing NULL as the section's name to
31334 'get_named_section' will create a section with the declaration's
31335 section name. */
31336 sec = get_named_section (decl, NULL, 0);
31337
31338 /* Set the SHF_ARM_PURECODE attribute. */
31339 sec->common.flags |= SECTION_ARM_PURECODE;
31340
31341 return sec;
31342 }
31343
31344 /* Implements the TARGET_SECTION_FLAGS hook.
31345
31346 If DECL is a function declaration and pure-code is passed as an option
31347 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31348 section's name and RELOC indicates whether the declarations initializer may
31349 contain runtime relocations. */
31350
31351 static unsigned int
31352 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31353 {
31354 unsigned int flags = default_section_type_flags (decl, name, reloc);
31355
31356 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31357 flags |= SECTION_ARM_PURECODE;
31358
31359 return flags;
31360 }
31361
31362 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31363
31364 static void
31365 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31366 rtx op0, rtx op1,
31367 rtx *quot_p, rtx *rem_p)
31368 {
31369 if (mode == SImode)
31370 gcc_assert (!TARGET_IDIV);
31371
31372 scalar_int_mode libval_mode
31373 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31374
31375 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31376 libval_mode,
31377 op0, GET_MODE (op0),
31378 op1, GET_MODE (op1));
31379
31380 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31381 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31382 GET_MODE_SIZE (mode));
31383
31384 gcc_assert (quotient);
31385 gcc_assert (remainder);
31386
31387 *quot_p = quotient;
31388 *rem_p = remainder;
31389 }
31390
31391 /* This function checks for the availability of the coprocessor builtin passed
31392 in BUILTIN for the current target. Returns true if it is available and
31393 false otherwise. If a BUILTIN is passed for which this function has not
31394 been implemented it will cause an exception. */
31395
31396 bool
31397 arm_coproc_builtin_available (enum unspecv builtin)
31398 {
31399 /* None of these builtins are available in Thumb mode if the target only
31400 supports Thumb-1. */
31401 if (TARGET_THUMB1)
31402 return false;
31403
31404 switch (builtin)
31405 {
31406 case VUNSPEC_CDP:
31407 case VUNSPEC_LDC:
31408 case VUNSPEC_LDCL:
31409 case VUNSPEC_STC:
31410 case VUNSPEC_STCL:
31411 case VUNSPEC_MCR:
31412 case VUNSPEC_MRC:
31413 if (arm_arch4)
31414 return true;
31415 break;
31416 case VUNSPEC_CDP2:
31417 case VUNSPEC_LDC2:
31418 case VUNSPEC_LDC2L:
31419 case VUNSPEC_STC2:
31420 case VUNSPEC_STC2L:
31421 case VUNSPEC_MCR2:
31422 case VUNSPEC_MRC2:
31423 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31424 ARMv8-{A,M}. */
31425 if (arm_arch5t)
31426 return true;
31427 break;
31428 case VUNSPEC_MCRR:
31429 case VUNSPEC_MRRC:
31430 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31431 ARMv8-{A,M}. */
31432 if (arm_arch6 || arm_arch5te)
31433 return true;
31434 break;
31435 case VUNSPEC_MCRR2:
31436 case VUNSPEC_MRRC2:
31437 if (arm_arch6)
31438 return true;
31439 break;
31440 default:
31441 gcc_unreachable ();
31442 }
31443 return false;
31444 }
31445
31446 /* This function returns true if OP is a valid memory operand for the ldc and
31447 stc coprocessor instructions and false otherwise. */
31448
31449 bool
31450 arm_coproc_ldc_stc_legitimate_address (rtx op)
31451 {
31452 HOST_WIDE_INT range;
31453 /* Has to be a memory operand. */
31454 if (!MEM_P (op))
31455 return false;
31456
31457 op = XEXP (op, 0);
31458
31459 /* We accept registers. */
31460 if (REG_P (op))
31461 return true;
31462
31463 switch GET_CODE (op)
31464 {
31465 case PLUS:
31466 {
31467 /* Or registers with an offset. */
31468 if (!REG_P (XEXP (op, 0)))
31469 return false;
31470
31471 op = XEXP (op, 1);
31472
31473 /* The offset must be an immediate though. */
31474 if (!CONST_INT_P (op))
31475 return false;
31476
31477 range = INTVAL (op);
31478
31479 /* Within the range of [-1020,1020]. */
31480 if (!IN_RANGE (range, -1020, 1020))
31481 return false;
31482
31483 /* And a multiple of 4. */
31484 return (range % 4) == 0;
31485 }
31486 case PRE_INC:
31487 case POST_INC:
31488 case PRE_DEC:
31489 case POST_DEC:
31490 return REG_P (XEXP (op, 0));
31491 default:
31492 gcc_unreachable ();
31493 }
31494 return false;
31495 }
31496
31497 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31498
31499 In VFPv1, VFP registers could only be accessed in the mode they were
31500 set, so subregs would be invalid there. However, we don't support
31501 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31502
31503 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31504 VFP registers in little-endian order. We can't describe that accurately to
31505 GCC, so avoid taking subregs of such values.
31506
31507 The only exception is going from a 128-bit to a 64-bit type. In that
31508 case the data layout happens to be consistent for big-endian, so we
31509 explicitly allow that case. */
31510
31511 static bool
31512 arm_can_change_mode_class (machine_mode from, machine_mode to,
31513 reg_class_t rclass)
31514 {
31515 if (TARGET_BIG_END
31516 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31517 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31518 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31519 && reg_classes_intersect_p (VFP_REGS, rclass))
31520 return false;
31521 return true;
31522 }
31523
31524 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31525 strcpy from constants will be faster. */
31526
31527 static HOST_WIDE_INT
31528 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31529 {
31530 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31531 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31532 return MAX (align, BITS_PER_WORD * factor);
31533 return align;
31534 }
31535
31536 #if CHECKING_P
31537 namespace selftest {
31538
31539 /* Scan the static data tables generated by parsecpu.awk looking for
31540 potential issues with the data. We primarily check for
31541 inconsistencies in the option extensions at present (extensions
31542 that duplicate others but aren't marked as aliases). Furthermore,
31543 for correct canonicalization later options must never be a subset
31544 of an earlier option. Any extension should also only specify other
31545 feature bits and never an architecture bit. The architecture is inferred
31546 from the declaration of the extension. */
31547 static void
31548 arm_test_cpu_arch_data (void)
31549 {
31550 const arch_option *arch;
31551 const cpu_option *cpu;
31552 auto_sbitmap target_isa (isa_num_bits);
31553 auto_sbitmap isa1 (isa_num_bits);
31554 auto_sbitmap isa2 (isa_num_bits);
31555
31556 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31557 {
31558 const cpu_arch_extension *ext1, *ext2;
31559
31560 if (arch->common.extensions == NULL)
31561 continue;
31562
31563 arm_initialize_isa (target_isa, arch->common.isa_bits);
31564
31565 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31566 {
31567 if (ext1->alias)
31568 continue;
31569
31570 arm_initialize_isa (isa1, ext1->isa_bits);
31571 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31572 {
31573 if (ext2->alias || ext1->remove != ext2->remove)
31574 continue;
31575
31576 arm_initialize_isa (isa2, ext2->isa_bits);
31577 /* If the option is a subset of the parent option, it doesn't
31578 add anything and so isn't useful. */
31579 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31580
31581 /* If the extension specifies any architectural bits then
31582 disallow it. Extensions should only specify feature bits. */
31583 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31584 }
31585 }
31586 }
31587
31588 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31589 {
31590 const cpu_arch_extension *ext1, *ext2;
31591
31592 if (cpu->common.extensions == NULL)
31593 continue;
31594
31595 arm_initialize_isa (target_isa, arch->common.isa_bits);
31596
31597 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31598 {
31599 if (ext1->alias)
31600 continue;
31601
31602 arm_initialize_isa (isa1, ext1->isa_bits);
31603 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31604 {
31605 if (ext2->alias || ext1->remove != ext2->remove)
31606 continue;
31607
31608 arm_initialize_isa (isa2, ext2->isa_bits);
31609 /* If the option is a subset of the parent option, it doesn't
31610 add anything and so isn't useful. */
31611 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31612
31613 /* If the extension specifies any architectural bits then
31614 disallow it. Extensions should only specify feature bits. */
31615 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31616 }
31617 }
31618 }
31619 }
31620
31621 /* Scan the static data tables generated by parsecpu.awk looking for
31622 potential issues with the data. Here we check for consistency between the
31623 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31624 a feature bit that is not defined by any FPU flag. */
31625 static void
31626 arm_test_fpu_data (void)
31627 {
31628 auto_sbitmap isa_all_fpubits (isa_num_bits);
31629 auto_sbitmap fpubits (isa_num_bits);
31630 auto_sbitmap tmpset (isa_num_bits);
31631
31632 static const enum isa_feature fpu_bitlist[]
31633 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31634 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31635
31636 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31637 {
31638 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31639 bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31640 bitmap_clear (isa_all_fpubits);
31641 bitmap_copy (isa_all_fpubits, tmpset);
31642 }
31643
31644 if (!bitmap_empty_p (isa_all_fpubits))
31645 {
31646 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31647 " group that are not defined by any FPU.\n"
31648 " Check your arm-cpus.in.\n");
31649 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31650 }
31651 }
31652
31653 static void
31654 arm_run_selftests (void)
31655 {
31656 arm_test_cpu_arch_data ();
31657 arm_test_fpu_data ();
31658 }
31659 } /* Namespace selftest. */
31660
31661 #undef TARGET_RUN_TARGET_SELFTESTS
31662 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31663 #endif /* CHECKING_P */
31664
31665 struct gcc_target targetm = TARGET_INITIALIZER;
31666
31667 #include "gt-arm.h"