]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
[arm] Early split addvdi4
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2019 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "df.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "flags.h"
53 #include "reload.h"
54 #include "explow.h"
55 #include "expr.h"
56 #include "cfgrtl.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
60 #include "intl.h"
61 #include "libfuncs.h"
62 #include "params.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "selftest.h"
73
74 /* This file should be included last. */
75 #include "target-def.h"
76
77 /* Forward definitions of types. */
78 typedef struct minipool_node Mnode;
79 typedef struct minipool_fixup Mfix;
80
81 /* The last .arch and .fpu assembly strings that we printed. */
82 static std::string arm_last_printed_arch_string;
83 static std::string arm_last_printed_fpu_string;
84
85 void (*arm_lang_output_object_attributes_hook)(void);
86
87 struct four_ints
88 {
89 int i[4];
90 };
91
92 /* Forward function declarations. */
93 static bool arm_const_not_ok_for_debug_p (rtx);
94 static int arm_needs_doubleword_align (machine_mode, const_tree);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets *arm_get_frame_offsets (void);
97 static void arm_compute_frame_layout (void);
98 static void arm_add_gc_roots (void);
99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
100 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
101 static unsigned bit_count (unsigned long);
102 static unsigned bitmap_popcount (const sbitmap);
103 static int arm_address_register_rtx_p (rtx, int);
104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
105 static bool is_called_in_ARM_mode (tree);
106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
111 inline static int thumb1_index_register_rtx_p (rtx, int);
112 static int thumb_far_jump_used_p (void);
113 static bool thumb_force_lr_save (void);
114 static unsigned arm_size_return_regs (void);
115 static bool arm_assemble_integer (rtx, unsigned int, int);
116 static void arm_print_operand (FILE *, rtx, int);
117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
118 static bool arm_print_operand_punct_valid_p (unsigned char code);
119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
120 static arm_cc get_arm_condition_code (rtx);
121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
122 static const char *output_multi_immediate (rtx *, const char *, const char *,
123 int, HOST_WIDE_INT);
124 static const char *shift_op (rtx, HOST_WIDE_INT *);
125 static struct machine_function *arm_init_machine_status (void);
126 static void thumb_exit (FILE *, int);
127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
129 static Mnode *add_minipool_forward_ref (Mfix *);
130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
131 static Mnode *add_minipool_backward_ref (Mfix *);
132 static void assign_minipool_offsets (Mfix *);
133 static void arm_print_value (FILE *, rtx);
134 static void dump_minipool (rtx_insn *);
135 static int arm_barrier_cost (rtx_insn *);
136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
139 machine_mode, rtx);
140 static void arm_reorg (void);
141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
143 static unsigned long arm_compute_save_core_reg_mask (void);
144 static unsigned long arm_isr_value (tree);
145 static unsigned long arm_compute_func_type (void);
146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
151 #endif
152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
154 static void arm_output_function_epilogue (FILE *);
155 static void arm_output_function_prologue (FILE *);
156 static int arm_comp_type_attributes (const_tree, const_tree);
157 static void arm_set_default_type_attributes (tree);
158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
160 static int optimal_immediate_sequence (enum rtx_code code,
161 unsigned HOST_WIDE_INT val,
162 struct four_ints *return_sequence);
163 static int optimal_immediate_sequence_1 (enum rtx_code code,
164 unsigned HOST_WIDE_INT val,
165 struct four_ints *return_sequence,
166 int i);
167 static int arm_get_strip_length (int);
168 static bool arm_function_ok_for_sibcall (tree, tree);
169 static machine_mode arm_promote_function_mode (const_tree,
170 machine_mode, int *,
171 const_tree, int);
172 static bool arm_return_in_memory (const_tree, const_tree);
173 static rtx arm_function_value (const_tree, const_tree, bool);
174 static rtx arm_libcall_value_1 (machine_mode);
175 static rtx arm_libcall_value (machine_mode, const_rtx);
176 static bool arm_function_value_regno_p (const unsigned int);
177 static void arm_internal_label (FILE *, const char *, unsigned long);
178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
179 tree);
180 static bool arm_have_conditional_execution (void);
181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
182 static bool arm_legitimate_constant_p (machine_mode, rtx);
183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
184 static int arm_insn_cost (rtx_insn *, bool);
185 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
186 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
187 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
188 static void emit_constant_insn (rtx cond, rtx pattern);
189 static rtx_insn *emit_set_insn (rtx, rtx);
190 static rtx emit_multi_reg_push (unsigned long, unsigned long);
191 static int arm_arg_partial_bytes (cumulative_args_t,
192 const function_arg_info &);
193 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
194 static void arm_function_arg_advance (cumulative_args_t,
195 const function_arg_info &);
196 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
197 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
198 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
199 const_tree);
200 static rtx aapcs_libcall_value (machine_mode);
201 static int aapcs_select_return_coproc (const_tree, const_tree);
202
203 #ifdef OBJECT_FORMAT_ELF
204 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
205 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
206 #endif
207 #ifndef ARM_PE
208 static void arm_encode_section_info (tree, rtx, int);
209 #endif
210
211 static void arm_file_end (void);
212 static void arm_file_start (void);
213 static void arm_insert_attributes (tree, tree *);
214
215 static void arm_setup_incoming_varargs (cumulative_args_t,
216 const function_arg_info &, int *, int);
217 static bool arm_pass_by_reference (cumulative_args_t,
218 const function_arg_info &);
219 static bool arm_promote_prototypes (const_tree);
220 static bool arm_default_short_enums (void);
221 static bool arm_align_anon_bitfield (void);
222 static bool arm_return_in_msb (const_tree);
223 static bool arm_must_pass_in_stack (const function_arg_info &);
224 static bool arm_return_in_memory (const_tree, const_tree);
225 #if ARM_UNWIND_INFO
226 static void arm_unwind_emit (FILE *, rtx_insn *);
227 static bool arm_output_ttype (rtx);
228 static void arm_asm_emit_except_personality (rtx);
229 #endif
230 static void arm_asm_init_sections (void);
231 static rtx arm_dwarf_register_span (rtx);
232
233 static tree arm_cxx_guard_type (void);
234 static bool arm_cxx_guard_mask_bit (void);
235 static tree arm_get_cookie_size (tree);
236 static bool arm_cookie_has_size (void);
237 static bool arm_cxx_cdtor_returns_this (void);
238 static bool arm_cxx_key_method_may_be_inline (void);
239 static void arm_cxx_determine_class_data_visibility (tree);
240 static bool arm_cxx_class_data_always_comdat (void);
241 static bool arm_cxx_use_aeabi_atexit (void);
242 static void arm_init_libfuncs (void);
243 static tree arm_build_builtin_va_list (void);
244 static void arm_expand_builtin_va_start (tree, rtx);
245 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
246 static void arm_option_override (void);
247 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
248 static void arm_option_restore (struct gcc_options *,
249 struct cl_target_option *);
250 static void arm_override_options_after_change (void);
251 static void arm_option_print (FILE *, int, struct cl_target_option *);
252 static void arm_set_current_function (tree);
253 static bool arm_can_inline_p (tree, tree);
254 static void arm_relayout_function (tree);
255 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
256 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
257 static bool arm_sched_can_speculate_insn (rtx_insn *);
258 static bool arm_macro_fusion_p (void);
259 static bool arm_cannot_copy_insn_p (rtx_insn *);
260 static int arm_issue_rate (void);
261 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
262 static int arm_first_cycle_multipass_dfa_lookahead (void);
263 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
264 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
265 static bool arm_output_addr_const_extra (FILE *, rtx);
266 static bool arm_allocate_stack_slots_for_args (void);
267 static bool arm_warn_func_return (tree);
268 static tree arm_promoted_type (const_tree t);
269 static bool arm_scalar_mode_supported_p (scalar_mode);
270 static bool arm_frame_pointer_required (void);
271 static bool arm_can_eliminate (const int, const int);
272 static void arm_asm_trampoline_template (FILE *);
273 static void arm_trampoline_init (rtx, tree, rtx);
274 static rtx arm_trampoline_adjust_address (rtx);
275 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
276 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
277 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
278 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
279 static bool arm_array_mode_supported_p (machine_mode,
280 unsigned HOST_WIDE_INT);
281 static machine_mode arm_preferred_simd_mode (scalar_mode);
282 static bool arm_class_likely_spilled_p (reg_class_t);
283 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
284 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
285 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
286 const_tree type,
287 int misalignment,
288 bool is_packed);
289 static void arm_conditional_register_usage (void);
290 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
291 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
292 static void arm_autovectorize_vector_sizes (vector_sizes *, bool);
293 static int arm_default_branch_cost (bool, bool);
294 static int arm_cortex_a5_branch_cost (bool, bool);
295 static int arm_cortex_m_branch_cost (bool, bool);
296 static int arm_cortex_m7_branch_cost (bool, bool);
297
298 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
299 const vec_perm_indices &);
300
301 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
302
303 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
304 tree vectype,
305 int misalign ATTRIBUTE_UNUSED);
306 static unsigned arm_add_stmt_cost (void *data, int count,
307 enum vect_cost_for_stmt kind,
308 struct _stmt_vec_info *stmt_info,
309 int misalign,
310 enum vect_cost_model_location where);
311
312 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
313 bool op0_preserve_value);
314 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
315
316 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
317 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
318 const_tree);
319 static section *arm_function_section (tree, enum node_frequency, bool, bool);
320 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
321 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
322 int reloc);
323 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
324 static opt_scalar_float_mode arm_floatn_mode (int, bool);
325 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
326 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
327 static bool arm_modes_tieable_p (machine_mode, machine_mode);
328 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
329 \f
330 /* Table of machine attributes. */
331 static const struct attribute_spec arm_attribute_table[] =
332 {
333 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
334 affects_type_identity, handler, exclude } */
335 /* Function calls made to this symbol must be done indirectly, because
336 it may lie outside of the 26 bit addressing range of a normal function
337 call. */
338 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
339 /* Whereas these functions are always known to reside within the 26 bit
340 addressing range. */
341 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
342 /* Specify the procedure call conventions for a function. */
343 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
344 NULL },
345 /* Interrupt Service Routines have special prologue and epilogue requirements. */
346 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
347 NULL },
348 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
349 NULL },
350 { "naked", 0, 0, true, false, false, false,
351 arm_handle_fndecl_attribute, NULL },
352 #ifdef ARM_PE
353 /* ARM/PE has three new attributes:
354 interfacearm - ?
355 dllexport - for exporting a function/variable that will live in a dll
356 dllimport - for importing a function/variable from a dll
357
358 Microsoft allows multiple declspecs in one __declspec, separating
359 them with spaces. We do NOT support this. Instead, use __declspec
360 multiple times.
361 */
362 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
363 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
364 { "interfacearm", 0, 0, true, false, false, false,
365 arm_handle_fndecl_attribute, NULL },
366 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
367 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
368 NULL },
369 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
370 NULL },
371 { "notshared", 0, 0, false, true, false, false,
372 arm_handle_notshared_attribute, NULL },
373 #endif
374 /* ARMv8-M Security Extensions support. */
375 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
376 arm_handle_cmse_nonsecure_entry, NULL },
377 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
378 arm_handle_cmse_nonsecure_call, NULL },
379 { NULL, 0, 0, false, false, false, false, NULL, NULL }
380 };
381 \f
382 /* Initialize the GCC target structure. */
383 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
384 #undef TARGET_MERGE_DECL_ATTRIBUTES
385 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
386 #endif
387
388 #undef TARGET_LEGITIMIZE_ADDRESS
389 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
390
391 #undef TARGET_ATTRIBUTE_TABLE
392 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
393
394 #undef TARGET_INSERT_ATTRIBUTES
395 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
396
397 #undef TARGET_ASM_FILE_START
398 #define TARGET_ASM_FILE_START arm_file_start
399 #undef TARGET_ASM_FILE_END
400 #define TARGET_ASM_FILE_END arm_file_end
401
402 #undef TARGET_ASM_ALIGNED_SI_OP
403 #define TARGET_ASM_ALIGNED_SI_OP NULL
404 #undef TARGET_ASM_INTEGER
405 #define TARGET_ASM_INTEGER arm_assemble_integer
406
407 #undef TARGET_PRINT_OPERAND
408 #define TARGET_PRINT_OPERAND arm_print_operand
409 #undef TARGET_PRINT_OPERAND_ADDRESS
410 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
411 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
412 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
413
414 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
415 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
416
417 #undef TARGET_ASM_FUNCTION_PROLOGUE
418 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
419
420 #undef TARGET_ASM_FUNCTION_EPILOGUE
421 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
422
423 #undef TARGET_CAN_INLINE_P
424 #define TARGET_CAN_INLINE_P arm_can_inline_p
425
426 #undef TARGET_RELAYOUT_FUNCTION
427 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
428
429 #undef TARGET_OPTION_OVERRIDE
430 #define TARGET_OPTION_OVERRIDE arm_option_override
431
432 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
433 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
434
435 #undef TARGET_OPTION_SAVE
436 #define TARGET_OPTION_SAVE arm_option_save
437
438 #undef TARGET_OPTION_RESTORE
439 #define TARGET_OPTION_RESTORE arm_option_restore
440
441 #undef TARGET_OPTION_PRINT
442 #define TARGET_OPTION_PRINT arm_option_print
443
444 #undef TARGET_COMP_TYPE_ATTRIBUTES
445 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
446
447 #undef TARGET_SCHED_CAN_SPECULATE_INSN
448 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
449
450 #undef TARGET_SCHED_MACRO_FUSION_P
451 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
452
453 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
454 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
455
456 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
457 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
458
459 #undef TARGET_SCHED_ADJUST_COST
460 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
461
462 #undef TARGET_SET_CURRENT_FUNCTION
463 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
464
465 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
466 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
467
468 #undef TARGET_SCHED_REORDER
469 #define TARGET_SCHED_REORDER arm_sched_reorder
470
471 #undef TARGET_REGISTER_MOVE_COST
472 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
473
474 #undef TARGET_MEMORY_MOVE_COST
475 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
476
477 #undef TARGET_ENCODE_SECTION_INFO
478 #ifdef ARM_PE
479 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
480 #else
481 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
482 #endif
483
484 #undef TARGET_STRIP_NAME_ENCODING
485 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
486
487 #undef TARGET_ASM_INTERNAL_LABEL
488 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
489
490 #undef TARGET_FLOATN_MODE
491 #define TARGET_FLOATN_MODE arm_floatn_mode
492
493 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
494 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
495
496 #undef TARGET_FUNCTION_VALUE
497 #define TARGET_FUNCTION_VALUE arm_function_value
498
499 #undef TARGET_LIBCALL_VALUE
500 #define TARGET_LIBCALL_VALUE arm_libcall_value
501
502 #undef TARGET_FUNCTION_VALUE_REGNO_P
503 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
504
505 #undef TARGET_ASM_OUTPUT_MI_THUNK
506 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
507 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
508 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
509
510 #undef TARGET_RTX_COSTS
511 #define TARGET_RTX_COSTS arm_rtx_costs
512 #undef TARGET_ADDRESS_COST
513 #define TARGET_ADDRESS_COST arm_address_cost
514 #undef TARGET_INSN_COST
515 #define TARGET_INSN_COST arm_insn_cost
516
517 #undef TARGET_SHIFT_TRUNCATION_MASK
518 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
519 #undef TARGET_VECTOR_MODE_SUPPORTED_P
520 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
521 #undef TARGET_ARRAY_MODE_SUPPORTED_P
522 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
523 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
524 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
525 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
526 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
527 arm_autovectorize_vector_sizes
528
529 #undef TARGET_MACHINE_DEPENDENT_REORG
530 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
531
532 #undef TARGET_INIT_BUILTINS
533 #define TARGET_INIT_BUILTINS arm_init_builtins
534 #undef TARGET_EXPAND_BUILTIN
535 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
536 #undef TARGET_BUILTIN_DECL
537 #define TARGET_BUILTIN_DECL arm_builtin_decl
538
539 #undef TARGET_INIT_LIBFUNCS
540 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
541
542 #undef TARGET_PROMOTE_FUNCTION_MODE
543 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
544 #undef TARGET_PROMOTE_PROTOTYPES
545 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
546 #undef TARGET_PASS_BY_REFERENCE
547 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
548 #undef TARGET_ARG_PARTIAL_BYTES
549 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
550 #undef TARGET_FUNCTION_ARG
551 #define TARGET_FUNCTION_ARG arm_function_arg
552 #undef TARGET_FUNCTION_ARG_ADVANCE
553 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
554 #undef TARGET_FUNCTION_ARG_PADDING
555 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
556 #undef TARGET_FUNCTION_ARG_BOUNDARY
557 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
558
559 #undef TARGET_SETUP_INCOMING_VARARGS
560 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
561
562 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
563 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
564
565 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
566 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
567 #undef TARGET_TRAMPOLINE_INIT
568 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
569 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
570 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
571
572 #undef TARGET_WARN_FUNC_RETURN
573 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
574
575 #undef TARGET_DEFAULT_SHORT_ENUMS
576 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
577
578 #undef TARGET_ALIGN_ANON_BITFIELD
579 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
580
581 #undef TARGET_NARROW_VOLATILE_BITFIELD
582 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
583
584 #undef TARGET_CXX_GUARD_TYPE
585 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
586
587 #undef TARGET_CXX_GUARD_MASK_BIT
588 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
589
590 #undef TARGET_CXX_GET_COOKIE_SIZE
591 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
592
593 #undef TARGET_CXX_COOKIE_HAS_SIZE
594 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
595
596 #undef TARGET_CXX_CDTOR_RETURNS_THIS
597 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
598
599 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
600 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
601
602 #undef TARGET_CXX_USE_AEABI_ATEXIT
603 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
604
605 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
606 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
607 arm_cxx_determine_class_data_visibility
608
609 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
610 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
611
612 #undef TARGET_RETURN_IN_MSB
613 #define TARGET_RETURN_IN_MSB arm_return_in_msb
614
615 #undef TARGET_RETURN_IN_MEMORY
616 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
617
618 #undef TARGET_MUST_PASS_IN_STACK
619 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
620
621 #if ARM_UNWIND_INFO
622 #undef TARGET_ASM_UNWIND_EMIT
623 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
624
625 /* EABI unwinding tables use a different format for the typeinfo tables. */
626 #undef TARGET_ASM_TTYPE
627 #define TARGET_ASM_TTYPE arm_output_ttype
628
629 #undef TARGET_ARM_EABI_UNWINDER
630 #define TARGET_ARM_EABI_UNWINDER true
631
632 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
633 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
634
635 #endif /* ARM_UNWIND_INFO */
636
637 #undef TARGET_ASM_INIT_SECTIONS
638 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
639
640 #undef TARGET_DWARF_REGISTER_SPAN
641 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
642
643 #undef TARGET_CANNOT_COPY_INSN_P
644 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
645
646 #ifdef HAVE_AS_TLS
647 #undef TARGET_HAVE_TLS
648 #define TARGET_HAVE_TLS true
649 #endif
650
651 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
652 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
653
654 #undef TARGET_LEGITIMATE_CONSTANT_P
655 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
656
657 #undef TARGET_CANNOT_FORCE_CONST_MEM
658 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
659
660 #undef TARGET_MAX_ANCHOR_OFFSET
661 #define TARGET_MAX_ANCHOR_OFFSET 4095
662
663 /* The minimum is set such that the total size of the block
664 for a particular anchor is -4088 + 1 + 4095 bytes, which is
665 divisible by eight, ensuring natural spacing of anchors. */
666 #undef TARGET_MIN_ANCHOR_OFFSET
667 #define TARGET_MIN_ANCHOR_OFFSET -4088
668
669 #undef TARGET_SCHED_ISSUE_RATE
670 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
671
672 #undef TARGET_SCHED_VARIABLE_ISSUE
673 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
674
675 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
676 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
677 arm_first_cycle_multipass_dfa_lookahead
678
679 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
680 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
681 arm_first_cycle_multipass_dfa_lookahead_guard
682
683 #undef TARGET_MANGLE_TYPE
684 #define TARGET_MANGLE_TYPE arm_mangle_type
685
686 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
687 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
688
689 #undef TARGET_BUILD_BUILTIN_VA_LIST
690 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
691 #undef TARGET_EXPAND_BUILTIN_VA_START
692 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
693 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
694 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
695
696 #ifdef HAVE_AS_TLS
697 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
698 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
699 #endif
700
701 #undef TARGET_LEGITIMATE_ADDRESS_P
702 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
703
704 #undef TARGET_PREFERRED_RELOAD_CLASS
705 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
706
707 #undef TARGET_PROMOTED_TYPE
708 #define TARGET_PROMOTED_TYPE arm_promoted_type
709
710 #undef TARGET_SCALAR_MODE_SUPPORTED_P
711 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
712
713 #undef TARGET_COMPUTE_FRAME_LAYOUT
714 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
715
716 #undef TARGET_FRAME_POINTER_REQUIRED
717 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
718
719 #undef TARGET_CAN_ELIMINATE
720 #define TARGET_CAN_ELIMINATE arm_can_eliminate
721
722 #undef TARGET_CONDITIONAL_REGISTER_USAGE
723 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
724
725 #undef TARGET_CLASS_LIKELY_SPILLED_P
726 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
727
728 #undef TARGET_VECTORIZE_BUILTINS
729 #define TARGET_VECTORIZE_BUILTINS
730
731 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
732 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
733 arm_builtin_vectorized_function
734
735 #undef TARGET_VECTOR_ALIGNMENT
736 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
737
738 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
739 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
740 arm_vector_alignment_reachable
741
742 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
743 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
744 arm_builtin_support_vector_misalignment
745
746 #undef TARGET_PREFERRED_RENAME_CLASS
747 #define TARGET_PREFERRED_RENAME_CLASS \
748 arm_preferred_rename_class
749
750 #undef TARGET_VECTORIZE_VEC_PERM_CONST
751 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
752
753 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
754 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
755 arm_builtin_vectorization_cost
756 #undef TARGET_VECTORIZE_ADD_STMT_COST
757 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
758
759 #undef TARGET_CANONICALIZE_COMPARISON
760 #define TARGET_CANONICALIZE_COMPARISON \
761 arm_canonicalize_comparison
762
763 #undef TARGET_ASAN_SHADOW_OFFSET
764 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
765
766 #undef MAX_INSN_PER_IT_BLOCK
767 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
768
769 #undef TARGET_CAN_USE_DOLOOP_P
770 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
771
772 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
773 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
774
775 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
776 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
777
778 #undef TARGET_SCHED_FUSION_PRIORITY
779 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
780
781 #undef TARGET_ASM_FUNCTION_SECTION
782 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
783
784 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
785 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
786
787 #undef TARGET_SECTION_TYPE_FLAGS
788 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
789
790 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
791 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
792
793 #undef TARGET_C_EXCESS_PRECISION
794 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
795
796 /* Although the architecture reserves bits 0 and 1, only the former is
797 used for ARM/Thumb ISA selection in v7 and earlier versions. */
798 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
799 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
800
801 #undef TARGET_FIXED_CONDITION_CODE_REGS
802 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
803
804 #undef TARGET_HARD_REGNO_NREGS
805 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
806 #undef TARGET_HARD_REGNO_MODE_OK
807 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
808
809 #undef TARGET_MODES_TIEABLE_P
810 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
811
812 #undef TARGET_CAN_CHANGE_MODE_CLASS
813 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
814
815 #undef TARGET_CONSTANT_ALIGNMENT
816 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
817 \f
818 /* Obstack for minipool constant handling. */
819 static struct obstack minipool_obstack;
820 static char * minipool_startobj;
821
822 /* The maximum number of insns skipped which
823 will be conditionalised if possible. */
824 static int max_insns_skipped = 5;
825
826 extern FILE * asm_out_file;
827
828 /* True if we are currently building a constant table. */
829 int making_const_table;
830
831 /* The processor for which instructions should be scheduled. */
832 enum processor_type arm_tune = TARGET_CPU_arm_none;
833
834 /* The current tuning set. */
835 const struct tune_params *current_tune;
836
837 /* Which floating point hardware to schedule for. */
838 int arm_fpu_attr;
839
840 /* Used for Thumb call_via trampolines. */
841 rtx thumb_call_via_label[14];
842 static int thumb_call_reg_needed;
843
844 /* The bits in this mask specify which instruction scheduling options should
845 be used. */
846 unsigned int tune_flags = 0;
847
848 /* The highest ARM architecture version supported by the
849 target. */
850 enum base_architecture arm_base_arch = BASE_ARCH_0;
851
852 /* Active target architecture and tuning. */
853
854 struct arm_build_target arm_active_target;
855
856 /* The following are used in the arm.md file as equivalents to bits
857 in the above two flag variables. */
858
859 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
860 int arm_arch4 = 0;
861
862 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
863 int arm_arch4t = 0;
864
865 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
866 int arm_arch5t = 0;
867
868 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
869 int arm_arch5te = 0;
870
871 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
872 int arm_arch6 = 0;
873
874 /* Nonzero if this chip supports the ARM 6K extensions. */
875 int arm_arch6k = 0;
876
877 /* Nonzero if this chip supports the ARM 6KZ extensions. */
878 int arm_arch6kz = 0;
879
880 /* Nonzero if instructions present in ARMv6-M can be used. */
881 int arm_arch6m = 0;
882
883 /* Nonzero if this chip supports the ARM 7 extensions. */
884 int arm_arch7 = 0;
885
886 /* Nonzero if this chip supports the Large Physical Address Extension. */
887 int arm_arch_lpae = 0;
888
889 /* Nonzero if instructions not present in the 'M' profile can be used. */
890 int arm_arch_notm = 0;
891
892 /* Nonzero if instructions present in ARMv7E-M can be used. */
893 int arm_arch7em = 0;
894
895 /* Nonzero if instructions present in ARMv8 can be used. */
896 int arm_arch8 = 0;
897
898 /* Nonzero if this chip supports the ARMv8.1 extensions. */
899 int arm_arch8_1 = 0;
900
901 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
902 int arm_arch8_2 = 0;
903
904 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
905 int arm_arch8_3 = 0;
906
907 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
908 int arm_arch8_4 = 0;
909
910 /* Nonzero if this chip supports the FP16 instructions extension of ARM
911 Architecture 8.2. */
912 int arm_fp16_inst = 0;
913
914 /* Nonzero if this chip can benefit from load scheduling. */
915 int arm_ld_sched = 0;
916
917 /* Nonzero if this chip is a StrongARM. */
918 int arm_tune_strongarm = 0;
919
920 /* Nonzero if this chip supports Intel Wireless MMX technology. */
921 int arm_arch_iwmmxt = 0;
922
923 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
924 int arm_arch_iwmmxt2 = 0;
925
926 /* Nonzero if this chip is an XScale. */
927 int arm_arch_xscale = 0;
928
929 /* Nonzero if tuning for XScale */
930 int arm_tune_xscale = 0;
931
932 /* Nonzero if we want to tune for stores that access the write-buffer.
933 This typically means an ARM6 or ARM7 with MMU or MPU. */
934 int arm_tune_wbuf = 0;
935
936 /* Nonzero if tuning for Cortex-A9. */
937 int arm_tune_cortex_a9 = 0;
938
939 /* Nonzero if we should define __THUMB_INTERWORK__ in the
940 preprocessor.
941 XXX This is a bit of a hack, it's intended to help work around
942 problems in GLD which doesn't understand that armv5t code is
943 interworking clean. */
944 int arm_cpp_interwork = 0;
945
946 /* Nonzero if chip supports Thumb 1. */
947 int arm_arch_thumb1;
948
949 /* Nonzero if chip supports Thumb 2. */
950 int arm_arch_thumb2;
951
952 /* Nonzero if chip supports integer division instruction. */
953 int arm_arch_arm_hwdiv;
954 int arm_arch_thumb_hwdiv;
955
956 /* Nonzero if chip disallows volatile memory access in IT block. */
957 int arm_arch_no_volatile_ce;
958
959 /* Nonzero if we shouldn't use literal pools. */
960 bool arm_disable_literal_pool = false;
961
962 /* The register number to be used for the PIC offset register. */
963 unsigned arm_pic_register = INVALID_REGNUM;
964
965 enum arm_pcs arm_pcs_default;
966
967 /* For an explanation of these variables, see final_prescan_insn below. */
968 int arm_ccfsm_state;
969 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
970 enum arm_cond_code arm_current_cc;
971
972 rtx arm_target_insn;
973 int arm_target_label;
974 /* The number of conditionally executed insns, including the current insn. */
975 int arm_condexec_count = 0;
976 /* A bitmask specifying the patterns for the IT block.
977 Zero means do not output an IT block before this insn. */
978 int arm_condexec_mask = 0;
979 /* The number of bits used in arm_condexec_mask. */
980 int arm_condexec_masklen = 0;
981
982 /* Nonzero if chip supports the ARMv8 CRC instructions. */
983 int arm_arch_crc = 0;
984
985 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
986 int arm_arch_dotprod = 0;
987
988 /* Nonzero if chip supports the ARMv8-M security extensions. */
989 int arm_arch_cmse = 0;
990
991 /* Nonzero if the core has a very small, high-latency, multiply unit. */
992 int arm_m_profile_small_mul = 0;
993
994 /* The condition codes of the ARM, and the inverse function. */
995 static const char * const arm_condition_codes[] =
996 {
997 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
998 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
999 };
1000
1001 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1002 int arm_regs_in_sequence[] =
1003 {
1004 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1005 };
1006
1007 #define ARM_LSL_NAME "lsl"
1008 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1009
1010 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1011 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1012 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1013 \f
1014 /* Initialization code. */
1015
1016 struct cpu_tune
1017 {
1018 enum processor_type scheduler;
1019 unsigned int tune_flags;
1020 const struct tune_params *tune;
1021 };
1022
1023 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1024 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1025 { \
1026 num_slots, \
1027 l1_size, \
1028 l1_line_size \
1029 }
1030
1031 /* arm generic vectorizer costs. */
1032 static const
1033 struct cpu_vec_costs arm_default_vec_cost = {
1034 1, /* scalar_stmt_cost. */
1035 1, /* scalar load_cost. */
1036 1, /* scalar_store_cost. */
1037 1, /* vec_stmt_cost. */
1038 1, /* vec_to_scalar_cost. */
1039 1, /* scalar_to_vec_cost. */
1040 1, /* vec_align_load_cost. */
1041 1, /* vec_unalign_load_cost. */
1042 1, /* vec_unalign_store_cost. */
1043 1, /* vec_store_cost. */
1044 3, /* cond_taken_branch_cost. */
1045 1, /* cond_not_taken_branch_cost. */
1046 };
1047
1048 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1049 #include "aarch-cost-tables.h"
1050
1051
1052
1053 const struct cpu_cost_table cortexa9_extra_costs =
1054 {
1055 /* ALU */
1056 {
1057 0, /* arith. */
1058 0, /* logical. */
1059 0, /* shift. */
1060 COSTS_N_INSNS (1), /* shift_reg. */
1061 COSTS_N_INSNS (1), /* arith_shift. */
1062 COSTS_N_INSNS (2), /* arith_shift_reg. */
1063 0, /* log_shift. */
1064 COSTS_N_INSNS (1), /* log_shift_reg. */
1065 COSTS_N_INSNS (1), /* extend. */
1066 COSTS_N_INSNS (2), /* extend_arith. */
1067 COSTS_N_INSNS (1), /* bfi. */
1068 COSTS_N_INSNS (1), /* bfx. */
1069 0, /* clz. */
1070 0, /* rev. */
1071 0, /* non_exec. */
1072 true /* non_exec_costs_exec. */
1073 },
1074 {
1075 /* MULT SImode */
1076 {
1077 COSTS_N_INSNS (3), /* simple. */
1078 COSTS_N_INSNS (3), /* flag_setting. */
1079 COSTS_N_INSNS (2), /* extend. */
1080 COSTS_N_INSNS (3), /* add. */
1081 COSTS_N_INSNS (2), /* extend_add. */
1082 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1083 },
1084 /* MULT DImode */
1085 {
1086 0, /* simple (N/A). */
1087 0, /* flag_setting (N/A). */
1088 COSTS_N_INSNS (4), /* extend. */
1089 0, /* add (N/A). */
1090 COSTS_N_INSNS (4), /* extend_add. */
1091 0 /* idiv (N/A). */
1092 }
1093 },
1094 /* LD/ST */
1095 {
1096 COSTS_N_INSNS (2), /* load. */
1097 COSTS_N_INSNS (2), /* load_sign_extend. */
1098 COSTS_N_INSNS (2), /* ldrd. */
1099 COSTS_N_INSNS (2), /* ldm_1st. */
1100 1, /* ldm_regs_per_insn_1st. */
1101 2, /* ldm_regs_per_insn_subsequent. */
1102 COSTS_N_INSNS (5), /* loadf. */
1103 COSTS_N_INSNS (5), /* loadd. */
1104 COSTS_N_INSNS (1), /* load_unaligned. */
1105 COSTS_N_INSNS (2), /* store. */
1106 COSTS_N_INSNS (2), /* strd. */
1107 COSTS_N_INSNS (2), /* stm_1st. */
1108 1, /* stm_regs_per_insn_1st. */
1109 2, /* stm_regs_per_insn_subsequent. */
1110 COSTS_N_INSNS (1), /* storef. */
1111 COSTS_N_INSNS (1), /* stored. */
1112 COSTS_N_INSNS (1), /* store_unaligned. */
1113 COSTS_N_INSNS (1), /* loadv. */
1114 COSTS_N_INSNS (1) /* storev. */
1115 },
1116 {
1117 /* FP SFmode */
1118 {
1119 COSTS_N_INSNS (14), /* div. */
1120 COSTS_N_INSNS (4), /* mult. */
1121 COSTS_N_INSNS (7), /* mult_addsub. */
1122 COSTS_N_INSNS (30), /* fma. */
1123 COSTS_N_INSNS (3), /* addsub. */
1124 COSTS_N_INSNS (1), /* fpconst. */
1125 COSTS_N_INSNS (1), /* neg. */
1126 COSTS_N_INSNS (3), /* compare. */
1127 COSTS_N_INSNS (3), /* widen. */
1128 COSTS_N_INSNS (3), /* narrow. */
1129 COSTS_N_INSNS (3), /* toint. */
1130 COSTS_N_INSNS (3), /* fromint. */
1131 COSTS_N_INSNS (3) /* roundint. */
1132 },
1133 /* FP DFmode */
1134 {
1135 COSTS_N_INSNS (24), /* div. */
1136 COSTS_N_INSNS (5), /* mult. */
1137 COSTS_N_INSNS (8), /* mult_addsub. */
1138 COSTS_N_INSNS (30), /* fma. */
1139 COSTS_N_INSNS (3), /* addsub. */
1140 COSTS_N_INSNS (1), /* fpconst. */
1141 COSTS_N_INSNS (1), /* neg. */
1142 COSTS_N_INSNS (3), /* compare. */
1143 COSTS_N_INSNS (3), /* widen. */
1144 COSTS_N_INSNS (3), /* narrow. */
1145 COSTS_N_INSNS (3), /* toint. */
1146 COSTS_N_INSNS (3), /* fromint. */
1147 COSTS_N_INSNS (3) /* roundint. */
1148 }
1149 },
1150 /* Vector */
1151 {
1152 COSTS_N_INSNS (1) /* alu. */
1153 }
1154 };
1155
1156 const struct cpu_cost_table cortexa8_extra_costs =
1157 {
1158 /* ALU */
1159 {
1160 0, /* arith. */
1161 0, /* logical. */
1162 COSTS_N_INSNS (1), /* shift. */
1163 0, /* shift_reg. */
1164 COSTS_N_INSNS (1), /* arith_shift. */
1165 0, /* arith_shift_reg. */
1166 COSTS_N_INSNS (1), /* log_shift. */
1167 0, /* log_shift_reg. */
1168 0, /* extend. */
1169 0, /* extend_arith. */
1170 0, /* bfi. */
1171 0, /* bfx. */
1172 0, /* clz. */
1173 0, /* rev. */
1174 0, /* non_exec. */
1175 true /* non_exec_costs_exec. */
1176 },
1177 {
1178 /* MULT SImode */
1179 {
1180 COSTS_N_INSNS (1), /* simple. */
1181 COSTS_N_INSNS (1), /* flag_setting. */
1182 COSTS_N_INSNS (1), /* extend. */
1183 COSTS_N_INSNS (1), /* add. */
1184 COSTS_N_INSNS (1), /* extend_add. */
1185 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1186 },
1187 /* MULT DImode */
1188 {
1189 0, /* simple (N/A). */
1190 0, /* flag_setting (N/A). */
1191 COSTS_N_INSNS (2), /* extend. */
1192 0, /* add (N/A). */
1193 COSTS_N_INSNS (2), /* extend_add. */
1194 0 /* idiv (N/A). */
1195 }
1196 },
1197 /* LD/ST */
1198 {
1199 COSTS_N_INSNS (1), /* load. */
1200 COSTS_N_INSNS (1), /* load_sign_extend. */
1201 COSTS_N_INSNS (1), /* ldrd. */
1202 COSTS_N_INSNS (1), /* ldm_1st. */
1203 1, /* ldm_regs_per_insn_1st. */
1204 2, /* ldm_regs_per_insn_subsequent. */
1205 COSTS_N_INSNS (1), /* loadf. */
1206 COSTS_N_INSNS (1), /* loadd. */
1207 COSTS_N_INSNS (1), /* load_unaligned. */
1208 COSTS_N_INSNS (1), /* store. */
1209 COSTS_N_INSNS (1), /* strd. */
1210 COSTS_N_INSNS (1), /* stm_1st. */
1211 1, /* stm_regs_per_insn_1st. */
1212 2, /* stm_regs_per_insn_subsequent. */
1213 COSTS_N_INSNS (1), /* storef. */
1214 COSTS_N_INSNS (1), /* stored. */
1215 COSTS_N_INSNS (1), /* store_unaligned. */
1216 COSTS_N_INSNS (1), /* loadv. */
1217 COSTS_N_INSNS (1) /* storev. */
1218 },
1219 {
1220 /* FP SFmode */
1221 {
1222 COSTS_N_INSNS (36), /* div. */
1223 COSTS_N_INSNS (11), /* mult. */
1224 COSTS_N_INSNS (20), /* mult_addsub. */
1225 COSTS_N_INSNS (30), /* fma. */
1226 COSTS_N_INSNS (9), /* addsub. */
1227 COSTS_N_INSNS (3), /* fpconst. */
1228 COSTS_N_INSNS (3), /* neg. */
1229 COSTS_N_INSNS (6), /* compare. */
1230 COSTS_N_INSNS (4), /* widen. */
1231 COSTS_N_INSNS (4), /* narrow. */
1232 COSTS_N_INSNS (8), /* toint. */
1233 COSTS_N_INSNS (8), /* fromint. */
1234 COSTS_N_INSNS (8) /* roundint. */
1235 },
1236 /* FP DFmode */
1237 {
1238 COSTS_N_INSNS (64), /* div. */
1239 COSTS_N_INSNS (16), /* mult. */
1240 COSTS_N_INSNS (25), /* mult_addsub. */
1241 COSTS_N_INSNS (30), /* fma. */
1242 COSTS_N_INSNS (9), /* addsub. */
1243 COSTS_N_INSNS (3), /* fpconst. */
1244 COSTS_N_INSNS (3), /* neg. */
1245 COSTS_N_INSNS (6), /* compare. */
1246 COSTS_N_INSNS (6), /* widen. */
1247 COSTS_N_INSNS (6), /* narrow. */
1248 COSTS_N_INSNS (8), /* toint. */
1249 COSTS_N_INSNS (8), /* fromint. */
1250 COSTS_N_INSNS (8) /* roundint. */
1251 }
1252 },
1253 /* Vector */
1254 {
1255 COSTS_N_INSNS (1) /* alu. */
1256 }
1257 };
1258
1259 const struct cpu_cost_table cortexa5_extra_costs =
1260 {
1261 /* ALU */
1262 {
1263 0, /* arith. */
1264 0, /* logical. */
1265 COSTS_N_INSNS (1), /* shift. */
1266 COSTS_N_INSNS (1), /* shift_reg. */
1267 COSTS_N_INSNS (1), /* arith_shift. */
1268 COSTS_N_INSNS (1), /* arith_shift_reg. */
1269 COSTS_N_INSNS (1), /* log_shift. */
1270 COSTS_N_INSNS (1), /* log_shift_reg. */
1271 COSTS_N_INSNS (1), /* extend. */
1272 COSTS_N_INSNS (1), /* extend_arith. */
1273 COSTS_N_INSNS (1), /* bfi. */
1274 COSTS_N_INSNS (1), /* bfx. */
1275 COSTS_N_INSNS (1), /* clz. */
1276 COSTS_N_INSNS (1), /* rev. */
1277 0, /* non_exec. */
1278 true /* non_exec_costs_exec. */
1279 },
1280
1281 {
1282 /* MULT SImode */
1283 {
1284 0, /* simple. */
1285 COSTS_N_INSNS (1), /* flag_setting. */
1286 COSTS_N_INSNS (1), /* extend. */
1287 COSTS_N_INSNS (1), /* add. */
1288 COSTS_N_INSNS (1), /* extend_add. */
1289 COSTS_N_INSNS (7) /* idiv. */
1290 },
1291 /* MULT DImode */
1292 {
1293 0, /* simple (N/A). */
1294 0, /* flag_setting (N/A). */
1295 COSTS_N_INSNS (1), /* extend. */
1296 0, /* add. */
1297 COSTS_N_INSNS (2), /* extend_add. */
1298 0 /* idiv (N/A). */
1299 }
1300 },
1301 /* LD/ST */
1302 {
1303 COSTS_N_INSNS (1), /* load. */
1304 COSTS_N_INSNS (1), /* load_sign_extend. */
1305 COSTS_N_INSNS (6), /* ldrd. */
1306 COSTS_N_INSNS (1), /* ldm_1st. */
1307 1, /* ldm_regs_per_insn_1st. */
1308 2, /* ldm_regs_per_insn_subsequent. */
1309 COSTS_N_INSNS (2), /* loadf. */
1310 COSTS_N_INSNS (4), /* loadd. */
1311 COSTS_N_INSNS (1), /* load_unaligned. */
1312 COSTS_N_INSNS (1), /* store. */
1313 COSTS_N_INSNS (3), /* strd. */
1314 COSTS_N_INSNS (1), /* stm_1st. */
1315 1, /* stm_regs_per_insn_1st. */
1316 2, /* stm_regs_per_insn_subsequent. */
1317 COSTS_N_INSNS (2), /* storef. */
1318 COSTS_N_INSNS (2), /* stored. */
1319 COSTS_N_INSNS (1), /* store_unaligned. */
1320 COSTS_N_INSNS (1), /* loadv. */
1321 COSTS_N_INSNS (1) /* storev. */
1322 },
1323 {
1324 /* FP SFmode */
1325 {
1326 COSTS_N_INSNS (15), /* div. */
1327 COSTS_N_INSNS (3), /* mult. */
1328 COSTS_N_INSNS (7), /* mult_addsub. */
1329 COSTS_N_INSNS (7), /* fma. */
1330 COSTS_N_INSNS (3), /* addsub. */
1331 COSTS_N_INSNS (3), /* fpconst. */
1332 COSTS_N_INSNS (3), /* neg. */
1333 COSTS_N_INSNS (3), /* compare. */
1334 COSTS_N_INSNS (3), /* widen. */
1335 COSTS_N_INSNS (3), /* narrow. */
1336 COSTS_N_INSNS (3), /* toint. */
1337 COSTS_N_INSNS (3), /* fromint. */
1338 COSTS_N_INSNS (3) /* roundint. */
1339 },
1340 /* FP DFmode */
1341 {
1342 COSTS_N_INSNS (30), /* div. */
1343 COSTS_N_INSNS (6), /* mult. */
1344 COSTS_N_INSNS (10), /* mult_addsub. */
1345 COSTS_N_INSNS (7), /* fma. */
1346 COSTS_N_INSNS (3), /* addsub. */
1347 COSTS_N_INSNS (3), /* fpconst. */
1348 COSTS_N_INSNS (3), /* neg. */
1349 COSTS_N_INSNS (3), /* compare. */
1350 COSTS_N_INSNS (3), /* widen. */
1351 COSTS_N_INSNS (3), /* narrow. */
1352 COSTS_N_INSNS (3), /* toint. */
1353 COSTS_N_INSNS (3), /* fromint. */
1354 COSTS_N_INSNS (3) /* roundint. */
1355 }
1356 },
1357 /* Vector */
1358 {
1359 COSTS_N_INSNS (1) /* alu. */
1360 }
1361 };
1362
1363
1364 const struct cpu_cost_table cortexa7_extra_costs =
1365 {
1366 /* ALU */
1367 {
1368 0, /* arith. */
1369 0, /* logical. */
1370 COSTS_N_INSNS (1), /* shift. */
1371 COSTS_N_INSNS (1), /* shift_reg. */
1372 COSTS_N_INSNS (1), /* arith_shift. */
1373 COSTS_N_INSNS (1), /* arith_shift_reg. */
1374 COSTS_N_INSNS (1), /* log_shift. */
1375 COSTS_N_INSNS (1), /* log_shift_reg. */
1376 COSTS_N_INSNS (1), /* extend. */
1377 COSTS_N_INSNS (1), /* extend_arith. */
1378 COSTS_N_INSNS (1), /* bfi. */
1379 COSTS_N_INSNS (1), /* bfx. */
1380 COSTS_N_INSNS (1), /* clz. */
1381 COSTS_N_INSNS (1), /* rev. */
1382 0, /* non_exec. */
1383 true /* non_exec_costs_exec. */
1384 },
1385
1386 {
1387 /* MULT SImode */
1388 {
1389 0, /* simple. */
1390 COSTS_N_INSNS (1), /* flag_setting. */
1391 COSTS_N_INSNS (1), /* extend. */
1392 COSTS_N_INSNS (1), /* add. */
1393 COSTS_N_INSNS (1), /* extend_add. */
1394 COSTS_N_INSNS (7) /* idiv. */
1395 },
1396 /* MULT DImode */
1397 {
1398 0, /* simple (N/A). */
1399 0, /* flag_setting (N/A). */
1400 COSTS_N_INSNS (1), /* extend. */
1401 0, /* add. */
1402 COSTS_N_INSNS (2), /* extend_add. */
1403 0 /* idiv (N/A). */
1404 }
1405 },
1406 /* LD/ST */
1407 {
1408 COSTS_N_INSNS (1), /* load. */
1409 COSTS_N_INSNS (1), /* load_sign_extend. */
1410 COSTS_N_INSNS (3), /* ldrd. */
1411 COSTS_N_INSNS (1), /* ldm_1st. */
1412 1, /* ldm_regs_per_insn_1st. */
1413 2, /* ldm_regs_per_insn_subsequent. */
1414 COSTS_N_INSNS (2), /* loadf. */
1415 COSTS_N_INSNS (2), /* loadd. */
1416 COSTS_N_INSNS (1), /* load_unaligned. */
1417 COSTS_N_INSNS (1), /* store. */
1418 COSTS_N_INSNS (3), /* strd. */
1419 COSTS_N_INSNS (1), /* stm_1st. */
1420 1, /* stm_regs_per_insn_1st. */
1421 2, /* stm_regs_per_insn_subsequent. */
1422 COSTS_N_INSNS (2), /* storef. */
1423 COSTS_N_INSNS (2), /* stored. */
1424 COSTS_N_INSNS (1), /* store_unaligned. */
1425 COSTS_N_INSNS (1), /* loadv. */
1426 COSTS_N_INSNS (1) /* storev. */
1427 },
1428 {
1429 /* FP SFmode */
1430 {
1431 COSTS_N_INSNS (15), /* div. */
1432 COSTS_N_INSNS (3), /* mult. */
1433 COSTS_N_INSNS (7), /* mult_addsub. */
1434 COSTS_N_INSNS (7), /* fma. */
1435 COSTS_N_INSNS (3), /* addsub. */
1436 COSTS_N_INSNS (3), /* fpconst. */
1437 COSTS_N_INSNS (3), /* neg. */
1438 COSTS_N_INSNS (3), /* compare. */
1439 COSTS_N_INSNS (3), /* widen. */
1440 COSTS_N_INSNS (3), /* narrow. */
1441 COSTS_N_INSNS (3), /* toint. */
1442 COSTS_N_INSNS (3), /* fromint. */
1443 COSTS_N_INSNS (3) /* roundint. */
1444 },
1445 /* FP DFmode */
1446 {
1447 COSTS_N_INSNS (30), /* div. */
1448 COSTS_N_INSNS (6), /* mult. */
1449 COSTS_N_INSNS (10), /* mult_addsub. */
1450 COSTS_N_INSNS (7), /* fma. */
1451 COSTS_N_INSNS (3), /* addsub. */
1452 COSTS_N_INSNS (3), /* fpconst. */
1453 COSTS_N_INSNS (3), /* neg. */
1454 COSTS_N_INSNS (3), /* compare. */
1455 COSTS_N_INSNS (3), /* widen. */
1456 COSTS_N_INSNS (3), /* narrow. */
1457 COSTS_N_INSNS (3), /* toint. */
1458 COSTS_N_INSNS (3), /* fromint. */
1459 COSTS_N_INSNS (3) /* roundint. */
1460 }
1461 },
1462 /* Vector */
1463 {
1464 COSTS_N_INSNS (1) /* alu. */
1465 }
1466 };
1467
1468 const struct cpu_cost_table cortexa12_extra_costs =
1469 {
1470 /* ALU */
1471 {
1472 0, /* arith. */
1473 0, /* logical. */
1474 0, /* shift. */
1475 COSTS_N_INSNS (1), /* shift_reg. */
1476 COSTS_N_INSNS (1), /* arith_shift. */
1477 COSTS_N_INSNS (1), /* arith_shift_reg. */
1478 COSTS_N_INSNS (1), /* log_shift. */
1479 COSTS_N_INSNS (1), /* log_shift_reg. */
1480 0, /* extend. */
1481 COSTS_N_INSNS (1), /* extend_arith. */
1482 0, /* bfi. */
1483 COSTS_N_INSNS (1), /* bfx. */
1484 COSTS_N_INSNS (1), /* clz. */
1485 COSTS_N_INSNS (1), /* rev. */
1486 0, /* non_exec. */
1487 true /* non_exec_costs_exec. */
1488 },
1489 /* MULT SImode */
1490 {
1491 {
1492 COSTS_N_INSNS (2), /* simple. */
1493 COSTS_N_INSNS (3), /* flag_setting. */
1494 COSTS_N_INSNS (2), /* extend. */
1495 COSTS_N_INSNS (3), /* add. */
1496 COSTS_N_INSNS (2), /* extend_add. */
1497 COSTS_N_INSNS (18) /* idiv. */
1498 },
1499 /* MULT DImode */
1500 {
1501 0, /* simple (N/A). */
1502 0, /* flag_setting (N/A). */
1503 COSTS_N_INSNS (3), /* extend. */
1504 0, /* add (N/A). */
1505 COSTS_N_INSNS (3), /* extend_add. */
1506 0 /* idiv (N/A). */
1507 }
1508 },
1509 /* LD/ST */
1510 {
1511 COSTS_N_INSNS (3), /* load. */
1512 COSTS_N_INSNS (3), /* load_sign_extend. */
1513 COSTS_N_INSNS (3), /* ldrd. */
1514 COSTS_N_INSNS (3), /* ldm_1st. */
1515 1, /* ldm_regs_per_insn_1st. */
1516 2, /* ldm_regs_per_insn_subsequent. */
1517 COSTS_N_INSNS (3), /* loadf. */
1518 COSTS_N_INSNS (3), /* loadd. */
1519 0, /* load_unaligned. */
1520 0, /* store. */
1521 0, /* strd. */
1522 0, /* stm_1st. */
1523 1, /* stm_regs_per_insn_1st. */
1524 2, /* stm_regs_per_insn_subsequent. */
1525 COSTS_N_INSNS (2), /* storef. */
1526 COSTS_N_INSNS (2), /* stored. */
1527 0, /* store_unaligned. */
1528 COSTS_N_INSNS (1), /* loadv. */
1529 COSTS_N_INSNS (1) /* storev. */
1530 },
1531 {
1532 /* FP SFmode */
1533 {
1534 COSTS_N_INSNS (17), /* div. */
1535 COSTS_N_INSNS (4), /* mult. */
1536 COSTS_N_INSNS (8), /* mult_addsub. */
1537 COSTS_N_INSNS (8), /* fma. */
1538 COSTS_N_INSNS (4), /* addsub. */
1539 COSTS_N_INSNS (2), /* fpconst. */
1540 COSTS_N_INSNS (2), /* neg. */
1541 COSTS_N_INSNS (2), /* compare. */
1542 COSTS_N_INSNS (4), /* widen. */
1543 COSTS_N_INSNS (4), /* narrow. */
1544 COSTS_N_INSNS (4), /* toint. */
1545 COSTS_N_INSNS (4), /* fromint. */
1546 COSTS_N_INSNS (4) /* roundint. */
1547 },
1548 /* FP DFmode */
1549 {
1550 COSTS_N_INSNS (31), /* div. */
1551 COSTS_N_INSNS (4), /* mult. */
1552 COSTS_N_INSNS (8), /* mult_addsub. */
1553 COSTS_N_INSNS (8), /* fma. */
1554 COSTS_N_INSNS (4), /* addsub. */
1555 COSTS_N_INSNS (2), /* fpconst. */
1556 COSTS_N_INSNS (2), /* neg. */
1557 COSTS_N_INSNS (2), /* compare. */
1558 COSTS_N_INSNS (4), /* widen. */
1559 COSTS_N_INSNS (4), /* narrow. */
1560 COSTS_N_INSNS (4), /* toint. */
1561 COSTS_N_INSNS (4), /* fromint. */
1562 COSTS_N_INSNS (4) /* roundint. */
1563 }
1564 },
1565 /* Vector */
1566 {
1567 COSTS_N_INSNS (1) /* alu. */
1568 }
1569 };
1570
1571 const struct cpu_cost_table cortexa15_extra_costs =
1572 {
1573 /* ALU */
1574 {
1575 0, /* arith. */
1576 0, /* logical. */
1577 0, /* shift. */
1578 0, /* shift_reg. */
1579 COSTS_N_INSNS (1), /* arith_shift. */
1580 COSTS_N_INSNS (1), /* arith_shift_reg. */
1581 COSTS_N_INSNS (1), /* log_shift. */
1582 COSTS_N_INSNS (1), /* log_shift_reg. */
1583 0, /* extend. */
1584 COSTS_N_INSNS (1), /* extend_arith. */
1585 COSTS_N_INSNS (1), /* bfi. */
1586 0, /* bfx. */
1587 0, /* clz. */
1588 0, /* rev. */
1589 0, /* non_exec. */
1590 true /* non_exec_costs_exec. */
1591 },
1592 /* MULT SImode */
1593 {
1594 {
1595 COSTS_N_INSNS (2), /* simple. */
1596 COSTS_N_INSNS (3), /* flag_setting. */
1597 COSTS_N_INSNS (2), /* extend. */
1598 COSTS_N_INSNS (2), /* add. */
1599 COSTS_N_INSNS (2), /* extend_add. */
1600 COSTS_N_INSNS (18) /* idiv. */
1601 },
1602 /* MULT DImode */
1603 {
1604 0, /* simple (N/A). */
1605 0, /* flag_setting (N/A). */
1606 COSTS_N_INSNS (3), /* extend. */
1607 0, /* add (N/A). */
1608 COSTS_N_INSNS (3), /* extend_add. */
1609 0 /* idiv (N/A). */
1610 }
1611 },
1612 /* LD/ST */
1613 {
1614 COSTS_N_INSNS (3), /* load. */
1615 COSTS_N_INSNS (3), /* load_sign_extend. */
1616 COSTS_N_INSNS (3), /* ldrd. */
1617 COSTS_N_INSNS (4), /* ldm_1st. */
1618 1, /* ldm_regs_per_insn_1st. */
1619 2, /* ldm_regs_per_insn_subsequent. */
1620 COSTS_N_INSNS (4), /* loadf. */
1621 COSTS_N_INSNS (4), /* loadd. */
1622 0, /* load_unaligned. */
1623 0, /* store. */
1624 0, /* strd. */
1625 COSTS_N_INSNS (1), /* stm_1st. */
1626 1, /* stm_regs_per_insn_1st. */
1627 2, /* stm_regs_per_insn_subsequent. */
1628 0, /* storef. */
1629 0, /* stored. */
1630 0, /* store_unaligned. */
1631 COSTS_N_INSNS (1), /* loadv. */
1632 COSTS_N_INSNS (1) /* storev. */
1633 },
1634 {
1635 /* FP SFmode */
1636 {
1637 COSTS_N_INSNS (17), /* div. */
1638 COSTS_N_INSNS (4), /* mult. */
1639 COSTS_N_INSNS (8), /* mult_addsub. */
1640 COSTS_N_INSNS (8), /* fma. */
1641 COSTS_N_INSNS (4), /* addsub. */
1642 COSTS_N_INSNS (2), /* fpconst. */
1643 COSTS_N_INSNS (2), /* neg. */
1644 COSTS_N_INSNS (5), /* compare. */
1645 COSTS_N_INSNS (4), /* widen. */
1646 COSTS_N_INSNS (4), /* narrow. */
1647 COSTS_N_INSNS (4), /* toint. */
1648 COSTS_N_INSNS (4), /* fromint. */
1649 COSTS_N_INSNS (4) /* roundint. */
1650 },
1651 /* FP DFmode */
1652 {
1653 COSTS_N_INSNS (31), /* div. */
1654 COSTS_N_INSNS (4), /* mult. */
1655 COSTS_N_INSNS (8), /* mult_addsub. */
1656 COSTS_N_INSNS (8), /* fma. */
1657 COSTS_N_INSNS (4), /* addsub. */
1658 COSTS_N_INSNS (2), /* fpconst. */
1659 COSTS_N_INSNS (2), /* neg. */
1660 COSTS_N_INSNS (2), /* compare. */
1661 COSTS_N_INSNS (4), /* widen. */
1662 COSTS_N_INSNS (4), /* narrow. */
1663 COSTS_N_INSNS (4), /* toint. */
1664 COSTS_N_INSNS (4), /* fromint. */
1665 COSTS_N_INSNS (4) /* roundint. */
1666 }
1667 },
1668 /* Vector */
1669 {
1670 COSTS_N_INSNS (1) /* alu. */
1671 }
1672 };
1673
1674 const struct cpu_cost_table v7m_extra_costs =
1675 {
1676 /* ALU */
1677 {
1678 0, /* arith. */
1679 0, /* logical. */
1680 0, /* shift. */
1681 0, /* shift_reg. */
1682 0, /* arith_shift. */
1683 COSTS_N_INSNS (1), /* arith_shift_reg. */
1684 0, /* log_shift. */
1685 COSTS_N_INSNS (1), /* log_shift_reg. */
1686 0, /* extend. */
1687 COSTS_N_INSNS (1), /* extend_arith. */
1688 0, /* bfi. */
1689 0, /* bfx. */
1690 0, /* clz. */
1691 0, /* rev. */
1692 COSTS_N_INSNS (1), /* non_exec. */
1693 false /* non_exec_costs_exec. */
1694 },
1695 {
1696 /* MULT SImode */
1697 {
1698 COSTS_N_INSNS (1), /* simple. */
1699 COSTS_N_INSNS (1), /* flag_setting. */
1700 COSTS_N_INSNS (2), /* extend. */
1701 COSTS_N_INSNS (1), /* add. */
1702 COSTS_N_INSNS (3), /* extend_add. */
1703 COSTS_N_INSNS (8) /* idiv. */
1704 },
1705 /* MULT DImode */
1706 {
1707 0, /* simple (N/A). */
1708 0, /* flag_setting (N/A). */
1709 COSTS_N_INSNS (2), /* extend. */
1710 0, /* add (N/A). */
1711 COSTS_N_INSNS (3), /* extend_add. */
1712 0 /* idiv (N/A). */
1713 }
1714 },
1715 /* LD/ST */
1716 {
1717 COSTS_N_INSNS (2), /* load. */
1718 0, /* load_sign_extend. */
1719 COSTS_N_INSNS (3), /* ldrd. */
1720 COSTS_N_INSNS (2), /* ldm_1st. */
1721 1, /* ldm_regs_per_insn_1st. */
1722 1, /* ldm_regs_per_insn_subsequent. */
1723 COSTS_N_INSNS (2), /* loadf. */
1724 COSTS_N_INSNS (3), /* loadd. */
1725 COSTS_N_INSNS (1), /* load_unaligned. */
1726 COSTS_N_INSNS (2), /* store. */
1727 COSTS_N_INSNS (3), /* strd. */
1728 COSTS_N_INSNS (2), /* stm_1st. */
1729 1, /* stm_regs_per_insn_1st. */
1730 1, /* stm_regs_per_insn_subsequent. */
1731 COSTS_N_INSNS (2), /* storef. */
1732 COSTS_N_INSNS (3), /* stored. */
1733 COSTS_N_INSNS (1), /* store_unaligned. */
1734 COSTS_N_INSNS (1), /* loadv. */
1735 COSTS_N_INSNS (1) /* storev. */
1736 },
1737 {
1738 /* FP SFmode */
1739 {
1740 COSTS_N_INSNS (7), /* div. */
1741 COSTS_N_INSNS (2), /* mult. */
1742 COSTS_N_INSNS (5), /* mult_addsub. */
1743 COSTS_N_INSNS (3), /* fma. */
1744 COSTS_N_INSNS (1), /* addsub. */
1745 0, /* fpconst. */
1746 0, /* neg. */
1747 0, /* compare. */
1748 0, /* widen. */
1749 0, /* narrow. */
1750 0, /* toint. */
1751 0, /* fromint. */
1752 0 /* roundint. */
1753 },
1754 /* FP DFmode */
1755 {
1756 COSTS_N_INSNS (15), /* div. */
1757 COSTS_N_INSNS (5), /* mult. */
1758 COSTS_N_INSNS (7), /* mult_addsub. */
1759 COSTS_N_INSNS (7), /* fma. */
1760 COSTS_N_INSNS (3), /* addsub. */
1761 0, /* fpconst. */
1762 0, /* neg. */
1763 0, /* compare. */
1764 0, /* widen. */
1765 0, /* narrow. */
1766 0, /* toint. */
1767 0, /* fromint. */
1768 0 /* roundint. */
1769 }
1770 },
1771 /* Vector */
1772 {
1773 COSTS_N_INSNS (1) /* alu. */
1774 }
1775 };
1776
1777 const struct addr_mode_cost_table generic_addr_mode_costs =
1778 {
1779 /* int. */
1780 {
1781 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1782 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1783 COSTS_N_INSNS (0) /* AMO_WB. */
1784 },
1785 /* float. */
1786 {
1787 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1788 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1789 COSTS_N_INSNS (0) /* AMO_WB. */
1790 },
1791 /* vector. */
1792 {
1793 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1794 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1795 COSTS_N_INSNS (0) /* AMO_WB. */
1796 }
1797 };
1798
1799 const struct tune_params arm_slowmul_tune =
1800 {
1801 &generic_extra_costs, /* Insn extra costs. */
1802 &generic_addr_mode_costs, /* Addressing mode costs. */
1803 NULL, /* Sched adj cost. */
1804 arm_default_branch_cost,
1805 &arm_default_vec_cost,
1806 3, /* Constant limit. */
1807 5, /* Max cond insns. */
1808 8, /* Memset max inline. */
1809 1, /* Issue rate. */
1810 ARM_PREFETCH_NOT_BENEFICIAL,
1811 tune_params::PREF_CONST_POOL_TRUE,
1812 tune_params::PREF_LDRD_FALSE,
1813 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1814 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1815 tune_params::DISPARAGE_FLAGS_NEITHER,
1816 tune_params::PREF_NEON_STRINGOPS_FALSE,
1817 tune_params::FUSE_NOTHING,
1818 tune_params::SCHED_AUTOPREF_OFF
1819 };
1820
1821 const struct tune_params arm_fastmul_tune =
1822 {
1823 &generic_extra_costs, /* Insn extra costs. */
1824 &generic_addr_mode_costs, /* Addressing mode costs. */
1825 NULL, /* Sched adj cost. */
1826 arm_default_branch_cost,
1827 &arm_default_vec_cost,
1828 1, /* Constant limit. */
1829 5, /* Max cond insns. */
1830 8, /* Memset max inline. */
1831 1, /* Issue rate. */
1832 ARM_PREFETCH_NOT_BENEFICIAL,
1833 tune_params::PREF_CONST_POOL_TRUE,
1834 tune_params::PREF_LDRD_FALSE,
1835 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1836 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1837 tune_params::DISPARAGE_FLAGS_NEITHER,
1838 tune_params::PREF_NEON_STRINGOPS_FALSE,
1839 tune_params::FUSE_NOTHING,
1840 tune_params::SCHED_AUTOPREF_OFF
1841 };
1842
1843 /* StrongARM has early execution of branches, so a sequence that is worth
1844 skipping is shorter. Set max_insns_skipped to a lower value. */
1845
1846 const struct tune_params arm_strongarm_tune =
1847 {
1848 &generic_extra_costs, /* Insn extra costs. */
1849 &generic_addr_mode_costs, /* Addressing mode costs. */
1850 NULL, /* Sched adj cost. */
1851 arm_default_branch_cost,
1852 &arm_default_vec_cost,
1853 1, /* Constant limit. */
1854 3, /* Max cond insns. */
1855 8, /* Memset max inline. */
1856 1, /* Issue rate. */
1857 ARM_PREFETCH_NOT_BENEFICIAL,
1858 tune_params::PREF_CONST_POOL_TRUE,
1859 tune_params::PREF_LDRD_FALSE,
1860 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1861 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1862 tune_params::DISPARAGE_FLAGS_NEITHER,
1863 tune_params::PREF_NEON_STRINGOPS_FALSE,
1864 tune_params::FUSE_NOTHING,
1865 tune_params::SCHED_AUTOPREF_OFF
1866 };
1867
1868 const struct tune_params arm_xscale_tune =
1869 {
1870 &generic_extra_costs, /* Insn extra costs. */
1871 &generic_addr_mode_costs, /* Addressing mode costs. */
1872 xscale_sched_adjust_cost,
1873 arm_default_branch_cost,
1874 &arm_default_vec_cost,
1875 2, /* Constant limit. */
1876 3, /* Max cond insns. */
1877 8, /* Memset max inline. */
1878 1, /* Issue rate. */
1879 ARM_PREFETCH_NOT_BENEFICIAL,
1880 tune_params::PREF_CONST_POOL_TRUE,
1881 tune_params::PREF_LDRD_FALSE,
1882 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1883 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1884 tune_params::DISPARAGE_FLAGS_NEITHER,
1885 tune_params::PREF_NEON_STRINGOPS_FALSE,
1886 tune_params::FUSE_NOTHING,
1887 tune_params::SCHED_AUTOPREF_OFF
1888 };
1889
1890 const struct tune_params arm_9e_tune =
1891 {
1892 &generic_extra_costs, /* Insn extra costs. */
1893 &generic_addr_mode_costs, /* Addressing mode costs. */
1894 NULL, /* Sched adj cost. */
1895 arm_default_branch_cost,
1896 &arm_default_vec_cost,
1897 1, /* Constant limit. */
1898 5, /* Max cond insns. */
1899 8, /* Memset max inline. */
1900 1, /* Issue rate. */
1901 ARM_PREFETCH_NOT_BENEFICIAL,
1902 tune_params::PREF_CONST_POOL_TRUE,
1903 tune_params::PREF_LDRD_FALSE,
1904 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1905 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1906 tune_params::DISPARAGE_FLAGS_NEITHER,
1907 tune_params::PREF_NEON_STRINGOPS_FALSE,
1908 tune_params::FUSE_NOTHING,
1909 tune_params::SCHED_AUTOPREF_OFF
1910 };
1911
1912 const struct tune_params arm_marvell_pj4_tune =
1913 {
1914 &generic_extra_costs, /* Insn extra costs. */
1915 &generic_addr_mode_costs, /* Addressing mode costs. */
1916 NULL, /* Sched adj cost. */
1917 arm_default_branch_cost,
1918 &arm_default_vec_cost,
1919 1, /* Constant limit. */
1920 5, /* Max cond insns. */
1921 8, /* Memset max inline. */
1922 2, /* Issue rate. */
1923 ARM_PREFETCH_NOT_BENEFICIAL,
1924 tune_params::PREF_CONST_POOL_TRUE,
1925 tune_params::PREF_LDRD_FALSE,
1926 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1927 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1928 tune_params::DISPARAGE_FLAGS_NEITHER,
1929 tune_params::PREF_NEON_STRINGOPS_FALSE,
1930 tune_params::FUSE_NOTHING,
1931 tune_params::SCHED_AUTOPREF_OFF
1932 };
1933
1934 const struct tune_params arm_v6t2_tune =
1935 {
1936 &generic_extra_costs, /* Insn extra costs. */
1937 &generic_addr_mode_costs, /* Addressing mode costs. */
1938 NULL, /* Sched adj cost. */
1939 arm_default_branch_cost,
1940 &arm_default_vec_cost,
1941 1, /* Constant limit. */
1942 5, /* Max cond insns. */
1943 8, /* Memset max inline. */
1944 1, /* Issue rate. */
1945 ARM_PREFETCH_NOT_BENEFICIAL,
1946 tune_params::PREF_CONST_POOL_FALSE,
1947 tune_params::PREF_LDRD_FALSE,
1948 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1949 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1950 tune_params::DISPARAGE_FLAGS_NEITHER,
1951 tune_params::PREF_NEON_STRINGOPS_FALSE,
1952 tune_params::FUSE_NOTHING,
1953 tune_params::SCHED_AUTOPREF_OFF
1954 };
1955
1956
1957 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1958 const struct tune_params arm_cortex_tune =
1959 {
1960 &generic_extra_costs,
1961 &generic_addr_mode_costs, /* Addressing mode costs. */
1962 NULL, /* Sched adj cost. */
1963 arm_default_branch_cost,
1964 &arm_default_vec_cost,
1965 1, /* Constant limit. */
1966 5, /* Max cond insns. */
1967 8, /* Memset max inline. */
1968 2, /* Issue rate. */
1969 ARM_PREFETCH_NOT_BENEFICIAL,
1970 tune_params::PREF_CONST_POOL_FALSE,
1971 tune_params::PREF_LDRD_FALSE,
1972 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1973 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1974 tune_params::DISPARAGE_FLAGS_NEITHER,
1975 tune_params::PREF_NEON_STRINGOPS_FALSE,
1976 tune_params::FUSE_NOTHING,
1977 tune_params::SCHED_AUTOPREF_OFF
1978 };
1979
1980 const struct tune_params arm_cortex_a8_tune =
1981 {
1982 &cortexa8_extra_costs,
1983 &generic_addr_mode_costs, /* Addressing mode costs. */
1984 NULL, /* Sched adj cost. */
1985 arm_default_branch_cost,
1986 &arm_default_vec_cost,
1987 1, /* Constant limit. */
1988 5, /* Max cond insns. */
1989 8, /* Memset max inline. */
1990 2, /* Issue rate. */
1991 ARM_PREFETCH_NOT_BENEFICIAL,
1992 tune_params::PREF_CONST_POOL_FALSE,
1993 tune_params::PREF_LDRD_FALSE,
1994 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1995 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1996 tune_params::DISPARAGE_FLAGS_NEITHER,
1997 tune_params::PREF_NEON_STRINGOPS_TRUE,
1998 tune_params::FUSE_NOTHING,
1999 tune_params::SCHED_AUTOPREF_OFF
2000 };
2001
2002 const struct tune_params arm_cortex_a7_tune =
2003 {
2004 &cortexa7_extra_costs,
2005 &generic_addr_mode_costs, /* Addressing mode costs. */
2006 NULL, /* Sched adj cost. */
2007 arm_default_branch_cost,
2008 &arm_default_vec_cost,
2009 1, /* Constant limit. */
2010 5, /* Max cond insns. */
2011 8, /* Memset max inline. */
2012 2, /* Issue rate. */
2013 ARM_PREFETCH_NOT_BENEFICIAL,
2014 tune_params::PREF_CONST_POOL_FALSE,
2015 tune_params::PREF_LDRD_FALSE,
2016 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2017 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2018 tune_params::DISPARAGE_FLAGS_NEITHER,
2019 tune_params::PREF_NEON_STRINGOPS_TRUE,
2020 tune_params::FUSE_NOTHING,
2021 tune_params::SCHED_AUTOPREF_OFF
2022 };
2023
2024 const struct tune_params arm_cortex_a15_tune =
2025 {
2026 &cortexa15_extra_costs,
2027 &generic_addr_mode_costs, /* Addressing mode costs. */
2028 NULL, /* Sched adj cost. */
2029 arm_default_branch_cost,
2030 &arm_default_vec_cost,
2031 1, /* Constant limit. */
2032 2, /* Max cond insns. */
2033 8, /* Memset max inline. */
2034 3, /* Issue rate. */
2035 ARM_PREFETCH_NOT_BENEFICIAL,
2036 tune_params::PREF_CONST_POOL_FALSE,
2037 tune_params::PREF_LDRD_TRUE,
2038 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2039 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2040 tune_params::DISPARAGE_FLAGS_ALL,
2041 tune_params::PREF_NEON_STRINGOPS_TRUE,
2042 tune_params::FUSE_NOTHING,
2043 tune_params::SCHED_AUTOPREF_FULL
2044 };
2045
2046 const struct tune_params arm_cortex_a35_tune =
2047 {
2048 &cortexa53_extra_costs,
2049 &generic_addr_mode_costs, /* Addressing mode costs. */
2050 NULL, /* Sched adj cost. */
2051 arm_default_branch_cost,
2052 &arm_default_vec_cost,
2053 1, /* Constant limit. */
2054 5, /* Max cond insns. */
2055 8, /* Memset max inline. */
2056 1, /* Issue rate. */
2057 ARM_PREFETCH_NOT_BENEFICIAL,
2058 tune_params::PREF_CONST_POOL_FALSE,
2059 tune_params::PREF_LDRD_FALSE,
2060 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2061 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2062 tune_params::DISPARAGE_FLAGS_NEITHER,
2063 tune_params::PREF_NEON_STRINGOPS_TRUE,
2064 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2065 tune_params::SCHED_AUTOPREF_OFF
2066 };
2067
2068 const struct tune_params arm_cortex_a53_tune =
2069 {
2070 &cortexa53_extra_costs,
2071 &generic_addr_mode_costs, /* Addressing mode costs. */
2072 NULL, /* Sched adj cost. */
2073 arm_default_branch_cost,
2074 &arm_default_vec_cost,
2075 1, /* Constant limit. */
2076 5, /* Max cond insns. */
2077 8, /* Memset max inline. */
2078 2, /* Issue rate. */
2079 ARM_PREFETCH_NOT_BENEFICIAL,
2080 tune_params::PREF_CONST_POOL_FALSE,
2081 tune_params::PREF_LDRD_FALSE,
2082 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2083 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2084 tune_params::DISPARAGE_FLAGS_NEITHER,
2085 tune_params::PREF_NEON_STRINGOPS_TRUE,
2086 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2087 tune_params::SCHED_AUTOPREF_OFF
2088 };
2089
2090 const struct tune_params arm_cortex_a57_tune =
2091 {
2092 &cortexa57_extra_costs,
2093 &generic_addr_mode_costs, /* addressing mode costs */
2094 NULL, /* Sched adj cost. */
2095 arm_default_branch_cost,
2096 &arm_default_vec_cost,
2097 1, /* Constant limit. */
2098 2, /* Max cond insns. */
2099 8, /* Memset max inline. */
2100 3, /* Issue rate. */
2101 ARM_PREFETCH_NOT_BENEFICIAL,
2102 tune_params::PREF_CONST_POOL_FALSE,
2103 tune_params::PREF_LDRD_TRUE,
2104 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2105 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2106 tune_params::DISPARAGE_FLAGS_ALL,
2107 tune_params::PREF_NEON_STRINGOPS_TRUE,
2108 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2109 tune_params::SCHED_AUTOPREF_FULL
2110 };
2111
2112 const struct tune_params arm_exynosm1_tune =
2113 {
2114 &exynosm1_extra_costs,
2115 &generic_addr_mode_costs, /* Addressing mode costs. */
2116 NULL, /* Sched adj cost. */
2117 arm_default_branch_cost,
2118 &arm_default_vec_cost,
2119 1, /* Constant limit. */
2120 2, /* Max cond insns. */
2121 8, /* Memset max inline. */
2122 3, /* Issue rate. */
2123 ARM_PREFETCH_NOT_BENEFICIAL,
2124 tune_params::PREF_CONST_POOL_FALSE,
2125 tune_params::PREF_LDRD_TRUE,
2126 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2127 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2128 tune_params::DISPARAGE_FLAGS_ALL,
2129 tune_params::PREF_NEON_STRINGOPS_TRUE,
2130 tune_params::FUSE_NOTHING,
2131 tune_params::SCHED_AUTOPREF_OFF
2132 };
2133
2134 const struct tune_params arm_xgene1_tune =
2135 {
2136 &xgene1_extra_costs,
2137 &generic_addr_mode_costs, /* Addressing mode costs. */
2138 NULL, /* Sched adj cost. */
2139 arm_default_branch_cost,
2140 &arm_default_vec_cost,
2141 1, /* Constant limit. */
2142 2, /* Max cond insns. */
2143 32, /* Memset max inline. */
2144 4, /* Issue rate. */
2145 ARM_PREFETCH_NOT_BENEFICIAL,
2146 tune_params::PREF_CONST_POOL_FALSE,
2147 tune_params::PREF_LDRD_TRUE,
2148 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2149 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2150 tune_params::DISPARAGE_FLAGS_ALL,
2151 tune_params::PREF_NEON_STRINGOPS_FALSE,
2152 tune_params::FUSE_NOTHING,
2153 tune_params::SCHED_AUTOPREF_OFF
2154 };
2155
2156 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2157 less appealing. Set max_insns_skipped to a low value. */
2158
2159 const struct tune_params arm_cortex_a5_tune =
2160 {
2161 &cortexa5_extra_costs,
2162 &generic_addr_mode_costs, /* Addressing mode costs. */
2163 NULL, /* Sched adj cost. */
2164 arm_cortex_a5_branch_cost,
2165 &arm_default_vec_cost,
2166 1, /* Constant limit. */
2167 1, /* Max cond insns. */
2168 8, /* Memset max inline. */
2169 2, /* Issue rate. */
2170 ARM_PREFETCH_NOT_BENEFICIAL,
2171 tune_params::PREF_CONST_POOL_FALSE,
2172 tune_params::PREF_LDRD_FALSE,
2173 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2174 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2175 tune_params::DISPARAGE_FLAGS_NEITHER,
2176 tune_params::PREF_NEON_STRINGOPS_TRUE,
2177 tune_params::FUSE_NOTHING,
2178 tune_params::SCHED_AUTOPREF_OFF
2179 };
2180
2181 const struct tune_params arm_cortex_a9_tune =
2182 {
2183 &cortexa9_extra_costs,
2184 &generic_addr_mode_costs, /* Addressing mode costs. */
2185 cortex_a9_sched_adjust_cost,
2186 arm_default_branch_cost,
2187 &arm_default_vec_cost,
2188 1, /* Constant limit. */
2189 5, /* Max cond insns. */
2190 8, /* Memset max inline. */
2191 2, /* Issue rate. */
2192 ARM_PREFETCH_BENEFICIAL(4,32,32),
2193 tune_params::PREF_CONST_POOL_FALSE,
2194 tune_params::PREF_LDRD_FALSE,
2195 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2196 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2197 tune_params::DISPARAGE_FLAGS_NEITHER,
2198 tune_params::PREF_NEON_STRINGOPS_FALSE,
2199 tune_params::FUSE_NOTHING,
2200 tune_params::SCHED_AUTOPREF_OFF
2201 };
2202
2203 const struct tune_params arm_cortex_a12_tune =
2204 {
2205 &cortexa12_extra_costs,
2206 &generic_addr_mode_costs, /* Addressing mode costs. */
2207 NULL, /* Sched adj cost. */
2208 arm_default_branch_cost,
2209 &arm_default_vec_cost, /* Vectorizer costs. */
2210 1, /* Constant limit. */
2211 2, /* Max cond insns. */
2212 8, /* Memset max inline. */
2213 2, /* Issue rate. */
2214 ARM_PREFETCH_NOT_BENEFICIAL,
2215 tune_params::PREF_CONST_POOL_FALSE,
2216 tune_params::PREF_LDRD_TRUE,
2217 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2218 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2219 tune_params::DISPARAGE_FLAGS_ALL,
2220 tune_params::PREF_NEON_STRINGOPS_TRUE,
2221 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2222 tune_params::SCHED_AUTOPREF_OFF
2223 };
2224
2225 const struct tune_params arm_cortex_a73_tune =
2226 {
2227 &cortexa57_extra_costs,
2228 &generic_addr_mode_costs, /* Addressing mode costs. */
2229 NULL, /* Sched adj cost. */
2230 arm_default_branch_cost,
2231 &arm_default_vec_cost, /* Vectorizer costs. */
2232 1, /* Constant limit. */
2233 2, /* Max cond insns. */
2234 8, /* Memset max inline. */
2235 2, /* Issue rate. */
2236 ARM_PREFETCH_NOT_BENEFICIAL,
2237 tune_params::PREF_CONST_POOL_FALSE,
2238 tune_params::PREF_LDRD_TRUE,
2239 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2240 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2241 tune_params::DISPARAGE_FLAGS_ALL,
2242 tune_params::PREF_NEON_STRINGOPS_TRUE,
2243 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2244 tune_params::SCHED_AUTOPREF_FULL
2245 };
2246
2247 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2248 cycle to execute each. An LDR from the constant pool also takes two cycles
2249 to execute, but mildly increases pipelining opportunity (consecutive
2250 loads/stores can be pipelined together, saving one cycle), and may also
2251 improve icache utilisation. Hence we prefer the constant pool for such
2252 processors. */
2253
2254 const struct tune_params arm_v7m_tune =
2255 {
2256 &v7m_extra_costs,
2257 &generic_addr_mode_costs, /* Addressing mode costs. */
2258 NULL, /* Sched adj cost. */
2259 arm_cortex_m_branch_cost,
2260 &arm_default_vec_cost,
2261 1, /* Constant limit. */
2262 2, /* Max cond insns. */
2263 8, /* Memset max inline. */
2264 1, /* Issue rate. */
2265 ARM_PREFETCH_NOT_BENEFICIAL,
2266 tune_params::PREF_CONST_POOL_TRUE,
2267 tune_params::PREF_LDRD_FALSE,
2268 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2269 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2270 tune_params::DISPARAGE_FLAGS_NEITHER,
2271 tune_params::PREF_NEON_STRINGOPS_FALSE,
2272 tune_params::FUSE_NOTHING,
2273 tune_params::SCHED_AUTOPREF_OFF
2274 };
2275
2276 /* Cortex-M7 tuning. */
2277
2278 const struct tune_params arm_cortex_m7_tune =
2279 {
2280 &v7m_extra_costs,
2281 &generic_addr_mode_costs, /* Addressing mode costs. */
2282 NULL, /* Sched adj cost. */
2283 arm_cortex_m7_branch_cost,
2284 &arm_default_vec_cost,
2285 0, /* Constant limit. */
2286 1, /* Max cond insns. */
2287 8, /* Memset max inline. */
2288 2, /* Issue rate. */
2289 ARM_PREFETCH_NOT_BENEFICIAL,
2290 tune_params::PREF_CONST_POOL_TRUE,
2291 tune_params::PREF_LDRD_FALSE,
2292 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2293 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2294 tune_params::DISPARAGE_FLAGS_NEITHER,
2295 tune_params::PREF_NEON_STRINGOPS_FALSE,
2296 tune_params::FUSE_NOTHING,
2297 tune_params::SCHED_AUTOPREF_OFF
2298 };
2299
2300 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2301 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2302 cortex-m23. */
2303 const struct tune_params arm_v6m_tune =
2304 {
2305 &generic_extra_costs, /* Insn extra costs. */
2306 &generic_addr_mode_costs, /* Addressing mode costs. */
2307 NULL, /* Sched adj cost. */
2308 arm_default_branch_cost,
2309 &arm_default_vec_cost, /* Vectorizer costs. */
2310 1, /* Constant limit. */
2311 5, /* Max cond insns. */
2312 8, /* Memset max inline. */
2313 1, /* Issue rate. */
2314 ARM_PREFETCH_NOT_BENEFICIAL,
2315 tune_params::PREF_CONST_POOL_FALSE,
2316 tune_params::PREF_LDRD_FALSE,
2317 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2318 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2319 tune_params::DISPARAGE_FLAGS_NEITHER,
2320 tune_params::PREF_NEON_STRINGOPS_FALSE,
2321 tune_params::FUSE_NOTHING,
2322 tune_params::SCHED_AUTOPREF_OFF
2323 };
2324
2325 const struct tune_params arm_fa726te_tune =
2326 {
2327 &generic_extra_costs, /* Insn extra costs. */
2328 &generic_addr_mode_costs, /* Addressing mode costs. */
2329 fa726te_sched_adjust_cost,
2330 arm_default_branch_cost,
2331 &arm_default_vec_cost,
2332 1, /* Constant limit. */
2333 5, /* Max cond insns. */
2334 8, /* Memset max inline. */
2335 2, /* Issue rate. */
2336 ARM_PREFETCH_NOT_BENEFICIAL,
2337 tune_params::PREF_CONST_POOL_TRUE,
2338 tune_params::PREF_LDRD_FALSE,
2339 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2340 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2341 tune_params::DISPARAGE_FLAGS_NEITHER,
2342 tune_params::PREF_NEON_STRINGOPS_FALSE,
2343 tune_params::FUSE_NOTHING,
2344 tune_params::SCHED_AUTOPREF_OFF
2345 };
2346
2347 /* Auto-generated CPU, FPU and architecture tables. */
2348 #include "arm-cpu-data.h"
2349
2350 /* The name of the preprocessor macro to define for this architecture. PROFILE
2351 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2352 is thus chosen to be big enough to hold the longest architecture name. */
2353
2354 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2355
2356 /* Supported TLS relocations. */
2357
2358 enum tls_reloc {
2359 TLS_GD32,
2360 TLS_GD32_FDPIC,
2361 TLS_LDM32,
2362 TLS_LDM32_FDPIC,
2363 TLS_LDO32,
2364 TLS_IE32,
2365 TLS_IE32_FDPIC,
2366 TLS_LE32,
2367 TLS_DESCSEQ /* GNU scheme */
2368 };
2369
2370 /* The maximum number of insns to be used when loading a constant. */
2371 inline static int
2372 arm_constant_limit (bool size_p)
2373 {
2374 return size_p ? 1 : current_tune->constant_limit;
2375 }
2376
2377 /* Emit an insn that's a simple single-set. Both the operands must be known
2378 to be valid. */
2379 inline static rtx_insn *
2380 emit_set_insn (rtx x, rtx y)
2381 {
2382 return emit_insn (gen_rtx_SET (x, y));
2383 }
2384
2385 /* Return the number of bits set in VALUE. */
2386 static unsigned
2387 bit_count (unsigned long value)
2388 {
2389 unsigned long count = 0;
2390
2391 while (value)
2392 {
2393 count++;
2394 value &= value - 1; /* Clear the least-significant set bit. */
2395 }
2396
2397 return count;
2398 }
2399
2400 /* Return the number of bits set in BMAP. */
2401 static unsigned
2402 bitmap_popcount (const sbitmap bmap)
2403 {
2404 unsigned int count = 0;
2405 unsigned int n = 0;
2406 sbitmap_iterator sbi;
2407
2408 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2409 count++;
2410 return count;
2411 }
2412
2413 typedef struct
2414 {
2415 machine_mode mode;
2416 const char *name;
2417 } arm_fixed_mode_set;
2418
2419 /* A small helper for setting fixed-point library libfuncs. */
2420
2421 static void
2422 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2423 const char *funcname, const char *modename,
2424 int num_suffix)
2425 {
2426 char buffer[50];
2427
2428 if (num_suffix == 0)
2429 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2430 else
2431 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2432
2433 set_optab_libfunc (optable, mode, buffer);
2434 }
2435
2436 static void
2437 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2438 machine_mode from, const char *funcname,
2439 const char *toname, const char *fromname)
2440 {
2441 char buffer[50];
2442 const char *maybe_suffix_2 = "";
2443
2444 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2445 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2446 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2447 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2448 maybe_suffix_2 = "2";
2449
2450 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2451 maybe_suffix_2);
2452
2453 set_conv_libfunc (optable, to, from, buffer);
2454 }
2455
2456 static GTY(()) rtx speculation_barrier_libfunc;
2457
2458 /* Set up library functions unique to ARM. */
2459 static void
2460 arm_init_libfuncs (void)
2461 {
2462 /* For Linux, we have access to kernel support for atomic operations. */
2463 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2464 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2465
2466 /* There are no special library functions unless we are using the
2467 ARM BPABI. */
2468 if (!TARGET_BPABI)
2469 return;
2470
2471 /* The functions below are described in Section 4 of the "Run-Time
2472 ABI for the ARM architecture", Version 1.0. */
2473
2474 /* Double-precision floating-point arithmetic. Table 2. */
2475 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2476 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2477 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2478 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2479 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2480
2481 /* Double-precision comparisons. Table 3. */
2482 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2483 set_optab_libfunc (ne_optab, DFmode, NULL);
2484 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2485 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2486 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2487 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2488 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2489
2490 /* Single-precision floating-point arithmetic. Table 4. */
2491 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2492 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2493 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2494 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2495 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2496
2497 /* Single-precision comparisons. Table 5. */
2498 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2499 set_optab_libfunc (ne_optab, SFmode, NULL);
2500 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2501 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2502 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2503 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2504 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2505
2506 /* Floating-point to integer conversions. Table 6. */
2507 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2508 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2509 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2510 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2511 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2512 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2513 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2514 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2515
2516 /* Conversions between floating types. Table 7. */
2517 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2518 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2519
2520 /* Integer to floating-point conversions. Table 8. */
2521 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2522 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2523 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2524 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2525 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2526 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2527 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2528 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2529
2530 /* Long long. Table 9. */
2531 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2532 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2533 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2534 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2535 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2536 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2537 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2538 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2539
2540 /* Integer (32/32->32) division. \S 4.3.1. */
2541 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2542 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2543
2544 /* The divmod functions are designed so that they can be used for
2545 plain division, even though they return both the quotient and the
2546 remainder. The quotient is returned in the usual location (i.e.,
2547 r0 for SImode, {r0, r1} for DImode), just as would be expected
2548 for an ordinary division routine. Because the AAPCS calling
2549 conventions specify that all of { r0, r1, r2, r3 } are
2550 callee-saved registers, there is no need to tell the compiler
2551 explicitly that those registers are clobbered by these
2552 routines. */
2553 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2554 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2555
2556 /* For SImode division the ABI provides div-without-mod routines,
2557 which are faster. */
2558 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2559 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2560
2561 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2562 divmod libcalls instead. */
2563 set_optab_libfunc (smod_optab, DImode, NULL);
2564 set_optab_libfunc (umod_optab, DImode, NULL);
2565 set_optab_libfunc (smod_optab, SImode, NULL);
2566 set_optab_libfunc (umod_optab, SImode, NULL);
2567
2568 /* Half-precision float operations. The compiler handles all operations
2569 with NULL libfuncs by converting the SFmode. */
2570 switch (arm_fp16_format)
2571 {
2572 case ARM_FP16_FORMAT_IEEE:
2573 case ARM_FP16_FORMAT_ALTERNATIVE:
2574
2575 /* Conversions. */
2576 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2577 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2578 ? "__gnu_f2h_ieee"
2579 : "__gnu_f2h_alternative"));
2580 set_conv_libfunc (sext_optab, SFmode, HFmode,
2581 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2582 ? "__gnu_h2f_ieee"
2583 : "__gnu_h2f_alternative"));
2584
2585 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2586 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2587 ? "__gnu_d2h_ieee"
2588 : "__gnu_d2h_alternative"));
2589
2590 /* Arithmetic. */
2591 set_optab_libfunc (add_optab, HFmode, NULL);
2592 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2593 set_optab_libfunc (smul_optab, HFmode, NULL);
2594 set_optab_libfunc (neg_optab, HFmode, NULL);
2595 set_optab_libfunc (sub_optab, HFmode, NULL);
2596
2597 /* Comparisons. */
2598 set_optab_libfunc (eq_optab, HFmode, NULL);
2599 set_optab_libfunc (ne_optab, HFmode, NULL);
2600 set_optab_libfunc (lt_optab, HFmode, NULL);
2601 set_optab_libfunc (le_optab, HFmode, NULL);
2602 set_optab_libfunc (ge_optab, HFmode, NULL);
2603 set_optab_libfunc (gt_optab, HFmode, NULL);
2604 set_optab_libfunc (unord_optab, HFmode, NULL);
2605 break;
2606
2607 default:
2608 break;
2609 }
2610
2611 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2612 {
2613 const arm_fixed_mode_set fixed_arith_modes[] =
2614 {
2615 { E_QQmode, "qq" },
2616 { E_UQQmode, "uqq" },
2617 { E_HQmode, "hq" },
2618 { E_UHQmode, "uhq" },
2619 { E_SQmode, "sq" },
2620 { E_USQmode, "usq" },
2621 { E_DQmode, "dq" },
2622 { E_UDQmode, "udq" },
2623 { E_TQmode, "tq" },
2624 { E_UTQmode, "utq" },
2625 { E_HAmode, "ha" },
2626 { E_UHAmode, "uha" },
2627 { E_SAmode, "sa" },
2628 { E_USAmode, "usa" },
2629 { E_DAmode, "da" },
2630 { E_UDAmode, "uda" },
2631 { E_TAmode, "ta" },
2632 { E_UTAmode, "uta" }
2633 };
2634 const arm_fixed_mode_set fixed_conv_modes[] =
2635 {
2636 { E_QQmode, "qq" },
2637 { E_UQQmode, "uqq" },
2638 { E_HQmode, "hq" },
2639 { E_UHQmode, "uhq" },
2640 { E_SQmode, "sq" },
2641 { E_USQmode, "usq" },
2642 { E_DQmode, "dq" },
2643 { E_UDQmode, "udq" },
2644 { E_TQmode, "tq" },
2645 { E_UTQmode, "utq" },
2646 { E_HAmode, "ha" },
2647 { E_UHAmode, "uha" },
2648 { E_SAmode, "sa" },
2649 { E_USAmode, "usa" },
2650 { E_DAmode, "da" },
2651 { E_UDAmode, "uda" },
2652 { E_TAmode, "ta" },
2653 { E_UTAmode, "uta" },
2654 { E_QImode, "qi" },
2655 { E_HImode, "hi" },
2656 { E_SImode, "si" },
2657 { E_DImode, "di" },
2658 { E_TImode, "ti" },
2659 { E_SFmode, "sf" },
2660 { E_DFmode, "df" }
2661 };
2662 unsigned int i, j;
2663
2664 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2665 {
2666 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2667 "add", fixed_arith_modes[i].name, 3);
2668 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2669 "ssadd", fixed_arith_modes[i].name, 3);
2670 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2671 "usadd", fixed_arith_modes[i].name, 3);
2672 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2673 "sub", fixed_arith_modes[i].name, 3);
2674 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2675 "sssub", fixed_arith_modes[i].name, 3);
2676 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2677 "ussub", fixed_arith_modes[i].name, 3);
2678 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2679 "mul", fixed_arith_modes[i].name, 3);
2680 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2681 "ssmul", fixed_arith_modes[i].name, 3);
2682 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2683 "usmul", fixed_arith_modes[i].name, 3);
2684 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2685 "div", fixed_arith_modes[i].name, 3);
2686 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2687 "udiv", fixed_arith_modes[i].name, 3);
2688 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2689 "ssdiv", fixed_arith_modes[i].name, 3);
2690 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2691 "usdiv", fixed_arith_modes[i].name, 3);
2692 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2693 "neg", fixed_arith_modes[i].name, 2);
2694 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2695 "ssneg", fixed_arith_modes[i].name, 2);
2696 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2697 "usneg", fixed_arith_modes[i].name, 2);
2698 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2699 "ashl", fixed_arith_modes[i].name, 3);
2700 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2701 "ashr", fixed_arith_modes[i].name, 3);
2702 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2703 "lshr", fixed_arith_modes[i].name, 3);
2704 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2705 "ssashl", fixed_arith_modes[i].name, 3);
2706 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2707 "usashl", fixed_arith_modes[i].name, 3);
2708 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2709 "cmp", fixed_arith_modes[i].name, 2);
2710 }
2711
2712 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2713 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2714 {
2715 if (i == j
2716 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2717 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2718 continue;
2719
2720 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2721 fixed_conv_modes[j].mode, "fract",
2722 fixed_conv_modes[i].name,
2723 fixed_conv_modes[j].name);
2724 arm_set_fixed_conv_libfunc (satfract_optab,
2725 fixed_conv_modes[i].mode,
2726 fixed_conv_modes[j].mode, "satfract",
2727 fixed_conv_modes[i].name,
2728 fixed_conv_modes[j].name);
2729 arm_set_fixed_conv_libfunc (fractuns_optab,
2730 fixed_conv_modes[i].mode,
2731 fixed_conv_modes[j].mode, "fractuns",
2732 fixed_conv_modes[i].name,
2733 fixed_conv_modes[j].name);
2734 arm_set_fixed_conv_libfunc (satfractuns_optab,
2735 fixed_conv_modes[i].mode,
2736 fixed_conv_modes[j].mode, "satfractuns",
2737 fixed_conv_modes[i].name,
2738 fixed_conv_modes[j].name);
2739 }
2740 }
2741
2742 if (TARGET_AAPCS_BASED)
2743 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2744
2745 speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2746 }
2747
2748 /* On AAPCS systems, this is the "struct __va_list". */
2749 static GTY(()) tree va_list_type;
2750
2751 /* Return the type to use as __builtin_va_list. */
2752 static tree
2753 arm_build_builtin_va_list (void)
2754 {
2755 tree va_list_name;
2756 tree ap_field;
2757
2758 if (!TARGET_AAPCS_BASED)
2759 return std_build_builtin_va_list ();
2760
2761 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2762 defined as:
2763
2764 struct __va_list
2765 {
2766 void *__ap;
2767 };
2768
2769 The C Library ABI further reinforces this definition in \S
2770 4.1.
2771
2772 We must follow this definition exactly. The structure tag
2773 name is visible in C++ mangled names, and thus forms a part
2774 of the ABI. The field name may be used by people who
2775 #include <stdarg.h>. */
2776 /* Create the type. */
2777 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2778 /* Give it the required name. */
2779 va_list_name = build_decl (BUILTINS_LOCATION,
2780 TYPE_DECL,
2781 get_identifier ("__va_list"),
2782 va_list_type);
2783 DECL_ARTIFICIAL (va_list_name) = 1;
2784 TYPE_NAME (va_list_type) = va_list_name;
2785 TYPE_STUB_DECL (va_list_type) = va_list_name;
2786 /* Create the __ap field. */
2787 ap_field = build_decl (BUILTINS_LOCATION,
2788 FIELD_DECL,
2789 get_identifier ("__ap"),
2790 ptr_type_node);
2791 DECL_ARTIFICIAL (ap_field) = 1;
2792 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2793 TYPE_FIELDS (va_list_type) = ap_field;
2794 /* Compute its layout. */
2795 layout_type (va_list_type);
2796
2797 return va_list_type;
2798 }
2799
2800 /* Return an expression of type "void *" pointing to the next
2801 available argument in a variable-argument list. VALIST is the
2802 user-level va_list object, of type __builtin_va_list. */
2803 static tree
2804 arm_extract_valist_ptr (tree valist)
2805 {
2806 if (TREE_TYPE (valist) == error_mark_node)
2807 return error_mark_node;
2808
2809 /* On an AAPCS target, the pointer is stored within "struct
2810 va_list". */
2811 if (TARGET_AAPCS_BASED)
2812 {
2813 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2814 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2815 valist, ap_field, NULL_TREE);
2816 }
2817
2818 return valist;
2819 }
2820
2821 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2822 static void
2823 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2824 {
2825 valist = arm_extract_valist_ptr (valist);
2826 std_expand_builtin_va_start (valist, nextarg);
2827 }
2828
2829 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2830 static tree
2831 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2832 gimple_seq *post_p)
2833 {
2834 valist = arm_extract_valist_ptr (valist);
2835 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2836 }
2837
2838 /* Check any incompatible options that the user has specified. */
2839 static void
2840 arm_option_check_internal (struct gcc_options *opts)
2841 {
2842 int flags = opts->x_target_flags;
2843
2844 /* iWMMXt and NEON are incompatible. */
2845 if (TARGET_IWMMXT
2846 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2847 error ("iWMMXt and NEON are incompatible");
2848
2849 /* Make sure that the processor choice does not conflict with any of the
2850 other command line choices. */
2851 if (TARGET_ARM_P (flags)
2852 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2853 error ("target CPU does not support ARM mode");
2854
2855 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2856 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2857 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2858
2859 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2860 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2861
2862 /* If this target is normally configured to use APCS frames, warn if they
2863 are turned off and debugging is turned on. */
2864 if (TARGET_ARM_P (flags)
2865 && write_symbols != NO_DEBUG
2866 && !TARGET_APCS_FRAME
2867 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2868 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2869 "debugging");
2870
2871 /* iWMMXt unsupported under Thumb mode. */
2872 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2873 error ("iWMMXt unsupported under Thumb mode");
2874
2875 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2876 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2877
2878 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2879 {
2880 error ("RTP PIC is incompatible with Thumb");
2881 flag_pic = 0;
2882 }
2883
2884 if (target_pure_code || target_slow_flash_data)
2885 {
2886 const char *flag = (target_pure_code ? "-mpure-code" :
2887 "-mslow-flash-data");
2888
2889 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2890 with MOVT. */
2891 if (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON)
2892 error ("%s only supports non-pic code on M-profile targets with the "
2893 "MOVT instruction", flag);
2894
2895 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2896 -mword-relocations forbids relocation of MOVT/MOVW. */
2897 if (target_word_relocations)
2898 error ("%s incompatible with %<-mword-relocations%>", flag);
2899 }
2900 }
2901
2902 /* Recompute the global settings depending on target attribute options. */
2903
2904 static void
2905 arm_option_params_internal (void)
2906 {
2907 /* If we are not using the default (ARM mode) section anchor offset
2908 ranges, then set the correct ranges now. */
2909 if (TARGET_THUMB1)
2910 {
2911 /* Thumb-1 LDR instructions cannot have negative offsets.
2912 Permissible positive offset ranges are 5-bit (for byte loads),
2913 6-bit (for halfword loads), or 7-bit (for word loads).
2914 Empirical results suggest a 7-bit anchor range gives the best
2915 overall code size. */
2916 targetm.min_anchor_offset = 0;
2917 targetm.max_anchor_offset = 127;
2918 }
2919 else if (TARGET_THUMB2)
2920 {
2921 /* The minimum is set such that the total size of the block
2922 for a particular anchor is 248 + 1 + 4095 bytes, which is
2923 divisible by eight, ensuring natural spacing of anchors. */
2924 targetm.min_anchor_offset = -248;
2925 targetm.max_anchor_offset = 4095;
2926 }
2927 else
2928 {
2929 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2930 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2931 }
2932
2933 /* Increase the number of conditional instructions with -Os. */
2934 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2935
2936 /* For THUMB2, we limit the conditional sequence to one IT block. */
2937 if (TARGET_THUMB2)
2938 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2939 }
2940
2941 /* True if -mflip-thumb should next add an attribute for the default
2942 mode, false if it should next add an attribute for the opposite mode. */
2943 static GTY(()) bool thumb_flipper;
2944
2945 /* Options after initial target override. */
2946 static GTY(()) tree init_optimize;
2947
2948 static void
2949 arm_override_options_after_change_1 (struct gcc_options *opts)
2950 {
2951 /* -falign-functions without argument: supply one. */
2952 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
2953 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2954 && opts->x_optimize_size ? "2" : "4";
2955 }
2956
2957 /* Implement targetm.override_options_after_change. */
2958
2959 static void
2960 arm_override_options_after_change (void)
2961 {
2962 arm_configure_build_target (&arm_active_target,
2963 TREE_TARGET_OPTION (target_option_default_node),
2964 &global_options_set, false);
2965
2966 arm_override_options_after_change_1 (&global_options);
2967 }
2968
2969 /* Implement TARGET_OPTION_SAVE. */
2970 static void
2971 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2972 {
2973 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2974 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2975 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2976 }
2977
2978 /* Implement TARGET_OPTION_RESTORE. */
2979 static void
2980 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2981 {
2982 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2983 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2984 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2985 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2986 false);
2987 }
2988
2989 /* Reset options between modes that the user has specified. */
2990 static void
2991 arm_option_override_internal (struct gcc_options *opts,
2992 struct gcc_options *opts_set)
2993 {
2994 arm_override_options_after_change_1 (opts);
2995
2996 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2997 {
2998 /* The default is to enable interworking, so this warning message would
2999 be confusing to users who have just compiled with
3000 eg, -march=armv4. */
3001 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3002 opts->x_target_flags &= ~MASK_INTERWORK;
3003 }
3004
3005 if (TARGET_THUMB_P (opts->x_target_flags)
3006 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3007 {
3008 warning (0, "target CPU does not support THUMB instructions");
3009 opts->x_target_flags &= ~MASK_THUMB;
3010 }
3011
3012 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3013 {
3014 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3015 opts->x_target_flags &= ~MASK_APCS_FRAME;
3016 }
3017
3018 /* Callee super interworking implies thumb interworking. Adding
3019 this to the flags here simplifies the logic elsewhere. */
3020 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3021 opts->x_target_flags |= MASK_INTERWORK;
3022
3023 /* need to remember initial values so combinaisons of options like
3024 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3025 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3026
3027 if (! opts_set->x_arm_restrict_it)
3028 opts->x_arm_restrict_it = arm_arch8;
3029
3030 /* ARM execution state and M profile don't have [restrict] IT. */
3031 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3032 opts->x_arm_restrict_it = 0;
3033
3034 /* Enable -munaligned-access by default for
3035 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3036 i.e. Thumb2 and ARM state only.
3037 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3038 - ARMv8 architecture-base processors.
3039
3040 Disable -munaligned-access by default for
3041 - all pre-ARMv6 architecture-based processors
3042 - ARMv6-M architecture-based processors
3043 - ARMv8-M Baseline processors. */
3044
3045 if (! opts_set->x_unaligned_access)
3046 {
3047 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3048 && arm_arch6 && (arm_arch_notm || arm_arch7));
3049 }
3050 else if (opts->x_unaligned_access == 1
3051 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3052 {
3053 warning (0, "target CPU does not support unaligned accesses");
3054 opts->x_unaligned_access = 0;
3055 }
3056
3057 /* Don't warn since it's on by default in -O2. */
3058 if (TARGET_THUMB1_P (opts->x_target_flags))
3059 opts->x_flag_schedule_insns = 0;
3060 else
3061 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3062
3063 /* Disable shrink-wrap when optimizing function for size, since it tends to
3064 generate additional returns. */
3065 if (optimize_function_for_size_p (cfun)
3066 && TARGET_THUMB2_P (opts->x_target_flags))
3067 opts->x_flag_shrink_wrap = false;
3068 else
3069 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3070
3071 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3072 - epilogue_insns - does not accurately model the corresponding insns
3073 emitted in the asm file. In particular, see the comment in thumb_exit
3074 'Find out how many of the (return) argument registers we can corrupt'.
3075 As a consequence, the epilogue may clobber registers without fipa-ra
3076 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3077 TODO: Accurately model clobbers for epilogue_insns and reenable
3078 fipa-ra. */
3079 if (TARGET_THUMB1_P (opts->x_target_flags))
3080 opts->x_flag_ipa_ra = 0;
3081 else
3082 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3083
3084 /* Thumb2 inline assembly code should always use unified syntax.
3085 This will apply to ARM and Thumb1 eventually. */
3086 if (TARGET_THUMB2_P (opts->x_target_flags))
3087 opts->x_inline_asm_unified = true;
3088
3089 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3090 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3091 #endif
3092 }
3093
3094 static sbitmap isa_all_fpubits;
3095 static sbitmap isa_quirkbits;
3096
3097 /* Configure a build target TARGET from the user-specified options OPTS and
3098 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3099 architecture have been specified, but the two are not identical. */
3100 void
3101 arm_configure_build_target (struct arm_build_target *target,
3102 struct cl_target_option *opts,
3103 struct gcc_options *opts_set,
3104 bool warn_compatible)
3105 {
3106 const cpu_option *arm_selected_tune = NULL;
3107 const arch_option *arm_selected_arch = NULL;
3108 const cpu_option *arm_selected_cpu = NULL;
3109 const arm_fpu_desc *arm_selected_fpu = NULL;
3110 const char *tune_opts = NULL;
3111 const char *arch_opts = NULL;
3112 const char *cpu_opts = NULL;
3113
3114 bitmap_clear (target->isa);
3115 target->core_name = NULL;
3116 target->arch_name = NULL;
3117
3118 if (opts_set->x_arm_arch_string)
3119 {
3120 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3121 "-march",
3122 opts->x_arm_arch_string);
3123 arch_opts = strchr (opts->x_arm_arch_string, '+');
3124 }
3125
3126 if (opts_set->x_arm_cpu_string)
3127 {
3128 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3129 opts->x_arm_cpu_string);
3130 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3131 arm_selected_tune = arm_selected_cpu;
3132 /* If taking the tuning from -mcpu, we don't need to rescan the
3133 options for tuning. */
3134 }
3135
3136 if (opts_set->x_arm_tune_string)
3137 {
3138 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3139 opts->x_arm_tune_string);
3140 tune_opts = strchr (opts->x_arm_tune_string, '+');
3141 }
3142
3143 if (arm_selected_arch)
3144 {
3145 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3146 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3147 arch_opts);
3148
3149 if (arm_selected_cpu)
3150 {
3151 auto_sbitmap cpu_isa (isa_num_bits);
3152 auto_sbitmap isa_delta (isa_num_bits);
3153
3154 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3155 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3156 cpu_opts);
3157 bitmap_xor (isa_delta, cpu_isa, target->isa);
3158 /* Ignore any bits that are quirk bits. */
3159 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3160 /* Ignore (for now) any bits that might be set by -mfpu. */
3161 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3162
3163 if (!bitmap_empty_p (isa_delta))
3164 {
3165 if (warn_compatible)
3166 warning (0, "switch %<-mcpu=%s%> conflicts "
3167 "with %<-march=%s%> switch",
3168 arm_selected_cpu->common.name,
3169 arm_selected_arch->common.name);
3170 /* -march wins for code generation.
3171 -mcpu wins for default tuning. */
3172 if (!arm_selected_tune)
3173 arm_selected_tune = arm_selected_cpu;
3174
3175 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3176 target->arch_name = arm_selected_arch->common.name;
3177 }
3178 else
3179 {
3180 /* Architecture and CPU are essentially the same.
3181 Prefer the CPU setting. */
3182 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3183 target->core_name = arm_selected_cpu->common.name;
3184 /* Copy the CPU's capabilities, so that we inherit the
3185 appropriate extensions and quirks. */
3186 bitmap_copy (target->isa, cpu_isa);
3187 }
3188 }
3189 else
3190 {
3191 /* Pick a CPU based on the architecture. */
3192 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3193 target->arch_name = arm_selected_arch->common.name;
3194 /* Note: target->core_name is left unset in this path. */
3195 }
3196 }
3197 else if (arm_selected_cpu)
3198 {
3199 target->core_name = arm_selected_cpu->common.name;
3200 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3201 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3202 cpu_opts);
3203 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3204 }
3205 /* If the user did not specify a processor or architecture, choose
3206 one for them. */
3207 else
3208 {
3209 const cpu_option *sel;
3210 auto_sbitmap sought_isa (isa_num_bits);
3211 bitmap_clear (sought_isa);
3212 auto_sbitmap default_isa (isa_num_bits);
3213
3214 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3215 TARGET_CPU_DEFAULT);
3216 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3217 gcc_assert (arm_selected_cpu->common.name);
3218
3219 /* RWE: All of the selection logic below (to the end of this
3220 'if' clause) looks somewhat suspect. It appears to be mostly
3221 there to support forcing thumb support when the default CPU
3222 does not have thumb (somewhat dubious in terms of what the
3223 user might be expecting). I think it should be removed once
3224 support for the pre-thumb era cores is removed. */
3225 sel = arm_selected_cpu;
3226 arm_initialize_isa (default_isa, sel->common.isa_bits);
3227 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3228 cpu_opts);
3229
3230 /* Now check to see if the user has specified any command line
3231 switches that require certain abilities from the cpu. */
3232
3233 if (TARGET_INTERWORK || TARGET_THUMB)
3234 bitmap_set_bit (sought_isa, isa_bit_thumb);
3235
3236 /* If there are such requirements and the default CPU does not
3237 satisfy them, we need to run over the complete list of
3238 cores looking for one that is satisfactory. */
3239 if (!bitmap_empty_p (sought_isa)
3240 && !bitmap_subset_p (sought_isa, default_isa))
3241 {
3242 auto_sbitmap candidate_isa (isa_num_bits);
3243 /* We're only interested in a CPU with at least the
3244 capabilities of the default CPU and the required
3245 additional features. */
3246 bitmap_ior (default_isa, default_isa, sought_isa);
3247
3248 /* Try to locate a CPU type that supports all of the abilities
3249 of the default CPU, plus the extra abilities requested by
3250 the user. */
3251 for (sel = all_cores; sel->common.name != NULL; sel++)
3252 {
3253 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3254 /* An exact match? */
3255 if (bitmap_equal_p (default_isa, candidate_isa))
3256 break;
3257 }
3258
3259 if (sel->common.name == NULL)
3260 {
3261 unsigned current_bit_count = isa_num_bits;
3262 const cpu_option *best_fit = NULL;
3263
3264 /* Ideally we would like to issue an error message here
3265 saying that it was not possible to find a CPU compatible
3266 with the default CPU, but which also supports the command
3267 line options specified by the programmer, and so they
3268 ought to use the -mcpu=<name> command line option to
3269 override the default CPU type.
3270
3271 If we cannot find a CPU that has exactly the
3272 characteristics of the default CPU and the given
3273 command line options we scan the array again looking
3274 for a best match. The best match must have at least
3275 the capabilities of the perfect match. */
3276 for (sel = all_cores; sel->common.name != NULL; sel++)
3277 {
3278 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3279
3280 if (bitmap_subset_p (default_isa, candidate_isa))
3281 {
3282 unsigned count;
3283
3284 bitmap_and_compl (candidate_isa, candidate_isa,
3285 default_isa);
3286 count = bitmap_popcount (candidate_isa);
3287
3288 if (count < current_bit_count)
3289 {
3290 best_fit = sel;
3291 current_bit_count = count;
3292 }
3293 }
3294
3295 gcc_assert (best_fit);
3296 sel = best_fit;
3297 }
3298 }
3299 arm_selected_cpu = sel;
3300 }
3301
3302 /* Now we know the CPU, we can finally initialize the target
3303 structure. */
3304 target->core_name = arm_selected_cpu->common.name;
3305 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3306 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3307 cpu_opts);
3308 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3309 }
3310
3311 gcc_assert (arm_selected_cpu);
3312 gcc_assert (arm_selected_arch);
3313
3314 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3315 {
3316 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3317 auto_sbitmap fpu_bits (isa_num_bits);
3318
3319 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3320 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3321 bitmap_ior (target->isa, target->isa, fpu_bits);
3322 }
3323
3324 if (!arm_selected_tune)
3325 arm_selected_tune = arm_selected_cpu;
3326 else /* Validate the features passed to -mtune. */
3327 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3328
3329 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3330
3331 /* Finish initializing the target structure. */
3332 target->arch_pp_name = arm_selected_arch->arch;
3333 target->base_arch = arm_selected_arch->base_arch;
3334 target->profile = arm_selected_arch->profile;
3335
3336 target->tune_flags = tune_data->tune_flags;
3337 target->tune = tune_data->tune;
3338 target->tune_core = tune_data->scheduler;
3339 arm_option_reconfigure_globals ();
3340 }
3341
3342 /* Fix up any incompatible options that the user has specified. */
3343 static void
3344 arm_option_override (void)
3345 {
3346 static const enum isa_feature fpu_bitlist[]
3347 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3348 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3349 cl_target_option opts;
3350
3351 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3352 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3353
3354 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3355 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3356
3357 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3358
3359 if (!global_options_set.x_arm_fpu_index)
3360 {
3361 bool ok;
3362 int fpu_index;
3363
3364 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3365 CL_TARGET);
3366 gcc_assert (ok);
3367 arm_fpu_index = (enum fpu_type) fpu_index;
3368 }
3369
3370 cl_target_option_save (&opts, &global_options);
3371 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3372 true);
3373
3374 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3375 SUBTARGET_OVERRIDE_OPTIONS;
3376 #endif
3377
3378 /* Initialize boolean versions of the architectural flags, for use
3379 in the arm.md file and for enabling feature flags. */
3380 arm_option_reconfigure_globals ();
3381
3382 arm_tune = arm_active_target.tune_core;
3383 tune_flags = arm_active_target.tune_flags;
3384 current_tune = arm_active_target.tune;
3385
3386 /* TBD: Dwarf info for apcs frame is not handled yet. */
3387 if (TARGET_APCS_FRAME)
3388 flag_shrink_wrap = false;
3389
3390 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3391 {
3392 warning (0, "%<-mapcs-stack-check%> incompatible with "
3393 "%<-mno-apcs-frame%>");
3394 target_flags |= MASK_APCS_FRAME;
3395 }
3396
3397 if (TARGET_POKE_FUNCTION_NAME)
3398 target_flags |= MASK_APCS_FRAME;
3399
3400 if (TARGET_APCS_REENT && flag_pic)
3401 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3402
3403 if (TARGET_APCS_REENT)
3404 warning (0, "APCS reentrant code not supported. Ignored");
3405
3406 /* Set up some tuning parameters. */
3407 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3408 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3409 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3410 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3411 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3412 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3413
3414 /* For arm2/3 there is no need to do any scheduling if we are doing
3415 software floating-point. */
3416 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3417 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3418
3419 /* Override the default structure alignment for AAPCS ABI. */
3420 if (!global_options_set.x_arm_structure_size_boundary)
3421 {
3422 if (TARGET_AAPCS_BASED)
3423 arm_structure_size_boundary = 8;
3424 }
3425 else
3426 {
3427 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3428
3429 if (arm_structure_size_boundary != 8
3430 && arm_structure_size_boundary != 32
3431 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3432 {
3433 if (ARM_DOUBLEWORD_ALIGN)
3434 warning (0,
3435 "structure size boundary can only be set to 8, 32 or 64");
3436 else
3437 warning (0, "structure size boundary can only be set to 8 or 32");
3438 arm_structure_size_boundary
3439 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3440 }
3441 }
3442
3443 if (TARGET_VXWORKS_RTP)
3444 {
3445 if (!global_options_set.x_arm_pic_data_is_text_relative)
3446 arm_pic_data_is_text_relative = 0;
3447 }
3448 else if (flag_pic
3449 && !arm_pic_data_is_text_relative
3450 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3451 /* When text & data segments don't have a fixed displacement, the
3452 intended use is with a single, read only, pic base register.
3453 Unless the user explicitly requested not to do that, set
3454 it. */
3455 target_flags |= MASK_SINGLE_PIC_BASE;
3456
3457 /* If stack checking is disabled, we can use r10 as the PIC register,
3458 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3459 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3460 {
3461 if (TARGET_VXWORKS_RTP)
3462 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3463 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3464 }
3465
3466 if (flag_pic && TARGET_VXWORKS_RTP)
3467 arm_pic_register = 9;
3468
3469 /* If in FDPIC mode then force arm_pic_register to be r9. */
3470 if (TARGET_FDPIC)
3471 {
3472 arm_pic_register = FDPIC_REGNUM;
3473 if (TARGET_THUMB1)
3474 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3475 }
3476
3477 if (arm_pic_register_string != NULL)
3478 {
3479 int pic_register = decode_reg_name (arm_pic_register_string);
3480
3481 if (!flag_pic)
3482 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3483
3484 /* Prevent the user from choosing an obviously stupid PIC register. */
3485 else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3486 || pic_register == HARD_FRAME_POINTER_REGNUM
3487 || pic_register == STACK_POINTER_REGNUM
3488 || pic_register >= PC_REGNUM
3489 || (TARGET_VXWORKS_RTP
3490 && (unsigned int) pic_register != arm_pic_register))
3491 error ("unable to use %qs for PIC register", arm_pic_register_string);
3492 else
3493 arm_pic_register = pic_register;
3494 }
3495
3496 if (flag_pic)
3497 target_word_relocations = 1;
3498
3499 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3500 if (fix_cm3_ldrd == 2)
3501 {
3502 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3503 fix_cm3_ldrd = 1;
3504 else
3505 fix_cm3_ldrd = 0;
3506 }
3507
3508 /* Hot/Cold partitioning is not currently supported, since we can't
3509 handle literal pool placement in that case. */
3510 if (flag_reorder_blocks_and_partition)
3511 {
3512 inform (input_location,
3513 "%<-freorder-blocks-and-partition%> not supported "
3514 "on this architecture");
3515 flag_reorder_blocks_and_partition = 0;
3516 flag_reorder_blocks = 1;
3517 }
3518
3519 if (flag_pic)
3520 /* Hoisting PIC address calculations more aggressively provides a small,
3521 but measurable, size reduction for PIC code. Therefore, we decrease
3522 the bar for unrestricted expression hoisting to the cost of PIC address
3523 calculation, which is 2 instructions. */
3524 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3525 global_options.x_param_values,
3526 global_options_set.x_param_values);
3527
3528 /* ARM EABI defaults to strict volatile bitfields. */
3529 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3530 && abi_version_at_least(2))
3531 flag_strict_volatile_bitfields = 1;
3532
3533 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3534 have deemed it beneficial (signified by setting
3535 prefetch.num_slots to 1 or more). */
3536 if (flag_prefetch_loop_arrays < 0
3537 && HAVE_prefetch
3538 && optimize >= 3
3539 && current_tune->prefetch.num_slots > 0)
3540 flag_prefetch_loop_arrays = 1;
3541
3542 /* Set up parameters to be used in prefetching algorithm. Do not
3543 override the defaults unless we are tuning for a core we have
3544 researched values for. */
3545 if (current_tune->prefetch.num_slots > 0)
3546 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3547 current_tune->prefetch.num_slots,
3548 global_options.x_param_values,
3549 global_options_set.x_param_values);
3550 if (current_tune->prefetch.l1_cache_line_size >= 0)
3551 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3552 current_tune->prefetch.l1_cache_line_size,
3553 global_options.x_param_values,
3554 global_options_set.x_param_values);
3555 if (current_tune->prefetch.l1_cache_size >= 0)
3556 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3557 current_tune->prefetch.l1_cache_size,
3558 global_options.x_param_values,
3559 global_options_set.x_param_values);
3560
3561 /* Look through ready list and all of queue for instructions
3562 relevant for L2 auto-prefetcher. */
3563 int param_sched_autopref_queue_depth;
3564
3565 switch (current_tune->sched_autopref)
3566 {
3567 case tune_params::SCHED_AUTOPREF_OFF:
3568 param_sched_autopref_queue_depth = -1;
3569 break;
3570
3571 case tune_params::SCHED_AUTOPREF_RANK:
3572 param_sched_autopref_queue_depth = 0;
3573 break;
3574
3575 case tune_params::SCHED_AUTOPREF_FULL:
3576 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3577 break;
3578
3579 default:
3580 gcc_unreachable ();
3581 }
3582
3583 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3584 param_sched_autopref_queue_depth,
3585 global_options.x_param_values,
3586 global_options_set.x_param_values);
3587
3588 /* Currently, for slow flash data, we just disable literal pools. We also
3589 disable it for pure-code. */
3590 if (target_slow_flash_data || target_pure_code)
3591 arm_disable_literal_pool = true;
3592
3593 /* Disable scheduling fusion by default if it's not armv7 processor
3594 or doesn't prefer ldrd/strd. */
3595 if (flag_schedule_fusion == 2
3596 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3597 flag_schedule_fusion = 0;
3598
3599 /* Need to remember initial options before they are overriden. */
3600 init_optimize = build_optimization_node (&global_options);
3601
3602 arm_options_perform_arch_sanity_checks ();
3603 arm_option_override_internal (&global_options, &global_options_set);
3604 arm_option_check_internal (&global_options);
3605 arm_option_params_internal ();
3606
3607 /* Create the default target_options structure. */
3608 target_option_default_node = target_option_current_node
3609 = build_target_option_node (&global_options);
3610
3611 /* Register global variables with the garbage collector. */
3612 arm_add_gc_roots ();
3613
3614 /* Init initial mode for testing. */
3615 thumb_flipper = TARGET_THUMB;
3616 }
3617
3618
3619 /* Reconfigure global status flags from the active_target.isa. */
3620 void
3621 arm_option_reconfigure_globals (void)
3622 {
3623 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3624 arm_base_arch = arm_active_target.base_arch;
3625
3626 /* Initialize boolean versions of the architectural flags, for use
3627 in the arm.md file. */
3628 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3629 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3630 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3631 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3632 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3633 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3634 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3635 arm_arch6m = arm_arch6 && !arm_arch_notm;
3636 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3637 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3638 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3639 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3640 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3641 arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3642 arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3643 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3644 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3645 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3646 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3647 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3648 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3649 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3650 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3651 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3652 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3653 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3654 if (arm_fp16_inst)
3655 {
3656 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3657 error ("selected fp16 options are incompatible");
3658 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3659 }
3660
3661 /* And finally, set up some quirks. */
3662 arm_arch_no_volatile_ce
3663 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3664 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3665 isa_bit_quirk_armv6kz);
3666
3667 /* Use the cp15 method if it is available. */
3668 if (target_thread_pointer == TP_AUTO)
3669 {
3670 if (arm_arch6k && !TARGET_THUMB1)
3671 target_thread_pointer = TP_CP15;
3672 else
3673 target_thread_pointer = TP_SOFT;
3674 }
3675 }
3676
3677 /* Perform some validation between the desired architecture and the rest of the
3678 options. */
3679 void
3680 arm_options_perform_arch_sanity_checks (void)
3681 {
3682 /* V5T code we generate is completely interworking capable, so we turn off
3683 TARGET_INTERWORK here to avoid many tests later on. */
3684
3685 /* XXX However, we must pass the right pre-processor defines to CPP
3686 or GLD can get confused. This is a hack. */
3687 if (TARGET_INTERWORK)
3688 arm_cpp_interwork = 1;
3689
3690 if (arm_arch5t)
3691 target_flags &= ~MASK_INTERWORK;
3692
3693 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3694 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3695
3696 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3697 error ("iwmmxt abi requires an iwmmxt capable cpu");
3698
3699 /* BPABI targets use linker tricks to allow interworking on cores
3700 without thumb support. */
3701 if (TARGET_INTERWORK
3702 && !TARGET_BPABI
3703 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3704 {
3705 warning (0, "target CPU does not support interworking" );
3706 target_flags &= ~MASK_INTERWORK;
3707 }
3708
3709 /* If soft-float is specified then don't use FPU. */
3710 if (TARGET_SOFT_FLOAT)
3711 arm_fpu_attr = FPU_NONE;
3712 else
3713 arm_fpu_attr = FPU_VFP;
3714
3715 if (TARGET_AAPCS_BASED)
3716 {
3717 if (TARGET_CALLER_INTERWORKING)
3718 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3719 else
3720 if (TARGET_CALLEE_INTERWORKING)
3721 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3722 }
3723
3724 /* __fp16 support currently assumes the core has ldrh. */
3725 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3726 sorry ("__fp16 and no ldrh");
3727
3728 if (use_cmse && !arm_arch_cmse)
3729 error ("target CPU does not support ARMv8-M Security Extensions");
3730
3731 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3732 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3733 if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3734 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3735
3736
3737 if (TARGET_AAPCS_BASED)
3738 {
3739 if (arm_abi == ARM_ABI_IWMMXT)
3740 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3741 else if (TARGET_HARD_FLOAT_ABI)
3742 {
3743 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3744 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3745 error ("%<-mfloat-abi=hard%>: selected processor lacks an FPU");
3746 }
3747 else
3748 arm_pcs_default = ARM_PCS_AAPCS;
3749 }
3750 else
3751 {
3752 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3753 sorry ("%<-mfloat-abi=hard%> and VFP");
3754
3755 if (arm_abi == ARM_ABI_APCS)
3756 arm_pcs_default = ARM_PCS_APCS;
3757 else
3758 arm_pcs_default = ARM_PCS_ATPCS;
3759 }
3760 }
3761
3762 /* Test whether a local function descriptor is canonical, i.e.,
3763 whether we can use GOTOFFFUNCDESC to compute the address of the
3764 function. */
3765 static bool
3766 arm_fdpic_local_funcdesc_p (rtx fnx)
3767 {
3768 tree fn;
3769 enum symbol_visibility vis;
3770 bool ret;
3771
3772 if (!TARGET_FDPIC)
3773 return true;
3774
3775 if (! SYMBOL_REF_LOCAL_P (fnx))
3776 return false;
3777
3778 fn = SYMBOL_REF_DECL (fnx);
3779
3780 if (! fn)
3781 return false;
3782
3783 vis = DECL_VISIBILITY (fn);
3784
3785 if (vis == VISIBILITY_PROTECTED)
3786 /* Private function descriptors for protected functions are not
3787 canonical. Temporarily change the visibility to global so that
3788 we can ensure uniqueness of funcdesc pointers. */
3789 DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
3790
3791 ret = default_binds_local_p_1 (fn, flag_pic);
3792
3793 DECL_VISIBILITY (fn) = vis;
3794
3795 return ret;
3796 }
3797
3798 static void
3799 arm_add_gc_roots (void)
3800 {
3801 gcc_obstack_init(&minipool_obstack);
3802 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3803 }
3804 \f
3805 /* A table of known ARM exception types.
3806 For use with the interrupt function attribute. */
3807
3808 typedef struct
3809 {
3810 const char *const arg;
3811 const unsigned long return_value;
3812 }
3813 isr_attribute_arg;
3814
3815 static const isr_attribute_arg isr_attribute_args [] =
3816 {
3817 { "IRQ", ARM_FT_ISR },
3818 { "irq", ARM_FT_ISR },
3819 { "FIQ", ARM_FT_FIQ },
3820 { "fiq", ARM_FT_FIQ },
3821 { "ABORT", ARM_FT_ISR },
3822 { "abort", ARM_FT_ISR },
3823 { "ABORT", ARM_FT_ISR },
3824 { "abort", ARM_FT_ISR },
3825 { "UNDEF", ARM_FT_EXCEPTION },
3826 { "undef", ARM_FT_EXCEPTION },
3827 { "SWI", ARM_FT_EXCEPTION },
3828 { "swi", ARM_FT_EXCEPTION },
3829 { NULL, ARM_FT_NORMAL }
3830 };
3831
3832 /* Returns the (interrupt) function type of the current
3833 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3834
3835 static unsigned long
3836 arm_isr_value (tree argument)
3837 {
3838 const isr_attribute_arg * ptr;
3839 const char * arg;
3840
3841 if (!arm_arch_notm)
3842 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3843
3844 /* No argument - default to IRQ. */
3845 if (argument == NULL_TREE)
3846 return ARM_FT_ISR;
3847
3848 /* Get the value of the argument. */
3849 if (TREE_VALUE (argument) == NULL_TREE
3850 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3851 return ARM_FT_UNKNOWN;
3852
3853 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3854
3855 /* Check it against the list of known arguments. */
3856 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3857 if (streq (arg, ptr->arg))
3858 return ptr->return_value;
3859
3860 /* An unrecognized interrupt type. */
3861 return ARM_FT_UNKNOWN;
3862 }
3863
3864 /* Computes the type of the current function. */
3865
3866 static unsigned long
3867 arm_compute_func_type (void)
3868 {
3869 unsigned long type = ARM_FT_UNKNOWN;
3870 tree a;
3871 tree attr;
3872
3873 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3874
3875 /* Decide if the current function is volatile. Such functions
3876 never return, and many memory cycles can be saved by not storing
3877 register values that will never be needed again. This optimization
3878 was added to speed up context switching in a kernel application. */
3879 if (optimize > 0
3880 && (TREE_NOTHROW (current_function_decl)
3881 || !(flag_unwind_tables
3882 || (flag_exceptions
3883 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3884 && TREE_THIS_VOLATILE (current_function_decl))
3885 type |= ARM_FT_VOLATILE;
3886
3887 if (cfun->static_chain_decl != NULL)
3888 type |= ARM_FT_NESTED;
3889
3890 attr = DECL_ATTRIBUTES (current_function_decl);
3891
3892 a = lookup_attribute ("naked", attr);
3893 if (a != NULL_TREE)
3894 type |= ARM_FT_NAKED;
3895
3896 a = lookup_attribute ("isr", attr);
3897 if (a == NULL_TREE)
3898 a = lookup_attribute ("interrupt", attr);
3899
3900 if (a == NULL_TREE)
3901 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3902 else
3903 type |= arm_isr_value (TREE_VALUE (a));
3904
3905 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3906 type |= ARM_FT_CMSE_ENTRY;
3907
3908 return type;
3909 }
3910
3911 /* Returns the type of the current function. */
3912
3913 unsigned long
3914 arm_current_func_type (void)
3915 {
3916 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3917 cfun->machine->func_type = arm_compute_func_type ();
3918
3919 return cfun->machine->func_type;
3920 }
3921
3922 bool
3923 arm_allocate_stack_slots_for_args (void)
3924 {
3925 /* Naked functions should not allocate stack slots for arguments. */
3926 return !IS_NAKED (arm_current_func_type ());
3927 }
3928
3929 static bool
3930 arm_warn_func_return (tree decl)
3931 {
3932 /* Naked functions are implemented entirely in assembly, including the
3933 return sequence, so suppress warnings about this. */
3934 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3935 }
3936
3937 \f
3938 /* Output assembler code for a block containing the constant parts
3939 of a trampoline, leaving space for the variable parts.
3940
3941 On the ARM, (if r8 is the static chain regnum, and remembering that
3942 referencing pc adds an offset of 8) the trampoline looks like:
3943 ldr r8, [pc, #0]
3944 ldr pc, [pc]
3945 .word static chain value
3946 .word function's address
3947 XXX FIXME: When the trampoline returns, r8 will be clobbered.
3948
3949 In FDPIC mode, the trampoline looks like:
3950 .word trampoline address
3951 .word trampoline GOT address
3952 ldr r12, [pc, #8] ; #4 for Arm mode
3953 ldr r9, [pc, #8] ; #4 for Arm mode
3954 ldr pc, [pc, #8] ; #4 for Arm mode
3955 .word static chain value
3956 .word GOT address
3957 .word function's address
3958 */
3959
3960 static void
3961 arm_asm_trampoline_template (FILE *f)
3962 {
3963 fprintf (f, "\t.syntax unified\n");
3964
3965 if (TARGET_FDPIC)
3966 {
3967 /* The first two words are a function descriptor pointing to the
3968 trampoline code just below. */
3969 if (TARGET_ARM)
3970 fprintf (f, "\t.arm\n");
3971 else if (TARGET_THUMB2)
3972 fprintf (f, "\t.thumb\n");
3973 else
3974 /* Only ARM and Thumb-2 are supported. */
3975 gcc_unreachable ();
3976
3977 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3978 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3979 /* Trampoline code which sets the static chain register but also
3980 PIC register before jumping into real code. */
3981 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
3982 STATIC_CHAIN_REGNUM, PC_REGNUM,
3983 TARGET_THUMB2 ? 8 : 4);
3984 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
3985 PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
3986 TARGET_THUMB2 ? 8 : 4);
3987 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
3988 PC_REGNUM, PC_REGNUM,
3989 TARGET_THUMB2 ? 8 : 4);
3990 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3991 }
3992 else if (TARGET_ARM)
3993 {
3994 fprintf (f, "\t.arm\n");
3995 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3996 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3997 }
3998 else if (TARGET_THUMB2)
3999 {
4000 fprintf (f, "\t.thumb\n");
4001 /* The Thumb-2 trampoline is similar to the arm implementation.
4002 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4003 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4004 STATIC_CHAIN_REGNUM, PC_REGNUM);
4005 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4006 }
4007 else
4008 {
4009 ASM_OUTPUT_ALIGN (f, 2);
4010 fprintf (f, "\t.code\t16\n");
4011 fprintf (f, ".Ltrampoline_start:\n");
4012 asm_fprintf (f, "\tpush\t{r0, r1}\n");
4013 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4014 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4015 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4016 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4017 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4018 }
4019 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4020 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4021 }
4022
4023 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4024
4025 static void
4026 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4027 {
4028 rtx fnaddr, mem, a_tramp;
4029
4030 emit_block_move (m_tramp, assemble_trampoline_template (),
4031 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4032
4033 if (TARGET_FDPIC)
4034 {
4035 rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4036 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4037 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4038 /* The function start address is at offset 8, but in Thumb mode
4039 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4040 below. */
4041 rtx trampoline_code_start
4042 = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4043
4044 /* Write initial funcdesc which points to the trampoline. */
4045 mem = adjust_address (m_tramp, SImode, 0);
4046 emit_move_insn (mem, trampoline_code_start);
4047 mem = adjust_address (m_tramp, SImode, 4);
4048 emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4049 /* Setup static chain. */
4050 mem = adjust_address (m_tramp, SImode, 20);
4051 emit_move_insn (mem, chain_value);
4052 /* GOT + real function entry point. */
4053 mem = adjust_address (m_tramp, SImode, 24);
4054 emit_move_insn (mem, gotaddr);
4055 mem = adjust_address (m_tramp, SImode, 28);
4056 emit_move_insn (mem, fnaddr);
4057 }
4058 else
4059 {
4060 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4061 emit_move_insn (mem, chain_value);
4062
4063 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4064 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4065 emit_move_insn (mem, fnaddr);
4066 }
4067
4068 a_tramp = XEXP (m_tramp, 0);
4069 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4070 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
4071 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
4072 }
4073
4074 /* Thumb trampolines should be entered in thumb mode, so set
4075 the bottom bit of the address. */
4076
4077 static rtx
4078 arm_trampoline_adjust_address (rtx addr)
4079 {
4080 /* For FDPIC don't fix trampoline address since it's a function
4081 descriptor and not a function address. */
4082 if (TARGET_THUMB && !TARGET_FDPIC)
4083 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4084 NULL, 0, OPTAB_LIB_WIDEN);
4085 return addr;
4086 }
4087 \f
4088 /* Return 1 if it is possible to return using a single instruction.
4089 If SIBLING is non-null, this is a test for a return before a sibling
4090 call. SIBLING is the call insn, so we can examine its register usage. */
4091
4092 int
4093 use_return_insn (int iscond, rtx sibling)
4094 {
4095 int regno;
4096 unsigned int func_type;
4097 unsigned long saved_int_regs;
4098 unsigned HOST_WIDE_INT stack_adjust;
4099 arm_stack_offsets *offsets;
4100
4101 /* Never use a return instruction before reload has run. */
4102 if (!reload_completed)
4103 return 0;
4104
4105 func_type = arm_current_func_type ();
4106
4107 /* Naked, volatile and stack alignment functions need special
4108 consideration. */
4109 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4110 return 0;
4111
4112 /* So do interrupt functions that use the frame pointer and Thumb
4113 interrupt functions. */
4114 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4115 return 0;
4116
4117 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4118 && !optimize_function_for_size_p (cfun))
4119 return 0;
4120
4121 offsets = arm_get_frame_offsets ();
4122 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4123
4124 /* As do variadic functions. */
4125 if (crtl->args.pretend_args_size
4126 || cfun->machine->uses_anonymous_args
4127 /* Or if the function calls __builtin_eh_return () */
4128 || crtl->calls_eh_return
4129 /* Or if the function calls alloca */
4130 || cfun->calls_alloca
4131 /* Or if there is a stack adjustment. However, if the stack pointer
4132 is saved on the stack, we can use a pre-incrementing stack load. */
4133 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4134 && stack_adjust == 4))
4135 /* Or if the static chain register was saved above the frame, under the
4136 assumption that the stack pointer isn't saved on the stack. */
4137 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4138 && arm_compute_static_chain_stack_bytes() != 0))
4139 return 0;
4140
4141 saved_int_regs = offsets->saved_regs_mask;
4142
4143 /* Unfortunately, the insn
4144
4145 ldmib sp, {..., sp, ...}
4146
4147 triggers a bug on most SA-110 based devices, such that the stack
4148 pointer won't be correctly restored if the instruction takes a
4149 page fault. We work around this problem by popping r3 along with
4150 the other registers, since that is never slower than executing
4151 another instruction.
4152
4153 We test for !arm_arch5t here, because code for any architecture
4154 less than this could potentially be run on one of the buggy
4155 chips. */
4156 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4157 {
4158 /* Validate that r3 is a call-clobbered register (always true in
4159 the default abi) ... */
4160 if (!call_used_or_fixed_reg_p (3))
4161 return 0;
4162
4163 /* ... that it isn't being used for a return value ... */
4164 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4165 return 0;
4166
4167 /* ... or for a tail-call argument ... */
4168 if (sibling)
4169 {
4170 gcc_assert (CALL_P (sibling));
4171
4172 if (find_regno_fusage (sibling, USE, 3))
4173 return 0;
4174 }
4175
4176 /* ... and that there are no call-saved registers in r0-r2
4177 (always true in the default ABI). */
4178 if (saved_int_regs & 0x7)
4179 return 0;
4180 }
4181
4182 /* Can't be done if interworking with Thumb, and any registers have been
4183 stacked. */
4184 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4185 return 0;
4186
4187 /* On StrongARM, conditional returns are expensive if they aren't
4188 taken and multiple registers have been stacked. */
4189 if (iscond && arm_tune_strongarm)
4190 {
4191 /* Conditional return when just the LR is stored is a simple
4192 conditional-load instruction, that's not expensive. */
4193 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4194 return 0;
4195
4196 if (flag_pic
4197 && arm_pic_register != INVALID_REGNUM
4198 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4199 return 0;
4200 }
4201
4202 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4203 several instructions if anything needs to be popped. */
4204 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4205 return 0;
4206
4207 /* If there are saved registers but the LR isn't saved, then we need
4208 two instructions for the return. */
4209 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4210 return 0;
4211
4212 /* Can't be done if any of the VFP regs are pushed,
4213 since this also requires an insn. */
4214 if (TARGET_HARD_FLOAT)
4215 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4216 if (df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
4217 return 0;
4218
4219 if (TARGET_REALLY_IWMMXT)
4220 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4221 if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
4222 return 0;
4223
4224 return 1;
4225 }
4226
4227 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4228 shrink-wrapping if possible. This is the case if we need to emit a
4229 prologue, which we can test by looking at the offsets. */
4230 bool
4231 use_simple_return_p (void)
4232 {
4233 arm_stack_offsets *offsets;
4234
4235 /* Note this function can be called before or after reload. */
4236 if (!reload_completed)
4237 arm_compute_frame_layout ();
4238
4239 offsets = arm_get_frame_offsets ();
4240 return offsets->outgoing_args != 0;
4241 }
4242
4243 /* Return TRUE if int I is a valid immediate ARM constant. */
4244
4245 int
4246 const_ok_for_arm (HOST_WIDE_INT i)
4247 {
4248 int lowbit;
4249
4250 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4251 be all zero, or all one. */
4252 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4253 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4254 != ((~(unsigned HOST_WIDE_INT) 0)
4255 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4256 return FALSE;
4257
4258 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4259
4260 /* Fast return for 0 and small values. We must do this for zero, since
4261 the code below can't handle that one case. */
4262 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4263 return TRUE;
4264
4265 /* Get the number of trailing zeros. */
4266 lowbit = ffs((int) i) - 1;
4267
4268 /* Only even shifts are allowed in ARM mode so round down to the
4269 nearest even number. */
4270 if (TARGET_ARM)
4271 lowbit &= ~1;
4272
4273 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4274 return TRUE;
4275
4276 if (TARGET_ARM)
4277 {
4278 /* Allow rotated constants in ARM mode. */
4279 if (lowbit <= 4
4280 && ((i & ~0xc000003f) == 0
4281 || (i & ~0xf000000f) == 0
4282 || (i & ~0xfc000003) == 0))
4283 return TRUE;
4284 }
4285 else if (TARGET_THUMB2)
4286 {
4287 HOST_WIDE_INT v;
4288
4289 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4290 v = i & 0xff;
4291 v |= v << 16;
4292 if (i == v || i == (v | (v << 8)))
4293 return TRUE;
4294
4295 /* Allow repeated pattern 0xXY00XY00. */
4296 v = i & 0xff00;
4297 v |= v << 16;
4298 if (i == v)
4299 return TRUE;
4300 }
4301 else if (TARGET_HAVE_MOVT)
4302 {
4303 /* Thumb-1 Targets with MOVT. */
4304 if (i > 0xffff)
4305 return FALSE;
4306 else
4307 return TRUE;
4308 }
4309
4310 return FALSE;
4311 }
4312
4313 /* Return true if I is a valid constant for the operation CODE. */
4314 int
4315 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4316 {
4317 if (const_ok_for_arm (i))
4318 return 1;
4319
4320 switch (code)
4321 {
4322 case SET:
4323 /* See if we can use movw. */
4324 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4325 return 1;
4326 else
4327 /* Otherwise, try mvn. */
4328 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4329
4330 case PLUS:
4331 /* See if we can use addw or subw. */
4332 if (TARGET_THUMB2
4333 && ((i & 0xfffff000) == 0
4334 || ((-i) & 0xfffff000) == 0))
4335 return 1;
4336 /* Fall through. */
4337 case COMPARE:
4338 case EQ:
4339 case NE:
4340 case GT:
4341 case LE:
4342 case LT:
4343 case GE:
4344 case GEU:
4345 case LTU:
4346 case GTU:
4347 case LEU:
4348 case UNORDERED:
4349 case ORDERED:
4350 case UNEQ:
4351 case UNGE:
4352 case UNLT:
4353 case UNGT:
4354 case UNLE:
4355 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4356
4357 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4358 case XOR:
4359 return 0;
4360
4361 case IOR:
4362 if (TARGET_THUMB2)
4363 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4364 return 0;
4365
4366 case AND:
4367 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4368
4369 default:
4370 gcc_unreachable ();
4371 }
4372 }
4373
4374 /* Return true if I is a valid di mode constant for the operation CODE. */
4375 int
4376 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4377 {
4378 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4379 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4380 rtx hi = GEN_INT (hi_val);
4381 rtx lo = GEN_INT (lo_val);
4382
4383 if (TARGET_THUMB1)
4384 return 0;
4385
4386 switch (code)
4387 {
4388 case AND:
4389 case IOR:
4390 case XOR:
4391 return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4392 || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4393 case PLUS:
4394 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4395
4396 default:
4397 return 0;
4398 }
4399 }
4400
4401 /* Emit a sequence of insns to handle a large constant.
4402 CODE is the code of the operation required, it can be any of SET, PLUS,
4403 IOR, AND, XOR, MINUS;
4404 MODE is the mode in which the operation is being performed;
4405 VAL is the integer to operate on;
4406 SOURCE is the other operand (a register, or a null-pointer for SET);
4407 SUBTARGETS means it is safe to create scratch registers if that will
4408 either produce a simpler sequence, or we will want to cse the values.
4409 Return value is the number of insns emitted. */
4410
4411 /* ??? Tweak this for thumb2. */
4412 int
4413 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4414 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4415 {
4416 rtx cond;
4417
4418 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4419 cond = COND_EXEC_TEST (PATTERN (insn));
4420 else
4421 cond = NULL_RTX;
4422
4423 if (subtargets || code == SET
4424 || (REG_P (target) && REG_P (source)
4425 && REGNO (target) != REGNO (source)))
4426 {
4427 /* After arm_reorg has been called, we can't fix up expensive
4428 constants by pushing them into memory so we must synthesize
4429 them in-line, regardless of the cost. This is only likely to
4430 be more costly on chips that have load delay slots and we are
4431 compiling without running the scheduler (so no splitting
4432 occurred before the final instruction emission).
4433
4434 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4435 */
4436 if (!cfun->machine->after_arm_reorg
4437 && !cond
4438 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4439 1, 0)
4440 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4441 + (code != SET))))
4442 {
4443 if (code == SET)
4444 {
4445 /* Currently SET is the only monadic value for CODE, all
4446 the rest are diadic. */
4447 if (TARGET_USE_MOVT)
4448 arm_emit_movpair (target, GEN_INT (val));
4449 else
4450 emit_set_insn (target, GEN_INT (val));
4451
4452 return 1;
4453 }
4454 else
4455 {
4456 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4457
4458 if (TARGET_USE_MOVT)
4459 arm_emit_movpair (temp, GEN_INT (val));
4460 else
4461 emit_set_insn (temp, GEN_INT (val));
4462
4463 /* For MINUS, the value is subtracted from, since we never
4464 have subtraction of a constant. */
4465 if (code == MINUS)
4466 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4467 else
4468 emit_set_insn (target,
4469 gen_rtx_fmt_ee (code, mode, source, temp));
4470 return 2;
4471 }
4472 }
4473 }
4474
4475 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4476 1);
4477 }
4478
4479 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4480 ARM/THUMB2 immediates, and add up to VAL.
4481 Thr function return value gives the number of insns required. */
4482 static int
4483 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4484 struct four_ints *return_sequence)
4485 {
4486 int best_consecutive_zeros = 0;
4487 int i;
4488 int best_start = 0;
4489 int insns1, insns2;
4490 struct four_ints tmp_sequence;
4491
4492 /* If we aren't targeting ARM, the best place to start is always at
4493 the bottom, otherwise look more closely. */
4494 if (TARGET_ARM)
4495 {
4496 for (i = 0; i < 32; i += 2)
4497 {
4498 int consecutive_zeros = 0;
4499
4500 if (!(val & (3 << i)))
4501 {
4502 while ((i < 32) && !(val & (3 << i)))
4503 {
4504 consecutive_zeros += 2;
4505 i += 2;
4506 }
4507 if (consecutive_zeros > best_consecutive_zeros)
4508 {
4509 best_consecutive_zeros = consecutive_zeros;
4510 best_start = i - consecutive_zeros;
4511 }
4512 i -= 2;
4513 }
4514 }
4515 }
4516
4517 /* So long as it won't require any more insns to do so, it's
4518 desirable to emit a small constant (in bits 0...9) in the last
4519 insn. This way there is more chance that it can be combined with
4520 a later addressing insn to form a pre-indexed load or store
4521 operation. Consider:
4522
4523 *((volatile int *)0xe0000100) = 1;
4524 *((volatile int *)0xe0000110) = 2;
4525
4526 We want this to wind up as:
4527
4528 mov rA, #0xe0000000
4529 mov rB, #1
4530 str rB, [rA, #0x100]
4531 mov rB, #2
4532 str rB, [rA, #0x110]
4533
4534 rather than having to synthesize both large constants from scratch.
4535
4536 Therefore, we calculate how many insns would be required to emit
4537 the constant starting from `best_start', and also starting from
4538 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4539 yield a shorter sequence, we may as well use zero. */
4540 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4541 if (best_start != 0
4542 && ((HOST_WIDE_INT_1U << best_start) < val))
4543 {
4544 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4545 if (insns2 <= insns1)
4546 {
4547 *return_sequence = tmp_sequence;
4548 insns1 = insns2;
4549 }
4550 }
4551
4552 return insns1;
4553 }
4554
4555 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4556 static int
4557 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4558 struct four_ints *return_sequence, int i)
4559 {
4560 int remainder = val & 0xffffffff;
4561 int insns = 0;
4562
4563 /* Try and find a way of doing the job in either two or three
4564 instructions.
4565
4566 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4567 location. We start at position I. This may be the MSB, or
4568 optimial_immediate_sequence may have positioned it at the largest block
4569 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4570 wrapping around to the top of the word when we drop off the bottom.
4571 In the worst case this code should produce no more than four insns.
4572
4573 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4574 constants, shifted to any arbitrary location. We should always start
4575 at the MSB. */
4576 do
4577 {
4578 int end;
4579 unsigned int b1, b2, b3, b4;
4580 unsigned HOST_WIDE_INT result;
4581 int loc;
4582
4583 gcc_assert (insns < 4);
4584
4585 if (i <= 0)
4586 i += 32;
4587
4588 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4589 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4590 {
4591 loc = i;
4592 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4593 /* We can use addw/subw for the last 12 bits. */
4594 result = remainder;
4595 else
4596 {
4597 /* Use an 8-bit shifted/rotated immediate. */
4598 end = i - 8;
4599 if (end < 0)
4600 end += 32;
4601 result = remainder & ((0x0ff << end)
4602 | ((i < end) ? (0xff >> (32 - end))
4603 : 0));
4604 i -= 8;
4605 }
4606 }
4607 else
4608 {
4609 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4610 arbitrary shifts. */
4611 i -= TARGET_ARM ? 2 : 1;
4612 continue;
4613 }
4614
4615 /* Next, see if we can do a better job with a thumb2 replicated
4616 constant.
4617
4618 We do it this way around to catch the cases like 0x01F001E0 where
4619 two 8-bit immediates would work, but a replicated constant would
4620 make it worse.
4621
4622 TODO: 16-bit constants that don't clear all the bits, but still win.
4623 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4624 if (TARGET_THUMB2)
4625 {
4626 b1 = (remainder & 0xff000000) >> 24;
4627 b2 = (remainder & 0x00ff0000) >> 16;
4628 b3 = (remainder & 0x0000ff00) >> 8;
4629 b4 = remainder & 0xff;
4630
4631 if (loc > 24)
4632 {
4633 /* The 8-bit immediate already found clears b1 (and maybe b2),
4634 but must leave b3 and b4 alone. */
4635
4636 /* First try to find a 32-bit replicated constant that clears
4637 almost everything. We can assume that we can't do it in one,
4638 or else we wouldn't be here. */
4639 unsigned int tmp = b1 & b2 & b3 & b4;
4640 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4641 + (tmp << 24);
4642 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4643 + (tmp == b3) + (tmp == b4);
4644 if (tmp
4645 && (matching_bytes >= 3
4646 || (matching_bytes == 2
4647 && const_ok_for_op (remainder & ~tmp2, code))))
4648 {
4649 /* At least 3 of the bytes match, and the fourth has at
4650 least as many bits set, or two of the bytes match
4651 and it will only require one more insn to finish. */
4652 result = tmp2;
4653 i = tmp != b1 ? 32
4654 : tmp != b2 ? 24
4655 : tmp != b3 ? 16
4656 : 8;
4657 }
4658
4659 /* Second, try to find a 16-bit replicated constant that can
4660 leave three of the bytes clear. If b2 or b4 is already
4661 zero, then we can. If the 8-bit from above would not
4662 clear b2 anyway, then we still win. */
4663 else if (b1 == b3 && (!b2 || !b4
4664 || (remainder & 0x00ff0000 & ~result)))
4665 {
4666 result = remainder & 0xff00ff00;
4667 i = 24;
4668 }
4669 }
4670 else if (loc > 16)
4671 {
4672 /* The 8-bit immediate already found clears b2 (and maybe b3)
4673 and we don't get here unless b1 is alredy clear, but it will
4674 leave b4 unchanged. */
4675
4676 /* If we can clear b2 and b4 at once, then we win, since the
4677 8-bits couldn't possibly reach that far. */
4678 if (b2 == b4)
4679 {
4680 result = remainder & 0x00ff00ff;
4681 i = 16;
4682 }
4683 }
4684 }
4685
4686 return_sequence->i[insns++] = result;
4687 remainder &= ~result;
4688
4689 if (code == SET || code == MINUS)
4690 code = PLUS;
4691 }
4692 while (remainder);
4693
4694 return insns;
4695 }
4696
4697 /* Emit an instruction with the indicated PATTERN. If COND is
4698 non-NULL, conditionalize the execution of the instruction on COND
4699 being true. */
4700
4701 static void
4702 emit_constant_insn (rtx cond, rtx pattern)
4703 {
4704 if (cond)
4705 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4706 emit_insn (pattern);
4707 }
4708
4709 /* As above, but extra parameter GENERATE which, if clear, suppresses
4710 RTL generation. */
4711
4712 static int
4713 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4714 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4715 int subtargets, int generate)
4716 {
4717 int can_invert = 0;
4718 int can_negate = 0;
4719 int final_invert = 0;
4720 int i;
4721 int set_sign_bit_copies = 0;
4722 int clear_sign_bit_copies = 0;
4723 int clear_zero_bit_copies = 0;
4724 int set_zero_bit_copies = 0;
4725 int insns = 0, neg_insns, inv_insns;
4726 unsigned HOST_WIDE_INT temp1, temp2;
4727 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4728 struct four_ints *immediates;
4729 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4730
4731 /* Find out which operations are safe for a given CODE. Also do a quick
4732 check for degenerate cases; these can occur when DImode operations
4733 are split. */
4734 switch (code)
4735 {
4736 case SET:
4737 can_invert = 1;
4738 break;
4739
4740 case PLUS:
4741 can_negate = 1;
4742 break;
4743
4744 case IOR:
4745 if (remainder == 0xffffffff)
4746 {
4747 if (generate)
4748 emit_constant_insn (cond,
4749 gen_rtx_SET (target,
4750 GEN_INT (ARM_SIGN_EXTEND (val))));
4751 return 1;
4752 }
4753
4754 if (remainder == 0)
4755 {
4756 if (reload_completed && rtx_equal_p (target, source))
4757 return 0;
4758
4759 if (generate)
4760 emit_constant_insn (cond, gen_rtx_SET (target, source));
4761 return 1;
4762 }
4763 break;
4764
4765 case AND:
4766 if (remainder == 0)
4767 {
4768 if (generate)
4769 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4770 return 1;
4771 }
4772 if (remainder == 0xffffffff)
4773 {
4774 if (reload_completed && rtx_equal_p (target, source))
4775 return 0;
4776 if (generate)
4777 emit_constant_insn (cond, gen_rtx_SET (target, source));
4778 return 1;
4779 }
4780 can_invert = 1;
4781 break;
4782
4783 case XOR:
4784 if (remainder == 0)
4785 {
4786 if (reload_completed && rtx_equal_p (target, source))
4787 return 0;
4788 if (generate)
4789 emit_constant_insn (cond, gen_rtx_SET (target, source));
4790 return 1;
4791 }
4792
4793 if (remainder == 0xffffffff)
4794 {
4795 if (generate)
4796 emit_constant_insn (cond,
4797 gen_rtx_SET (target,
4798 gen_rtx_NOT (mode, source)));
4799 return 1;
4800 }
4801 final_invert = 1;
4802 break;
4803
4804 case MINUS:
4805 /* We treat MINUS as (val - source), since (source - val) is always
4806 passed as (source + (-val)). */
4807 if (remainder == 0)
4808 {
4809 if (generate)
4810 emit_constant_insn (cond,
4811 gen_rtx_SET (target,
4812 gen_rtx_NEG (mode, source)));
4813 return 1;
4814 }
4815 if (const_ok_for_arm (val))
4816 {
4817 if (generate)
4818 emit_constant_insn (cond,
4819 gen_rtx_SET (target,
4820 gen_rtx_MINUS (mode, GEN_INT (val),
4821 source)));
4822 return 1;
4823 }
4824
4825 break;
4826
4827 default:
4828 gcc_unreachable ();
4829 }
4830
4831 /* If we can do it in one insn get out quickly. */
4832 if (const_ok_for_op (val, code))
4833 {
4834 if (generate)
4835 emit_constant_insn (cond,
4836 gen_rtx_SET (target,
4837 (source
4838 ? gen_rtx_fmt_ee (code, mode, source,
4839 GEN_INT (val))
4840 : GEN_INT (val))));
4841 return 1;
4842 }
4843
4844 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4845 insn. */
4846 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4847 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4848 {
4849 if (generate)
4850 {
4851 if (mode == SImode && i == 16)
4852 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4853 smaller insn. */
4854 emit_constant_insn (cond,
4855 gen_zero_extendhisi2
4856 (target, gen_lowpart (HImode, source)));
4857 else
4858 /* Extz only supports SImode, but we can coerce the operands
4859 into that mode. */
4860 emit_constant_insn (cond,
4861 gen_extzv_t2 (gen_lowpart (SImode, target),
4862 gen_lowpart (SImode, source),
4863 GEN_INT (i), const0_rtx));
4864 }
4865
4866 return 1;
4867 }
4868
4869 /* Calculate a few attributes that may be useful for specific
4870 optimizations. */
4871 /* Count number of leading zeros. */
4872 for (i = 31; i >= 0; i--)
4873 {
4874 if ((remainder & (1 << i)) == 0)
4875 clear_sign_bit_copies++;
4876 else
4877 break;
4878 }
4879
4880 /* Count number of leading 1's. */
4881 for (i = 31; i >= 0; i--)
4882 {
4883 if ((remainder & (1 << i)) != 0)
4884 set_sign_bit_copies++;
4885 else
4886 break;
4887 }
4888
4889 /* Count number of trailing zero's. */
4890 for (i = 0; i <= 31; i++)
4891 {
4892 if ((remainder & (1 << i)) == 0)
4893 clear_zero_bit_copies++;
4894 else
4895 break;
4896 }
4897
4898 /* Count number of trailing 1's. */
4899 for (i = 0; i <= 31; i++)
4900 {
4901 if ((remainder & (1 << i)) != 0)
4902 set_zero_bit_copies++;
4903 else
4904 break;
4905 }
4906
4907 switch (code)
4908 {
4909 case SET:
4910 /* See if we can do this by sign_extending a constant that is known
4911 to be negative. This is a good, way of doing it, since the shift
4912 may well merge into a subsequent insn. */
4913 if (set_sign_bit_copies > 1)
4914 {
4915 if (const_ok_for_arm
4916 (temp1 = ARM_SIGN_EXTEND (remainder
4917 << (set_sign_bit_copies - 1))))
4918 {
4919 if (generate)
4920 {
4921 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4922 emit_constant_insn (cond,
4923 gen_rtx_SET (new_src, GEN_INT (temp1)));
4924 emit_constant_insn (cond,
4925 gen_ashrsi3 (target, new_src,
4926 GEN_INT (set_sign_bit_copies - 1)));
4927 }
4928 return 2;
4929 }
4930 /* For an inverted constant, we will need to set the low bits,
4931 these will be shifted out of harm's way. */
4932 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4933 if (const_ok_for_arm (~temp1))
4934 {
4935 if (generate)
4936 {
4937 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4938 emit_constant_insn (cond,
4939 gen_rtx_SET (new_src, GEN_INT (temp1)));
4940 emit_constant_insn (cond,
4941 gen_ashrsi3 (target, new_src,
4942 GEN_INT (set_sign_bit_copies - 1)));
4943 }
4944 return 2;
4945 }
4946 }
4947
4948 /* See if we can calculate the value as the difference between two
4949 valid immediates. */
4950 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4951 {
4952 int topshift = clear_sign_bit_copies & ~1;
4953
4954 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4955 & (0xff000000 >> topshift));
4956
4957 /* If temp1 is zero, then that means the 9 most significant
4958 bits of remainder were 1 and we've caused it to overflow.
4959 When topshift is 0 we don't need to do anything since we
4960 can borrow from 'bit 32'. */
4961 if (temp1 == 0 && topshift != 0)
4962 temp1 = 0x80000000 >> (topshift - 1);
4963
4964 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4965
4966 if (const_ok_for_arm (temp2))
4967 {
4968 if (generate)
4969 {
4970 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4971 emit_constant_insn (cond,
4972 gen_rtx_SET (new_src, GEN_INT (temp1)));
4973 emit_constant_insn (cond,
4974 gen_addsi3 (target, new_src,
4975 GEN_INT (-temp2)));
4976 }
4977
4978 return 2;
4979 }
4980 }
4981
4982 /* See if we can generate this by setting the bottom (or the top)
4983 16 bits, and then shifting these into the other half of the
4984 word. We only look for the simplest cases, to do more would cost
4985 too much. Be careful, however, not to generate this when the
4986 alternative would take fewer insns. */
4987 if (val & 0xffff0000)
4988 {
4989 temp1 = remainder & 0xffff0000;
4990 temp2 = remainder & 0x0000ffff;
4991
4992 /* Overlaps outside this range are best done using other methods. */
4993 for (i = 9; i < 24; i++)
4994 {
4995 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4996 && !const_ok_for_arm (temp2))
4997 {
4998 rtx new_src = (subtargets
4999 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5000 : target);
5001 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5002 source, subtargets, generate);
5003 source = new_src;
5004 if (generate)
5005 emit_constant_insn
5006 (cond,
5007 gen_rtx_SET
5008 (target,
5009 gen_rtx_IOR (mode,
5010 gen_rtx_ASHIFT (mode, source,
5011 GEN_INT (i)),
5012 source)));
5013 return insns + 1;
5014 }
5015 }
5016
5017 /* Don't duplicate cases already considered. */
5018 for (i = 17; i < 24; i++)
5019 {
5020 if (((temp1 | (temp1 >> i)) == remainder)
5021 && !const_ok_for_arm (temp1))
5022 {
5023 rtx new_src = (subtargets
5024 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5025 : target);
5026 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5027 source, subtargets, generate);
5028 source = new_src;
5029 if (generate)
5030 emit_constant_insn
5031 (cond,
5032 gen_rtx_SET (target,
5033 gen_rtx_IOR
5034 (mode,
5035 gen_rtx_LSHIFTRT (mode, source,
5036 GEN_INT (i)),
5037 source)));
5038 return insns + 1;
5039 }
5040 }
5041 }
5042 break;
5043
5044 case IOR:
5045 case XOR:
5046 /* If we have IOR or XOR, and the constant can be loaded in a
5047 single instruction, and we can find a temporary to put it in,
5048 then this can be done in two instructions instead of 3-4. */
5049 if (subtargets
5050 /* TARGET can't be NULL if SUBTARGETS is 0 */
5051 || (reload_completed && !reg_mentioned_p (target, source)))
5052 {
5053 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5054 {
5055 if (generate)
5056 {
5057 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5058
5059 emit_constant_insn (cond,
5060 gen_rtx_SET (sub, GEN_INT (val)));
5061 emit_constant_insn (cond,
5062 gen_rtx_SET (target,
5063 gen_rtx_fmt_ee (code, mode,
5064 source, sub)));
5065 }
5066 return 2;
5067 }
5068 }
5069
5070 if (code == XOR)
5071 break;
5072
5073 /* Convert.
5074 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5075 and the remainder 0s for e.g. 0xfff00000)
5076 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5077
5078 This can be done in 2 instructions by using shifts with mov or mvn.
5079 e.g. for
5080 x = x | 0xfff00000;
5081 we generate.
5082 mvn r0, r0, asl #12
5083 mvn r0, r0, lsr #12 */
5084 if (set_sign_bit_copies > 8
5085 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5086 {
5087 if (generate)
5088 {
5089 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5090 rtx shift = GEN_INT (set_sign_bit_copies);
5091
5092 emit_constant_insn
5093 (cond,
5094 gen_rtx_SET (sub,
5095 gen_rtx_NOT (mode,
5096 gen_rtx_ASHIFT (mode,
5097 source,
5098 shift))));
5099 emit_constant_insn
5100 (cond,
5101 gen_rtx_SET (target,
5102 gen_rtx_NOT (mode,
5103 gen_rtx_LSHIFTRT (mode, sub,
5104 shift))));
5105 }
5106 return 2;
5107 }
5108
5109 /* Convert
5110 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5111 to
5112 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5113
5114 For eg. r0 = r0 | 0xfff
5115 mvn r0, r0, lsr #12
5116 mvn r0, r0, asl #12
5117
5118 */
5119 if (set_zero_bit_copies > 8
5120 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5121 {
5122 if (generate)
5123 {
5124 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5125 rtx shift = GEN_INT (set_zero_bit_copies);
5126
5127 emit_constant_insn
5128 (cond,
5129 gen_rtx_SET (sub,
5130 gen_rtx_NOT (mode,
5131 gen_rtx_LSHIFTRT (mode,
5132 source,
5133 shift))));
5134 emit_constant_insn
5135 (cond,
5136 gen_rtx_SET (target,
5137 gen_rtx_NOT (mode,
5138 gen_rtx_ASHIFT (mode, sub,
5139 shift))));
5140 }
5141 return 2;
5142 }
5143
5144 /* This will never be reached for Thumb2 because orn is a valid
5145 instruction. This is for Thumb1 and the ARM 32 bit cases.
5146
5147 x = y | constant (such that ~constant is a valid constant)
5148 Transform this to
5149 x = ~(~y & ~constant).
5150 */
5151 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5152 {
5153 if (generate)
5154 {
5155 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5156 emit_constant_insn (cond,
5157 gen_rtx_SET (sub,
5158 gen_rtx_NOT (mode, source)));
5159 source = sub;
5160 if (subtargets)
5161 sub = gen_reg_rtx (mode);
5162 emit_constant_insn (cond,
5163 gen_rtx_SET (sub,
5164 gen_rtx_AND (mode, source,
5165 GEN_INT (temp1))));
5166 emit_constant_insn (cond,
5167 gen_rtx_SET (target,
5168 gen_rtx_NOT (mode, sub)));
5169 }
5170 return 3;
5171 }
5172 break;
5173
5174 case AND:
5175 /* See if two shifts will do 2 or more insn's worth of work. */
5176 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5177 {
5178 HOST_WIDE_INT shift_mask = ((0xffffffff
5179 << (32 - clear_sign_bit_copies))
5180 & 0xffffffff);
5181
5182 if ((remainder | shift_mask) != 0xffffffff)
5183 {
5184 HOST_WIDE_INT new_val
5185 = ARM_SIGN_EXTEND (remainder | shift_mask);
5186
5187 if (generate)
5188 {
5189 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5190 insns = arm_gen_constant (AND, SImode, cond, new_val,
5191 new_src, source, subtargets, 1);
5192 source = new_src;
5193 }
5194 else
5195 {
5196 rtx targ = subtargets ? NULL_RTX : target;
5197 insns = arm_gen_constant (AND, mode, cond, new_val,
5198 targ, source, subtargets, 0);
5199 }
5200 }
5201
5202 if (generate)
5203 {
5204 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5205 rtx shift = GEN_INT (clear_sign_bit_copies);
5206
5207 emit_insn (gen_ashlsi3 (new_src, source, shift));
5208 emit_insn (gen_lshrsi3 (target, new_src, shift));
5209 }
5210
5211 return insns + 2;
5212 }
5213
5214 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5215 {
5216 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5217
5218 if ((remainder | shift_mask) != 0xffffffff)
5219 {
5220 HOST_WIDE_INT new_val
5221 = ARM_SIGN_EXTEND (remainder | shift_mask);
5222 if (generate)
5223 {
5224 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5225
5226 insns = arm_gen_constant (AND, mode, cond, new_val,
5227 new_src, source, subtargets, 1);
5228 source = new_src;
5229 }
5230 else
5231 {
5232 rtx targ = subtargets ? NULL_RTX : target;
5233
5234 insns = arm_gen_constant (AND, mode, cond, new_val,
5235 targ, source, subtargets, 0);
5236 }
5237 }
5238
5239 if (generate)
5240 {
5241 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5242 rtx shift = GEN_INT (clear_zero_bit_copies);
5243
5244 emit_insn (gen_lshrsi3 (new_src, source, shift));
5245 emit_insn (gen_ashlsi3 (target, new_src, shift));
5246 }
5247
5248 return insns + 2;
5249 }
5250
5251 break;
5252
5253 default:
5254 break;
5255 }
5256
5257 /* Calculate what the instruction sequences would be if we generated it
5258 normally, negated, or inverted. */
5259 if (code == AND)
5260 /* AND cannot be split into multiple insns, so invert and use BIC. */
5261 insns = 99;
5262 else
5263 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5264
5265 if (can_negate)
5266 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5267 &neg_immediates);
5268 else
5269 neg_insns = 99;
5270
5271 if (can_invert || final_invert)
5272 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5273 &inv_immediates);
5274 else
5275 inv_insns = 99;
5276
5277 immediates = &pos_immediates;
5278
5279 /* Is the negated immediate sequence more efficient? */
5280 if (neg_insns < insns && neg_insns <= inv_insns)
5281 {
5282 insns = neg_insns;
5283 immediates = &neg_immediates;
5284 }
5285 else
5286 can_negate = 0;
5287
5288 /* Is the inverted immediate sequence more efficient?
5289 We must allow for an extra NOT instruction for XOR operations, although
5290 there is some chance that the final 'mvn' will get optimized later. */
5291 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5292 {
5293 insns = inv_insns;
5294 immediates = &inv_immediates;
5295 }
5296 else
5297 {
5298 can_invert = 0;
5299 final_invert = 0;
5300 }
5301
5302 /* Now output the chosen sequence as instructions. */
5303 if (generate)
5304 {
5305 for (i = 0; i < insns; i++)
5306 {
5307 rtx new_src, temp1_rtx;
5308
5309 temp1 = immediates->i[i];
5310
5311 if (code == SET || code == MINUS)
5312 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5313 else if ((final_invert || i < (insns - 1)) && subtargets)
5314 new_src = gen_reg_rtx (mode);
5315 else
5316 new_src = target;
5317
5318 if (can_invert)
5319 temp1 = ~temp1;
5320 else if (can_negate)
5321 temp1 = -temp1;
5322
5323 temp1 = trunc_int_for_mode (temp1, mode);
5324 temp1_rtx = GEN_INT (temp1);
5325
5326 if (code == SET)
5327 ;
5328 else if (code == MINUS)
5329 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5330 else
5331 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5332
5333 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5334 source = new_src;
5335
5336 if (code == SET)
5337 {
5338 can_negate = can_invert;
5339 can_invert = 0;
5340 code = PLUS;
5341 }
5342 else if (code == MINUS)
5343 code = PLUS;
5344 }
5345 }
5346
5347 if (final_invert)
5348 {
5349 if (generate)
5350 emit_constant_insn (cond, gen_rtx_SET (target,
5351 gen_rtx_NOT (mode, source)));
5352 insns++;
5353 }
5354
5355 return insns;
5356 }
5357
5358 /* Return TRUE if op is a constant where both the low and top words are
5359 suitable for RSB/RSC instructions. This is never true for Thumb, since
5360 we do not have RSC in that case. */
5361 static bool
5362 arm_const_double_prefer_rsbs_rsc (rtx op)
5363 {
5364 /* Thumb lacks RSC, so we never prefer that sequence. */
5365 if (TARGET_THUMB || !CONST_INT_P (op))
5366 return false;
5367 HOST_WIDE_INT hi, lo;
5368 lo = UINTVAL (op) & 0xffffffffULL;
5369 hi = UINTVAL (op) >> 32;
5370 return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5371 }
5372
5373 /* Canonicalize a comparison so that we are more likely to recognize it.
5374 This can be done for a few constant compares, where we can make the
5375 immediate value easier to load. */
5376
5377 static void
5378 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5379 bool op0_preserve_value)
5380 {
5381 machine_mode mode;
5382 unsigned HOST_WIDE_INT i, maxval;
5383
5384 mode = GET_MODE (*op0);
5385 if (mode == VOIDmode)
5386 mode = GET_MODE (*op1);
5387
5388 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5389
5390 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5391 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5392 either reversed or (for constant OP1) adjusted to GE/LT.
5393 Similarly for GTU/LEU in Thumb mode. */
5394 if (mode == DImode)
5395 {
5396
5397 if (*code == GT || *code == LE
5398 || *code == GTU || *code == LEU)
5399 {
5400 /* Missing comparison. First try to use an available
5401 comparison. */
5402 if (CONST_INT_P (*op1))
5403 {
5404 i = INTVAL (*op1);
5405 switch (*code)
5406 {
5407 case GT:
5408 case LE:
5409 if (i != maxval)
5410 {
5411 /* Try to convert to GE/LT, unless that would be more
5412 expensive. */
5413 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5414 && arm_const_double_prefer_rsbs_rsc (*op1))
5415 return;
5416 *op1 = GEN_INT (i + 1);
5417 *code = *code == GT ? GE : LT;
5418 return;
5419 }
5420 break;
5421
5422 case GTU:
5423 case LEU:
5424 if (i != ~((unsigned HOST_WIDE_INT) 0))
5425 {
5426 /* Try to convert to GEU/LTU, unless that would
5427 be more expensive. */
5428 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5429 && arm_const_double_prefer_rsbs_rsc (*op1))
5430 return;
5431 *op1 = GEN_INT (i + 1);
5432 *code = *code == GTU ? GEU : LTU;
5433 return;
5434 }
5435 break;
5436
5437 default:
5438 gcc_unreachable ();
5439 }
5440 }
5441
5442 if (!op0_preserve_value)
5443 {
5444 std::swap (*op0, *op1);
5445 *code = (int)swap_condition ((enum rtx_code)*code);
5446 }
5447 }
5448 return;
5449 }
5450
5451 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5452 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5453 to facilitate possible combining with a cmp into 'ands'. */
5454 if (mode == SImode
5455 && GET_CODE (*op0) == ZERO_EXTEND
5456 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5457 && GET_MODE (XEXP (*op0, 0)) == QImode
5458 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5459 && subreg_lowpart_p (XEXP (*op0, 0))
5460 && *op1 == const0_rtx)
5461 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5462 GEN_INT (255));
5463
5464 /* Comparisons smaller than DImode. Only adjust comparisons against
5465 an out-of-range constant. */
5466 if (!CONST_INT_P (*op1)
5467 || const_ok_for_arm (INTVAL (*op1))
5468 || const_ok_for_arm (- INTVAL (*op1)))
5469 return;
5470
5471 i = INTVAL (*op1);
5472
5473 switch (*code)
5474 {
5475 case EQ:
5476 case NE:
5477 return;
5478
5479 case GT:
5480 case LE:
5481 if (i != maxval
5482 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5483 {
5484 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5485 *code = *code == GT ? GE : LT;
5486 return;
5487 }
5488 break;
5489
5490 case GE:
5491 case LT:
5492 if (i != ~maxval
5493 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5494 {
5495 *op1 = GEN_INT (i - 1);
5496 *code = *code == GE ? GT : LE;
5497 return;
5498 }
5499 break;
5500
5501 case GTU:
5502 case LEU:
5503 if (i != ~((unsigned HOST_WIDE_INT) 0)
5504 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5505 {
5506 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5507 *code = *code == GTU ? GEU : LTU;
5508 return;
5509 }
5510 break;
5511
5512 case GEU:
5513 case LTU:
5514 if (i != 0
5515 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5516 {
5517 *op1 = GEN_INT (i - 1);
5518 *code = *code == GEU ? GTU : LEU;
5519 return;
5520 }
5521 break;
5522
5523 default:
5524 gcc_unreachable ();
5525 }
5526 }
5527
5528
5529 /* Define how to find the value returned by a function. */
5530
5531 static rtx
5532 arm_function_value(const_tree type, const_tree func,
5533 bool outgoing ATTRIBUTE_UNUSED)
5534 {
5535 machine_mode mode;
5536 int unsignedp ATTRIBUTE_UNUSED;
5537 rtx r ATTRIBUTE_UNUSED;
5538
5539 mode = TYPE_MODE (type);
5540
5541 if (TARGET_AAPCS_BASED)
5542 return aapcs_allocate_return_reg (mode, type, func);
5543
5544 /* Promote integer types. */
5545 if (INTEGRAL_TYPE_P (type))
5546 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5547
5548 /* Promotes small structs returned in a register to full-word size
5549 for big-endian AAPCS. */
5550 if (arm_return_in_msb (type))
5551 {
5552 HOST_WIDE_INT size = int_size_in_bytes (type);
5553 if (size % UNITS_PER_WORD != 0)
5554 {
5555 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5556 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5557 }
5558 }
5559
5560 return arm_libcall_value_1 (mode);
5561 }
5562
5563 /* libcall hashtable helpers. */
5564
5565 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5566 {
5567 static inline hashval_t hash (const rtx_def *);
5568 static inline bool equal (const rtx_def *, const rtx_def *);
5569 static inline void remove (rtx_def *);
5570 };
5571
5572 inline bool
5573 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5574 {
5575 return rtx_equal_p (p1, p2);
5576 }
5577
5578 inline hashval_t
5579 libcall_hasher::hash (const rtx_def *p1)
5580 {
5581 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5582 }
5583
5584 typedef hash_table<libcall_hasher> libcall_table_type;
5585
5586 static void
5587 add_libcall (libcall_table_type *htab, rtx libcall)
5588 {
5589 *htab->find_slot (libcall, INSERT) = libcall;
5590 }
5591
5592 static bool
5593 arm_libcall_uses_aapcs_base (const_rtx libcall)
5594 {
5595 static bool init_done = false;
5596 static libcall_table_type *libcall_htab = NULL;
5597
5598 if (!init_done)
5599 {
5600 init_done = true;
5601
5602 libcall_htab = new libcall_table_type (31);
5603 add_libcall (libcall_htab,
5604 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5605 add_libcall (libcall_htab,
5606 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5607 add_libcall (libcall_htab,
5608 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5609 add_libcall (libcall_htab,
5610 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5611
5612 add_libcall (libcall_htab,
5613 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5614 add_libcall (libcall_htab,
5615 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5616 add_libcall (libcall_htab,
5617 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5618 add_libcall (libcall_htab,
5619 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5620
5621 add_libcall (libcall_htab,
5622 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5623 add_libcall (libcall_htab,
5624 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5625 add_libcall (libcall_htab,
5626 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5627 add_libcall (libcall_htab,
5628 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5629 add_libcall (libcall_htab,
5630 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5631 add_libcall (libcall_htab,
5632 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5633 add_libcall (libcall_htab,
5634 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5635 add_libcall (libcall_htab,
5636 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5637
5638 /* Values from double-precision helper functions are returned in core
5639 registers if the selected core only supports single-precision
5640 arithmetic, even if we are using the hard-float ABI. The same is
5641 true for single-precision helpers, but we will never be using the
5642 hard-float ABI on a CPU which doesn't support single-precision
5643 operations in hardware. */
5644 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5645 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5646 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5647 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5648 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5649 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5650 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5651 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5652 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5653 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5654 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5655 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5656 SFmode));
5657 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5658 DFmode));
5659 add_libcall (libcall_htab,
5660 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5661 }
5662
5663 return libcall && libcall_htab->find (libcall) != NULL;
5664 }
5665
5666 static rtx
5667 arm_libcall_value_1 (machine_mode mode)
5668 {
5669 if (TARGET_AAPCS_BASED)
5670 return aapcs_libcall_value (mode);
5671 else if (TARGET_IWMMXT_ABI
5672 && arm_vector_mode_supported_p (mode))
5673 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5674 else
5675 return gen_rtx_REG (mode, ARG_REGISTER (1));
5676 }
5677
5678 /* Define how to find the value returned by a library function
5679 assuming the value has mode MODE. */
5680
5681 static rtx
5682 arm_libcall_value (machine_mode mode, const_rtx libcall)
5683 {
5684 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5685 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5686 {
5687 /* The following libcalls return their result in integer registers,
5688 even though they return a floating point value. */
5689 if (arm_libcall_uses_aapcs_base (libcall))
5690 return gen_rtx_REG (mode, ARG_REGISTER(1));
5691
5692 }
5693
5694 return arm_libcall_value_1 (mode);
5695 }
5696
5697 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5698
5699 static bool
5700 arm_function_value_regno_p (const unsigned int regno)
5701 {
5702 if (regno == ARG_REGISTER (1)
5703 || (TARGET_32BIT
5704 && TARGET_AAPCS_BASED
5705 && TARGET_HARD_FLOAT
5706 && regno == FIRST_VFP_REGNUM)
5707 || (TARGET_IWMMXT_ABI
5708 && regno == FIRST_IWMMXT_REGNUM))
5709 return true;
5710
5711 return false;
5712 }
5713
5714 /* Determine the amount of memory needed to store the possible return
5715 registers of an untyped call. */
5716 int
5717 arm_apply_result_size (void)
5718 {
5719 int size = 16;
5720
5721 if (TARGET_32BIT)
5722 {
5723 if (TARGET_HARD_FLOAT_ABI)
5724 size += 32;
5725 if (TARGET_IWMMXT_ABI)
5726 size += 8;
5727 }
5728
5729 return size;
5730 }
5731
5732 /* Decide whether TYPE should be returned in memory (true)
5733 or in a register (false). FNTYPE is the type of the function making
5734 the call. */
5735 static bool
5736 arm_return_in_memory (const_tree type, const_tree fntype)
5737 {
5738 HOST_WIDE_INT size;
5739
5740 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5741
5742 if (TARGET_AAPCS_BASED)
5743 {
5744 /* Simple, non-aggregate types (ie not including vectors and
5745 complex) are always returned in a register (or registers).
5746 We don't care about which register here, so we can short-cut
5747 some of the detail. */
5748 if (!AGGREGATE_TYPE_P (type)
5749 && TREE_CODE (type) != VECTOR_TYPE
5750 && TREE_CODE (type) != COMPLEX_TYPE)
5751 return false;
5752
5753 /* Any return value that is no larger than one word can be
5754 returned in r0. */
5755 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5756 return false;
5757
5758 /* Check any available co-processors to see if they accept the
5759 type as a register candidate (VFP, for example, can return
5760 some aggregates in consecutive registers). These aren't
5761 available if the call is variadic. */
5762 if (aapcs_select_return_coproc (type, fntype) >= 0)
5763 return false;
5764
5765 /* Vector values should be returned using ARM registers, not
5766 memory (unless they're over 16 bytes, which will break since
5767 we only have four call-clobbered registers to play with). */
5768 if (TREE_CODE (type) == VECTOR_TYPE)
5769 return (size < 0 || size > (4 * UNITS_PER_WORD));
5770
5771 /* The rest go in memory. */
5772 return true;
5773 }
5774
5775 if (TREE_CODE (type) == VECTOR_TYPE)
5776 return (size < 0 || size > (4 * UNITS_PER_WORD));
5777
5778 if (!AGGREGATE_TYPE_P (type) &&
5779 (TREE_CODE (type) != VECTOR_TYPE))
5780 /* All simple types are returned in registers. */
5781 return false;
5782
5783 if (arm_abi != ARM_ABI_APCS)
5784 {
5785 /* ATPCS and later return aggregate types in memory only if they are
5786 larger than a word (or are variable size). */
5787 return (size < 0 || size > UNITS_PER_WORD);
5788 }
5789
5790 /* For the arm-wince targets we choose to be compatible with Microsoft's
5791 ARM and Thumb compilers, which always return aggregates in memory. */
5792 #ifndef ARM_WINCE
5793 /* All structures/unions bigger than one word are returned in memory.
5794 Also catch the case where int_size_in_bytes returns -1. In this case
5795 the aggregate is either huge or of variable size, and in either case
5796 we will want to return it via memory and not in a register. */
5797 if (size < 0 || size > UNITS_PER_WORD)
5798 return true;
5799
5800 if (TREE_CODE (type) == RECORD_TYPE)
5801 {
5802 tree field;
5803
5804 /* For a struct the APCS says that we only return in a register
5805 if the type is 'integer like' and every addressable element
5806 has an offset of zero. For practical purposes this means
5807 that the structure can have at most one non bit-field element
5808 and that this element must be the first one in the structure. */
5809
5810 /* Find the first field, ignoring non FIELD_DECL things which will
5811 have been created by C++. */
5812 for (field = TYPE_FIELDS (type);
5813 field && TREE_CODE (field) != FIELD_DECL;
5814 field = DECL_CHAIN (field))
5815 continue;
5816
5817 if (field == NULL)
5818 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5819
5820 /* Check that the first field is valid for returning in a register. */
5821
5822 /* ... Floats are not allowed */
5823 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5824 return true;
5825
5826 /* ... Aggregates that are not themselves valid for returning in
5827 a register are not allowed. */
5828 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5829 return true;
5830
5831 /* Now check the remaining fields, if any. Only bitfields are allowed,
5832 since they are not addressable. */
5833 for (field = DECL_CHAIN (field);
5834 field;
5835 field = DECL_CHAIN (field))
5836 {
5837 if (TREE_CODE (field) != FIELD_DECL)
5838 continue;
5839
5840 if (!DECL_BIT_FIELD_TYPE (field))
5841 return true;
5842 }
5843
5844 return false;
5845 }
5846
5847 if (TREE_CODE (type) == UNION_TYPE)
5848 {
5849 tree field;
5850
5851 /* Unions can be returned in registers if every element is
5852 integral, or can be returned in an integer register. */
5853 for (field = TYPE_FIELDS (type);
5854 field;
5855 field = DECL_CHAIN (field))
5856 {
5857 if (TREE_CODE (field) != FIELD_DECL)
5858 continue;
5859
5860 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5861 return true;
5862
5863 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5864 return true;
5865 }
5866
5867 return false;
5868 }
5869 #endif /* not ARM_WINCE */
5870
5871 /* Return all other types in memory. */
5872 return true;
5873 }
5874
5875 const struct pcs_attribute_arg
5876 {
5877 const char *arg;
5878 enum arm_pcs value;
5879 } pcs_attribute_args[] =
5880 {
5881 {"aapcs", ARM_PCS_AAPCS},
5882 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5883 #if 0
5884 /* We could recognize these, but changes would be needed elsewhere
5885 * to implement them. */
5886 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5887 {"atpcs", ARM_PCS_ATPCS},
5888 {"apcs", ARM_PCS_APCS},
5889 #endif
5890 {NULL, ARM_PCS_UNKNOWN}
5891 };
5892
5893 static enum arm_pcs
5894 arm_pcs_from_attribute (tree attr)
5895 {
5896 const struct pcs_attribute_arg *ptr;
5897 const char *arg;
5898
5899 /* Get the value of the argument. */
5900 if (TREE_VALUE (attr) == NULL_TREE
5901 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5902 return ARM_PCS_UNKNOWN;
5903
5904 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5905
5906 /* Check it against the list of known arguments. */
5907 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5908 if (streq (arg, ptr->arg))
5909 return ptr->value;
5910
5911 /* An unrecognized interrupt type. */
5912 return ARM_PCS_UNKNOWN;
5913 }
5914
5915 /* Get the PCS variant to use for this call. TYPE is the function's type
5916 specification, DECL is the specific declartion. DECL may be null if
5917 the call could be indirect or if this is a library call. */
5918 static enum arm_pcs
5919 arm_get_pcs_model (const_tree type, const_tree decl)
5920 {
5921 bool user_convention = false;
5922 enum arm_pcs user_pcs = arm_pcs_default;
5923 tree attr;
5924
5925 gcc_assert (type);
5926
5927 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5928 if (attr)
5929 {
5930 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5931 user_convention = true;
5932 }
5933
5934 if (TARGET_AAPCS_BASED)
5935 {
5936 /* Detect varargs functions. These always use the base rules
5937 (no argument is ever a candidate for a co-processor
5938 register). */
5939 bool base_rules = stdarg_p (type);
5940
5941 if (user_convention)
5942 {
5943 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5944 sorry ("non-AAPCS derived PCS variant");
5945 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5946 error ("variadic functions must use the base AAPCS variant");
5947 }
5948
5949 if (base_rules)
5950 return ARM_PCS_AAPCS;
5951 else if (user_convention)
5952 return user_pcs;
5953 else if (decl && flag_unit_at_a_time)
5954 {
5955 /* Local functions never leak outside this compilation unit,
5956 so we are free to use whatever conventions are
5957 appropriate. */
5958 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5959 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5960 if (i && i->local)
5961 return ARM_PCS_AAPCS_LOCAL;
5962 }
5963 }
5964 else if (user_convention && user_pcs != arm_pcs_default)
5965 sorry ("PCS variant");
5966
5967 /* For everything else we use the target's default. */
5968 return arm_pcs_default;
5969 }
5970
5971
5972 static void
5973 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5974 const_tree fntype ATTRIBUTE_UNUSED,
5975 rtx libcall ATTRIBUTE_UNUSED,
5976 const_tree fndecl ATTRIBUTE_UNUSED)
5977 {
5978 /* Record the unallocated VFP registers. */
5979 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5980 pcum->aapcs_vfp_reg_alloc = 0;
5981 }
5982
5983 /* Walk down the type tree of TYPE counting consecutive base elements.
5984 If *MODEP is VOIDmode, then set it to the first valid floating point
5985 type. If a non-floating point type is found, or if a floating point
5986 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5987 otherwise return the count in the sub-tree. */
5988 static int
5989 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5990 {
5991 machine_mode mode;
5992 HOST_WIDE_INT size;
5993
5994 switch (TREE_CODE (type))
5995 {
5996 case REAL_TYPE:
5997 mode = TYPE_MODE (type);
5998 if (mode != DFmode && mode != SFmode && mode != HFmode)
5999 return -1;
6000
6001 if (*modep == VOIDmode)
6002 *modep = mode;
6003
6004 if (*modep == mode)
6005 return 1;
6006
6007 break;
6008
6009 case COMPLEX_TYPE:
6010 mode = TYPE_MODE (TREE_TYPE (type));
6011 if (mode != DFmode && mode != SFmode)
6012 return -1;
6013
6014 if (*modep == VOIDmode)
6015 *modep = mode;
6016
6017 if (*modep == mode)
6018 return 2;
6019
6020 break;
6021
6022 case VECTOR_TYPE:
6023 /* Use V2SImode and V4SImode as representatives of all 64-bit
6024 and 128-bit vector types, whether or not those modes are
6025 supported with the present options. */
6026 size = int_size_in_bytes (type);
6027 switch (size)
6028 {
6029 case 8:
6030 mode = V2SImode;
6031 break;
6032 case 16:
6033 mode = V4SImode;
6034 break;
6035 default:
6036 return -1;
6037 }
6038
6039 if (*modep == VOIDmode)
6040 *modep = mode;
6041
6042 /* Vector modes are considered to be opaque: two vectors are
6043 equivalent for the purposes of being homogeneous aggregates
6044 if they are the same size. */
6045 if (*modep == mode)
6046 return 1;
6047
6048 break;
6049
6050 case ARRAY_TYPE:
6051 {
6052 int count;
6053 tree index = TYPE_DOMAIN (type);
6054
6055 /* Can't handle incomplete types nor sizes that are not
6056 fixed. */
6057 if (!COMPLETE_TYPE_P (type)
6058 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6059 return -1;
6060
6061 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6062 if (count == -1
6063 || !index
6064 || !TYPE_MAX_VALUE (index)
6065 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6066 || !TYPE_MIN_VALUE (index)
6067 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6068 || count < 0)
6069 return -1;
6070
6071 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6072 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6073
6074 /* There must be no padding. */
6075 if (wi::to_wide (TYPE_SIZE (type))
6076 != count * GET_MODE_BITSIZE (*modep))
6077 return -1;
6078
6079 return count;
6080 }
6081
6082 case RECORD_TYPE:
6083 {
6084 int count = 0;
6085 int sub_count;
6086 tree field;
6087
6088 /* Can't handle incomplete types nor sizes that are not
6089 fixed. */
6090 if (!COMPLETE_TYPE_P (type)
6091 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6092 return -1;
6093
6094 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6095 {
6096 if (TREE_CODE (field) != FIELD_DECL)
6097 continue;
6098
6099 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6100 if (sub_count < 0)
6101 return -1;
6102 count += sub_count;
6103 }
6104
6105 /* There must be no padding. */
6106 if (wi::to_wide (TYPE_SIZE (type))
6107 != count * GET_MODE_BITSIZE (*modep))
6108 return -1;
6109
6110 return count;
6111 }
6112
6113 case UNION_TYPE:
6114 case QUAL_UNION_TYPE:
6115 {
6116 /* These aren't very interesting except in a degenerate case. */
6117 int count = 0;
6118 int sub_count;
6119 tree field;
6120
6121 /* Can't handle incomplete types nor sizes that are not
6122 fixed. */
6123 if (!COMPLETE_TYPE_P (type)
6124 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6125 return -1;
6126
6127 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6128 {
6129 if (TREE_CODE (field) != FIELD_DECL)
6130 continue;
6131
6132 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6133 if (sub_count < 0)
6134 return -1;
6135 count = count > sub_count ? count : sub_count;
6136 }
6137
6138 /* There must be no padding. */
6139 if (wi::to_wide (TYPE_SIZE (type))
6140 != count * GET_MODE_BITSIZE (*modep))
6141 return -1;
6142
6143 return count;
6144 }
6145
6146 default:
6147 break;
6148 }
6149
6150 return -1;
6151 }
6152
6153 /* Return true if PCS_VARIANT should use VFP registers. */
6154 static bool
6155 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6156 {
6157 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6158 {
6159 static bool seen_thumb1_vfp = false;
6160
6161 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6162 {
6163 sorry ("Thumb-1 hard-float VFP ABI");
6164 /* sorry() is not immediately fatal, so only display this once. */
6165 seen_thumb1_vfp = true;
6166 }
6167
6168 return true;
6169 }
6170
6171 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6172 return false;
6173
6174 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6175 (TARGET_VFP_DOUBLE || !is_double));
6176 }
6177
6178 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6179 suitable for passing or returning in VFP registers for the PCS
6180 variant selected. If it is, then *BASE_MODE is updated to contain
6181 a machine mode describing each element of the argument's type and
6182 *COUNT to hold the number of such elements. */
6183 static bool
6184 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6185 machine_mode mode, const_tree type,
6186 machine_mode *base_mode, int *count)
6187 {
6188 machine_mode new_mode = VOIDmode;
6189
6190 /* If we have the type information, prefer that to working things
6191 out from the mode. */
6192 if (type)
6193 {
6194 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6195
6196 if (ag_count > 0 && ag_count <= 4)
6197 *count = ag_count;
6198 else
6199 return false;
6200 }
6201 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6202 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6203 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6204 {
6205 *count = 1;
6206 new_mode = mode;
6207 }
6208 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6209 {
6210 *count = 2;
6211 new_mode = (mode == DCmode ? DFmode : SFmode);
6212 }
6213 else
6214 return false;
6215
6216
6217 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6218 return false;
6219
6220 *base_mode = new_mode;
6221
6222 if (TARGET_GENERAL_REGS_ONLY)
6223 error ("argument of type %qT not permitted with -mgeneral-regs-only",
6224 type);
6225
6226 return true;
6227 }
6228
6229 static bool
6230 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6231 machine_mode mode, const_tree type)
6232 {
6233 int count ATTRIBUTE_UNUSED;
6234 machine_mode ag_mode ATTRIBUTE_UNUSED;
6235
6236 if (!use_vfp_abi (pcs_variant, false))
6237 return false;
6238 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6239 &ag_mode, &count);
6240 }
6241
6242 static bool
6243 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6244 const_tree type)
6245 {
6246 if (!use_vfp_abi (pcum->pcs_variant, false))
6247 return false;
6248
6249 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6250 &pcum->aapcs_vfp_rmode,
6251 &pcum->aapcs_vfp_rcount);
6252 }
6253
6254 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6255 for the behaviour of this function. */
6256
6257 static bool
6258 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6259 const_tree type ATTRIBUTE_UNUSED)
6260 {
6261 int rmode_size
6262 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6263 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6264 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6265 int regno;
6266
6267 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6268 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6269 {
6270 pcum->aapcs_vfp_reg_alloc = mask << regno;
6271 if (mode == BLKmode
6272 || (mode == TImode && ! TARGET_NEON)
6273 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6274 {
6275 int i;
6276 int rcount = pcum->aapcs_vfp_rcount;
6277 int rshift = shift;
6278 machine_mode rmode = pcum->aapcs_vfp_rmode;
6279 rtx par;
6280 if (!TARGET_NEON)
6281 {
6282 /* Avoid using unsupported vector modes. */
6283 if (rmode == V2SImode)
6284 rmode = DImode;
6285 else if (rmode == V4SImode)
6286 {
6287 rmode = DImode;
6288 rcount *= 2;
6289 rshift /= 2;
6290 }
6291 }
6292 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6293 for (i = 0; i < rcount; i++)
6294 {
6295 rtx tmp = gen_rtx_REG (rmode,
6296 FIRST_VFP_REGNUM + regno + i * rshift);
6297 tmp = gen_rtx_EXPR_LIST
6298 (VOIDmode, tmp,
6299 GEN_INT (i * GET_MODE_SIZE (rmode)));
6300 XVECEXP (par, 0, i) = tmp;
6301 }
6302
6303 pcum->aapcs_reg = par;
6304 }
6305 else
6306 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6307 return true;
6308 }
6309 return false;
6310 }
6311
6312 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6313 comment there for the behaviour of this function. */
6314
6315 static rtx
6316 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6317 machine_mode mode,
6318 const_tree type ATTRIBUTE_UNUSED)
6319 {
6320 if (!use_vfp_abi (pcs_variant, false))
6321 return NULL;
6322
6323 if (mode == BLKmode
6324 || (GET_MODE_CLASS (mode) == MODE_INT
6325 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6326 && !TARGET_NEON))
6327 {
6328 int count;
6329 machine_mode ag_mode;
6330 int i;
6331 rtx par;
6332 int shift;
6333
6334 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6335 &ag_mode, &count);
6336
6337 if (!TARGET_NEON)
6338 {
6339 if (ag_mode == V2SImode)
6340 ag_mode = DImode;
6341 else if (ag_mode == V4SImode)
6342 {
6343 ag_mode = DImode;
6344 count *= 2;
6345 }
6346 }
6347 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6348 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6349 for (i = 0; i < count; i++)
6350 {
6351 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6352 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6353 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6354 XVECEXP (par, 0, i) = tmp;
6355 }
6356
6357 return par;
6358 }
6359
6360 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6361 }
6362
6363 static void
6364 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6365 machine_mode mode ATTRIBUTE_UNUSED,
6366 const_tree type ATTRIBUTE_UNUSED)
6367 {
6368 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6369 pcum->aapcs_vfp_reg_alloc = 0;
6370 return;
6371 }
6372
6373 #define AAPCS_CP(X) \
6374 { \
6375 aapcs_ ## X ## _cum_init, \
6376 aapcs_ ## X ## _is_call_candidate, \
6377 aapcs_ ## X ## _allocate, \
6378 aapcs_ ## X ## _is_return_candidate, \
6379 aapcs_ ## X ## _allocate_return_reg, \
6380 aapcs_ ## X ## _advance \
6381 }
6382
6383 /* Table of co-processors that can be used to pass arguments in
6384 registers. Idealy no arugment should be a candidate for more than
6385 one co-processor table entry, but the table is processed in order
6386 and stops after the first match. If that entry then fails to put
6387 the argument into a co-processor register, the argument will go on
6388 the stack. */
6389 static struct
6390 {
6391 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6392 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6393
6394 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6395 BLKmode) is a candidate for this co-processor's registers; this
6396 function should ignore any position-dependent state in
6397 CUMULATIVE_ARGS and only use call-type dependent information. */
6398 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6399
6400 /* Return true if the argument does get a co-processor register; it
6401 should set aapcs_reg to an RTX of the register allocated as is
6402 required for a return from FUNCTION_ARG. */
6403 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6404
6405 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6406 be returned in this co-processor's registers. */
6407 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6408
6409 /* Allocate and return an RTX element to hold the return type of a call. This
6410 routine must not fail and will only be called if is_return_candidate
6411 returned true with the same parameters. */
6412 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6413
6414 /* Finish processing this argument and prepare to start processing
6415 the next one. */
6416 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6417 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6418 {
6419 AAPCS_CP(vfp)
6420 };
6421
6422 #undef AAPCS_CP
6423
6424 static int
6425 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6426 const_tree type)
6427 {
6428 int i;
6429
6430 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6431 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6432 return i;
6433
6434 return -1;
6435 }
6436
6437 static int
6438 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6439 {
6440 /* We aren't passed a decl, so we can't check that a call is local.
6441 However, it isn't clear that that would be a win anyway, since it
6442 might limit some tail-calling opportunities. */
6443 enum arm_pcs pcs_variant;
6444
6445 if (fntype)
6446 {
6447 const_tree fndecl = NULL_TREE;
6448
6449 if (TREE_CODE (fntype) == FUNCTION_DECL)
6450 {
6451 fndecl = fntype;
6452 fntype = TREE_TYPE (fntype);
6453 }
6454
6455 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6456 }
6457 else
6458 pcs_variant = arm_pcs_default;
6459
6460 if (pcs_variant != ARM_PCS_AAPCS)
6461 {
6462 int i;
6463
6464 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6465 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6466 TYPE_MODE (type),
6467 type))
6468 return i;
6469 }
6470 return -1;
6471 }
6472
6473 static rtx
6474 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6475 const_tree fntype)
6476 {
6477 /* We aren't passed a decl, so we can't check that a call is local.
6478 However, it isn't clear that that would be a win anyway, since it
6479 might limit some tail-calling opportunities. */
6480 enum arm_pcs pcs_variant;
6481 int unsignedp ATTRIBUTE_UNUSED;
6482
6483 if (fntype)
6484 {
6485 const_tree fndecl = NULL_TREE;
6486
6487 if (TREE_CODE (fntype) == FUNCTION_DECL)
6488 {
6489 fndecl = fntype;
6490 fntype = TREE_TYPE (fntype);
6491 }
6492
6493 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6494 }
6495 else
6496 pcs_variant = arm_pcs_default;
6497
6498 /* Promote integer types. */
6499 if (type && INTEGRAL_TYPE_P (type))
6500 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6501
6502 if (pcs_variant != ARM_PCS_AAPCS)
6503 {
6504 int i;
6505
6506 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6507 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6508 type))
6509 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6510 mode, type);
6511 }
6512
6513 /* Promotes small structs returned in a register to full-word size
6514 for big-endian AAPCS. */
6515 if (type && arm_return_in_msb (type))
6516 {
6517 HOST_WIDE_INT size = int_size_in_bytes (type);
6518 if (size % UNITS_PER_WORD != 0)
6519 {
6520 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6521 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6522 }
6523 }
6524
6525 return gen_rtx_REG (mode, R0_REGNUM);
6526 }
6527
6528 static rtx
6529 aapcs_libcall_value (machine_mode mode)
6530 {
6531 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6532 && GET_MODE_SIZE (mode) <= 4)
6533 mode = SImode;
6534
6535 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6536 }
6537
6538 /* Lay out a function argument using the AAPCS rules. The rule
6539 numbers referred to here are those in the AAPCS. */
6540 static void
6541 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6542 const_tree type, bool named)
6543 {
6544 int nregs, nregs2;
6545 int ncrn;
6546
6547 /* We only need to do this once per argument. */
6548 if (pcum->aapcs_arg_processed)
6549 return;
6550
6551 pcum->aapcs_arg_processed = true;
6552
6553 /* Special case: if named is false then we are handling an incoming
6554 anonymous argument which is on the stack. */
6555 if (!named)
6556 return;
6557
6558 /* Is this a potential co-processor register candidate? */
6559 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6560 {
6561 int slot = aapcs_select_call_coproc (pcum, mode, type);
6562 pcum->aapcs_cprc_slot = slot;
6563
6564 /* We don't have to apply any of the rules from part B of the
6565 preparation phase, these are handled elsewhere in the
6566 compiler. */
6567
6568 if (slot >= 0)
6569 {
6570 /* A Co-processor register candidate goes either in its own
6571 class of registers or on the stack. */
6572 if (!pcum->aapcs_cprc_failed[slot])
6573 {
6574 /* C1.cp - Try to allocate the argument to co-processor
6575 registers. */
6576 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6577 return;
6578
6579 /* C2.cp - Put the argument on the stack and note that we
6580 can't assign any more candidates in this slot. We also
6581 need to note that we have allocated stack space, so that
6582 we won't later try to split a non-cprc candidate between
6583 core registers and the stack. */
6584 pcum->aapcs_cprc_failed[slot] = true;
6585 pcum->can_split = false;
6586 }
6587
6588 /* We didn't get a register, so this argument goes on the
6589 stack. */
6590 gcc_assert (pcum->can_split == false);
6591 return;
6592 }
6593 }
6594
6595 /* C3 - For double-word aligned arguments, round the NCRN up to the
6596 next even number. */
6597 ncrn = pcum->aapcs_ncrn;
6598 if (ncrn & 1)
6599 {
6600 int res = arm_needs_doubleword_align (mode, type);
6601 /* Only warn during RTL expansion of call stmts, otherwise we would
6602 warn e.g. during gimplification even on functions that will be
6603 always inlined, and we'd warn multiple times. Don't warn when
6604 called in expand_function_start either, as we warn instead in
6605 arm_function_arg_boundary in that case. */
6606 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6607 inform (input_location, "parameter passing for argument of type "
6608 "%qT changed in GCC 7.1", type);
6609 else if (res > 0)
6610 ncrn++;
6611 }
6612
6613 nregs = ARM_NUM_REGS2(mode, type);
6614
6615 /* Sigh, this test should really assert that nregs > 0, but a GCC
6616 extension allows empty structs and then gives them empty size; it
6617 then allows such a structure to be passed by value. For some of
6618 the code below we have to pretend that such an argument has
6619 non-zero size so that we 'locate' it correctly either in
6620 registers or on the stack. */
6621 gcc_assert (nregs >= 0);
6622
6623 nregs2 = nregs ? nregs : 1;
6624
6625 /* C4 - Argument fits entirely in core registers. */
6626 if (ncrn + nregs2 <= NUM_ARG_REGS)
6627 {
6628 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6629 pcum->aapcs_next_ncrn = ncrn + nregs;
6630 return;
6631 }
6632
6633 /* C5 - Some core registers left and there are no arguments already
6634 on the stack: split this argument between the remaining core
6635 registers and the stack. */
6636 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6637 {
6638 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6639 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6640 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6641 return;
6642 }
6643
6644 /* C6 - NCRN is set to 4. */
6645 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6646
6647 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6648 return;
6649 }
6650
6651 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6652 for a call to a function whose data type is FNTYPE.
6653 For a library call, FNTYPE is NULL. */
6654 void
6655 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6656 rtx libname,
6657 tree fndecl ATTRIBUTE_UNUSED)
6658 {
6659 /* Long call handling. */
6660 if (fntype)
6661 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6662 else
6663 pcum->pcs_variant = arm_pcs_default;
6664
6665 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6666 {
6667 if (arm_libcall_uses_aapcs_base (libname))
6668 pcum->pcs_variant = ARM_PCS_AAPCS;
6669
6670 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6671 pcum->aapcs_reg = NULL_RTX;
6672 pcum->aapcs_partial = 0;
6673 pcum->aapcs_arg_processed = false;
6674 pcum->aapcs_cprc_slot = -1;
6675 pcum->can_split = true;
6676
6677 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6678 {
6679 int i;
6680
6681 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6682 {
6683 pcum->aapcs_cprc_failed[i] = false;
6684 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6685 }
6686 }
6687 return;
6688 }
6689
6690 /* Legacy ABIs */
6691
6692 /* On the ARM, the offset starts at 0. */
6693 pcum->nregs = 0;
6694 pcum->iwmmxt_nregs = 0;
6695 pcum->can_split = true;
6696
6697 /* Varargs vectors are treated the same as long long.
6698 named_count avoids having to change the way arm handles 'named' */
6699 pcum->named_count = 0;
6700 pcum->nargs = 0;
6701
6702 if (TARGET_REALLY_IWMMXT && fntype)
6703 {
6704 tree fn_arg;
6705
6706 for (fn_arg = TYPE_ARG_TYPES (fntype);
6707 fn_arg;
6708 fn_arg = TREE_CHAIN (fn_arg))
6709 pcum->named_count += 1;
6710
6711 if (! pcum->named_count)
6712 pcum->named_count = INT_MAX;
6713 }
6714 }
6715
6716 /* Return 2 if double word alignment is required for argument passing,
6717 but wasn't required before the fix for PR88469.
6718 Return 1 if double word alignment is required for argument passing.
6719 Return -1 if double word alignment used to be required for argument
6720 passing before PR77728 ABI fix, but is not required anymore.
6721 Return 0 if double word alignment is not required and wasn't requried
6722 before either. */
6723 static int
6724 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6725 {
6726 if (!type)
6727 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6728
6729 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6730 if (!AGGREGATE_TYPE_P (type))
6731 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6732
6733 /* Array types: Use member alignment of element type. */
6734 if (TREE_CODE (type) == ARRAY_TYPE)
6735 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6736
6737 int ret = 0;
6738 int ret2 = 0;
6739 /* Record/aggregate types: Use greatest member alignment of any member. */
6740 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6741 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6742 {
6743 if (TREE_CODE (field) == FIELD_DECL)
6744 return 1;
6745 else
6746 /* Before PR77728 fix, we were incorrectly considering also
6747 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6748 Make sure we can warn about that with -Wpsabi. */
6749 ret = -1;
6750 }
6751 else if (TREE_CODE (field) == FIELD_DECL
6752 && DECL_BIT_FIELD_TYPE (field)
6753 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
6754 ret2 = 1;
6755
6756 if (ret2)
6757 return 2;
6758
6759 return ret;
6760 }
6761
6762
6763 /* Determine where to put an argument to a function.
6764 Value is zero to push the argument on the stack,
6765 or a hard register in which to store the argument.
6766
6767 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6768 the preceding args and about the function being called.
6769 ARG is a description of the argument.
6770
6771 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6772 other arguments are passed on the stack. If (NAMED == 0) (which happens
6773 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6774 defined), say it is passed in the stack (function_prologue will
6775 indeed make it pass in the stack if necessary). */
6776
6777 static rtx
6778 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
6779 {
6780 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6781 int nregs;
6782
6783 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6784 a call insn (op3 of a call_value insn). */
6785 if (arg.end_marker_p ())
6786 return const0_rtx;
6787
6788 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6789 {
6790 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
6791 return pcum->aapcs_reg;
6792 }
6793
6794 /* Varargs vectors are treated the same as long long.
6795 named_count avoids having to change the way arm handles 'named' */
6796 if (TARGET_IWMMXT_ABI
6797 && arm_vector_mode_supported_p (arg.mode)
6798 && pcum->named_count > pcum->nargs + 1)
6799 {
6800 if (pcum->iwmmxt_nregs <= 9)
6801 return gen_rtx_REG (arg.mode,
6802 pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6803 else
6804 {
6805 pcum->can_split = false;
6806 return NULL_RTX;
6807 }
6808 }
6809
6810 /* Put doubleword aligned quantities in even register pairs. */
6811 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6812 {
6813 int res = arm_needs_doubleword_align (arg.mode, arg.type);
6814 if (res < 0 && warn_psabi)
6815 inform (input_location, "parameter passing for argument of type "
6816 "%qT changed in GCC 7.1", arg.type);
6817 else if (res > 0)
6818 {
6819 pcum->nregs++;
6820 if (res > 1 && warn_psabi)
6821 inform (input_location, "parameter passing for argument of type "
6822 "%qT changed in GCC 9.1", arg.type);
6823 }
6824 }
6825
6826 /* Only allow splitting an arg between regs and memory if all preceding
6827 args were allocated to regs. For args passed by reference we only count
6828 the reference pointer. */
6829 if (pcum->can_split)
6830 nregs = 1;
6831 else
6832 nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
6833
6834 if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
6835 return NULL_RTX;
6836
6837 return gen_rtx_REG (arg.mode, pcum->nregs);
6838 }
6839
6840 static unsigned int
6841 arm_function_arg_boundary (machine_mode mode, const_tree type)
6842 {
6843 if (!ARM_DOUBLEWORD_ALIGN)
6844 return PARM_BOUNDARY;
6845
6846 int res = arm_needs_doubleword_align (mode, type);
6847 if (res < 0 && warn_psabi)
6848 inform (input_location, "parameter passing for argument of type %qT "
6849 "changed in GCC 7.1", type);
6850 if (res > 1 && warn_psabi)
6851 inform (input_location, "parameter passing for argument of type "
6852 "%qT changed in GCC 9.1", type);
6853
6854 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6855 }
6856
6857 static int
6858 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
6859 {
6860 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6861 int nregs = pcum->nregs;
6862
6863 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6864 {
6865 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
6866 return pcum->aapcs_partial;
6867 }
6868
6869 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
6870 return 0;
6871
6872 if (NUM_ARG_REGS > nregs
6873 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
6874 && pcum->can_split)
6875 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6876
6877 return 0;
6878 }
6879
6880 /* Update the data in PCUM to advance over argument ARG. */
6881
6882 static void
6883 arm_function_arg_advance (cumulative_args_t pcum_v,
6884 const function_arg_info &arg)
6885 {
6886 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6887
6888 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6889 {
6890 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
6891
6892 if (pcum->aapcs_cprc_slot >= 0)
6893 {
6894 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
6895 arg.type);
6896 pcum->aapcs_cprc_slot = -1;
6897 }
6898
6899 /* Generic stuff. */
6900 pcum->aapcs_arg_processed = false;
6901 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6902 pcum->aapcs_reg = NULL_RTX;
6903 pcum->aapcs_partial = 0;
6904 }
6905 else
6906 {
6907 pcum->nargs += 1;
6908 if (arm_vector_mode_supported_p (arg.mode)
6909 && pcum->named_count > pcum->nargs
6910 && TARGET_IWMMXT_ABI)
6911 pcum->iwmmxt_nregs += 1;
6912 else
6913 pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
6914 }
6915 }
6916
6917 /* Variable sized types are passed by reference. This is a GCC
6918 extension to the ARM ABI. */
6919
6920 static bool
6921 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6922 {
6923 return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
6924 }
6925 \f
6926 /* Encode the current state of the #pragma [no_]long_calls. */
6927 typedef enum
6928 {
6929 OFF, /* No #pragma [no_]long_calls is in effect. */
6930 LONG, /* #pragma long_calls is in effect. */
6931 SHORT /* #pragma no_long_calls is in effect. */
6932 } arm_pragma_enum;
6933
6934 static arm_pragma_enum arm_pragma_long_calls = OFF;
6935
6936 void
6937 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6938 {
6939 arm_pragma_long_calls = LONG;
6940 }
6941
6942 void
6943 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6944 {
6945 arm_pragma_long_calls = SHORT;
6946 }
6947
6948 void
6949 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6950 {
6951 arm_pragma_long_calls = OFF;
6952 }
6953 \f
6954 /* Handle an attribute requiring a FUNCTION_DECL;
6955 arguments as in struct attribute_spec.handler. */
6956 static tree
6957 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6958 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6959 {
6960 if (TREE_CODE (*node) != FUNCTION_DECL)
6961 {
6962 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6963 name);
6964 *no_add_attrs = true;
6965 }
6966
6967 return NULL_TREE;
6968 }
6969
6970 /* Handle an "interrupt" or "isr" attribute;
6971 arguments as in struct attribute_spec.handler. */
6972 static tree
6973 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6974 bool *no_add_attrs)
6975 {
6976 if (DECL_P (*node))
6977 {
6978 if (TREE_CODE (*node) != FUNCTION_DECL)
6979 {
6980 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6981 name);
6982 *no_add_attrs = true;
6983 }
6984 /* FIXME: the argument if any is checked for type attributes;
6985 should it be checked for decl ones? */
6986 }
6987 else
6988 {
6989 if (TREE_CODE (*node) == FUNCTION_TYPE
6990 || TREE_CODE (*node) == METHOD_TYPE)
6991 {
6992 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6993 {
6994 warning (OPT_Wattributes, "%qE attribute ignored",
6995 name);
6996 *no_add_attrs = true;
6997 }
6998 }
6999 else if (TREE_CODE (*node) == POINTER_TYPE
7000 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
7001 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
7002 && arm_isr_value (args) != ARM_FT_UNKNOWN)
7003 {
7004 *node = build_variant_type_copy (*node);
7005 TREE_TYPE (*node) = build_type_attribute_variant
7006 (TREE_TYPE (*node),
7007 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7008 *no_add_attrs = true;
7009 }
7010 else
7011 {
7012 /* Possibly pass this attribute on from the type to a decl. */
7013 if (flags & ((int) ATTR_FLAG_DECL_NEXT
7014 | (int) ATTR_FLAG_FUNCTION_NEXT
7015 | (int) ATTR_FLAG_ARRAY_NEXT))
7016 {
7017 *no_add_attrs = true;
7018 return tree_cons (name, args, NULL_TREE);
7019 }
7020 else
7021 {
7022 warning (OPT_Wattributes, "%qE attribute ignored",
7023 name);
7024 }
7025 }
7026 }
7027
7028 return NULL_TREE;
7029 }
7030
7031 /* Handle a "pcs" attribute; arguments as in struct
7032 attribute_spec.handler. */
7033 static tree
7034 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7035 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7036 {
7037 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7038 {
7039 warning (OPT_Wattributes, "%qE attribute ignored", name);
7040 *no_add_attrs = true;
7041 }
7042 return NULL_TREE;
7043 }
7044
7045 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7046 /* Handle the "notshared" attribute. This attribute is another way of
7047 requesting hidden visibility. ARM's compiler supports
7048 "__declspec(notshared)"; we support the same thing via an
7049 attribute. */
7050
7051 static tree
7052 arm_handle_notshared_attribute (tree *node,
7053 tree name ATTRIBUTE_UNUSED,
7054 tree args ATTRIBUTE_UNUSED,
7055 int flags ATTRIBUTE_UNUSED,
7056 bool *no_add_attrs)
7057 {
7058 tree decl = TYPE_NAME (*node);
7059
7060 if (decl)
7061 {
7062 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7063 DECL_VISIBILITY_SPECIFIED (decl) = 1;
7064 *no_add_attrs = false;
7065 }
7066 return NULL_TREE;
7067 }
7068 #endif
7069
7070 /* This function returns true if a function with declaration FNDECL and type
7071 FNTYPE uses the stack to pass arguments or return variables and false
7072 otherwise. This is used for functions with the attributes
7073 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7074 diagnostic messages if the stack is used. NAME is the name of the attribute
7075 used. */
7076
7077 static bool
7078 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7079 {
7080 function_args_iterator args_iter;
7081 CUMULATIVE_ARGS args_so_far_v;
7082 cumulative_args_t args_so_far;
7083 bool first_param = true;
7084 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7085
7086 /* Error out if any argument is passed on the stack. */
7087 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7088 args_so_far = pack_cumulative_args (&args_so_far_v);
7089 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7090 {
7091 rtx arg_rtx;
7092
7093 prev_arg_type = arg_type;
7094 if (VOID_TYPE_P (arg_type))
7095 continue;
7096
7097 function_arg_info arg (arg_type, /*named=*/true);
7098 if (!first_param)
7099 /* ??? We should advance after processing the argument and pass
7100 the argument we're advancing past. */
7101 arm_function_arg_advance (args_so_far, arg);
7102 arg_rtx = arm_function_arg (args_so_far, arg);
7103 if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7104 {
7105 error ("%qE attribute not available to functions with arguments "
7106 "passed on the stack", name);
7107 return true;
7108 }
7109 first_param = false;
7110 }
7111
7112 /* Error out for variadic functions since we cannot control how many
7113 arguments will be passed and thus stack could be used. stdarg_p () is not
7114 used for the checking to avoid browsing arguments twice. */
7115 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7116 {
7117 error ("%qE attribute not available to functions with variable number "
7118 "of arguments", name);
7119 return true;
7120 }
7121
7122 /* Error out if return value is passed on the stack. */
7123 ret_type = TREE_TYPE (fntype);
7124 if (arm_return_in_memory (ret_type, fntype))
7125 {
7126 error ("%qE attribute not available to functions that return value on "
7127 "the stack", name);
7128 return true;
7129 }
7130 return false;
7131 }
7132
7133 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7134 function will check whether the attribute is allowed here and will add the
7135 attribute to the function declaration tree or otherwise issue a warning. */
7136
7137 static tree
7138 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7139 tree /* args */,
7140 int /* flags */,
7141 bool *no_add_attrs)
7142 {
7143 tree fndecl;
7144
7145 if (!use_cmse)
7146 {
7147 *no_add_attrs = true;
7148 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7149 "option.", name);
7150 return NULL_TREE;
7151 }
7152
7153 /* Ignore attribute for function types. */
7154 if (TREE_CODE (*node) != FUNCTION_DECL)
7155 {
7156 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7157 name);
7158 *no_add_attrs = true;
7159 return NULL_TREE;
7160 }
7161
7162 fndecl = *node;
7163
7164 /* Warn for static linkage functions. */
7165 if (!TREE_PUBLIC (fndecl))
7166 {
7167 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7168 "with static linkage", name);
7169 *no_add_attrs = true;
7170 return NULL_TREE;
7171 }
7172
7173 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7174 TREE_TYPE (fndecl));
7175 return NULL_TREE;
7176 }
7177
7178
7179 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7180 function will check whether the attribute is allowed here and will add the
7181 attribute to the function type tree or otherwise issue a diagnostic. The
7182 reason we check this at declaration time is to only allow the use of the
7183 attribute with declarations of function pointers and not function
7184 declarations. This function checks NODE is of the expected type and issues
7185 diagnostics otherwise using NAME. If it is not of the expected type
7186 *NO_ADD_ATTRS will be set to true. */
7187
7188 static tree
7189 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7190 tree /* args */,
7191 int /* flags */,
7192 bool *no_add_attrs)
7193 {
7194 tree decl = NULL_TREE, fntype = NULL_TREE;
7195 tree type;
7196
7197 if (!use_cmse)
7198 {
7199 *no_add_attrs = true;
7200 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7201 "option.", name);
7202 return NULL_TREE;
7203 }
7204
7205 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7206 {
7207 decl = *node;
7208 fntype = TREE_TYPE (decl);
7209 }
7210
7211 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7212 fntype = TREE_TYPE (fntype);
7213
7214 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7215 {
7216 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7217 "function pointer", name);
7218 *no_add_attrs = true;
7219 return NULL_TREE;
7220 }
7221
7222 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7223
7224 if (*no_add_attrs)
7225 return NULL_TREE;
7226
7227 /* Prevent trees being shared among function types with and without
7228 cmse_nonsecure_call attribute. */
7229 type = TREE_TYPE (decl);
7230
7231 type = build_distinct_type_copy (type);
7232 TREE_TYPE (decl) = type;
7233 fntype = type;
7234
7235 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7236 {
7237 type = fntype;
7238 fntype = TREE_TYPE (fntype);
7239 fntype = build_distinct_type_copy (fntype);
7240 TREE_TYPE (type) = fntype;
7241 }
7242
7243 /* Construct a type attribute and add it to the function type. */
7244 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7245 TYPE_ATTRIBUTES (fntype));
7246 TYPE_ATTRIBUTES (fntype) = attrs;
7247 return NULL_TREE;
7248 }
7249
7250 /* Return 0 if the attributes for two types are incompatible, 1 if they
7251 are compatible, and 2 if they are nearly compatible (which causes a
7252 warning to be generated). */
7253 static int
7254 arm_comp_type_attributes (const_tree type1, const_tree type2)
7255 {
7256 int l1, l2, s1, s2;
7257
7258 /* Check for mismatch of non-default calling convention. */
7259 if (TREE_CODE (type1) != FUNCTION_TYPE)
7260 return 1;
7261
7262 /* Check for mismatched call attributes. */
7263 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7264 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7265 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7266 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7267
7268 /* Only bother to check if an attribute is defined. */
7269 if (l1 | l2 | s1 | s2)
7270 {
7271 /* If one type has an attribute, the other must have the same attribute. */
7272 if ((l1 != l2) || (s1 != s2))
7273 return 0;
7274
7275 /* Disallow mixed attributes. */
7276 if ((l1 & s2) || (l2 & s1))
7277 return 0;
7278 }
7279
7280 /* Check for mismatched ISR attribute. */
7281 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7282 if (! l1)
7283 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7284 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7285 if (! l2)
7286 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7287 if (l1 != l2)
7288 return 0;
7289
7290 l1 = lookup_attribute ("cmse_nonsecure_call",
7291 TYPE_ATTRIBUTES (type1)) != NULL;
7292 l2 = lookup_attribute ("cmse_nonsecure_call",
7293 TYPE_ATTRIBUTES (type2)) != NULL;
7294
7295 if (l1 != l2)
7296 return 0;
7297
7298 return 1;
7299 }
7300
7301 /* Assigns default attributes to newly defined type. This is used to
7302 set short_call/long_call attributes for function types of
7303 functions defined inside corresponding #pragma scopes. */
7304 static void
7305 arm_set_default_type_attributes (tree type)
7306 {
7307 /* Add __attribute__ ((long_call)) to all functions, when
7308 inside #pragma long_calls or __attribute__ ((short_call)),
7309 when inside #pragma no_long_calls. */
7310 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7311 {
7312 tree type_attr_list, attr_name;
7313 type_attr_list = TYPE_ATTRIBUTES (type);
7314
7315 if (arm_pragma_long_calls == LONG)
7316 attr_name = get_identifier ("long_call");
7317 else if (arm_pragma_long_calls == SHORT)
7318 attr_name = get_identifier ("short_call");
7319 else
7320 return;
7321
7322 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7323 TYPE_ATTRIBUTES (type) = type_attr_list;
7324 }
7325 }
7326 \f
7327 /* Return true if DECL is known to be linked into section SECTION. */
7328
7329 static bool
7330 arm_function_in_section_p (tree decl, section *section)
7331 {
7332 /* We can only be certain about the prevailing symbol definition. */
7333 if (!decl_binds_to_current_def_p (decl))
7334 return false;
7335
7336 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7337 if (!DECL_SECTION_NAME (decl))
7338 {
7339 /* Make sure that we will not create a unique section for DECL. */
7340 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7341 return false;
7342 }
7343
7344 return function_section (decl) == section;
7345 }
7346
7347 /* Return nonzero if a 32-bit "long_call" should be generated for
7348 a call from the current function to DECL. We generate a long_call
7349 if the function:
7350
7351 a. has an __attribute__((long call))
7352 or b. is within the scope of a #pragma long_calls
7353 or c. the -mlong-calls command line switch has been specified
7354
7355 However we do not generate a long call if the function:
7356
7357 d. has an __attribute__ ((short_call))
7358 or e. is inside the scope of a #pragma no_long_calls
7359 or f. is defined in the same section as the current function. */
7360
7361 bool
7362 arm_is_long_call_p (tree decl)
7363 {
7364 tree attrs;
7365
7366 if (!decl)
7367 return TARGET_LONG_CALLS;
7368
7369 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7370 if (lookup_attribute ("short_call", attrs))
7371 return false;
7372
7373 /* For "f", be conservative, and only cater for cases in which the
7374 whole of the current function is placed in the same section. */
7375 if (!flag_reorder_blocks_and_partition
7376 && TREE_CODE (decl) == FUNCTION_DECL
7377 && arm_function_in_section_p (decl, current_function_section ()))
7378 return false;
7379
7380 if (lookup_attribute ("long_call", attrs))
7381 return true;
7382
7383 return TARGET_LONG_CALLS;
7384 }
7385
7386 /* Return nonzero if it is ok to make a tail-call to DECL. */
7387 static bool
7388 arm_function_ok_for_sibcall (tree decl, tree exp)
7389 {
7390 unsigned long func_type;
7391
7392 if (cfun->machine->sibcall_blocked)
7393 return false;
7394
7395 if (TARGET_FDPIC)
7396 {
7397 /* In FDPIC, never tailcall something for which we have no decl:
7398 the target function could be in a different module, requiring
7399 a different FDPIC register value. */
7400 if (decl == NULL)
7401 return false;
7402 }
7403
7404 /* Never tailcall something if we are generating code for Thumb-1. */
7405 if (TARGET_THUMB1)
7406 return false;
7407
7408 /* The PIC register is live on entry to VxWorks PLT entries, so we
7409 must make the call before restoring the PIC register. */
7410 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7411 return false;
7412
7413 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7414 may be used both as target of the call and base register for restoring
7415 the VFP registers */
7416 if (TARGET_APCS_FRAME && TARGET_ARM
7417 && TARGET_HARD_FLOAT
7418 && decl && arm_is_long_call_p (decl))
7419 return false;
7420
7421 /* If we are interworking and the function is not declared static
7422 then we can't tail-call it unless we know that it exists in this
7423 compilation unit (since it might be a Thumb routine). */
7424 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7425 && !TREE_ASM_WRITTEN (decl))
7426 return false;
7427
7428 func_type = arm_current_func_type ();
7429 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7430 if (IS_INTERRUPT (func_type))
7431 return false;
7432
7433 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7434 generated for entry functions themselves. */
7435 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7436 return false;
7437
7438 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7439 this would complicate matters for later code generation. */
7440 if (TREE_CODE (exp) == CALL_EXPR)
7441 {
7442 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7443 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7444 return false;
7445 }
7446
7447 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7448 {
7449 /* Check that the return value locations are the same. For
7450 example that we aren't returning a value from the sibling in
7451 a VFP register but then need to transfer it to a core
7452 register. */
7453 rtx a, b;
7454 tree decl_or_type = decl;
7455
7456 /* If it is an indirect function pointer, get the function type. */
7457 if (!decl)
7458 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7459
7460 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7461 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7462 cfun->decl, false);
7463 if (!rtx_equal_p (a, b))
7464 return false;
7465 }
7466
7467 /* Never tailcall if function may be called with a misaligned SP. */
7468 if (IS_STACKALIGN (func_type))
7469 return false;
7470
7471 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7472 references should become a NOP. Don't convert such calls into
7473 sibling calls. */
7474 if (TARGET_AAPCS_BASED
7475 && arm_abi == ARM_ABI_AAPCS
7476 && decl
7477 && DECL_WEAK (decl))
7478 return false;
7479
7480 /* We cannot do a tailcall for an indirect call by descriptor if all the
7481 argument registers are used because the only register left to load the
7482 address is IP and it will already contain the static chain. */
7483 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7484 {
7485 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7486 CUMULATIVE_ARGS cum;
7487 cumulative_args_t cum_v;
7488
7489 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7490 cum_v = pack_cumulative_args (&cum);
7491
7492 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7493 {
7494 tree type = TREE_VALUE (t);
7495 if (!VOID_TYPE_P (type))
7496 {
7497 function_arg_info arg (type, /*named=*/true);
7498 arm_function_arg_advance (cum_v, arg);
7499 }
7500 }
7501
7502 function_arg_info arg (integer_type_node, /*named=*/true);
7503 if (!arm_function_arg (cum_v, arg))
7504 return false;
7505 }
7506
7507 /* Everything else is ok. */
7508 return true;
7509 }
7510
7511 \f
7512 /* Addressing mode support functions. */
7513
7514 /* Return nonzero if X is a legitimate immediate operand when compiling
7515 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7516 int
7517 legitimate_pic_operand_p (rtx x)
7518 {
7519 if (GET_CODE (x) == SYMBOL_REF
7520 || (GET_CODE (x) == CONST
7521 && GET_CODE (XEXP (x, 0)) == PLUS
7522 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7523 return 0;
7524
7525 return 1;
7526 }
7527
7528 /* Record that the current function needs a PIC register. If PIC_REG is null,
7529 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
7530 both case cfun->machine->pic_reg is initialized if we have not already done
7531 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
7532 PIC register is reloaded in the current position of the instruction stream
7533 irregardless of whether it was loaded before. Otherwise, it is only loaded
7534 if not already done so (crtl->uses_pic_offset_table is null). Note that
7535 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7536 is only supported iff COMPUTE_NOW is false. */
7537
7538 static void
7539 require_pic_register (rtx pic_reg, bool compute_now)
7540 {
7541 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7542
7543 /* A lot of the logic here is made obscure by the fact that this
7544 routine gets called as part of the rtx cost estimation process.
7545 We don't want those calls to affect any assumptions about the real
7546 function; and further, we can't call entry_of_function() until we
7547 start the real expansion process. */
7548 if (!crtl->uses_pic_offset_table || compute_now)
7549 {
7550 gcc_assert (can_create_pseudo_p ()
7551 || (pic_reg != NULL_RTX
7552 && REG_P (pic_reg)
7553 && GET_MODE (pic_reg) == Pmode));
7554 if (arm_pic_register != INVALID_REGNUM
7555 && !compute_now
7556 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7557 {
7558 if (!cfun->machine->pic_reg)
7559 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7560
7561 /* Play games to avoid marking the function as needing pic
7562 if we are being called as part of the cost-estimation
7563 process. */
7564 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7565 crtl->uses_pic_offset_table = 1;
7566 }
7567 else
7568 {
7569 rtx_insn *seq, *insn;
7570
7571 if (pic_reg == NULL_RTX)
7572 pic_reg = gen_reg_rtx (Pmode);
7573 if (!cfun->machine->pic_reg)
7574 cfun->machine->pic_reg = pic_reg;
7575
7576 /* Play games to avoid marking the function as needing pic
7577 if we are being called as part of the cost-estimation
7578 process. */
7579 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7580 {
7581 crtl->uses_pic_offset_table = 1;
7582 start_sequence ();
7583
7584 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7585 && arm_pic_register > LAST_LO_REGNUM
7586 && !compute_now)
7587 emit_move_insn (cfun->machine->pic_reg,
7588 gen_rtx_REG (Pmode, arm_pic_register));
7589 else
7590 arm_load_pic_register (0UL, pic_reg);
7591
7592 seq = get_insns ();
7593 end_sequence ();
7594
7595 for (insn = seq; insn; insn = NEXT_INSN (insn))
7596 if (INSN_P (insn))
7597 INSN_LOCATION (insn) = prologue_location;
7598
7599 /* We can be called during expansion of PHI nodes, where
7600 we can't yet emit instructions directly in the final
7601 insn stream. Queue the insns on the entry edge, they will
7602 be committed after everything else is expanded. */
7603 if (currently_expanding_to_rtl)
7604 insert_insn_on_edge (seq,
7605 single_succ_edge
7606 (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7607 else
7608 emit_insn (seq);
7609 }
7610 }
7611 }
7612 }
7613
7614 /* Generate insns to calculate the address of ORIG in pic mode. */
7615 static rtx_insn *
7616 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
7617 {
7618 rtx pat;
7619 rtx mem;
7620
7621 pat = gen_calculate_pic_address (reg, pic_reg, orig);
7622
7623 /* Make the MEM as close to a constant as possible. */
7624 mem = SET_SRC (pat);
7625 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7626 MEM_READONLY_P (mem) = 1;
7627 MEM_NOTRAP_P (mem) = 1;
7628
7629 return emit_insn (pat);
7630 }
7631
7632 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
7633 created to hold the result of the load. If not NULL, PIC_REG indicates
7634 which register to use as PIC register, otherwise it is decided by register
7635 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
7636 location in the instruction stream, irregardless of whether it was loaded
7637 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
7638 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
7639
7640 Returns the register REG into which the PIC load is performed. */
7641
7642 rtx
7643 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
7644 bool compute_now)
7645 {
7646 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7647
7648 if (GET_CODE (orig) == SYMBOL_REF
7649 || GET_CODE (orig) == LABEL_REF)
7650 {
7651 if (reg == 0)
7652 {
7653 gcc_assert (can_create_pseudo_p ());
7654 reg = gen_reg_rtx (Pmode);
7655 }
7656
7657 /* VxWorks does not impose a fixed gap between segments; the run-time
7658 gap can be different from the object-file gap. We therefore can't
7659 use GOTOFF unless we are absolutely sure that the symbol is in the
7660 same segment as the GOT. Unfortunately, the flexibility of linker
7661 scripts means that we can't be sure of that in general, so assume
7662 that GOTOFF is never valid on VxWorks. */
7663 /* References to weak symbols cannot be resolved locally: they
7664 may be overridden by a non-weak definition at link time. */
7665 rtx_insn *insn;
7666 if ((GET_CODE (orig) == LABEL_REF
7667 || (GET_CODE (orig) == SYMBOL_REF
7668 && SYMBOL_REF_LOCAL_P (orig)
7669 && (SYMBOL_REF_DECL (orig)
7670 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
7671 && (!SYMBOL_REF_FUNCTION_P (orig)
7672 || arm_fdpic_local_funcdesc_p (orig))))
7673 && NEED_GOT_RELOC
7674 && arm_pic_data_is_text_relative)
7675 insn = arm_pic_static_addr (orig, reg);
7676 else
7677 {
7678 /* If this function doesn't have a pic register, create one now. */
7679 require_pic_register (pic_reg, compute_now);
7680
7681 if (pic_reg == NULL_RTX)
7682 pic_reg = cfun->machine->pic_reg;
7683
7684 insn = calculate_pic_address_constant (reg, pic_reg, orig);
7685 }
7686
7687 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7688 by loop. */
7689 set_unique_reg_note (insn, REG_EQUAL, orig);
7690
7691 return reg;
7692 }
7693 else if (GET_CODE (orig) == CONST)
7694 {
7695 rtx base, offset;
7696
7697 if (GET_CODE (XEXP (orig, 0)) == PLUS
7698 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7699 return orig;
7700
7701 /* Handle the case where we have: const (UNSPEC_TLS). */
7702 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7703 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7704 return orig;
7705
7706 /* Handle the case where we have:
7707 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7708 CONST_INT. */
7709 if (GET_CODE (XEXP (orig, 0)) == PLUS
7710 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7711 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7712 {
7713 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7714 return orig;
7715 }
7716
7717 if (reg == 0)
7718 {
7719 gcc_assert (can_create_pseudo_p ());
7720 reg = gen_reg_rtx (Pmode);
7721 }
7722
7723 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7724
7725 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
7726 pic_reg, compute_now);
7727 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7728 base == reg ? 0 : reg, pic_reg,
7729 compute_now);
7730
7731 if (CONST_INT_P (offset))
7732 {
7733 /* The base register doesn't really matter, we only want to
7734 test the index for the appropriate mode. */
7735 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7736 {
7737 gcc_assert (can_create_pseudo_p ());
7738 offset = force_reg (Pmode, offset);
7739 }
7740
7741 if (CONST_INT_P (offset))
7742 return plus_constant (Pmode, base, INTVAL (offset));
7743 }
7744
7745 if (GET_MODE_SIZE (mode) > 4
7746 && (GET_MODE_CLASS (mode) == MODE_INT
7747 || TARGET_SOFT_FLOAT))
7748 {
7749 emit_insn (gen_addsi3 (reg, base, offset));
7750 return reg;
7751 }
7752
7753 return gen_rtx_PLUS (Pmode, base, offset);
7754 }
7755
7756 return orig;
7757 }
7758
7759
7760 /* Whether a register is callee saved or not. This is necessary because high
7761 registers are marked as caller saved when optimizing for size on Thumb-1
7762 targets despite being callee saved in order to avoid using them. */
7763 #define callee_saved_reg_p(reg) \
7764 (!call_used_or_fixed_reg_p (reg) \
7765 || (TARGET_THUMB1 && optimize_size \
7766 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
7767
7768 /* Return a mask for the call-clobbered low registers that are unused
7769 at the end of the prologue. */
7770 static unsigned long
7771 thumb1_prologue_unused_call_clobbered_lo_regs (void)
7772 {
7773 unsigned long mask = 0;
7774 bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7775
7776 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
7777 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
7778 mask |= 1 << (reg - FIRST_LO_REGNUM);
7779 return mask;
7780 }
7781
7782 /* Similarly for the start of the epilogue. */
7783 static unsigned long
7784 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
7785 {
7786 unsigned long mask = 0;
7787 bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
7788
7789 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
7790 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
7791 mask |= 1 << (reg - FIRST_LO_REGNUM);
7792 return mask;
7793 }
7794
7795 /* Find a spare register to use during the prolog of a function. */
7796
7797 static int
7798 thumb_find_work_register (unsigned long pushed_regs_mask)
7799 {
7800 int reg;
7801
7802 unsigned long unused_regs
7803 = thumb1_prologue_unused_call_clobbered_lo_regs ();
7804
7805 /* Check the argument registers first as these are call-used. The
7806 register allocation order means that sometimes r3 might be used
7807 but earlier argument registers might not, so check them all. */
7808 for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
7809 if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
7810 return reg;
7811
7812 /* Otherwise look for a call-saved register that is going to be pushed. */
7813 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7814 if (pushed_regs_mask & (1 << reg))
7815 return reg;
7816
7817 if (TARGET_THUMB2)
7818 {
7819 /* Thumb-2 can use high regs. */
7820 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7821 if (pushed_regs_mask & (1 << reg))
7822 return reg;
7823 }
7824 /* Something went wrong - thumb_compute_save_reg_mask()
7825 should have arranged for a suitable register to be pushed. */
7826 gcc_unreachable ();
7827 }
7828
7829 static GTY(()) int pic_labelno;
7830
7831 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7832 low register. */
7833
7834 void
7835 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
7836 {
7837 rtx l1, labelno, pic_tmp, pic_rtx;
7838
7839 if (crtl->uses_pic_offset_table == 0
7840 || TARGET_SINGLE_PIC_BASE
7841 || TARGET_FDPIC)
7842 return;
7843
7844 gcc_assert (flag_pic);
7845
7846 if (pic_reg == NULL_RTX)
7847 pic_reg = cfun->machine->pic_reg;
7848 if (TARGET_VXWORKS_RTP)
7849 {
7850 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7851 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7852 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7853
7854 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7855
7856 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7857 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7858 }
7859 else
7860 {
7861 /* We use an UNSPEC rather than a LABEL_REF because this label
7862 never appears in the code stream. */
7863
7864 labelno = GEN_INT (pic_labelno++);
7865 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7866 l1 = gen_rtx_CONST (VOIDmode, l1);
7867
7868 /* On the ARM the PC register contains 'dot + 8' at the time of the
7869 addition, on the Thumb it is 'dot + 4'. */
7870 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7871 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7872 UNSPEC_GOTSYM_OFF);
7873 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7874
7875 if (TARGET_32BIT)
7876 {
7877 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7878 }
7879 else /* TARGET_THUMB1 */
7880 {
7881 if (arm_pic_register != INVALID_REGNUM
7882 && REGNO (pic_reg) > LAST_LO_REGNUM)
7883 {
7884 /* We will have pushed the pic register, so we should always be
7885 able to find a work register. */
7886 pic_tmp = gen_rtx_REG (SImode,
7887 thumb_find_work_register (saved_regs));
7888 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7889 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7890 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7891 }
7892 else if (arm_pic_register != INVALID_REGNUM
7893 && arm_pic_register > LAST_LO_REGNUM
7894 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7895 {
7896 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7897 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7898 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7899 }
7900 else
7901 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7902 }
7903 }
7904
7905 /* Need to emit this whether or not we obey regdecls,
7906 since setjmp/longjmp can cause life info to screw up. */
7907 emit_use (pic_reg);
7908 }
7909
7910 /* Try to determine whether an object, referenced via ORIG, will be
7911 placed in the text or data segment. This is used in FDPIC mode, to
7912 decide which relocations to use when accessing ORIG. *IS_READONLY
7913 is set to true if ORIG is a read-only location, false otherwise.
7914 Return true if we could determine the location of ORIG, false
7915 otherwise. *IS_READONLY is valid only when we return true. */
7916 static bool
7917 arm_is_segment_info_known (rtx orig, bool *is_readonly)
7918 {
7919 *is_readonly = false;
7920
7921 if (GET_CODE (orig) == LABEL_REF)
7922 {
7923 *is_readonly = true;
7924 return true;
7925 }
7926
7927 if (SYMBOL_REF_P (orig))
7928 {
7929 if (CONSTANT_POOL_ADDRESS_P (orig))
7930 {
7931 *is_readonly = true;
7932 return true;
7933 }
7934 if (SYMBOL_REF_LOCAL_P (orig)
7935 && !SYMBOL_REF_EXTERNAL_P (orig)
7936 && SYMBOL_REF_DECL (orig)
7937 && (!DECL_P (SYMBOL_REF_DECL (orig))
7938 || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
7939 {
7940 tree decl = SYMBOL_REF_DECL (orig);
7941 tree init = (TREE_CODE (decl) == VAR_DECL)
7942 ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
7943 ? decl : 0;
7944 int reloc = 0;
7945 bool named_section, readonly;
7946
7947 if (init && init != error_mark_node)
7948 reloc = compute_reloc_for_constant (init);
7949
7950 named_section = TREE_CODE (decl) == VAR_DECL
7951 && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
7952 readonly = decl_readonly_section (decl, reloc);
7953
7954 /* We don't know where the link script will put a named
7955 section, so return false in such a case. */
7956 if (named_section)
7957 return false;
7958
7959 *is_readonly = readonly;
7960 return true;
7961 }
7962
7963 /* We don't know. */
7964 return false;
7965 }
7966
7967 gcc_unreachable ();
7968 }
7969
7970 /* Generate code to load the address of a static var when flag_pic is set. */
7971 static rtx_insn *
7972 arm_pic_static_addr (rtx orig, rtx reg)
7973 {
7974 rtx l1, labelno, offset_rtx;
7975 rtx_insn *insn;
7976
7977 gcc_assert (flag_pic);
7978
7979 bool is_readonly = false;
7980 bool info_known = false;
7981
7982 if (TARGET_FDPIC
7983 && SYMBOL_REF_P (orig)
7984 && !SYMBOL_REF_FUNCTION_P (orig))
7985 info_known = arm_is_segment_info_known (orig, &is_readonly);
7986
7987 if (TARGET_FDPIC
7988 && SYMBOL_REF_P (orig)
7989 && !SYMBOL_REF_FUNCTION_P (orig)
7990 && !info_known)
7991 {
7992 /* We don't know where orig is stored, so we have be
7993 pessimistic and use a GOT relocation. */
7994 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
7995
7996 insn = calculate_pic_address_constant (reg, pic_reg, orig);
7997 }
7998 else if (TARGET_FDPIC
7999 && SYMBOL_REF_P (orig)
8000 && (SYMBOL_REF_FUNCTION_P (orig)
8001 || !is_readonly))
8002 {
8003 /* We use the GOTOFF relocation. */
8004 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8005
8006 rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8007 emit_insn (gen_movsi (reg, l1));
8008 insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8009 }
8010 else
8011 {
8012 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8013 PC-relative access. */
8014 /* We use an UNSPEC rather than a LABEL_REF because this label
8015 never appears in the code stream. */
8016 labelno = GEN_INT (pic_labelno++);
8017 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8018 l1 = gen_rtx_CONST (VOIDmode, l1);
8019
8020 /* On the ARM the PC register contains 'dot + 8' at the time of the
8021 addition, on the Thumb it is 'dot + 4'. */
8022 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8023 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8024 UNSPEC_SYMBOL_OFFSET);
8025 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8026
8027 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8028 labelno));
8029 }
8030
8031 return insn;
8032 }
8033
8034 /* Return nonzero if X is valid as an ARM state addressing register. */
8035 static int
8036 arm_address_register_rtx_p (rtx x, int strict_p)
8037 {
8038 int regno;
8039
8040 if (!REG_P (x))
8041 return 0;
8042
8043 regno = REGNO (x);
8044
8045 if (strict_p)
8046 return ARM_REGNO_OK_FOR_BASE_P (regno);
8047
8048 return (regno <= LAST_ARM_REGNUM
8049 || regno >= FIRST_PSEUDO_REGISTER
8050 || regno == FRAME_POINTER_REGNUM
8051 || regno == ARG_POINTER_REGNUM);
8052 }
8053
8054 /* Return TRUE if this rtx is the difference of a symbol and a label,
8055 and will reduce to a PC-relative relocation in the object file.
8056 Expressions like this can be left alone when generating PIC, rather
8057 than forced through the GOT. */
8058 static int
8059 pcrel_constant_p (rtx x)
8060 {
8061 if (GET_CODE (x) == MINUS)
8062 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8063
8064 return FALSE;
8065 }
8066
8067 /* Return true if X will surely end up in an index register after next
8068 splitting pass. */
8069 static bool
8070 will_be_in_index_register (const_rtx x)
8071 {
8072 /* arm.md: calculate_pic_address will split this into a register. */
8073 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8074 }
8075
8076 /* Return nonzero if X is a valid ARM state address operand. */
8077 int
8078 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8079 int strict_p)
8080 {
8081 bool use_ldrd;
8082 enum rtx_code code = GET_CODE (x);
8083
8084 if (arm_address_register_rtx_p (x, strict_p))
8085 return 1;
8086
8087 use_ldrd = (TARGET_LDRD
8088 && (mode == DImode || mode == DFmode));
8089
8090 if (code == POST_INC || code == PRE_DEC
8091 || ((code == PRE_INC || code == POST_DEC)
8092 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8093 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8094
8095 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8096 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8097 && GET_CODE (XEXP (x, 1)) == PLUS
8098 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8099 {
8100 rtx addend = XEXP (XEXP (x, 1), 1);
8101
8102 /* Don't allow ldrd post increment by register because it's hard
8103 to fixup invalid register choices. */
8104 if (use_ldrd
8105 && GET_CODE (x) == POST_MODIFY
8106 && REG_P (addend))
8107 return 0;
8108
8109 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8110 && arm_legitimate_index_p (mode, addend, outer, strict_p));
8111 }
8112
8113 /* After reload constants split into minipools will have addresses
8114 from a LABEL_REF. */
8115 else if (reload_completed
8116 && (code == LABEL_REF
8117 || (code == CONST
8118 && GET_CODE (XEXP (x, 0)) == PLUS
8119 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8120 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8121 return 1;
8122
8123 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8124 return 0;
8125
8126 else if (code == PLUS)
8127 {
8128 rtx xop0 = XEXP (x, 0);
8129 rtx xop1 = XEXP (x, 1);
8130
8131 return ((arm_address_register_rtx_p (xop0, strict_p)
8132 && ((CONST_INT_P (xop1)
8133 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8134 || (!strict_p && will_be_in_index_register (xop1))))
8135 || (arm_address_register_rtx_p (xop1, strict_p)
8136 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8137 }
8138
8139 #if 0
8140 /* Reload currently can't handle MINUS, so disable this for now */
8141 else if (GET_CODE (x) == MINUS)
8142 {
8143 rtx xop0 = XEXP (x, 0);
8144 rtx xop1 = XEXP (x, 1);
8145
8146 return (arm_address_register_rtx_p (xop0, strict_p)
8147 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8148 }
8149 #endif
8150
8151 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8152 && code == SYMBOL_REF
8153 && CONSTANT_POOL_ADDRESS_P (x)
8154 && ! (flag_pic
8155 && symbol_mentioned_p (get_pool_constant (x))
8156 && ! pcrel_constant_p (get_pool_constant (x))))
8157 return 1;
8158
8159 return 0;
8160 }
8161
8162 /* Return true if we can avoid creating a constant pool entry for x. */
8163 static bool
8164 can_avoid_literal_pool_for_label_p (rtx x)
8165 {
8166 /* Normally we can assign constant values to target registers without
8167 the help of constant pool. But there are cases we have to use constant
8168 pool like:
8169 1) assign a label to register.
8170 2) sign-extend a 8bit value to 32bit and then assign to register.
8171
8172 Constant pool access in format:
8173 (set (reg r0) (mem (symbol_ref (".LC0"))))
8174 will cause the use of literal pool (later in function arm_reorg).
8175 So here we mark such format as an invalid format, then the compiler
8176 will adjust it into:
8177 (set (reg r0) (symbol_ref (".LC0")))
8178 (set (reg r0) (mem (reg r0))).
8179 No extra register is required, and (mem (reg r0)) won't cause the use
8180 of literal pools. */
8181 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
8182 && CONSTANT_POOL_ADDRESS_P (x))
8183 return 1;
8184 return 0;
8185 }
8186
8187
8188 /* Return nonzero if X is a valid Thumb-2 address operand. */
8189 static int
8190 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8191 {
8192 bool use_ldrd;
8193 enum rtx_code code = GET_CODE (x);
8194
8195 if (arm_address_register_rtx_p (x, strict_p))
8196 return 1;
8197
8198 use_ldrd = (TARGET_LDRD
8199 && (mode == DImode || mode == DFmode));
8200
8201 if (code == POST_INC || code == PRE_DEC
8202 || ((code == PRE_INC || code == POST_DEC)
8203 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8204 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8205
8206 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8207 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8208 && GET_CODE (XEXP (x, 1)) == PLUS
8209 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8210 {
8211 /* Thumb-2 only has autoincrement by constant. */
8212 rtx addend = XEXP (XEXP (x, 1), 1);
8213 HOST_WIDE_INT offset;
8214
8215 if (!CONST_INT_P (addend))
8216 return 0;
8217
8218 offset = INTVAL(addend);
8219 if (GET_MODE_SIZE (mode) <= 4)
8220 return (offset > -256 && offset < 256);
8221
8222 return (use_ldrd && offset > -1024 && offset < 1024
8223 && (offset & 3) == 0);
8224 }
8225
8226 /* After reload constants split into minipools will have addresses
8227 from a LABEL_REF. */
8228 else if (reload_completed
8229 && (code == LABEL_REF
8230 || (code == CONST
8231 && GET_CODE (XEXP (x, 0)) == PLUS
8232 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8233 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8234 return 1;
8235
8236 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8237 return 0;
8238
8239 else if (code == PLUS)
8240 {
8241 rtx xop0 = XEXP (x, 0);
8242 rtx xop1 = XEXP (x, 1);
8243
8244 return ((arm_address_register_rtx_p (xop0, strict_p)
8245 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8246 || (!strict_p && will_be_in_index_register (xop1))))
8247 || (arm_address_register_rtx_p (xop1, strict_p)
8248 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8249 }
8250
8251 else if (can_avoid_literal_pool_for_label_p (x))
8252 return 0;
8253
8254 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8255 && code == SYMBOL_REF
8256 && CONSTANT_POOL_ADDRESS_P (x)
8257 && ! (flag_pic
8258 && symbol_mentioned_p (get_pool_constant (x))
8259 && ! pcrel_constant_p (get_pool_constant (x))))
8260 return 1;
8261
8262 return 0;
8263 }
8264
8265 /* Return nonzero if INDEX is valid for an address index operand in
8266 ARM state. */
8267 static int
8268 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8269 int strict_p)
8270 {
8271 HOST_WIDE_INT range;
8272 enum rtx_code code = GET_CODE (index);
8273
8274 /* Standard coprocessor addressing modes. */
8275 if (TARGET_HARD_FLOAT
8276 && (mode == SFmode || mode == DFmode))
8277 return (code == CONST_INT && INTVAL (index) < 1024
8278 && INTVAL (index) > -1024
8279 && (INTVAL (index) & 3) == 0);
8280
8281 /* For quad modes, we restrict the constant offset to be slightly less
8282 than what the instruction format permits. We do this because for
8283 quad mode moves, we will actually decompose them into two separate
8284 double-mode reads or writes. INDEX must therefore be a valid
8285 (double-mode) offset and so should INDEX+8. */
8286 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8287 return (code == CONST_INT
8288 && INTVAL (index) < 1016
8289 && INTVAL (index) > -1024
8290 && (INTVAL (index) & 3) == 0);
8291
8292 /* We have no such constraint on double mode offsets, so we permit the
8293 full range of the instruction format. */
8294 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8295 return (code == CONST_INT
8296 && INTVAL (index) < 1024
8297 && INTVAL (index) > -1024
8298 && (INTVAL (index) & 3) == 0);
8299
8300 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8301 return (code == CONST_INT
8302 && INTVAL (index) < 1024
8303 && INTVAL (index) > -1024
8304 && (INTVAL (index) & 3) == 0);
8305
8306 if (arm_address_register_rtx_p (index, strict_p)
8307 && (GET_MODE_SIZE (mode) <= 4))
8308 return 1;
8309
8310 if (mode == DImode || mode == DFmode)
8311 {
8312 if (code == CONST_INT)
8313 {
8314 HOST_WIDE_INT val = INTVAL (index);
8315
8316 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8317 If vldr is selected it uses arm_coproc_mem_operand. */
8318 if (TARGET_LDRD)
8319 return val > -256 && val < 256;
8320 else
8321 return val > -4096 && val < 4092;
8322 }
8323
8324 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8325 }
8326
8327 if (GET_MODE_SIZE (mode) <= 4
8328 && ! (arm_arch4
8329 && (mode == HImode
8330 || mode == HFmode
8331 || (mode == QImode && outer == SIGN_EXTEND))))
8332 {
8333 if (code == MULT)
8334 {
8335 rtx xiop0 = XEXP (index, 0);
8336 rtx xiop1 = XEXP (index, 1);
8337
8338 return ((arm_address_register_rtx_p (xiop0, strict_p)
8339 && power_of_two_operand (xiop1, SImode))
8340 || (arm_address_register_rtx_p (xiop1, strict_p)
8341 && power_of_two_operand (xiop0, SImode)));
8342 }
8343 else if (code == LSHIFTRT || code == ASHIFTRT
8344 || code == ASHIFT || code == ROTATERT)
8345 {
8346 rtx op = XEXP (index, 1);
8347
8348 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8349 && CONST_INT_P (op)
8350 && INTVAL (op) > 0
8351 && INTVAL (op) <= 31);
8352 }
8353 }
8354
8355 /* For ARM v4 we may be doing a sign-extend operation during the
8356 load. */
8357 if (arm_arch4)
8358 {
8359 if (mode == HImode
8360 || mode == HFmode
8361 || (outer == SIGN_EXTEND && mode == QImode))
8362 range = 256;
8363 else
8364 range = 4096;
8365 }
8366 else
8367 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8368
8369 return (code == CONST_INT
8370 && INTVAL (index) < range
8371 && INTVAL (index) > -range);
8372 }
8373
8374 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8375 index operand. i.e. 1, 2, 4 or 8. */
8376 static bool
8377 thumb2_index_mul_operand (rtx op)
8378 {
8379 HOST_WIDE_INT val;
8380
8381 if (!CONST_INT_P (op))
8382 return false;
8383
8384 val = INTVAL(op);
8385 return (val == 1 || val == 2 || val == 4 || val == 8);
8386 }
8387
8388 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8389 static int
8390 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8391 {
8392 enum rtx_code code = GET_CODE (index);
8393
8394 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8395 /* Standard coprocessor addressing modes. */
8396 if (TARGET_HARD_FLOAT
8397 && (mode == SFmode || mode == DFmode))
8398 return (code == CONST_INT && INTVAL (index) < 1024
8399 /* Thumb-2 allows only > -256 index range for it's core register
8400 load/stores. Since we allow SF/DF in core registers, we have
8401 to use the intersection between -256~4096 (core) and -1024~1024
8402 (coprocessor). */
8403 && INTVAL (index) > -256
8404 && (INTVAL (index) & 3) == 0);
8405
8406 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8407 {
8408 /* For DImode assume values will usually live in core regs
8409 and only allow LDRD addressing modes. */
8410 if (!TARGET_LDRD || mode != DImode)
8411 return (code == CONST_INT
8412 && INTVAL (index) < 1024
8413 && INTVAL (index) > -1024
8414 && (INTVAL (index) & 3) == 0);
8415 }
8416
8417 /* For quad modes, we restrict the constant offset to be slightly less
8418 than what the instruction format permits. We do this because for
8419 quad mode moves, we will actually decompose them into two separate
8420 double-mode reads or writes. INDEX must therefore be a valid
8421 (double-mode) offset and so should INDEX+8. */
8422 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8423 return (code == CONST_INT
8424 && INTVAL (index) < 1016
8425 && INTVAL (index) > -1024
8426 && (INTVAL (index) & 3) == 0);
8427
8428 /* We have no such constraint on double mode offsets, so we permit the
8429 full range of the instruction format. */
8430 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8431 return (code == CONST_INT
8432 && INTVAL (index) < 1024
8433 && INTVAL (index) > -1024
8434 && (INTVAL (index) & 3) == 0);
8435
8436 if (arm_address_register_rtx_p (index, strict_p)
8437 && (GET_MODE_SIZE (mode) <= 4))
8438 return 1;
8439
8440 if (mode == DImode || mode == DFmode)
8441 {
8442 if (code == CONST_INT)
8443 {
8444 HOST_WIDE_INT val = INTVAL (index);
8445 /* Thumb-2 ldrd only has reg+const addressing modes.
8446 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8447 If vldr is selected it uses arm_coproc_mem_operand. */
8448 if (TARGET_LDRD)
8449 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8450 else
8451 return IN_RANGE (val, -255, 4095 - 4);
8452 }
8453 else
8454 return 0;
8455 }
8456
8457 if (code == MULT)
8458 {
8459 rtx xiop0 = XEXP (index, 0);
8460 rtx xiop1 = XEXP (index, 1);
8461
8462 return ((arm_address_register_rtx_p (xiop0, strict_p)
8463 && thumb2_index_mul_operand (xiop1))
8464 || (arm_address_register_rtx_p (xiop1, strict_p)
8465 && thumb2_index_mul_operand (xiop0)));
8466 }
8467 else if (code == ASHIFT)
8468 {
8469 rtx op = XEXP (index, 1);
8470
8471 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8472 && CONST_INT_P (op)
8473 && INTVAL (op) > 0
8474 && INTVAL (op) <= 3);
8475 }
8476
8477 return (code == CONST_INT
8478 && INTVAL (index) < 4096
8479 && INTVAL (index) > -256);
8480 }
8481
8482 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8483 static int
8484 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8485 {
8486 int regno;
8487
8488 if (!REG_P (x))
8489 return 0;
8490
8491 regno = REGNO (x);
8492
8493 if (strict_p)
8494 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8495
8496 return (regno <= LAST_LO_REGNUM
8497 || regno > LAST_VIRTUAL_REGISTER
8498 || regno == FRAME_POINTER_REGNUM
8499 || (GET_MODE_SIZE (mode) >= 4
8500 && (regno == STACK_POINTER_REGNUM
8501 || regno >= FIRST_PSEUDO_REGISTER
8502 || x == hard_frame_pointer_rtx
8503 || x == arg_pointer_rtx)));
8504 }
8505
8506 /* Return nonzero if x is a legitimate index register. This is the case
8507 for any base register that can access a QImode object. */
8508 inline static int
8509 thumb1_index_register_rtx_p (rtx x, int strict_p)
8510 {
8511 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8512 }
8513
8514 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8515
8516 The AP may be eliminated to either the SP or the FP, so we use the
8517 least common denominator, e.g. SImode, and offsets from 0 to 64.
8518
8519 ??? Verify whether the above is the right approach.
8520
8521 ??? Also, the FP may be eliminated to the SP, so perhaps that
8522 needs special handling also.
8523
8524 ??? Look at how the mips16 port solves this problem. It probably uses
8525 better ways to solve some of these problems.
8526
8527 Although it is not incorrect, we don't accept QImode and HImode
8528 addresses based on the frame pointer or arg pointer until the
8529 reload pass starts. This is so that eliminating such addresses
8530 into stack based ones won't produce impossible code. */
8531 int
8532 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8533 {
8534 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8535 return 0;
8536
8537 /* ??? Not clear if this is right. Experiment. */
8538 if (GET_MODE_SIZE (mode) < 4
8539 && !(reload_in_progress || reload_completed)
8540 && (reg_mentioned_p (frame_pointer_rtx, x)
8541 || reg_mentioned_p (arg_pointer_rtx, x)
8542 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8543 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8544 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8545 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8546 return 0;
8547
8548 /* Accept any base register. SP only in SImode or larger. */
8549 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8550 return 1;
8551
8552 /* This is PC relative data before arm_reorg runs. */
8553 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8554 && GET_CODE (x) == SYMBOL_REF
8555 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8556 return 1;
8557
8558 /* This is PC relative data after arm_reorg runs. */
8559 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8560 && reload_completed
8561 && (GET_CODE (x) == LABEL_REF
8562 || (GET_CODE (x) == CONST
8563 && GET_CODE (XEXP (x, 0)) == PLUS
8564 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8565 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8566 return 1;
8567
8568 /* Post-inc indexing only supported for SImode and larger. */
8569 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8570 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8571 return 1;
8572
8573 else if (GET_CODE (x) == PLUS)
8574 {
8575 /* REG+REG address can be any two index registers. */
8576 /* We disallow FRAME+REG addressing since we know that FRAME
8577 will be replaced with STACK, and SP relative addressing only
8578 permits SP+OFFSET. */
8579 if (GET_MODE_SIZE (mode) <= 4
8580 && XEXP (x, 0) != frame_pointer_rtx
8581 && XEXP (x, 1) != frame_pointer_rtx
8582 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8583 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8584 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8585 return 1;
8586
8587 /* REG+const has 5-7 bit offset for non-SP registers. */
8588 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8589 || XEXP (x, 0) == arg_pointer_rtx)
8590 && CONST_INT_P (XEXP (x, 1))
8591 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8592 return 1;
8593
8594 /* REG+const has 10-bit offset for SP, but only SImode and
8595 larger is supported. */
8596 /* ??? Should probably check for DI/DFmode overflow here
8597 just like GO_IF_LEGITIMATE_OFFSET does. */
8598 else if (REG_P (XEXP (x, 0))
8599 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8600 && GET_MODE_SIZE (mode) >= 4
8601 && CONST_INT_P (XEXP (x, 1))
8602 && INTVAL (XEXP (x, 1)) >= 0
8603 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8604 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8605 return 1;
8606
8607 else if (REG_P (XEXP (x, 0))
8608 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8609 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8610 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8611 && REGNO (XEXP (x, 0))
8612 <= LAST_VIRTUAL_POINTER_REGISTER))
8613 && GET_MODE_SIZE (mode) >= 4
8614 && CONST_INT_P (XEXP (x, 1))
8615 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8616 return 1;
8617 }
8618
8619 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8620 && GET_MODE_SIZE (mode) == 4
8621 && GET_CODE (x) == SYMBOL_REF
8622 && CONSTANT_POOL_ADDRESS_P (x)
8623 && ! (flag_pic
8624 && symbol_mentioned_p (get_pool_constant (x))
8625 && ! pcrel_constant_p (get_pool_constant (x))))
8626 return 1;
8627
8628 return 0;
8629 }
8630
8631 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8632 instruction of mode MODE. */
8633 int
8634 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8635 {
8636 switch (GET_MODE_SIZE (mode))
8637 {
8638 case 1:
8639 return val >= 0 && val < 32;
8640
8641 case 2:
8642 return val >= 0 && val < 64 && (val & 1) == 0;
8643
8644 default:
8645 return (val >= 0
8646 && (val + GET_MODE_SIZE (mode)) <= 128
8647 && (val & 3) == 0);
8648 }
8649 }
8650
8651 bool
8652 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8653 {
8654 if (TARGET_ARM)
8655 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8656 else if (TARGET_THUMB2)
8657 return thumb2_legitimate_address_p (mode, x, strict_p);
8658 else /* if (TARGET_THUMB1) */
8659 return thumb1_legitimate_address_p (mode, x, strict_p);
8660 }
8661
8662 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8663
8664 Given an rtx X being reloaded into a reg required to be
8665 in class CLASS, return the class of reg to actually use.
8666 In general this is just CLASS, but for the Thumb core registers and
8667 immediate constants we prefer a LO_REGS class or a subset. */
8668
8669 static reg_class_t
8670 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8671 {
8672 if (TARGET_32BIT)
8673 return rclass;
8674 else
8675 {
8676 if (rclass == GENERAL_REGS)
8677 return LO_REGS;
8678 else
8679 return rclass;
8680 }
8681 }
8682
8683 /* Build the SYMBOL_REF for __tls_get_addr. */
8684
8685 static GTY(()) rtx tls_get_addr_libfunc;
8686
8687 static rtx
8688 get_tls_get_addr (void)
8689 {
8690 if (!tls_get_addr_libfunc)
8691 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8692 return tls_get_addr_libfunc;
8693 }
8694
8695 rtx
8696 arm_load_tp (rtx target)
8697 {
8698 if (!target)
8699 target = gen_reg_rtx (SImode);
8700
8701 if (TARGET_HARD_TP)
8702 {
8703 /* Can return in any reg. */
8704 emit_insn (gen_load_tp_hard (target));
8705 }
8706 else
8707 {
8708 /* Always returned in r0. Immediately copy the result into a pseudo,
8709 otherwise other uses of r0 (e.g. setting up function arguments) may
8710 clobber the value. */
8711
8712 rtx tmp;
8713
8714 if (TARGET_FDPIC)
8715 {
8716 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8717 rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
8718
8719 emit_insn (gen_load_tp_soft_fdpic ());
8720
8721 /* Restore r9. */
8722 emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
8723 }
8724 else
8725 emit_insn (gen_load_tp_soft ());
8726
8727 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8728 emit_move_insn (target, tmp);
8729 }
8730 return target;
8731 }
8732
8733 static rtx
8734 load_tls_operand (rtx x, rtx reg)
8735 {
8736 rtx tmp;
8737
8738 if (reg == NULL_RTX)
8739 reg = gen_reg_rtx (SImode);
8740
8741 tmp = gen_rtx_CONST (SImode, x);
8742
8743 emit_move_insn (reg, tmp);
8744
8745 return reg;
8746 }
8747
8748 static rtx_insn *
8749 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8750 {
8751 rtx label, labelno = NULL_RTX, sum;
8752
8753 gcc_assert (reloc != TLS_DESCSEQ);
8754 start_sequence ();
8755
8756 if (TARGET_FDPIC)
8757 {
8758 sum = gen_rtx_UNSPEC (Pmode,
8759 gen_rtvec (2, x, GEN_INT (reloc)),
8760 UNSPEC_TLS);
8761 }
8762 else
8763 {
8764 labelno = GEN_INT (pic_labelno++);
8765 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8766 label = gen_rtx_CONST (VOIDmode, label);
8767
8768 sum = gen_rtx_UNSPEC (Pmode,
8769 gen_rtvec (4, x, GEN_INT (reloc), label,
8770 GEN_INT (TARGET_ARM ? 8 : 4)),
8771 UNSPEC_TLS);
8772 }
8773 reg = load_tls_operand (sum, reg);
8774
8775 if (TARGET_FDPIC)
8776 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
8777 else if (TARGET_ARM)
8778 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8779 else
8780 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8781
8782 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8783 LCT_PURE, /* LCT_CONST? */
8784 Pmode, reg, Pmode);
8785
8786 rtx_insn *insns = get_insns ();
8787 end_sequence ();
8788
8789 return insns;
8790 }
8791
8792 static rtx
8793 arm_tls_descseq_addr (rtx x, rtx reg)
8794 {
8795 rtx labelno = GEN_INT (pic_labelno++);
8796 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8797 rtx sum = gen_rtx_UNSPEC (Pmode,
8798 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8799 gen_rtx_CONST (VOIDmode, label),
8800 GEN_INT (!TARGET_ARM)),
8801 UNSPEC_TLS);
8802 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8803
8804 emit_insn (gen_tlscall (x, labelno));
8805 if (!reg)
8806 reg = gen_reg_rtx (SImode);
8807 else
8808 gcc_assert (REGNO (reg) != R0_REGNUM);
8809
8810 emit_move_insn (reg, reg0);
8811
8812 return reg;
8813 }
8814
8815
8816 rtx
8817 legitimize_tls_address (rtx x, rtx reg)
8818 {
8819 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8820 rtx_insn *insns;
8821 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8822
8823 switch (model)
8824 {
8825 case TLS_MODEL_GLOBAL_DYNAMIC:
8826 if (TARGET_GNU2_TLS)
8827 {
8828 gcc_assert (!TARGET_FDPIC);
8829
8830 reg = arm_tls_descseq_addr (x, reg);
8831
8832 tp = arm_load_tp (NULL_RTX);
8833
8834 dest = gen_rtx_PLUS (Pmode, tp, reg);
8835 }
8836 else
8837 {
8838 /* Original scheme */
8839 if (TARGET_FDPIC)
8840 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
8841 else
8842 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8843 dest = gen_reg_rtx (Pmode);
8844 emit_libcall_block (insns, dest, ret, x);
8845 }
8846 return dest;
8847
8848 case TLS_MODEL_LOCAL_DYNAMIC:
8849 if (TARGET_GNU2_TLS)
8850 {
8851 gcc_assert (!TARGET_FDPIC);
8852
8853 reg = arm_tls_descseq_addr (x, reg);
8854
8855 tp = arm_load_tp (NULL_RTX);
8856
8857 dest = gen_rtx_PLUS (Pmode, tp, reg);
8858 }
8859 else
8860 {
8861 if (TARGET_FDPIC)
8862 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
8863 else
8864 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8865
8866 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8867 share the LDM result with other LD model accesses. */
8868 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8869 UNSPEC_TLS);
8870 dest = gen_reg_rtx (Pmode);
8871 emit_libcall_block (insns, dest, ret, eqv);
8872
8873 /* Load the addend. */
8874 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8875 GEN_INT (TLS_LDO32)),
8876 UNSPEC_TLS);
8877 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8878 dest = gen_rtx_PLUS (Pmode, dest, addend);
8879 }
8880 return dest;
8881
8882 case TLS_MODEL_INITIAL_EXEC:
8883 if (TARGET_FDPIC)
8884 {
8885 sum = gen_rtx_UNSPEC (Pmode,
8886 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
8887 UNSPEC_TLS);
8888 reg = load_tls_operand (sum, reg);
8889 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
8890 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
8891 }
8892 else
8893 {
8894 labelno = GEN_INT (pic_labelno++);
8895 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8896 label = gen_rtx_CONST (VOIDmode, label);
8897 sum = gen_rtx_UNSPEC (Pmode,
8898 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8899 GEN_INT (TARGET_ARM ? 8 : 4)),
8900 UNSPEC_TLS);
8901 reg = load_tls_operand (sum, reg);
8902
8903 if (TARGET_ARM)
8904 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8905 else if (TARGET_THUMB2)
8906 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8907 else
8908 {
8909 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8910 emit_move_insn (reg, gen_const_mem (SImode, reg));
8911 }
8912 }
8913
8914 tp = arm_load_tp (NULL_RTX);
8915
8916 return gen_rtx_PLUS (Pmode, tp, reg);
8917
8918 case TLS_MODEL_LOCAL_EXEC:
8919 tp = arm_load_tp (NULL_RTX);
8920
8921 reg = gen_rtx_UNSPEC (Pmode,
8922 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8923 UNSPEC_TLS);
8924 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8925
8926 return gen_rtx_PLUS (Pmode, tp, reg);
8927
8928 default:
8929 abort ();
8930 }
8931 }
8932
8933 /* Try machine-dependent ways of modifying an illegitimate address
8934 to be legitimate. If we find one, return the new, valid address. */
8935 rtx
8936 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8937 {
8938 if (arm_tls_referenced_p (x))
8939 {
8940 rtx addend = NULL;
8941
8942 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8943 {
8944 addend = XEXP (XEXP (x, 0), 1);
8945 x = XEXP (XEXP (x, 0), 0);
8946 }
8947
8948 if (GET_CODE (x) != SYMBOL_REF)
8949 return x;
8950
8951 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8952
8953 x = legitimize_tls_address (x, NULL_RTX);
8954
8955 if (addend)
8956 {
8957 x = gen_rtx_PLUS (SImode, x, addend);
8958 orig_x = x;
8959 }
8960 else
8961 return x;
8962 }
8963
8964 if (TARGET_THUMB1)
8965 return thumb_legitimize_address (x, orig_x, mode);
8966
8967 if (GET_CODE (x) == PLUS)
8968 {
8969 rtx xop0 = XEXP (x, 0);
8970 rtx xop1 = XEXP (x, 1);
8971
8972 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8973 xop0 = force_reg (SImode, xop0);
8974
8975 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8976 && !symbol_mentioned_p (xop1))
8977 xop1 = force_reg (SImode, xop1);
8978
8979 if (ARM_BASE_REGISTER_RTX_P (xop0)
8980 && CONST_INT_P (xop1))
8981 {
8982 HOST_WIDE_INT n, low_n;
8983 rtx base_reg, val;
8984 n = INTVAL (xop1);
8985
8986 /* VFP addressing modes actually allow greater offsets, but for
8987 now we just stick with the lowest common denominator. */
8988 if (mode == DImode || mode == DFmode)
8989 {
8990 low_n = n & 0x0f;
8991 n &= ~0x0f;
8992 if (low_n > 4)
8993 {
8994 n += 16;
8995 low_n -= 16;
8996 }
8997 }
8998 else
8999 {
9000 low_n = ((mode) == TImode ? 0
9001 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9002 n -= low_n;
9003 }
9004
9005 base_reg = gen_reg_rtx (SImode);
9006 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9007 emit_move_insn (base_reg, val);
9008 x = plus_constant (Pmode, base_reg, low_n);
9009 }
9010 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9011 x = gen_rtx_PLUS (SImode, xop0, xop1);
9012 }
9013
9014 /* XXX We don't allow MINUS any more -- see comment in
9015 arm_legitimate_address_outer_p (). */
9016 else if (GET_CODE (x) == MINUS)
9017 {
9018 rtx xop0 = XEXP (x, 0);
9019 rtx xop1 = XEXP (x, 1);
9020
9021 if (CONSTANT_P (xop0))
9022 xop0 = force_reg (SImode, xop0);
9023
9024 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9025 xop1 = force_reg (SImode, xop1);
9026
9027 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9028 x = gen_rtx_MINUS (SImode, xop0, xop1);
9029 }
9030
9031 /* Make sure to take full advantage of the pre-indexed addressing mode
9032 with absolute addresses which often allows for the base register to
9033 be factorized for multiple adjacent memory references, and it might
9034 even allows for the mini pool to be avoided entirely. */
9035 else if (CONST_INT_P (x) && optimize > 0)
9036 {
9037 unsigned int bits;
9038 HOST_WIDE_INT mask, base, index;
9039 rtx base_reg;
9040
9041 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
9042 use a 8-bit index. So let's use a 12-bit index for SImode only and
9043 hope that arm_gen_constant will enable ldrb to use more bits. */
9044 bits = (mode == SImode) ? 12 : 8;
9045 mask = (1 << bits) - 1;
9046 base = INTVAL (x) & ~mask;
9047 index = INTVAL (x) & mask;
9048 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
9049 {
9050 /* It'll most probably be more efficient to generate the base
9051 with more bits set and use a negative index instead. */
9052 base |= mask;
9053 index -= mask;
9054 }
9055 base_reg = force_reg (SImode, GEN_INT (base));
9056 x = plus_constant (Pmode, base_reg, index);
9057 }
9058
9059 if (flag_pic)
9060 {
9061 /* We need to find and carefully transform any SYMBOL and LABEL
9062 references; so go back to the original address expression. */
9063 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9064 false /*compute_now*/);
9065
9066 if (new_x != orig_x)
9067 x = new_x;
9068 }
9069
9070 return x;
9071 }
9072
9073
9074 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9075 to be legitimate. If we find one, return the new, valid address. */
9076 rtx
9077 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9078 {
9079 if (GET_CODE (x) == PLUS
9080 && CONST_INT_P (XEXP (x, 1))
9081 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9082 || INTVAL (XEXP (x, 1)) < 0))
9083 {
9084 rtx xop0 = XEXP (x, 0);
9085 rtx xop1 = XEXP (x, 1);
9086 HOST_WIDE_INT offset = INTVAL (xop1);
9087
9088 /* Try and fold the offset into a biasing of the base register and
9089 then offsetting that. Don't do this when optimizing for space
9090 since it can cause too many CSEs. */
9091 if (optimize_size && offset >= 0
9092 && offset < 256 + 31 * GET_MODE_SIZE (mode))
9093 {
9094 HOST_WIDE_INT delta;
9095
9096 if (offset >= 256)
9097 delta = offset - (256 - GET_MODE_SIZE (mode));
9098 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9099 delta = 31 * GET_MODE_SIZE (mode);
9100 else
9101 delta = offset & (~31 * GET_MODE_SIZE (mode));
9102
9103 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9104 NULL_RTX);
9105 x = plus_constant (Pmode, xop0, delta);
9106 }
9107 else if (offset < 0 && offset > -256)
9108 /* Small negative offsets are best done with a subtract before the
9109 dereference, forcing these into a register normally takes two
9110 instructions. */
9111 x = force_operand (x, NULL_RTX);
9112 else
9113 {
9114 /* For the remaining cases, force the constant into a register. */
9115 xop1 = force_reg (SImode, xop1);
9116 x = gen_rtx_PLUS (SImode, xop0, xop1);
9117 }
9118 }
9119 else if (GET_CODE (x) == PLUS
9120 && s_register_operand (XEXP (x, 1), SImode)
9121 && !s_register_operand (XEXP (x, 0), SImode))
9122 {
9123 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9124
9125 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9126 }
9127
9128 if (flag_pic)
9129 {
9130 /* We need to find and carefully transform any SYMBOL and LABEL
9131 references; so go back to the original address expression. */
9132 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9133 false /*compute_now*/);
9134
9135 if (new_x != orig_x)
9136 x = new_x;
9137 }
9138
9139 return x;
9140 }
9141
9142 /* Return TRUE if X contains any TLS symbol references. */
9143
9144 bool
9145 arm_tls_referenced_p (rtx x)
9146 {
9147 if (! TARGET_HAVE_TLS)
9148 return false;
9149
9150 subrtx_iterator::array_type array;
9151 FOR_EACH_SUBRTX (iter, array, x, ALL)
9152 {
9153 const_rtx x = *iter;
9154 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
9155 {
9156 /* ARM currently does not provide relocations to encode TLS variables
9157 into AArch32 instructions, only data, so there is no way to
9158 currently implement these if a literal pool is disabled. */
9159 if (arm_disable_literal_pool)
9160 sorry ("accessing thread-local storage is not currently supported "
9161 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9162
9163 return true;
9164 }
9165
9166 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9167 TLS offsets, not real symbol references. */
9168 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9169 iter.skip_subrtxes ();
9170 }
9171 return false;
9172 }
9173
9174 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9175
9176 On the ARM, allow any integer (invalid ones are removed later by insn
9177 patterns), nice doubles and symbol_refs which refer to the function's
9178 constant pool XXX.
9179
9180 When generating pic allow anything. */
9181
9182 static bool
9183 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9184 {
9185 return flag_pic || !label_mentioned_p (x);
9186 }
9187
9188 static bool
9189 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9190 {
9191 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9192 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9193 for ARMv8-M Baseline or later the result is valid. */
9194 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9195 x = XEXP (x, 0);
9196
9197 return (CONST_INT_P (x)
9198 || CONST_DOUBLE_P (x)
9199 || CONSTANT_ADDRESS_P (x)
9200 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
9201 || flag_pic);
9202 }
9203
9204 static bool
9205 arm_legitimate_constant_p (machine_mode mode, rtx x)
9206 {
9207 return (!arm_cannot_force_const_mem (mode, x)
9208 && (TARGET_32BIT
9209 ? arm_legitimate_constant_p_1 (mode, x)
9210 : thumb_legitimate_constant_p (mode, x)));
9211 }
9212
9213 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9214
9215 static bool
9216 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9217 {
9218 rtx base, offset;
9219 split_const (x, &base, &offset);
9220
9221 if (SYMBOL_REF_P (base))
9222 {
9223 /* Function symbols cannot have an offset due to the Thumb bit. */
9224 if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9225 && INTVAL (offset) != 0)
9226 return true;
9227
9228 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9229 && !offset_within_block_p (base, INTVAL (offset)))
9230 return true;
9231 }
9232 return arm_tls_referenced_p (x);
9233 }
9234 \f
9235 #define REG_OR_SUBREG_REG(X) \
9236 (REG_P (X) \
9237 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
9238
9239 #define REG_OR_SUBREG_RTX(X) \
9240 (REG_P (X) ? (X) : SUBREG_REG (X))
9241
9242 static inline int
9243 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9244 {
9245 machine_mode mode = GET_MODE (x);
9246 int total, words;
9247
9248 switch (code)
9249 {
9250 case ASHIFT:
9251 case ASHIFTRT:
9252 case LSHIFTRT:
9253 case ROTATERT:
9254 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9255
9256 case PLUS:
9257 case MINUS:
9258 case COMPARE:
9259 case NEG:
9260 case NOT:
9261 return COSTS_N_INSNS (1);
9262
9263 case MULT:
9264 if (arm_arch6m && arm_m_profile_small_mul)
9265 return COSTS_N_INSNS (32);
9266
9267 if (CONST_INT_P (XEXP (x, 1)))
9268 {
9269 int cycles = 0;
9270 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9271
9272 while (i)
9273 {
9274 i >>= 2;
9275 cycles++;
9276 }
9277 return COSTS_N_INSNS (2) + cycles;
9278 }
9279 return COSTS_N_INSNS (1) + 16;
9280
9281 case SET:
9282 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9283 the mode. */
9284 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9285 return (COSTS_N_INSNS (words)
9286 + 4 * ((MEM_P (SET_SRC (x)))
9287 + MEM_P (SET_DEST (x))));
9288
9289 case CONST_INT:
9290 if (outer == SET)
9291 {
9292 if (UINTVAL (x) < 256
9293 /* 16-bit constant. */
9294 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9295 return 0;
9296 if (thumb_shiftable_const (INTVAL (x)))
9297 return COSTS_N_INSNS (2);
9298 return COSTS_N_INSNS (3);
9299 }
9300 else if ((outer == PLUS || outer == COMPARE)
9301 && INTVAL (x) < 256 && INTVAL (x) > -256)
9302 return 0;
9303 else if ((outer == IOR || outer == XOR || outer == AND)
9304 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9305 return COSTS_N_INSNS (1);
9306 else if (outer == AND)
9307 {
9308 int i;
9309 /* This duplicates the tests in the andsi3 expander. */
9310 for (i = 9; i <= 31; i++)
9311 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9312 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9313 return COSTS_N_INSNS (2);
9314 }
9315 else if (outer == ASHIFT || outer == ASHIFTRT
9316 || outer == LSHIFTRT)
9317 return 0;
9318 return COSTS_N_INSNS (2);
9319
9320 case CONST:
9321 case CONST_DOUBLE:
9322 case LABEL_REF:
9323 case SYMBOL_REF:
9324 return COSTS_N_INSNS (3);
9325
9326 case UDIV:
9327 case UMOD:
9328 case DIV:
9329 case MOD:
9330 return 100;
9331
9332 case TRUNCATE:
9333 return 99;
9334
9335 case AND:
9336 case XOR:
9337 case IOR:
9338 /* XXX guess. */
9339 return 8;
9340
9341 case MEM:
9342 /* XXX another guess. */
9343 /* Memory costs quite a lot for the first word, but subsequent words
9344 load at the equivalent of a single insn each. */
9345 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9346 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9347 ? 4 : 0));
9348
9349 case IF_THEN_ELSE:
9350 /* XXX a guess. */
9351 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9352 return 14;
9353 return 2;
9354
9355 case SIGN_EXTEND:
9356 case ZERO_EXTEND:
9357 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9358 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9359
9360 if (mode == SImode)
9361 return total;
9362
9363 if (arm_arch6)
9364 return total + COSTS_N_INSNS (1);
9365
9366 /* Assume a two-shift sequence. Increase the cost slightly so
9367 we prefer actual shifts over an extend operation. */
9368 return total + 1 + COSTS_N_INSNS (2);
9369
9370 default:
9371 return 99;
9372 }
9373 }
9374
9375 /* Estimates the size cost of thumb1 instructions.
9376 For now most of the code is copied from thumb1_rtx_costs. We need more
9377 fine grain tuning when we have more related test cases. */
9378 static inline int
9379 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9380 {
9381 machine_mode mode = GET_MODE (x);
9382 int words, cost;
9383
9384 switch (code)
9385 {
9386 case ASHIFT:
9387 case ASHIFTRT:
9388 case LSHIFTRT:
9389 case ROTATERT:
9390 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9391
9392 case PLUS:
9393 case MINUS:
9394 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9395 defined by RTL expansion, especially for the expansion of
9396 multiplication. */
9397 if ((GET_CODE (XEXP (x, 0)) == MULT
9398 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9399 || (GET_CODE (XEXP (x, 1)) == MULT
9400 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9401 return COSTS_N_INSNS (2);
9402 /* Fall through. */
9403 case COMPARE:
9404 case NEG:
9405 case NOT:
9406 return COSTS_N_INSNS (1);
9407
9408 case MULT:
9409 if (CONST_INT_P (XEXP (x, 1)))
9410 {
9411 /* Thumb1 mul instruction can't operate on const. We must Load it
9412 into a register first. */
9413 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9414 /* For the targets which have a very small and high-latency multiply
9415 unit, we prefer to synthesize the mult with up to 5 instructions,
9416 giving a good balance between size and performance. */
9417 if (arm_arch6m && arm_m_profile_small_mul)
9418 return COSTS_N_INSNS (5);
9419 else
9420 return COSTS_N_INSNS (1) + const_size;
9421 }
9422 return COSTS_N_INSNS (1);
9423
9424 case SET:
9425 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9426 the mode. */
9427 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9428 cost = COSTS_N_INSNS (words);
9429 if (satisfies_constraint_J (SET_SRC (x))
9430 || satisfies_constraint_K (SET_SRC (x))
9431 /* Too big an immediate for a 2-byte mov, using MOVT. */
9432 || (CONST_INT_P (SET_SRC (x))
9433 && UINTVAL (SET_SRC (x)) >= 256
9434 && TARGET_HAVE_MOVT
9435 && satisfies_constraint_j (SET_SRC (x)))
9436 /* thumb1_movdi_insn. */
9437 || ((words > 1) && MEM_P (SET_SRC (x))))
9438 cost += COSTS_N_INSNS (1);
9439 return cost;
9440
9441 case CONST_INT:
9442 if (outer == SET)
9443 {
9444 if (UINTVAL (x) < 256)
9445 return COSTS_N_INSNS (1);
9446 /* movw is 4byte long. */
9447 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9448 return COSTS_N_INSNS (2);
9449 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9450 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9451 return COSTS_N_INSNS (2);
9452 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9453 if (thumb_shiftable_const (INTVAL (x)))
9454 return COSTS_N_INSNS (2);
9455 return COSTS_N_INSNS (3);
9456 }
9457 else if ((outer == PLUS || outer == COMPARE)
9458 && INTVAL (x) < 256 && INTVAL (x) > -256)
9459 return 0;
9460 else if ((outer == IOR || outer == XOR || outer == AND)
9461 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9462 return COSTS_N_INSNS (1);
9463 else if (outer == AND)
9464 {
9465 int i;
9466 /* This duplicates the tests in the andsi3 expander. */
9467 for (i = 9; i <= 31; i++)
9468 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9469 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9470 return COSTS_N_INSNS (2);
9471 }
9472 else if (outer == ASHIFT || outer == ASHIFTRT
9473 || outer == LSHIFTRT)
9474 return 0;
9475 return COSTS_N_INSNS (2);
9476
9477 case CONST:
9478 case CONST_DOUBLE:
9479 case LABEL_REF:
9480 case SYMBOL_REF:
9481 return COSTS_N_INSNS (3);
9482
9483 case UDIV:
9484 case UMOD:
9485 case DIV:
9486 case MOD:
9487 return 100;
9488
9489 case TRUNCATE:
9490 return 99;
9491
9492 case AND:
9493 case XOR:
9494 case IOR:
9495 return COSTS_N_INSNS (1);
9496
9497 case MEM:
9498 return (COSTS_N_INSNS (1)
9499 + COSTS_N_INSNS (1)
9500 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9501 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9502 ? COSTS_N_INSNS (1) : 0));
9503
9504 case IF_THEN_ELSE:
9505 /* XXX a guess. */
9506 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9507 return 14;
9508 return 2;
9509
9510 case ZERO_EXTEND:
9511 /* XXX still guessing. */
9512 switch (GET_MODE (XEXP (x, 0)))
9513 {
9514 case E_QImode:
9515 return (1 + (mode == DImode ? 4 : 0)
9516 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9517
9518 case E_HImode:
9519 return (4 + (mode == DImode ? 4 : 0)
9520 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9521
9522 case E_SImode:
9523 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9524
9525 default:
9526 return 99;
9527 }
9528
9529 default:
9530 return 99;
9531 }
9532 }
9533
9534 /* Helper function for arm_rtx_costs. If one operand of the OP, a
9535 PLUS, adds the carry flag, then return the other operand. If
9536 neither is a carry, return OP unchanged. */
9537 static rtx
9538 strip_carry_operation (rtx op)
9539 {
9540 gcc_assert (GET_CODE (op) == PLUS);
9541 if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
9542 return XEXP (op, 1);
9543 else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
9544 return XEXP (op, 0);
9545 return op;
9546 }
9547
9548 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9549 operand, then return the operand that is being shifted. If the shift
9550 is not by a constant, then set SHIFT_REG to point to the operand.
9551 Return NULL if OP is not a shifter operand. */
9552 static rtx
9553 shifter_op_p (rtx op, rtx *shift_reg)
9554 {
9555 enum rtx_code code = GET_CODE (op);
9556
9557 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9558 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9559 return XEXP (op, 0);
9560 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9561 return XEXP (op, 0);
9562 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9563 || code == ASHIFTRT)
9564 {
9565 if (!CONST_INT_P (XEXP (op, 1)))
9566 *shift_reg = XEXP (op, 1);
9567 return XEXP (op, 0);
9568 }
9569
9570 return NULL;
9571 }
9572
9573 static bool
9574 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9575 {
9576 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9577 rtx_code code = GET_CODE (x);
9578 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9579
9580 switch (XINT (x, 1))
9581 {
9582 case UNSPEC_UNALIGNED_LOAD:
9583 /* We can only do unaligned loads into the integer unit, and we can't
9584 use LDM or LDRD. */
9585 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9586 if (speed_p)
9587 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9588 + extra_cost->ldst.load_unaligned);
9589
9590 #ifdef NOT_YET
9591 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9592 ADDR_SPACE_GENERIC, speed_p);
9593 #endif
9594 return true;
9595
9596 case UNSPEC_UNALIGNED_STORE:
9597 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9598 if (speed_p)
9599 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9600 + extra_cost->ldst.store_unaligned);
9601
9602 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9603 #ifdef NOT_YET
9604 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9605 ADDR_SPACE_GENERIC, speed_p);
9606 #endif
9607 return true;
9608
9609 case UNSPEC_VRINTZ:
9610 case UNSPEC_VRINTP:
9611 case UNSPEC_VRINTM:
9612 case UNSPEC_VRINTR:
9613 case UNSPEC_VRINTX:
9614 case UNSPEC_VRINTA:
9615 if (speed_p)
9616 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9617
9618 return true;
9619 default:
9620 *cost = COSTS_N_INSNS (2);
9621 break;
9622 }
9623 return true;
9624 }
9625
9626 /* Cost of a libcall. We assume one insn per argument, an amount for the
9627 call (one insn for -Os) and then one for processing the result. */
9628 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9629
9630 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9631 do \
9632 { \
9633 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9634 if (shift_op != NULL \
9635 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9636 { \
9637 if (shift_reg) \
9638 { \
9639 if (speed_p) \
9640 *cost += extra_cost->alu.arith_shift_reg; \
9641 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9642 ASHIFT, 1, speed_p); \
9643 } \
9644 else if (speed_p) \
9645 *cost += extra_cost->alu.arith_shift; \
9646 \
9647 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9648 ASHIFT, 0, speed_p) \
9649 + rtx_cost (XEXP (x, 1 - IDX), \
9650 GET_MODE (shift_op), \
9651 OP, 1, speed_p)); \
9652 return true; \
9653 } \
9654 } \
9655 while (0)
9656
9657 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9658 considering the costs of the addressing mode and memory access
9659 separately. */
9660 static bool
9661 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9662 int *cost, bool speed_p)
9663 {
9664 machine_mode mode = GET_MODE (x);
9665
9666 *cost = COSTS_N_INSNS (1);
9667
9668 if (flag_pic
9669 && GET_CODE (XEXP (x, 0)) == PLUS
9670 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9671 /* This will be split into two instructions. Add the cost of the
9672 additional instruction here. The cost of the memory access is computed
9673 below. See arm.md:calculate_pic_address. */
9674 *cost += COSTS_N_INSNS (1);
9675
9676 /* Calculate cost of the addressing mode. */
9677 if (speed_p)
9678 {
9679 arm_addr_mode_op op_type;
9680 switch (GET_CODE (XEXP (x, 0)))
9681 {
9682 default:
9683 case REG:
9684 op_type = AMO_DEFAULT;
9685 break;
9686 case MINUS:
9687 /* MINUS does not appear in RTL, but the architecture supports it,
9688 so handle this case defensively. */
9689 /* fall through */
9690 case PLUS:
9691 op_type = AMO_NO_WB;
9692 break;
9693 case PRE_INC:
9694 case PRE_DEC:
9695 case POST_INC:
9696 case POST_DEC:
9697 case PRE_MODIFY:
9698 case POST_MODIFY:
9699 op_type = AMO_WB;
9700 break;
9701 }
9702
9703 if (VECTOR_MODE_P (mode))
9704 *cost += current_tune->addr_mode_costs->vector[op_type];
9705 else if (FLOAT_MODE_P (mode))
9706 *cost += current_tune->addr_mode_costs->fp[op_type];
9707 else
9708 *cost += current_tune->addr_mode_costs->integer[op_type];
9709 }
9710
9711 /* Calculate cost of memory access. */
9712 if (speed_p)
9713 {
9714 if (FLOAT_MODE_P (mode))
9715 {
9716 if (GET_MODE_SIZE (mode) == 8)
9717 *cost += extra_cost->ldst.loadd;
9718 else
9719 *cost += extra_cost->ldst.loadf;
9720 }
9721 else if (VECTOR_MODE_P (mode))
9722 *cost += extra_cost->ldst.loadv;
9723 else
9724 {
9725 /* Integer modes */
9726 if (GET_MODE_SIZE (mode) == 8)
9727 *cost += extra_cost->ldst.ldrd;
9728 else
9729 *cost += extra_cost->ldst.load;
9730 }
9731 }
9732
9733 return true;
9734 }
9735
9736 /* RTX costs. Make an estimate of the cost of executing the operation
9737 X, which is contained within an operation with code OUTER_CODE.
9738 SPEED_P indicates whether the cost desired is the performance cost,
9739 or the size cost. The estimate is stored in COST and the return
9740 value is TRUE if the cost calculation is final, or FALSE if the
9741 caller should recurse through the operands of X to add additional
9742 costs.
9743
9744 We currently make no attempt to model the size savings of Thumb-2
9745 16-bit instructions. At the normal points in compilation where
9746 this code is called we have no measure of whether the condition
9747 flags are live or not, and thus no realistic way to determine what
9748 the size will eventually be. */
9749 static bool
9750 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9751 const struct cpu_cost_table *extra_cost,
9752 int *cost, bool speed_p)
9753 {
9754 machine_mode mode = GET_MODE (x);
9755
9756 *cost = COSTS_N_INSNS (1);
9757
9758 if (TARGET_THUMB1)
9759 {
9760 if (speed_p)
9761 *cost = thumb1_rtx_costs (x, code, outer_code);
9762 else
9763 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9764 return true;
9765 }
9766
9767 switch (code)
9768 {
9769 case SET:
9770 *cost = 0;
9771 /* SET RTXs don't have a mode so we get it from the destination. */
9772 mode = GET_MODE (SET_DEST (x));
9773
9774 if (REG_P (SET_SRC (x))
9775 && REG_P (SET_DEST (x)))
9776 {
9777 /* Assume that most copies can be done with a single insn,
9778 unless we don't have HW FP, in which case everything
9779 larger than word mode will require two insns. */
9780 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9781 && GET_MODE_SIZE (mode) > 4)
9782 || mode == DImode)
9783 ? 2 : 1);
9784 /* Conditional register moves can be encoded
9785 in 16 bits in Thumb mode. */
9786 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9787 *cost >>= 1;
9788
9789 return true;
9790 }
9791
9792 if (CONST_INT_P (SET_SRC (x)))
9793 {
9794 /* Handle CONST_INT here, since the value doesn't have a mode
9795 and we would otherwise be unable to work out the true cost. */
9796 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9797 0, speed_p);
9798 outer_code = SET;
9799 /* Slightly lower the cost of setting a core reg to a constant.
9800 This helps break up chains and allows for better scheduling. */
9801 if (REG_P (SET_DEST (x))
9802 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9803 *cost -= 1;
9804 x = SET_SRC (x);
9805 /* Immediate moves with an immediate in the range [0, 255] can be
9806 encoded in 16 bits in Thumb mode. */
9807 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9808 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9809 *cost >>= 1;
9810 goto const_int_cost;
9811 }
9812
9813 return false;
9814
9815 case MEM:
9816 return arm_mem_costs (x, extra_cost, cost, speed_p);
9817
9818 case PARALLEL:
9819 {
9820 /* Calculations of LDM costs are complex. We assume an initial cost
9821 (ldm_1st) which will load the number of registers mentioned in
9822 ldm_regs_per_insn_1st registers; then each additional
9823 ldm_regs_per_insn_subsequent registers cost one more insn. The
9824 formula for N regs is thus:
9825
9826 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9827 + ldm_regs_per_insn_subsequent - 1)
9828 / ldm_regs_per_insn_subsequent).
9829
9830 Additional costs may also be added for addressing. A similar
9831 formula is used for STM. */
9832
9833 bool is_ldm = load_multiple_operation (x, SImode);
9834 bool is_stm = store_multiple_operation (x, SImode);
9835
9836 if (is_ldm || is_stm)
9837 {
9838 if (speed_p)
9839 {
9840 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9841 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9842 ? extra_cost->ldst.ldm_regs_per_insn_1st
9843 : extra_cost->ldst.stm_regs_per_insn_1st;
9844 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9845 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9846 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9847
9848 *cost += regs_per_insn_1st
9849 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9850 + regs_per_insn_sub - 1)
9851 / regs_per_insn_sub);
9852 return true;
9853 }
9854
9855 }
9856 return false;
9857 }
9858 case DIV:
9859 case UDIV:
9860 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9861 && (mode == SFmode || !TARGET_VFP_SINGLE))
9862 *cost += COSTS_N_INSNS (speed_p
9863 ? extra_cost->fp[mode != SFmode].div : 0);
9864 else if (mode == SImode && TARGET_IDIV)
9865 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9866 else
9867 *cost = LIBCALL_COST (2);
9868
9869 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9870 possible udiv is prefered. */
9871 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9872 return false; /* All arguments must be in registers. */
9873
9874 case MOD:
9875 /* MOD by a power of 2 can be expanded as:
9876 rsbs r1, r0, #0
9877 and r0, r0, #(n - 1)
9878 and r1, r1, #(n - 1)
9879 rsbpl r0, r1, #0. */
9880 if (CONST_INT_P (XEXP (x, 1))
9881 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9882 && mode == SImode)
9883 {
9884 *cost += COSTS_N_INSNS (3);
9885
9886 if (speed_p)
9887 *cost += 2 * extra_cost->alu.logical
9888 + extra_cost->alu.arith;
9889 return true;
9890 }
9891
9892 /* Fall-through. */
9893 case UMOD:
9894 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9895 possible udiv is prefered. */
9896 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9897 return false; /* All arguments must be in registers. */
9898
9899 case ROTATE:
9900 if (mode == SImode && REG_P (XEXP (x, 1)))
9901 {
9902 *cost += (COSTS_N_INSNS (1)
9903 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9904 if (speed_p)
9905 *cost += extra_cost->alu.shift_reg;
9906 return true;
9907 }
9908 /* Fall through */
9909 case ROTATERT:
9910 case ASHIFT:
9911 case LSHIFTRT:
9912 case ASHIFTRT:
9913 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9914 {
9915 *cost += (COSTS_N_INSNS (2)
9916 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9917 if (speed_p)
9918 *cost += 2 * extra_cost->alu.shift;
9919 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9920 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9921 *cost += 1;
9922 return true;
9923 }
9924 else if (mode == SImode)
9925 {
9926 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9927 /* Slightly disparage register shifts at -Os, but not by much. */
9928 if (!CONST_INT_P (XEXP (x, 1)))
9929 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9930 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9931 return true;
9932 }
9933 else if (GET_MODE_CLASS (mode) == MODE_INT
9934 && GET_MODE_SIZE (mode) < 4)
9935 {
9936 if (code == ASHIFT)
9937 {
9938 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9939 /* Slightly disparage register shifts at -Os, but not by
9940 much. */
9941 if (!CONST_INT_P (XEXP (x, 1)))
9942 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9943 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9944 }
9945 else if (code == LSHIFTRT || code == ASHIFTRT)
9946 {
9947 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9948 {
9949 /* Can use SBFX/UBFX. */
9950 if (speed_p)
9951 *cost += extra_cost->alu.bfx;
9952 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9953 }
9954 else
9955 {
9956 *cost += COSTS_N_INSNS (1);
9957 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9958 if (speed_p)
9959 {
9960 if (CONST_INT_P (XEXP (x, 1)))
9961 *cost += 2 * extra_cost->alu.shift;
9962 else
9963 *cost += (extra_cost->alu.shift
9964 + extra_cost->alu.shift_reg);
9965 }
9966 else
9967 /* Slightly disparage register shifts. */
9968 *cost += !CONST_INT_P (XEXP (x, 1));
9969 }
9970 }
9971 else /* Rotates. */
9972 {
9973 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9974 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9975 if (speed_p)
9976 {
9977 if (CONST_INT_P (XEXP (x, 1)))
9978 *cost += (2 * extra_cost->alu.shift
9979 + extra_cost->alu.log_shift);
9980 else
9981 *cost += (extra_cost->alu.shift
9982 + extra_cost->alu.shift_reg
9983 + extra_cost->alu.log_shift_reg);
9984 }
9985 }
9986 return true;
9987 }
9988
9989 *cost = LIBCALL_COST (2);
9990 return false;
9991
9992 case BSWAP:
9993 if (arm_arch6)
9994 {
9995 if (mode == SImode)
9996 {
9997 if (speed_p)
9998 *cost += extra_cost->alu.rev;
9999
10000 return false;
10001 }
10002 }
10003 else
10004 {
10005 /* No rev instruction available. Look at arm_legacy_rev
10006 and thumb_legacy_rev for the form of RTL used then. */
10007 if (TARGET_THUMB)
10008 {
10009 *cost += COSTS_N_INSNS (9);
10010
10011 if (speed_p)
10012 {
10013 *cost += 6 * extra_cost->alu.shift;
10014 *cost += 3 * extra_cost->alu.logical;
10015 }
10016 }
10017 else
10018 {
10019 *cost += COSTS_N_INSNS (4);
10020
10021 if (speed_p)
10022 {
10023 *cost += 2 * extra_cost->alu.shift;
10024 *cost += extra_cost->alu.arith_shift;
10025 *cost += 2 * extra_cost->alu.logical;
10026 }
10027 }
10028 return true;
10029 }
10030 return false;
10031
10032 case MINUS:
10033 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10034 && (mode == SFmode || !TARGET_VFP_SINGLE))
10035 {
10036 if (GET_CODE (XEXP (x, 0)) == MULT
10037 || GET_CODE (XEXP (x, 1)) == MULT)
10038 {
10039 rtx mul_op0, mul_op1, sub_op;
10040
10041 if (speed_p)
10042 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10043
10044 if (GET_CODE (XEXP (x, 0)) == MULT)
10045 {
10046 mul_op0 = XEXP (XEXP (x, 0), 0);
10047 mul_op1 = XEXP (XEXP (x, 0), 1);
10048 sub_op = XEXP (x, 1);
10049 }
10050 else
10051 {
10052 mul_op0 = XEXP (XEXP (x, 1), 0);
10053 mul_op1 = XEXP (XEXP (x, 1), 1);
10054 sub_op = XEXP (x, 0);
10055 }
10056
10057 /* The first operand of the multiply may be optionally
10058 negated. */
10059 if (GET_CODE (mul_op0) == NEG)
10060 mul_op0 = XEXP (mul_op0, 0);
10061
10062 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10063 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10064 + rtx_cost (sub_op, mode, code, 0, speed_p));
10065
10066 return true;
10067 }
10068
10069 if (speed_p)
10070 *cost += extra_cost->fp[mode != SFmode].addsub;
10071 return false;
10072 }
10073
10074 if (mode == SImode)
10075 {
10076 rtx shift_by_reg = NULL;
10077 rtx shift_op;
10078 rtx non_shift_op;
10079 rtx op0 = XEXP (x, 0);
10080 rtx op1 = XEXP (x, 1);
10081
10082 /* Factor out any borrow operation. There's more than one way
10083 of expressing this; try to recognize them all. */
10084 if (GET_CODE (op0) == MINUS)
10085 {
10086 if (arm_borrow_operation (op1, SImode))
10087 {
10088 op1 = XEXP (op0, 1);
10089 op0 = XEXP (op0, 0);
10090 }
10091 else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10092 op0 = XEXP (op0, 0);
10093 }
10094 else if (GET_CODE (op1) == PLUS
10095 && arm_borrow_operation (XEXP (op1, 0), SImode))
10096 op1 = XEXP (op1, 0);
10097 else if (GET_CODE (op0) == NEG
10098 && arm_borrow_operation (op1, SImode))
10099 {
10100 /* Negate with carry-in. For Thumb2 this is done with
10101 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10102 RSC instruction that exists in Arm mode. */
10103 if (speed_p)
10104 *cost += (TARGET_THUMB2
10105 ? extra_cost->alu.arith_shift
10106 : extra_cost->alu.arith);
10107 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10108 return true;
10109 }
10110
10111 shift_op = shifter_op_p (op0, &shift_by_reg);
10112 if (shift_op == NULL)
10113 {
10114 shift_op = shifter_op_p (op1, &shift_by_reg);
10115 non_shift_op = op0;
10116 }
10117 else
10118 non_shift_op = op1;
10119
10120 if (shift_op != NULL)
10121 {
10122 if (shift_by_reg != NULL)
10123 {
10124 if (speed_p)
10125 *cost += extra_cost->alu.arith_shift_reg;
10126 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10127 }
10128 else if (speed_p)
10129 *cost += extra_cost->alu.arith_shift;
10130
10131 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10132 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10133 return true;
10134 }
10135
10136 if (arm_arch_thumb2
10137 && GET_CODE (XEXP (x, 1)) == MULT)
10138 {
10139 /* MLS. */
10140 if (speed_p)
10141 *cost += extra_cost->mult[0].add;
10142 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10143 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10144 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10145 return true;
10146 }
10147
10148 if (CONST_INT_P (op0))
10149 {
10150 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10151 INTVAL (op0), NULL_RTX,
10152 NULL_RTX, 1, 0);
10153 *cost = COSTS_N_INSNS (insns);
10154 if (speed_p)
10155 *cost += insns * extra_cost->alu.arith;
10156 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10157 return true;
10158 }
10159 else if (speed_p)
10160 *cost += extra_cost->alu.arith;
10161
10162 /* Don't recurse as we don't want to cost any borrow that
10163 we've stripped. */
10164 *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10165 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10166 return true;
10167 }
10168
10169 if (GET_MODE_CLASS (mode) == MODE_INT
10170 && GET_MODE_SIZE (mode) < 4)
10171 {
10172 rtx shift_op, shift_reg;
10173 shift_reg = NULL;
10174
10175 /* We check both sides of the MINUS for shifter operands since,
10176 unlike PLUS, it's not commutative. */
10177
10178 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10179 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10180
10181 /* Slightly disparage, as we might need to widen the result. */
10182 *cost += 1;
10183 if (speed_p)
10184 *cost += extra_cost->alu.arith;
10185
10186 if (CONST_INT_P (XEXP (x, 0)))
10187 {
10188 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10189 return true;
10190 }
10191
10192 return false;
10193 }
10194
10195 if (mode == DImode)
10196 {
10197 *cost += COSTS_N_INSNS (1);
10198
10199 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10200 {
10201 rtx op1 = XEXP (x, 1);
10202
10203 if (speed_p)
10204 *cost += 2 * extra_cost->alu.arith;
10205
10206 if (GET_CODE (op1) == ZERO_EXTEND)
10207 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10208 0, speed_p);
10209 else
10210 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10211 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10212 0, speed_p);
10213 return true;
10214 }
10215 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10216 {
10217 if (speed_p)
10218 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10219 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10220 0, speed_p)
10221 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10222 return true;
10223 }
10224 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10225 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10226 {
10227 if (speed_p)
10228 *cost += (extra_cost->alu.arith
10229 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10230 ? extra_cost->alu.arith
10231 : extra_cost->alu.arith_shift));
10232 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10233 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10234 GET_CODE (XEXP (x, 1)), 0, speed_p));
10235 return true;
10236 }
10237
10238 if (speed_p)
10239 *cost += 2 * extra_cost->alu.arith;
10240 return false;
10241 }
10242
10243 /* Vector mode? */
10244
10245 *cost = LIBCALL_COST (2);
10246 return false;
10247
10248 case PLUS:
10249 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10250 && (mode == SFmode || !TARGET_VFP_SINGLE))
10251 {
10252 if (GET_CODE (XEXP (x, 0)) == MULT)
10253 {
10254 rtx mul_op0, mul_op1, add_op;
10255
10256 if (speed_p)
10257 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10258
10259 mul_op0 = XEXP (XEXP (x, 0), 0);
10260 mul_op1 = XEXP (XEXP (x, 0), 1);
10261 add_op = XEXP (x, 1);
10262
10263 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10264 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10265 + rtx_cost (add_op, mode, code, 0, speed_p));
10266
10267 return true;
10268 }
10269
10270 if (speed_p)
10271 *cost += extra_cost->fp[mode != SFmode].addsub;
10272 return false;
10273 }
10274 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10275 {
10276 *cost = LIBCALL_COST (2);
10277 return false;
10278 }
10279
10280 /* Narrow modes can be synthesized in SImode, but the range
10281 of useful sub-operations is limited. Check for shift operations
10282 on one of the operands. Only left shifts can be used in the
10283 narrow modes. */
10284 if (GET_MODE_CLASS (mode) == MODE_INT
10285 && GET_MODE_SIZE (mode) < 4)
10286 {
10287 rtx shift_op, shift_reg;
10288 shift_reg = NULL;
10289
10290 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10291
10292 if (CONST_INT_P (XEXP (x, 1)))
10293 {
10294 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10295 INTVAL (XEXP (x, 1)), NULL_RTX,
10296 NULL_RTX, 1, 0);
10297 *cost = COSTS_N_INSNS (insns);
10298 if (speed_p)
10299 *cost += insns * extra_cost->alu.arith;
10300 /* Slightly penalize a narrow operation as the result may
10301 need widening. */
10302 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10303 return true;
10304 }
10305
10306 /* Slightly penalize a narrow operation as the result may
10307 need widening. */
10308 *cost += 1;
10309 if (speed_p)
10310 *cost += extra_cost->alu.arith;
10311
10312 return false;
10313 }
10314
10315 if (mode == SImode)
10316 {
10317 rtx shift_op, shift_reg;
10318
10319 if (TARGET_INT_SIMD
10320 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10321 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10322 {
10323 /* UXTA[BH] or SXTA[BH]. */
10324 if (speed_p)
10325 *cost += extra_cost->alu.extend_arith;
10326 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10327 0, speed_p)
10328 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10329 return true;
10330 }
10331
10332 rtx op0 = XEXP (x, 0);
10333 rtx op1 = XEXP (x, 1);
10334
10335 /* Handle a side effect of adding in the carry to an addition. */
10336 if (GET_CODE (op0) == PLUS
10337 && arm_carry_operation (op1, mode))
10338 {
10339 op1 = XEXP (op0, 1);
10340 op0 = XEXP (op0, 0);
10341 }
10342 else if (GET_CODE (op1) == PLUS
10343 && arm_carry_operation (op0, mode))
10344 {
10345 op0 = XEXP (op1, 0);
10346 op1 = XEXP (op1, 1);
10347 }
10348 else if (GET_CODE (op0) == PLUS)
10349 {
10350 op0 = strip_carry_operation (op0);
10351 if (swap_commutative_operands_p (op0, op1))
10352 std::swap (op0, op1);
10353 }
10354
10355 if (arm_carry_operation (op0, mode))
10356 {
10357 /* Adding the carry to a register is a canonicalization of
10358 adding 0 to the register plus the carry. */
10359 if (speed_p)
10360 *cost += extra_cost->alu.arith;
10361 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10362 return true;
10363 }
10364
10365 shift_reg = NULL;
10366 shift_op = shifter_op_p (op0, &shift_reg);
10367 if (shift_op != NULL)
10368 {
10369 if (shift_reg)
10370 {
10371 if (speed_p)
10372 *cost += extra_cost->alu.arith_shift_reg;
10373 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10374 }
10375 else if (speed_p)
10376 *cost += extra_cost->alu.arith_shift;
10377
10378 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10379 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10380 return true;
10381 }
10382
10383 if (GET_CODE (op0) == MULT)
10384 {
10385 rtx mul_op = op0;
10386
10387 if (TARGET_DSP_MULTIPLY
10388 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10389 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10390 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10391 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10392 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10393 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10394 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10395 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10396 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10397 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10398 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10399 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10400 == 16))))))
10401 {
10402 /* SMLA[BT][BT]. */
10403 if (speed_p)
10404 *cost += extra_cost->mult[0].extend_add;
10405 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10406 SIGN_EXTEND, 0, speed_p)
10407 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10408 SIGN_EXTEND, 0, speed_p)
10409 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10410 return true;
10411 }
10412
10413 if (speed_p)
10414 *cost += extra_cost->mult[0].add;
10415 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10416 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10417 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10418 return true;
10419 }
10420
10421 if (CONST_INT_P (op1))
10422 {
10423 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10424 INTVAL (op1), NULL_RTX,
10425 NULL_RTX, 1, 0);
10426 *cost = COSTS_N_INSNS (insns);
10427 if (speed_p)
10428 *cost += insns * extra_cost->alu.arith;
10429 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10430 return true;
10431 }
10432
10433 if (speed_p)
10434 *cost += extra_cost->alu.arith;
10435
10436 /* Don't recurse here because we want to test the operands
10437 without any carry operation. */
10438 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10439 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10440 return true;
10441 }
10442
10443 if (mode == DImode)
10444 {
10445 if (GET_CODE (XEXP (x, 0)) == MULT
10446 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10447 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10448 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10449 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10450 {
10451 if (speed_p)
10452 *cost += extra_cost->mult[1].extend_add;
10453 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10454 ZERO_EXTEND, 0, speed_p)
10455 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10456 ZERO_EXTEND, 0, speed_p)
10457 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10458 return true;
10459 }
10460
10461 *cost += COSTS_N_INSNS (1);
10462
10463 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10464 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10465 {
10466 if (speed_p)
10467 *cost += (extra_cost->alu.arith
10468 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10469 ? extra_cost->alu.arith
10470 : extra_cost->alu.arith_shift));
10471
10472 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10473 0, speed_p)
10474 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10475 return true;
10476 }
10477
10478 if (speed_p)
10479 *cost += 2 * extra_cost->alu.arith;
10480 return false;
10481 }
10482
10483 /* Vector mode? */
10484 *cost = LIBCALL_COST (2);
10485 return false;
10486 case IOR:
10487 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10488 {
10489 if (speed_p)
10490 *cost += extra_cost->alu.rev;
10491
10492 return true;
10493 }
10494 /* Fall through. */
10495 case AND: case XOR:
10496 if (mode == SImode)
10497 {
10498 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10499 rtx op0 = XEXP (x, 0);
10500 rtx shift_op, shift_reg;
10501
10502 if (subcode == NOT
10503 && (code == AND
10504 || (code == IOR && TARGET_THUMB2)))
10505 op0 = XEXP (op0, 0);
10506
10507 shift_reg = NULL;
10508 shift_op = shifter_op_p (op0, &shift_reg);
10509 if (shift_op != NULL)
10510 {
10511 if (shift_reg)
10512 {
10513 if (speed_p)
10514 *cost += extra_cost->alu.log_shift_reg;
10515 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10516 }
10517 else if (speed_p)
10518 *cost += extra_cost->alu.log_shift;
10519
10520 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10521 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10522 return true;
10523 }
10524
10525 if (CONST_INT_P (XEXP (x, 1)))
10526 {
10527 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10528 INTVAL (XEXP (x, 1)), NULL_RTX,
10529 NULL_RTX, 1, 0);
10530
10531 *cost = COSTS_N_INSNS (insns);
10532 if (speed_p)
10533 *cost += insns * extra_cost->alu.logical;
10534 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10535 return true;
10536 }
10537
10538 if (speed_p)
10539 *cost += extra_cost->alu.logical;
10540 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10541 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10542 return true;
10543 }
10544
10545 if (mode == DImode)
10546 {
10547 rtx op0 = XEXP (x, 0);
10548 enum rtx_code subcode = GET_CODE (op0);
10549
10550 *cost += COSTS_N_INSNS (1);
10551
10552 if (subcode == NOT
10553 && (code == AND
10554 || (code == IOR && TARGET_THUMB2)))
10555 op0 = XEXP (op0, 0);
10556
10557 if (GET_CODE (op0) == ZERO_EXTEND)
10558 {
10559 if (speed_p)
10560 *cost += 2 * extra_cost->alu.logical;
10561
10562 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10563 0, speed_p)
10564 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10565 return true;
10566 }
10567 else if (GET_CODE (op0) == SIGN_EXTEND)
10568 {
10569 if (speed_p)
10570 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10571
10572 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10573 0, speed_p)
10574 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10575 return true;
10576 }
10577
10578 if (speed_p)
10579 *cost += 2 * extra_cost->alu.logical;
10580
10581 return true;
10582 }
10583 /* Vector mode? */
10584
10585 *cost = LIBCALL_COST (2);
10586 return false;
10587
10588 case MULT:
10589 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10590 && (mode == SFmode || !TARGET_VFP_SINGLE))
10591 {
10592 rtx op0 = XEXP (x, 0);
10593
10594 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10595 op0 = XEXP (op0, 0);
10596
10597 if (speed_p)
10598 *cost += extra_cost->fp[mode != SFmode].mult;
10599
10600 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10601 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10602 return true;
10603 }
10604 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10605 {
10606 *cost = LIBCALL_COST (2);
10607 return false;
10608 }
10609
10610 if (mode == SImode)
10611 {
10612 if (TARGET_DSP_MULTIPLY
10613 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10614 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10615 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10616 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10617 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10618 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10619 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10620 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10621 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10622 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10623 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10624 && (INTVAL (XEXP (XEXP (x, 1), 1))
10625 == 16))))))
10626 {
10627 /* SMUL[TB][TB]. */
10628 if (speed_p)
10629 *cost += extra_cost->mult[0].extend;
10630 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10631 SIGN_EXTEND, 0, speed_p);
10632 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10633 SIGN_EXTEND, 1, speed_p);
10634 return true;
10635 }
10636 if (speed_p)
10637 *cost += extra_cost->mult[0].simple;
10638 return false;
10639 }
10640
10641 if (mode == DImode)
10642 {
10643 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10644 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10645 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10646 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
10647 {
10648 if (speed_p)
10649 *cost += extra_cost->mult[1].extend;
10650 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10651 ZERO_EXTEND, 0, speed_p)
10652 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10653 ZERO_EXTEND, 0, speed_p));
10654 return true;
10655 }
10656
10657 *cost = LIBCALL_COST (2);
10658 return false;
10659 }
10660
10661 /* Vector mode? */
10662 *cost = LIBCALL_COST (2);
10663 return false;
10664
10665 case NEG:
10666 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10667 && (mode == SFmode || !TARGET_VFP_SINGLE))
10668 {
10669 if (GET_CODE (XEXP (x, 0)) == MULT)
10670 {
10671 /* VNMUL. */
10672 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10673 return true;
10674 }
10675
10676 if (speed_p)
10677 *cost += extra_cost->fp[mode != SFmode].neg;
10678
10679 return false;
10680 }
10681 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10682 {
10683 *cost = LIBCALL_COST (1);
10684 return false;
10685 }
10686
10687 if (mode == SImode)
10688 {
10689 if (GET_CODE (XEXP (x, 0)) == ABS)
10690 {
10691 *cost += COSTS_N_INSNS (1);
10692 /* Assume the non-flag-changing variant. */
10693 if (speed_p)
10694 *cost += (extra_cost->alu.log_shift
10695 + extra_cost->alu.arith_shift);
10696 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10697 return true;
10698 }
10699
10700 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10701 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10702 {
10703 *cost += COSTS_N_INSNS (1);
10704 /* No extra cost for MOV imm and MVN imm. */
10705 /* If the comparison op is using the flags, there's no further
10706 cost, otherwise we need to add the cost of the comparison. */
10707 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10708 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10709 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10710 {
10711 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10712 *cost += (COSTS_N_INSNS (1)
10713 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10714 0, speed_p)
10715 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10716 1, speed_p));
10717 if (speed_p)
10718 *cost += extra_cost->alu.arith;
10719 }
10720 return true;
10721 }
10722
10723 if (speed_p)
10724 *cost += extra_cost->alu.arith;
10725 return false;
10726 }
10727
10728 if (GET_MODE_CLASS (mode) == MODE_INT
10729 && GET_MODE_SIZE (mode) < 4)
10730 {
10731 /* Slightly disparage, as we might need an extend operation. */
10732 *cost += 1;
10733 if (speed_p)
10734 *cost += extra_cost->alu.arith;
10735 return false;
10736 }
10737
10738 if (mode == DImode)
10739 {
10740 *cost += COSTS_N_INSNS (1);
10741 if (speed_p)
10742 *cost += 2 * extra_cost->alu.arith;
10743 return false;
10744 }
10745
10746 /* Vector mode? */
10747 *cost = LIBCALL_COST (1);
10748 return false;
10749
10750 case NOT:
10751 if (mode == SImode)
10752 {
10753 rtx shift_op;
10754 rtx shift_reg = NULL;
10755
10756 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10757
10758 if (shift_op)
10759 {
10760 if (shift_reg != NULL)
10761 {
10762 if (speed_p)
10763 *cost += extra_cost->alu.log_shift_reg;
10764 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10765 }
10766 else if (speed_p)
10767 *cost += extra_cost->alu.log_shift;
10768 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10769 return true;
10770 }
10771
10772 if (speed_p)
10773 *cost += extra_cost->alu.logical;
10774 return false;
10775 }
10776 if (mode == DImode)
10777 {
10778 *cost += COSTS_N_INSNS (1);
10779 return false;
10780 }
10781
10782 /* Vector mode? */
10783
10784 *cost += LIBCALL_COST (1);
10785 return false;
10786
10787 case IF_THEN_ELSE:
10788 {
10789 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10790 {
10791 *cost += COSTS_N_INSNS (3);
10792 return true;
10793 }
10794 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10795 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10796
10797 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10798 /* Assume that if one arm of the if_then_else is a register,
10799 that it will be tied with the result and eliminate the
10800 conditional insn. */
10801 if (REG_P (XEXP (x, 1)))
10802 *cost += op2cost;
10803 else if (REG_P (XEXP (x, 2)))
10804 *cost += op1cost;
10805 else
10806 {
10807 if (speed_p)
10808 {
10809 if (extra_cost->alu.non_exec_costs_exec)
10810 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10811 else
10812 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10813 }
10814 else
10815 *cost += op1cost + op2cost;
10816 }
10817 }
10818 return true;
10819
10820 case COMPARE:
10821 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10822 *cost = 0;
10823 else
10824 {
10825 machine_mode op0mode;
10826 /* We'll mostly assume that the cost of a compare is the cost of the
10827 LHS. However, there are some notable exceptions. */
10828
10829 /* Floating point compares are never done as side-effects. */
10830 op0mode = GET_MODE (XEXP (x, 0));
10831 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10832 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10833 {
10834 if (speed_p)
10835 *cost += extra_cost->fp[op0mode != SFmode].compare;
10836
10837 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10838 {
10839 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10840 return true;
10841 }
10842
10843 return false;
10844 }
10845 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10846 {
10847 *cost = LIBCALL_COST (2);
10848 return false;
10849 }
10850
10851 /* DImode compares normally take two insns. */
10852 if (op0mode == DImode)
10853 {
10854 *cost += COSTS_N_INSNS (1);
10855 if (speed_p)
10856 *cost += 2 * extra_cost->alu.arith;
10857 return false;
10858 }
10859
10860 if (op0mode == SImode)
10861 {
10862 rtx shift_op;
10863 rtx shift_reg;
10864
10865 if (XEXP (x, 1) == const0_rtx
10866 && !(REG_P (XEXP (x, 0))
10867 || (GET_CODE (XEXP (x, 0)) == SUBREG
10868 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10869 {
10870 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10871
10872 /* Multiply operations that set the flags are often
10873 significantly more expensive. */
10874 if (speed_p
10875 && GET_CODE (XEXP (x, 0)) == MULT
10876 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10877 *cost += extra_cost->mult[0].flag_setting;
10878
10879 if (speed_p
10880 && GET_CODE (XEXP (x, 0)) == PLUS
10881 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10882 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10883 0), 1), mode))
10884 *cost += extra_cost->mult[0].flag_setting;
10885 return true;
10886 }
10887
10888 shift_reg = NULL;
10889 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10890 if (shift_op != NULL)
10891 {
10892 if (shift_reg != NULL)
10893 {
10894 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10895 1, speed_p);
10896 if (speed_p)
10897 *cost += extra_cost->alu.arith_shift_reg;
10898 }
10899 else if (speed_p)
10900 *cost += extra_cost->alu.arith_shift;
10901 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10902 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10903 return true;
10904 }
10905
10906 if (speed_p)
10907 *cost += extra_cost->alu.arith;
10908 if (CONST_INT_P (XEXP (x, 1))
10909 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10910 {
10911 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10912 return true;
10913 }
10914 return false;
10915 }
10916
10917 /* Vector mode? */
10918
10919 *cost = LIBCALL_COST (2);
10920 return false;
10921 }
10922 return true;
10923
10924 case EQ:
10925 case NE:
10926 case LT:
10927 case LE:
10928 case GT:
10929 case GE:
10930 case LTU:
10931 case LEU:
10932 case GEU:
10933 case GTU:
10934 case ORDERED:
10935 case UNORDERED:
10936 case UNEQ:
10937 case UNLE:
10938 case UNLT:
10939 case UNGE:
10940 case UNGT:
10941 case LTGT:
10942 if (outer_code == SET)
10943 {
10944 /* Is it a store-flag operation? */
10945 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10946 && XEXP (x, 1) == const0_rtx)
10947 {
10948 /* Thumb also needs an IT insn. */
10949 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10950 return true;
10951 }
10952 if (XEXP (x, 1) == const0_rtx)
10953 {
10954 switch (code)
10955 {
10956 case LT:
10957 /* LSR Rd, Rn, #31. */
10958 if (speed_p)
10959 *cost += extra_cost->alu.shift;
10960 break;
10961
10962 case EQ:
10963 /* RSBS T1, Rn, #0
10964 ADC Rd, Rn, T1. */
10965
10966 case NE:
10967 /* SUBS T1, Rn, #1
10968 SBC Rd, Rn, T1. */
10969 *cost += COSTS_N_INSNS (1);
10970 break;
10971
10972 case LE:
10973 /* RSBS T1, Rn, Rn, LSR #31
10974 ADC Rd, Rn, T1. */
10975 *cost += COSTS_N_INSNS (1);
10976 if (speed_p)
10977 *cost += extra_cost->alu.arith_shift;
10978 break;
10979
10980 case GT:
10981 /* RSB Rd, Rn, Rn, ASR #1
10982 LSR Rd, Rd, #31. */
10983 *cost += COSTS_N_INSNS (1);
10984 if (speed_p)
10985 *cost += (extra_cost->alu.arith_shift
10986 + extra_cost->alu.shift);
10987 break;
10988
10989 case GE:
10990 /* ASR Rd, Rn, #31
10991 ADD Rd, Rn, #1. */
10992 *cost += COSTS_N_INSNS (1);
10993 if (speed_p)
10994 *cost += extra_cost->alu.shift;
10995 break;
10996
10997 default:
10998 /* Remaining cases are either meaningless or would take
10999 three insns anyway. */
11000 *cost = COSTS_N_INSNS (3);
11001 break;
11002 }
11003 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11004 return true;
11005 }
11006 else
11007 {
11008 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11009 if (CONST_INT_P (XEXP (x, 1))
11010 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11011 {
11012 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11013 return true;
11014 }
11015
11016 return false;
11017 }
11018 }
11019 /* Not directly inside a set. If it involves the condition code
11020 register it must be the condition for a branch, cond_exec or
11021 I_T_E operation. Since the comparison is performed elsewhere
11022 this is just the control part which has no additional
11023 cost. */
11024 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11025 && XEXP (x, 1) == const0_rtx)
11026 {
11027 *cost = 0;
11028 return true;
11029 }
11030 return false;
11031
11032 case ABS:
11033 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11034 && (mode == SFmode || !TARGET_VFP_SINGLE))
11035 {
11036 if (speed_p)
11037 *cost += extra_cost->fp[mode != SFmode].neg;
11038
11039 return false;
11040 }
11041 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11042 {
11043 *cost = LIBCALL_COST (1);
11044 return false;
11045 }
11046
11047 if (mode == SImode)
11048 {
11049 if (speed_p)
11050 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11051 return false;
11052 }
11053 /* Vector mode? */
11054 *cost = LIBCALL_COST (1);
11055 return false;
11056
11057 case SIGN_EXTEND:
11058 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11059 && MEM_P (XEXP (x, 0)))
11060 {
11061 if (mode == DImode)
11062 *cost += COSTS_N_INSNS (1);
11063
11064 if (!speed_p)
11065 return true;
11066
11067 if (GET_MODE (XEXP (x, 0)) == SImode)
11068 *cost += extra_cost->ldst.load;
11069 else
11070 *cost += extra_cost->ldst.load_sign_extend;
11071
11072 if (mode == DImode)
11073 *cost += extra_cost->alu.shift;
11074
11075 return true;
11076 }
11077
11078 /* Widening from less than 32-bits requires an extend operation. */
11079 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11080 {
11081 /* We have SXTB/SXTH. */
11082 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11083 if (speed_p)
11084 *cost += extra_cost->alu.extend;
11085 }
11086 else if (GET_MODE (XEXP (x, 0)) != SImode)
11087 {
11088 /* Needs two shifts. */
11089 *cost += COSTS_N_INSNS (1);
11090 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11091 if (speed_p)
11092 *cost += 2 * extra_cost->alu.shift;
11093 }
11094
11095 /* Widening beyond 32-bits requires one more insn. */
11096 if (mode == DImode)
11097 {
11098 *cost += COSTS_N_INSNS (1);
11099 if (speed_p)
11100 *cost += extra_cost->alu.shift;
11101 }
11102
11103 return true;
11104
11105 case ZERO_EXTEND:
11106 if ((arm_arch4
11107 || GET_MODE (XEXP (x, 0)) == SImode
11108 || GET_MODE (XEXP (x, 0)) == QImode)
11109 && MEM_P (XEXP (x, 0)))
11110 {
11111 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11112
11113 if (mode == DImode)
11114 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11115
11116 return true;
11117 }
11118
11119 /* Widening from less than 32-bits requires an extend operation. */
11120 if (GET_MODE (XEXP (x, 0)) == QImode)
11121 {
11122 /* UXTB can be a shorter instruction in Thumb2, but it might
11123 be slower than the AND Rd, Rn, #255 alternative. When
11124 optimizing for speed it should never be slower to use
11125 AND, and we don't really model 16-bit vs 32-bit insns
11126 here. */
11127 if (speed_p)
11128 *cost += extra_cost->alu.logical;
11129 }
11130 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11131 {
11132 /* We have UXTB/UXTH. */
11133 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11134 if (speed_p)
11135 *cost += extra_cost->alu.extend;
11136 }
11137 else if (GET_MODE (XEXP (x, 0)) != SImode)
11138 {
11139 /* Needs two shifts. It's marginally preferable to use
11140 shifts rather than two BIC instructions as the second
11141 shift may merge with a subsequent insn as a shifter
11142 op. */
11143 *cost = COSTS_N_INSNS (2);
11144 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11145 if (speed_p)
11146 *cost += 2 * extra_cost->alu.shift;
11147 }
11148
11149 /* Widening beyond 32-bits requires one more insn. */
11150 if (mode == DImode)
11151 {
11152 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11153 }
11154
11155 return true;
11156
11157 case CONST_INT:
11158 *cost = 0;
11159 /* CONST_INT has no mode, so we cannot tell for sure how many
11160 insns are really going to be needed. The best we can do is
11161 look at the value passed. If it fits in SImode, then assume
11162 that's the mode it will be used for. Otherwise assume it
11163 will be used in DImode. */
11164 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11165 mode = SImode;
11166 else
11167 mode = DImode;
11168
11169 /* Avoid blowing up in arm_gen_constant (). */
11170 if (!(outer_code == PLUS
11171 || outer_code == AND
11172 || outer_code == IOR
11173 || outer_code == XOR
11174 || outer_code == MINUS))
11175 outer_code = SET;
11176
11177 const_int_cost:
11178 if (mode == SImode)
11179 {
11180 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11181 INTVAL (x), NULL, NULL,
11182 0, 0));
11183 /* Extra costs? */
11184 }
11185 else
11186 {
11187 *cost += COSTS_N_INSNS (arm_gen_constant
11188 (outer_code, SImode, NULL,
11189 trunc_int_for_mode (INTVAL (x), SImode),
11190 NULL, NULL, 0, 0)
11191 + arm_gen_constant (outer_code, SImode, NULL,
11192 INTVAL (x) >> 32, NULL,
11193 NULL, 0, 0));
11194 /* Extra costs? */
11195 }
11196
11197 return true;
11198
11199 case CONST:
11200 case LABEL_REF:
11201 case SYMBOL_REF:
11202 if (speed_p)
11203 {
11204 if (arm_arch_thumb2 && !flag_pic)
11205 *cost += COSTS_N_INSNS (1);
11206 else
11207 *cost += extra_cost->ldst.load;
11208 }
11209 else
11210 *cost += COSTS_N_INSNS (1);
11211
11212 if (flag_pic)
11213 {
11214 *cost += COSTS_N_INSNS (1);
11215 if (speed_p)
11216 *cost += extra_cost->alu.arith;
11217 }
11218
11219 return true;
11220
11221 case CONST_FIXED:
11222 *cost = COSTS_N_INSNS (4);
11223 /* Fixme. */
11224 return true;
11225
11226 case CONST_DOUBLE:
11227 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11228 && (mode == SFmode || !TARGET_VFP_SINGLE))
11229 {
11230 if (vfp3_const_double_rtx (x))
11231 {
11232 if (speed_p)
11233 *cost += extra_cost->fp[mode == DFmode].fpconst;
11234 return true;
11235 }
11236
11237 if (speed_p)
11238 {
11239 if (mode == DFmode)
11240 *cost += extra_cost->ldst.loadd;
11241 else
11242 *cost += extra_cost->ldst.loadf;
11243 }
11244 else
11245 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11246
11247 return true;
11248 }
11249 *cost = COSTS_N_INSNS (4);
11250 return true;
11251
11252 case CONST_VECTOR:
11253 /* Fixme. */
11254 if (TARGET_NEON
11255 && TARGET_HARD_FLOAT
11256 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11257 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
11258 *cost = COSTS_N_INSNS (1);
11259 else
11260 *cost = COSTS_N_INSNS (4);
11261 return true;
11262
11263 case HIGH:
11264 case LO_SUM:
11265 /* When optimizing for size, we prefer constant pool entries to
11266 MOVW/MOVT pairs, so bump the cost of these slightly. */
11267 if (!speed_p)
11268 *cost += 1;
11269 return true;
11270
11271 case CLZ:
11272 if (speed_p)
11273 *cost += extra_cost->alu.clz;
11274 return false;
11275
11276 case SMIN:
11277 if (XEXP (x, 1) == const0_rtx)
11278 {
11279 if (speed_p)
11280 *cost += extra_cost->alu.log_shift;
11281 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11282 return true;
11283 }
11284 /* Fall through. */
11285 case SMAX:
11286 case UMIN:
11287 case UMAX:
11288 *cost += COSTS_N_INSNS (1);
11289 return false;
11290
11291 case TRUNCATE:
11292 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11293 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11294 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11295 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11296 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11297 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11298 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11299 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11300 == ZERO_EXTEND))))
11301 {
11302 if (speed_p)
11303 *cost += extra_cost->mult[1].extend;
11304 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11305 ZERO_EXTEND, 0, speed_p)
11306 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11307 ZERO_EXTEND, 0, speed_p));
11308 return true;
11309 }
11310 *cost = LIBCALL_COST (1);
11311 return false;
11312
11313 case UNSPEC_VOLATILE:
11314 case UNSPEC:
11315 return arm_unspec_cost (x, outer_code, speed_p, cost);
11316
11317 case PC:
11318 /* Reading the PC is like reading any other register. Writing it
11319 is more expensive, but we take that into account elsewhere. */
11320 *cost = 0;
11321 return true;
11322
11323 case ZERO_EXTRACT:
11324 /* TODO: Simple zero_extract of bottom bits using AND. */
11325 /* Fall through. */
11326 case SIGN_EXTRACT:
11327 if (arm_arch6
11328 && mode == SImode
11329 && CONST_INT_P (XEXP (x, 1))
11330 && CONST_INT_P (XEXP (x, 2)))
11331 {
11332 if (speed_p)
11333 *cost += extra_cost->alu.bfx;
11334 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11335 return true;
11336 }
11337 /* Without UBFX/SBFX, need to resort to shift operations. */
11338 *cost += COSTS_N_INSNS (1);
11339 if (speed_p)
11340 *cost += 2 * extra_cost->alu.shift;
11341 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11342 return true;
11343
11344 case FLOAT_EXTEND:
11345 if (TARGET_HARD_FLOAT)
11346 {
11347 if (speed_p)
11348 *cost += extra_cost->fp[mode == DFmode].widen;
11349 if (!TARGET_VFP5
11350 && GET_MODE (XEXP (x, 0)) == HFmode)
11351 {
11352 /* Pre v8, widening HF->DF is a two-step process, first
11353 widening to SFmode. */
11354 *cost += COSTS_N_INSNS (1);
11355 if (speed_p)
11356 *cost += extra_cost->fp[0].widen;
11357 }
11358 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11359 return true;
11360 }
11361
11362 *cost = LIBCALL_COST (1);
11363 return false;
11364
11365 case FLOAT_TRUNCATE:
11366 if (TARGET_HARD_FLOAT)
11367 {
11368 if (speed_p)
11369 *cost += extra_cost->fp[mode == DFmode].narrow;
11370 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11371 return true;
11372 /* Vector modes? */
11373 }
11374 *cost = LIBCALL_COST (1);
11375 return false;
11376
11377 case FMA:
11378 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11379 {
11380 rtx op0 = XEXP (x, 0);
11381 rtx op1 = XEXP (x, 1);
11382 rtx op2 = XEXP (x, 2);
11383
11384
11385 /* vfms or vfnma. */
11386 if (GET_CODE (op0) == NEG)
11387 op0 = XEXP (op0, 0);
11388
11389 /* vfnms or vfnma. */
11390 if (GET_CODE (op2) == NEG)
11391 op2 = XEXP (op2, 0);
11392
11393 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11394 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11395 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11396
11397 if (speed_p)
11398 *cost += extra_cost->fp[mode ==DFmode].fma;
11399
11400 return true;
11401 }
11402
11403 *cost = LIBCALL_COST (3);
11404 return false;
11405
11406 case FIX:
11407 case UNSIGNED_FIX:
11408 if (TARGET_HARD_FLOAT)
11409 {
11410 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11411 a vcvt fixed-point conversion. */
11412 if (code == FIX && mode == SImode
11413 && GET_CODE (XEXP (x, 0)) == FIX
11414 && GET_MODE (XEXP (x, 0)) == SFmode
11415 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11416 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11417 > 0)
11418 {
11419 if (speed_p)
11420 *cost += extra_cost->fp[0].toint;
11421
11422 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11423 code, 0, speed_p);
11424 return true;
11425 }
11426
11427 if (GET_MODE_CLASS (mode) == MODE_INT)
11428 {
11429 mode = GET_MODE (XEXP (x, 0));
11430 if (speed_p)
11431 *cost += extra_cost->fp[mode == DFmode].toint;
11432 /* Strip of the 'cost' of rounding towards zero. */
11433 if (GET_CODE (XEXP (x, 0)) == FIX)
11434 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11435 0, speed_p);
11436 else
11437 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11438 /* ??? Increase the cost to deal with transferring from
11439 FP -> CORE registers? */
11440 return true;
11441 }
11442 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11443 && TARGET_VFP5)
11444 {
11445 if (speed_p)
11446 *cost += extra_cost->fp[mode == DFmode].roundint;
11447 return false;
11448 }
11449 /* Vector costs? */
11450 }
11451 *cost = LIBCALL_COST (1);
11452 return false;
11453
11454 case FLOAT:
11455 case UNSIGNED_FLOAT:
11456 if (TARGET_HARD_FLOAT)
11457 {
11458 /* ??? Increase the cost to deal with transferring from CORE
11459 -> FP registers? */
11460 if (speed_p)
11461 *cost += extra_cost->fp[mode == DFmode].fromint;
11462 return false;
11463 }
11464 *cost = LIBCALL_COST (1);
11465 return false;
11466
11467 case CALL:
11468 return true;
11469
11470 case ASM_OPERANDS:
11471 {
11472 /* Just a guess. Guess number of instructions in the asm
11473 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11474 though (see PR60663). */
11475 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11476 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11477
11478 *cost = COSTS_N_INSNS (asm_length + num_operands);
11479 return true;
11480 }
11481 default:
11482 if (mode != VOIDmode)
11483 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11484 else
11485 *cost = COSTS_N_INSNS (4); /* Who knows? */
11486 return false;
11487 }
11488 }
11489
11490 #undef HANDLE_NARROW_SHIFT_ARITH
11491
11492 /* RTX costs entry point. */
11493
11494 static bool
11495 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11496 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11497 {
11498 bool result;
11499 int code = GET_CODE (x);
11500 gcc_assert (current_tune->insn_extra_cost);
11501
11502 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
11503 (enum rtx_code) outer_code,
11504 current_tune->insn_extra_cost,
11505 total, speed);
11506
11507 if (dump_file && arm_verbose_cost)
11508 {
11509 print_rtl_single (dump_file, x);
11510 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11511 *total, result ? "final" : "partial");
11512 }
11513 return result;
11514 }
11515
11516 static int
11517 arm_insn_cost (rtx_insn *insn, bool speed)
11518 {
11519 int cost;
11520
11521 /* Don't cost a simple reg-reg move at a full insn cost: such moves
11522 will likely disappear during register allocation. */
11523 if (!reload_completed
11524 && GET_CODE (PATTERN (insn)) == SET
11525 && REG_P (SET_DEST (PATTERN (insn)))
11526 && REG_P (SET_SRC (PATTERN (insn))))
11527 return 2;
11528 cost = pattern_cost (PATTERN (insn), speed);
11529 /* If the cost is zero, then it's likely a complex insn. We don't want the
11530 cost of these to be less than something we know about. */
11531 return cost ? cost : COSTS_N_INSNS (2);
11532 }
11533
11534 /* All address computations that can be done are free, but rtx cost returns
11535 the same for practically all of them. So we weight the different types
11536 of address here in the order (most pref first):
11537 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11538 static inline int
11539 arm_arm_address_cost (rtx x)
11540 {
11541 enum rtx_code c = GET_CODE (x);
11542
11543 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11544 return 0;
11545 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11546 return 10;
11547
11548 if (c == PLUS)
11549 {
11550 if (CONST_INT_P (XEXP (x, 1)))
11551 return 2;
11552
11553 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11554 return 3;
11555
11556 return 4;
11557 }
11558
11559 return 6;
11560 }
11561
11562 static inline int
11563 arm_thumb_address_cost (rtx x)
11564 {
11565 enum rtx_code c = GET_CODE (x);
11566
11567 if (c == REG)
11568 return 1;
11569 if (c == PLUS
11570 && REG_P (XEXP (x, 0))
11571 && CONST_INT_P (XEXP (x, 1)))
11572 return 1;
11573
11574 return 2;
11575 }
11576
11577 static int
11578 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11579 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11580 {
11581 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11582 }
11583
11584 /* Adjust cost hook for XScale. */
11585 static bool
11586 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11587 int * cost)
11588 {
11589 /* Some true dependencies can have a higher cost depending
11590 on precisely how certain input operands are used. */
11591 if (dep_type == 0
11592 && recog_memoized (insn) >= 0
11593 && recog_memoized (dep) >= 0)
11594 {
11595 int shift_opnum = get_attr_shift (insn);
11596 enum attr_type attr_type = get_attr_type (dep);
11597
11598 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11599 operand for INSN. If we have a shifted input operand and the
11600 instruction we depend on is another ALU instruction, then we may
11601 have to account for an additional stall. */
11602 if (shift_opnum != 0
11603 && (attr_type == TYPE_ALU_SHIFT_IMM
11604 || attr_type == TYPE_ALUS_SHIFT_IMM
11605 || attr_type == TYPE_LOGIC_SHIFT_IMM
11606 || attr_type == TYPE_LOGICS_SHIFT_IMM
11607 || attr_type == TYPE_ALU_SHIFT_REG
11608 || attr_type == TYPE_ALUS_SHIFT_REG
11609 || attr_type == TYPE_LOGIC_SHIFT_REG
11610 || attr_type == TYPE_LOGICS_SHIFT_REG
11611 || attr_type == TYPE_MOV_SHIFT
11612 || attr_type == TYPE_MVN_SHIFT
11613 || attr_type == TYPE_MOV_SHIFT_REG
11614 || attr_type == TYPE_MVN_SHIFT_REG))
11615 {
11616 rtx shifted_operand;
11617 int opno;
11618
11619 /* Get the shifted operand. */
11620 extract_insn (insn);
11621 shifted_operand = recog_data.operand[shift_opnum];
11622
11623 /* Iterate over all the operands in DEP. If we write an operand
11624 that overlaps with SHIFTED_OPERAND, then we have increase the
11625 cost of this dependency. */
11626 extract_insn (dep);
11627 preprocess_constraints (dep);
11628 for (opno = 0; opno < recog_data.n_operands; opno++)
11629 {
11630 /* We can ignore strict inputs. */
11631 if (recog_data.operand_type[opno] == OP_IN)
11632 continue;
11633
11634 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11635 shifted_operand))
11636 {
11637 *cost = 2;
11638 return false;
11639 }
11640 }
11641 }
11642 }
11643 return true;
11644 }
11645
11646 /* Adjust cost hook for Cortex A9. */
11647 static bool
11648 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11649 int * cost)
11650 {
11651 switch (dep_type)
11652 {
11653 case REG_DEP_ANTI:
11654 *cost = 0;
11655 return false;
11656
11657 case REG_DEP_TRUE:
11658 case REG_DEP_OUTPUT:
11659 if (recog_memoized (insn) >= 0
11660 && recog_memoized (dep) >= 0)
11661 {
11662 if (GET_CODE (PATTERN (insn)) == SET)
11663 {
11664 if (GET_MODE_CLASS
11665 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11666 || GET_MODE_CLASS
11667 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11668 {
11669 enum attr_type attr_type_insn = get_attr_type (insn);
11670 enum attr_type attr_type_dep = get_attr_type (dep);
11671
11672 /* By default all dependencies of the form
11673 s0 = s0 <op> s1
11674 s0 = s0 <op> s2
11675 have an extra latency of 1 cycle because
11676 of the input and output dependency in this
11677 case. However this gets modeled as an true
11678 dependency and hence all these checks. */
11679 if (REG_P (SET_DEST (PATTERN (insn)))
11680 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11681 {
11682 /* FMACS is a special case where the dependent
11683 instruction can be issued 3 cycles before
11684 the normal latency in case of an output
11685 dependency. */
11686 if ((attr_type_insn == TYPE_FMACS
11687 || attr_type_insn == TYPE_FMACD)
11688 && (attr_type_dep == TYPE_FMACS
11689 || attr_type_dep == TYPE_FMACD))
11690 {
11691 if (dep_type == REG_DEP_OUTPUT)
11692 *cost = insn_default_latency (dep) - 3;
11693 else
11694 *cost = insn_default_latency (dep);
11695 return false;
11696 }
11697 else
11698 {
11699 if (dep_type == REG_DEP_OUTPUT)
11700 *cost = insn_default_latency (dep) + 1;
11701 else
11702 *cost = insn_default_latency (dep);
11703 }
11704 return false;
11705 }
11706 }
11707 }
11708 }
11709 break;
11710
11711 default:
11712 gcc_unreachable ();
11713 }
11714
11715 return true;
11716 }
11717
11718 /* Adjust cost hook for FA726TE. */
11719 static bool
11720 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11721 int * cost)
11722 {
11723 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11724 have penalty of 3. */
11725 if (dep_type == REG_DEP_TRUE
11726 && recog_memoized (insn) >= 0
11727 && recog_memoized (dep) >= 0
11728 && get_attr_conds (dep) == CONDS_SET)
11729 {
11730 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11731 if (get_attr_conds (insn) == CONDS_USE
11732 && get_attr_type (insn) != TYPE_BRANCH)
11733 {
11734 *cost = 3;
11735 return false;
11736 }
11737
11738 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11739 || get_attr_conds (insn) == CONDS_USE)
11740 {
11741 *cost = 0;
11742 return false;
11743 }
11744 }
11745
11746 return true;
11747 }
11748
11749 /* Implement TARGET_REGISTER_MOVE_COST.
11750
11751 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11752 it is typically more expensive than a single memory access. We set
11753 the cost to less than two memory accesses so that floating
11754 point to integer conversion does not go through memory. */
11755
11756 int
11757 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11758 reg_class_t from, reg_class_t to)
11759 {
11760 if (TARGET_32BIT)
11761 {
11762 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11763 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11764 return 15;
11765 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11766 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11767 return 4;
11768 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11769 return 20;
11770 else
11771 return 2;
11772 }
11773 else
11774 {
11775 if (from == HI_REGS || to == HI_REGS)
11776 return 4;
11777 else
11778 return 2;
11779 }
11780 }
11781
11782 /* Implement TARGET_MEMORY_MOVE_COST. */
11783
11784 int
11785 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11786 bool in ATTRIBUTE_UNUSED)
11787 {
11788 if (TARGET_32BIT)
11789 return 10;
11790 else
11791 {
11792 if (GET_MODE_SIZE (mode) < 4)
11793 return 8;
11794 else
11795 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11796 }
11797 }
11798
11799 /* Vectorizer cost model implementation. */
11800
11801 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11802 static int
11803 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11804 tree vectype,
11805 int misalign ATTRIBUTE_UNUSED)
11806 {
11807 unsigned elements;
11808
11809 switch (type_of_cost)
11810 {
11811 case scalar_stmt:
11812 return current_tune->vec_costs->scalar_stmt_cost;
11813
11814 case scalar_load:
11815 return current_tune->vec_costs->scalar_load_cost;
11816
11817 case scalar_store:
11818 return current_tune->vec_costs->scalar_store_cost;
11819
11820 case vector_stmt:
11821 return current_tune->vec_costs->vec_stmt_cost;
11822
11823 case vector_load:
11824 return current_tune->vec_costs->vec_align_load_cost;
11825
11826 case vector_store:
11827 return current_tune->vec_costs->vec_store_cost;
11828
11829 case vec_to_scalar:
11830 return current_tune->vec_costs->vec_to_scalar_cost;
11831
11832 case scalar_to_vec:
11833 return current_tune->vec_costs->scalar_to_vec_cost;
11834
11835 case unaligned_load:
11836 case vector_gather_load:
11837 return current_tune->vec_costs->vec_unalign_load_cost;
11838
11839 case unaligned_store:
11840 case vector_scatter_store:
11841 return current_tune->vec_costs->vec_unalign_store_cost;
11842
11843 case cond_branch_taken:
11844 return current_tune->vec_costs->cond_taken_branch_cost;
11845
11846 case cond_branch_not_taken:
11847 return current_tune->vec_costs->cond_not_taken_branch_cost;
11848
11849 case vec_perm:
11850 case vec_promote_demote:
11851 return current_tune->vec_costs->vec_stmt_cost;
11852
11853 case vec_construct:
11854 elements = TYPE_VECTOR_SUBPARTS (vectype);
11855 return elements / 2 + 1;
11856
11857 default:
11858 gcc_unreachable ();
11859 }
11860 }
11861
11862 /* Implement targetm.vectorize.add_stmt_cost. */
11863
11864 static unsigned
11865 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11866 struct _stmt_vec_info *stmt_info, int misalign,
11867 enum vect_cost_model_location where)
11868 {
11869 unsigned *cost = (unsigned *) data;
11870 unsigned retval = 0;
11871
11872 if (flag_vect_cost_model)
11873 {
11874 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11875 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11876
11877 /* Statements in an inner loop relative to the loop being
11878 vectorized are weighted more heavily. The value here is
11879 arbitrary and could potentially be improved with analysis. */
11880 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11881 count *= 50; /* FIXME. */
11882
11883 retval = (unsigned) (count * stmt_cost);
11884 cost[where] += retval;
11885 }
11886
11887 return retval;
11888 }
11889
11890 /* Return true if and only if this insn can dual-issue only as older. */
11891 static bool
11892 cortexa7_older_only (rtx_insn *insn)
11893 {
11894 if (recog_memoized (insn) < 0)
11895 return false;
11896
11897 switch (get_attr_type (insn))
11898 {
11899 case TYPE_ALU_DSP_REG:
11900 case TYPE_ALU_SREG:
11901 case TYPE_ALUS_SREG:
11902 case TYPE_LOGIC_REG:
11903 case TYPE_LOGICS_REG:
11904 case TYPE_ADC_REG:
11905 case TYPE_ADCS_REG:
11906 case TYPE_ADR:
11907 case TYPE_BFM:
11908 case TYPE_REV:
11909 case TYPE_MVN_REG:
11910 case TYPE_SHIFT_IMM:
11911 case TYPE_SHIFT_REG:
11912 case TYPE_LOAD_BYTE:
11913 case TYPE_LOAD_4:
11914 case TYPE_STORE_4:
11915 case TYPE_FFARITHS:
11916 case TYPE_FADDS:
11917 case TYPE_FFARITHD:
11918 case TYPE_FADDD:
11919 case TYPE_FMOV:
11920 case TYPE_F_CVT:
11921 case TYPE_FCMPS:
11922 case TYPE_FCMPD:
11923 case TYPE_FCONSTS:
11924 case TYPE_FCONSTD:
11925 case TYPE_FMULS:
11926 case TYPE_FMACS:
11927 case TYPE_FMULD:
11928 case TYPE_FMACD:
11929 case TYPE_FDIVS:
11930 case TYPE_FDIVD:
11931 case TYPE_F_MRC:
11932 case TYPE_F_MRRC:
11933 case TYPE_F_FLAG:
11934 case TYPE_F_LOADS:
11935 case TYPE_F_STORES:
11936 return true;
11937 default:
11938 return false;
11939 }
11940 }
11941
11942 /* Return true if and only if this insn can dual-issue as younger. */
11943 static bool
11944 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11945 {
11946 if (recog_memoized (insn) < 0)
11947 {
11948 if (verbose > 5)
11949 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11950 return false;
11951 }
11952
11953 switch (get_attr_type (insn))
11954 {
11955 case TYPE_ALU_IMM:
11956 case TYPE_ALUS_IMM:
11957 case TYPE_LOGIC_IMM:
11958 case TYPE_LOGICS_IMM:
11959 case TYPE_EXTEND:
11960 case TYPE_MVN_IMM:
11961 case TYPE_MOV_IMM:
11962 case TYPE_MOV_REG:
11963 case TYPE_MOV_SHIFT:
11964 case TYPE_MOV_SHIFT_REG:
11965 case TYPE_BRANCH:
11966 case TYPE_CALL:
11967 return true;
11968 default:
11969 return false;
11970 }
11971 }
11972
11973
11974 /* Look for an instruction that can dual issue only as an older
11975 instruction, and move it in front of any instructions that can
11976 dual-issue as younger, while preserving the relative order of all
11977 other instructions in the ready list. This is a hueuristic to help
11978 dual-issue in later cycles, by postponing issue of more flexible
11979 instructions. This heuristic may affect dual issue opportunities
11980 in the current cycle. */
11981 static void
11982 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11983 int *n_readyp, int clock)
11984 {
11985 int i;
11986 int first_older_only = -1, first_younger = -1;
11987
11988 if (verbose > 5)
11989 fprintf (file,
11990 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11991 clock,
11992 *n_readyp);
11993
11994 /* Traverse the ready list from the head (the instruction to issue
11995 first), and looking for the first instruction that can issue as
11996 younger and the first instruction that can dual-issue only as
11997 older. */
11998 for (i = *n_readyp - 1; i >= 0; i--)
11999 {
12000 rtx_insn *insn = ready[i];
12001 if (cortexa7_older_only (insn))
12002 {
12003 first_older_only = i;
12004 if (verbose > 5)
12005 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12006 break;
12007 }
12008 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12009 first_younger = i;
12010 }
12011
12012 /* Nothing to reorder because either no younger insn found or insn
12013 that can dual-issue only as older appears before any insn that
12014 can dual-issue as younger. */
12015 if (first_younger == -1)
12016 {
12017 if (verbose > 5)
12018 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12019 return;
12020 }
12021
12022 /* Nothing to reorder because no older-only insn in the ready list. */
12023 if (first_older_only == -1)
12024 {
12025 if (verbose > 5)
12026 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12027 return;
12028 }
12029
12030 /* Move first_older_only insn before first_younger. */
12031 if (verbose > 5)
12032 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12033 INSN_UID(ready [first_older_only]),
12034 INSN_UID(ready [first_younger]));
12035 rtx_insn *first_older_only_insn = ready [first_older_only];
12036 for (i = first_older_only; i < first_younger; i++)
12037 {
12038 ready[i] = ready[i+1];
12039 }
12040
12041 ready[i] = first_older_only_insn;
12042 return;
12043 }
12044
12045 /* Implement TARGET_SCHED_REORDER. */
12046 static int
12047 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12048 int clock)
12049 {
12050 switch (arm_tune)
12051 {
12052 case TARGET_CPU_cortexa7:
12053 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12054 break;
12055 default:
12056 /* Do nothing for other cores. */
12057 break;
12058 }
12059
12060 return arm_issue_rate ();
12061 }
12062
12063 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12064 It corrects the value of COST based on the relationship between
12065 INSN and DEP through the dependence LINK. It returns the new
12066 value. There is a per-core adjust_cost hook to adjust scheduler costs
12067 and the per-core hook can choose to completely override the generic
12068 adjust_cost function. Only put bits of code into arm_adjust_cost that
12069 are common across all cores. */
12070 static int
12071 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12072 unsigned int)
12073 {
12074 rtx i_pat, d_pat;
12075
12076 /* When generating Thumb-1 code, we want to place flag-setting operations
12077 close to a conditional branch which depends on them, so that we can
12078 omit the comparison. */
12079 if (TARGET_THUMB1
12080 && dep_type == 0
12081 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12082 && recog_memoized (dep) >= 0
12083 && get_attr_conds (dep) == CONDS_SET)
12084 return 0;
12085
12086 if (current_tune->sched_adjust_cost != NULL)
12087 {
12088 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12089 return cost;
12090 }
12091
12092 /* XXX Is this strictly true? */
12093 if (dep_type == REG_DEP_ANTI
12094 || dep_type == REG_DEP_OUTPUT)
12095 return 0;
12096
12097 /* Call insns don't incur a stall, even if they follow a load. */
12098 if (dep_type == 0
12099 && CALL_P (insn))
12100 return 1;
12101
12102 if ((i_pat = single_set (insn)) != NULL
12103 && MEM_P (SET_SRC (i_pat))
12104 && (d_pat = single_set (dep)) != NULL
12105 && MEM_P (SET_DEST (d_pat)))
12106 {
12107 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12108 /* This is a load after a store, there is no conflict if the load reads
12109 from a cached area. Assume that loads from the stack, and from the
12110 constant pool are cached, and that others will miss. This is a
12111 hack. */
12112
12113 if ((GET_CODE (src_mem) == SYMBOL_REF
12114 && CONSTANT_POOL_ADDRESS_P (src_mem))
12115 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12116 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12117 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12118 return 1;
12119 }
12120
12121 return cost;
12122 }
12123
12124 int
12125 arm_max_conditional_execute (void)
12126 {
12127 return max_insns_skipped;
12128 }
12129
12130 static int
12131 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12132 {
12133 if (TARGET_32BIT)
12134 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12135 else
12136 return (optimize > 0) ? 2 : 0;
12137 }
12138
12139 static int
12140 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12141 {
12142 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12143 }
12144
12145 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12146 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12147 sequences of non-executed instructions in IT blocks probably take the same
12148 amount of time as executed instructions (and the IT instruction itself takes
12149 space in icache). This function was experimentally determined to give good
12150 results on a popular embedded benchmark. */
12151
12152 static int
12153 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12154 {
12155 return (TARGET_32BIT && speed_p) ? 1
12156 : arm_default_branch_cost (speed_p, predictable_p);
12157 }
12158
12159 static int
12160 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12161 {
12162 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12163 }
12164
12165 static bool fp_consts_inited = false;
12166
12167 static REAL_VALUE_TYPE value_fp0;
12168
12169 static void
12170 init_fp_table (void)
12171 {
12172 REAL_VALUE_TYPE r;
12173
12174 r = REAL_VALUE_ATOF ("0", DFmode);
12175 value_fp0 = r;
12176 fp_consts_inited = true;
12177 }
12178
12179 /* Return TRUE if rtx X is a valid immediate FP constant. */
12180 int
12181 arm_const_double_rtx (rtx x)
12182 {
12183 const REAL_VALUE_TYPE *r;
12184
12185 if (!fp_consts_inited)
12186 init_fp_table ();
12187
12188 r = CONST_DOUBLE_REAL_VALUE (x);
12189 if (REAL_VALUE_MINUS_ZERO (*r))
12190 return 0;
12191
12192 if (real_equal (r, &value_fp0))
12193 return 1;
12194
12195 return 0;
12196 }
12197
12198 /* VFPv3 has a fairly wide range of representable immediates, formed from
12199 "quarter-precision" floating-point values. These can be evaluated using this
12200 formula (with ^ for exponentiation):
12201
12202 -1^s * n * 2^-r
12203
12204 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12205 16 <= n <= 31 and 0 <= r <= 7.
12206
12207 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12208
12209 - A (most-significant) is the sign bit.
12210 - BCD are the exponent (encoded as r XOR 3).
12211 - EFGH are the mantissa (encoded as n - 16).
12212 */
12213
12214 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12215 fconst[sd] instruction, or -1 if X isn't suitable. */
12216 static int
12217 vfp3_const_double_index (rtx x)
12218 {
12219 REAL_VALUE_TYPE r, m;
12220 int sign, exponent;
12221 unsigned HOST_WIDE_INT mantissa, mant_hi;
12222 unsigned HOST_WIDE_INT mask;
12223 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12224 bool fail;
12225
12226 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12227 return -1;
12228
12229 r = *CONST_DOUBLE_REAL_VALUE (x);
12230
12231 /* We can't represent these things, so detect them first. */
12232 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12233 return -1;
12234
12235 /* Extract sign, exponent and mantissa. */
12236 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12237 r = real_value_abs (&r);
12238 exponent = REAL_EXP (&r);
12239 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12240 highest (sign) bit, with a fixed binary point at bit point_pos.
12241 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12242 bits for the mantissa, this may fail (low bits would be lost). */
12243 real_ldexp (&m, &r, point_pos - exponent);
12244 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12245 mantissa = w.elt (0);
12246 mant_hi = w.elt (1);
12247
12248 /* If there are bits set in the low part of the mantissa, we can't
12249 represent this value. */
12250 if (mantissa != 0)
12251 return -1;
12252
12253 /* Now make it so that mantissa contains the most-significant bits, and move
12254 the point_pos to indicate that the least-significant bits have been
12255 discarded. */
12256 point_pos -= HOST_BITS_PER_WIDE_INT;
12257 mantissa = mant_hi;
12258
12259 /* We can permit four significant bits of mantissa only, plus a high bit
12260 which is always 1. */
12261 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12262 if ((mantissa & mask) != 0)
12263 return -1;
12264
12265 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12266 mantissa >>= point_pos - 5;
12267
12268 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12269 floating-point immediate zero with Neon using an integer-zero load, but
12270 that case is handled elsewhere.) */
12271 if (mantissa == 0)
12272 return -1;
12273
12274 gcc_assert (mantissa >= 16 && mantissa <= 31);
12275
12276 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12277 normalized significands are in the range [1, 2). (Our mantissa is shifted
12278 left 4 places at this point relative to normalized IEEE754 values). GCC
12279 internally uses [0.5, 1) (see real.c), so the exponent returned from
12280 REAL_EXP must be altered. */
12281 exponent = 5 - exponent;
12282
12283 if (exponent < 0 || exponent > 7)
12284 return -1;
12285
12286 /* Sign, mantissa and exponent are now in the correct form to plug into the
12287 formula described in the comment above. */
12288 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12289 }
12290
12291 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12292 int
12293 vfp3_const_double_rtx (rtx x)
12294 {
12295 if (!TARGET_VFP3)
12296 return 0;
12297
12298 return vfp3_const_double_index (x) != -1;
12299 }
12300
12301 /* Recognize immediates which can be used in various Neon instructions. Legal
12302 immediates are described by the following table (for VMVN variants, the
12303 bitwise inverse of the constant shown is recognized. In either case, VMOV
12304 is output and the correct instruction to use for a given constant is chosen
12305 by the assembler). The constant shown is replicated across all elements of
12306 the destination vector.
12307
12308 insn elems variant constant (binary)
12309 ---- ----- ------- -----------------
12310 vmov i32 0 00000000 00000000 00000000 abcdefgh
12311 vmov i32 1 00000000 00000000 abcdefgh 00000000
12312 vmov i32 2 00000000 abcdefgh 00000000 00000000
12313 vmov i32 3 abcdefgh 00000000 00000000 00000000
12314 vmov i16 4 00000000 abcdefgh
12315 vmov i16 5 abcdefgh 00000000
12316 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12317 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12318 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12319 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12320 vmvn i16 10 00000000 abcdefgh
12321 vmvn i16 11 abcdefgh 00000000
12322 vmov i32 12 00000000 00000000 abcdefgh 11111111
12323 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12324 vmov i32 14 00000000 abcdefgh 11111111 11111111
12325 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12326 vmov i8 16 abcdefgh
12327 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12328 eeeeeeee ffffffff gggggggg hhhhhhhh
12329 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12330 vmov f32 19 00000000 00000000 00000000 00000000
12331
12332 For case 18, B = !b. Representable values are exactly those accepted by
12333 vfp3_const_double_index, but are output as floating-point numbers rather
12334 than indices.
12335
12336 For case 19, we will change it to vmov.i32 when assembling.
12337
12338 Variants 0-5 (inclusive) may also be used as immediates for the second
12339 operand of VORR/VBIC instructions.
12340
12341 The INVERSE argument causes the bitwise inverse of the given operand to be
12342 recognized instead (used for recognizing legal immediates for the VAND/VORN
12343 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12344 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12345 output, rather than the real insns vbic/vorr).
12346
12347 INVERSE makes no difference to the recognition of float vectors.
12348
12349 The return value is the variant of immediate as shown in the above table, or
12350 -1 if the given value doesn't match any of the listed patterns.
12351 */
12352 static int
12353 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12354 rtx *modconst, int *elementwidth)
12355 {
12356 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12357 matches = 1; \
12358 for (i = 0; i < idx; i += (STRIDE)) \
12359 if (!(TEST)) \
12360 matches = 0; \
12361 if (matches) \
12362 { \
12363 immtype = (CLASS); \
12364 elsize = (ELSIZE); \
12365 break; \
12366 }
12367
12368 unsigned int i, elsize = 0, idx = 0, n_elts;
12369 unsigned int innersize;
12370 unsigned char bytes[16] = {};
12371 int immtype = -1, matches;
12372 unsigned int invmask = inverse ? 0xff : 0;
12373 bool vector = GET_CODE (op) == CONST_VECTOR;
12374
12375 if (vector)
12376 n_elts = CONST_VECTOR_NUNITS (op);
12377 else
12378 {
12379 n_elts = 1;
12380 gcc_assert (mode != VOIDmode);
12381 }
12382
12383 innersize = GET_MODE_UNIT_SIZE (mode);
12384
12385 /* Vectors of float constants. */
12386 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12387 {
12388 rtx el0 = CONST_VECTOR_ELT (op, 0);
12389
12390 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12391 return -1;
12392
12393 /* FP16 vectors cannot be represented. */
12394 if (GET_MODE_INNER (mode) == HFmode)
12395 return -1;
12396
12397 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12398 are distinct in this context. */
12399 if (!const_vec_duplicate_p (op))
12400 return -1;
12401
12402 if (modconst)
12403 *modconst = CONST_VECTOR_ELT (op, 0);
12404
12405 if (elementwidth)
12406 *elementwidth = 0;
12407
12408 if (el0 == CONST0_RTX (GET_MODE (el0)))
12409 return 19;
12410 else
12411 return 18;
12412 }
12413
12414 /* The tricks done in the code below apply for little-endian vector layout.
12415 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12416 FIXME: Implement logic for big-endian vectors. */
12417 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
12418 return -1;
12419
12420 /* Splat vector constant out into a byte vector. */
12421 for (i = 0; i < n_elts; i++)
12422 {
12423 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12424 unsigned HOST_WIDE_INT elpart;
12425
12426 gcc_assert (CONST_INT_P (el));
12427 elpart = INTVAL (el);
12428
12429 for (unsigned int byte = 0; byte < innersize; byte++)
12430 {
12431 bytes[idx++] = (elpart & 0xff) ^ invmask;
12432 elpart >>= BITS_PER_UNIT;
12433 }
12434 }
12435
12436 /* Sanity check. */
12437 gcc_assert (idx == GET_MODE_SIZE (mode));
12438
12439 do
12440 {
12441 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12442 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12443
12444 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12445 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12446
12447 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12448 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12449
12450 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12451 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12452
12453 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12454
12455 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12456
12457 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12458 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12459
12460 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12461 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12462
12463 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12464 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12465
12466 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12467 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12468
12469 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12470
12471 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12472
12473 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12474 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12475
12476 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12477 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12478
12479 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12480 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12481
12482 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12483 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12484
12485 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12486
12487 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12488 && bytes[i] == bytes[(i + 8) % idx]);
12489 }
12490 while (0);
12491
12492 if (immtype == -1)
12493 return -1;
12494
12495 if (elementwidth)
12496 *elementwidth = elsize;
12497
12498 if (modconst)
12499 {
12500 unsigned HOST_WIDE_INT imm = 0;
12501
12502 /* Un-invert bytes of recognized vector, if necessary. */
12503 if (invmask != 0)
12504 for (i = 0; i < idx; i++)
12505 bytes[i] ^= invmask;
12506
12507 if (immtype == 17)
12508 {
12509 /* FIXME: Broken on 32-bit H_W_I hosts. */
12510 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12511
12512 for (i = 0; i < 8; i++)
12513 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12514 << (i * BITS_PER_UNIT);
12515
12516 *modconst = GEN_INT (imm);
12517 }
12518 else
12519 {
12520 unsigned HOST_WIDE_INT imm = 0;
12521
12522 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12523 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12524
12525 *modconst = GEN_INT (imm);
12526 }
12527 }
12528
12529 return immtype;
12530 #undef CHECK
12531 }
12532
12533 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12534 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12535 float elements), and a modified constant (whatever should be output for a
12536 VMOV) in *MODCONST. */
12537
12538 int
12539 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12540 rtx *modconst, int *elementwidth)
12541 {
12542 rtx tmpconst;
12543 int tmpwidth;
12544 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12545
12546 if (retval == -1)
12547 return 0;
12548
12549 if (modconst)
12550 *modconst = tmpconst;
12551
12552 if (elementwidth)
12553 *elementwidth = tmpwidth;
12554
12555 return 1;
12556 }
12557
12558 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12559 the immediate is valid, write a constant suitable for using as an operand
12560 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12561 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12562
12563 int
12564 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12565 rtx *modconst, int *elementwidth)
12566 {
12567 rtx tmpconst;
12568 int tmpwidth;
12569 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12570
12571 if (retval < 0 || retval > 5)
12572 return 0;
12573
12574 if (modconst)
12575 *modconst = tmpconst;
12576
12577 if (elementwidth)
12578 *elementwidth = tmpwidth;
12579
12580 return 1;
12581 }
12582
12583 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12584 the immediate is valid, write a constant suitable for using as an operand
12585 to VSHR/VSHL to *MODCONST and the corresponding element width to
12586 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12587 because they have different limitations. */
12588
12589 int
12590 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12591 rtx *modconst, int *elementwidth,
12592 bool isleftshift)
12593 {
12594 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12595 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12596 unsigned HOST_WIDE_INT last_elt = 0;
12597 unsigned HOST_WIDE_INT maxshift;
12598
12599 /* Split vector constant out into a byte vector. */
12600 for (i = 0; i < n_elts; i++)
12601 {
12602 rtx el = CONST_VECTOR_ELT (op, i);
12603 unsigned HOST_WIDE_INT elpart;
12604
12605 if (CONST_INT_P (el))
12606 elpart = INTVAL (el);
12607 else if (CONST_DOUBLE_P (el))
12608 return 0;
12609 else
12610 gcc_unreachable ();
12611
12612 if (i != 0 && elpart != last_elt)
12613 return 0;
12614
12615 last_elt = elpart;
12616 }
12617
12618 /* Shift less than element size. */
12619 maxshift = innersize * 8;
12620
12621 if (isleftshift)
12622 {
12623 /* Left shift immediate value can be from 0 to <size>-1. */
12624 if (last_elt >= maxshift)
12625 return 0;
12626 }
12627 else
12628 {
12629 /* Right shift immediate value can be from 1 to <size>. */
12630 if (last_elt == 0 || last_elt > maxshift)
12631 return 0;
12632 }
12633
12634 if (elementwidth)
12635 *elementwidth = innersize * 8;
12636
12637 if (modconst)
12638 *modconst = CONST_VECTOR_ELT (op, 0);
12639
12640 return 1;
12641 }
12642
12643 /* Return a string suitable for output of Neon immediate logic operation
12644 MNEM. */
12645
12646 char *
12647 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12648 int inverse, int quad)
12649 {
12650 int width, is_valid;
12651 static char templ[40];
12652
12653 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12654
12655 gcc_assert (is_valid != 0);
12656
12657 if (quad)
12658 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12659 else
12660 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12661
12662 return templ;
12663 }
12664
12665 /* Return a string suitable for output of Neon immediate shift operation
12666 (VSHR or VSHL) MNEM. */
12667
12668 char *
12669 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12670 machine_mode mode, int quad,
12671 bool isleftshift)
12672 {
12673 int width, is_valid;
12674 static char templ[40];
12675
12676 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12677 gcc_assert (is_valid != 0);
12678
12679 if (quad)
12680 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12681 else
12682 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12683
12684 return templ;
12685 }
12686
12687 /* Output a sequence of pairwise operations to implement a reduction.
12688 NOTE: We do "too much work" here, because pairwise operations work on two
12689 registers-worth of operands in one go. Unfortunately we can't exploit those
12690 extra calculations to do the full operation in fewer steps, I don't think.
12691 Although all vector elements of the result but the first are ignored, we
12692 actually calculate the same result in each of the elements. An alternative
12693 such as initially loading a vector with zero to use as each of the second
12694 operands would use up an additional register and take an extra instruction,
12695 for no particular gain. */
12696
12697 void
12698 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12699 rtx (*reduc) (rtx, rtx, rtx))
12700 {
12701 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12702 rtx tmpsum = op1;
12703
12704 for (i = parts / 2; i >= 1; i /= 2)
12705 {
12706 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12707 emit_insn (reduc (dest, tmpsum, tmpsum));
12708 tmpsum = dest;
12709 }
12710 }
12711
12712 /* If VALS is a vector constant that can be loaded into a register
12713 using VDUP, generate instructions to do so and return an RTX to
12714 assign to the register. Otherwise return NULL_RTX. */
12715
12716 static rtx
12717 neon_vdup_constant (rtx vals)
12718 {
12719 machine_mode mode = GET_MODE (vals);
12720 machine_mode inner_mode = GET_MODE_INNER (mode);
12721 rtx x;
12722
12723 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12724 return NULL_RTX;
12725
12726 if (!const_vec_duplicate_p (vals, &x))
12727 /* The elements are not all the same. We could handle repeating
12728 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12729 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12730 vdup.i16). */
12731 return NULL_RTX;
12732
12733 /* We can load this constant by using VDUP and a constant in a
12734 single ARM register. This will be cheaper than a vector
12735 load. */
12736
12737 x = copy_to_mode_reg (inner_mode, x);
12738 return gen_vec_duplicate (mode, x);
12739 }
12740
12741 /* Generate code to load VALS, which is a PARALLEL containing only
12742 constants (for vec_init) or CONST_VECTOR, efficiently into a
12743 register. Returns an RTX to copy into the register, or NULL_RTX
12744 for a PARALLEL that cannot be converted into a CONST_VECTOR. */
12745
12746 rtx
12747 neon_make_constant (rtx vals)
12748 {
12749 machine_mode mode = GET_MODE (vals);
12750 rtx target;
12751 rtx const_vec = NULL_RTX;
12752 int n_elts = GET_MODE_NUNITS (mode);
12753 int n_const = 0;
12754 int i;
12755
12756 if (GET_CODE (vals) == CONST_VECTOR)
12757 const_vec = vals;
12758 else if (GET_CODE (vals) == PARALLEL)
12759 {
12760 /* A CONST_VECTOR must contain only CONST_INTs and
12761 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12762 Only store valid constants in a CONST_VECTOR. */
12763 for (i = 0; i < n_elts; ++i)
12764 {
12765 rtx x = XVECEXP (vals, 0, i);
12766 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12767 n_const++;
12768 }
12769 if (n_const == n_elts)
12770 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12771 }
12772 else
12773 gcc_unreachable ();
12774
12775 if (const_vec != NULL
12776 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12777 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12778 return const_vec;
12779 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12780 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12781 pipeline cycle; creating the constant takes one or two ARM
12782 pipeline cycles. */
12783 return target;
12784 else if (const_vec != NULL_RTX)
12785 /* Load from constant pool. On Cortex-A8 this takes two cycles
12786 (for either double or quad vectors). We cannot take advantage
12787 of single-cycle VLD1 because we need a PC-relative addressing
12788 mode. */
12789 return const_vec;
12790 else
12791 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12792 We cannot construct an initializer. */
12793 return NULL_RTX;
12794 }
12795
12796 /* Initialize vector TARGET to VALS. */
12797
12798 void
12799 neon_expand_vector_init (rtx target, rtx vals)
12800 {
12801 machine_mode mode = GET_MODE (target);
12802 machine_mode inner_mode = GET_MODE_INNER (mode);
12803 int n_elts = GET_MODE_NUNITS (mode);
12804 int n_var = 0, one_var = -1;
12805 bool all_same = true;
12806 rtx x, mem;
12807 int i;
12808
12809 for (i = 0; i < n_elts; ++i)
12810 {
12811 x = XVECEXP (vals, 0, i);
12812 if (!CONSTANT_P (x))
12813 ++n_var, one_var = i;
12814
12815 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12816 all_same = false;
12817 }
12818
12819 if (n_var == 0)
12820 {
12821 rtx constant = neon_make_constant (vals);
12822 if (constant != NULL_RTX)
12823 {
12824 emit_move_insn (target, constant);
12825 return;
12826 }
12827 }
12828
12829 /* Splat a single non-constant element if we can. */
12830 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12831 {
12832 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12833 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12834 return;
12835 }
12836
12837 /* One field is non-constant. Load constant then overwrite varying
12838 field. This is more efficient than using the stack. */
12839 if (n_var == 1)
12840 {
12841 rtx copy = copy_rtx (vals);
12842 rtx merge_mask = GEN_INT (1 << one_var);
12843
12844 /* Load constant part of vector, substitute neighboring value for
12845 varying element. */
12846 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12847 neon_expand_vector_init (target, copy);
12848
12849 /* Insert variable. */
12850 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12851 emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
12852 return;
12853 }
12854
12855 /* Construct the vector in memory one field at a time
12856 and load the whole vector. */
12857 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12858 for (i = 0; i < n_elts; i++)
12859 emit_move_insn (adjust_address_nv (mem, inner_mode,
12860 i * GET_MODE_SIZE (inner_mode)),
12861 XVECEXP (vals, 0, i));
12862 emit_move_insn (target, mem);
12863 }
12864
12865 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12866 ERR if it doesn't. EXP indicates the source location, which includes the
12867 inlining history for intrinsics. */
12868
12869 static void
12870 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12871 const_tree exp, const char *desc)
12872 {
12873 HOST_WIDE_INT lane;
12874
12875 gcc_assert (CONST_INT_P (operand));
12876
12877 lane = INTVAL (operand);
12878
12879 if (lane < low || lane >= high)
12880 {
12881 if (exp)
12882 error ("%K%s %wd out of range %wd - %wd",
12883 exp, desc, lane, low, high - 1);
12884 else
12885 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12886 }
12887 }
12888
12889 /* Bounds-check lanes. */
12890
12891 void
12892 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12893 const_tree exp)
12894 {
12895 bounds_check (operand, low, high, exp, "lane");
12896 }
12897
12898 /* Bounds-check constants. */
12899
12900 void
12901 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12902 {
12903 bounds_check (operand, low, high, NULL_TREE, "constant");
12904 }
12905
12906 HOST_WIDE_INT
12907 neon_element_bits (machine_mode mode)
12908 {
12909 return GET_MODE_UNIT_BITSIZE (mode);
12910 }
12911
12912 \f
12913 /* Predicates for `match_operand' and `match_operator'. */
12914
12915 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12916 WB is true if full writeback address modes are allowed and is false
12917 if limited writeback address modes (POST_INC and PRE_DEC) are
12918 allowed. */
12919
12920 int
12921 arm_coproc_mem_operand (rtx op, bool wb)
12922 {
12923 rtx ind;
12924
12925 /* Reject eliminable registers. */
12926 if (! (reload_in_progress || reload_completed || lra_in_progress)
12927 && ( reg_mentioned_p (frame_pointer_rtx, op)
12928 || reg_mentioned_p (arg_pointer_rtx, op)
12929 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12930 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12931 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12932 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12933 return FALSE;
12934
12935 /* Constants are converted into offsets from labels. */
12936 if (!MEM_P (op))
12937 return FALSE;
12938
12939 ind = XEXP (op, 0);
12940
12941 if (reload_completed
12942 && (GET_CODE (ind) == LABEL_REF
12943 || (GET_CODE (ind) == CONST
12944 && GET_CODE (XEXP (ind, 0)) == PLUS
12945 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12946 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12947 return TRUE;
12948
12949 /* Match: (mem (reg)). */
12950 if (REG_P (ind))
12951 return arm_address_register_rtx_p (ind, 0);
12952
12953 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12954 acceptable in any case (subject to verification by
12955 arm_address_register_rtx_p). We need WB to be true to accept
12956 PRE_INC and POST_DEC. */
12957 if (GET_CODE (ind) == POST_INC
12958 || GET_CODE (ind) == PRE_DEC
12959 || (wb
12960 && (GET_CODE (ind) == PRE_INC
12961 || GET_CODE (ind) == POST_DEC)))
12962 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12963
12964 if (wb
12965 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12966 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12967 && GET_CODE (XEXP (ind, 1)) == PLUS
12968 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12969 ind = XEXP (ind, 1);
12970
12971 /* Match:
12972 (plus (reg)
12973 (const)). */
12974 if (GET_CODE (ind) == PLUS
12975 && REG_P (XEXP (ind, 0))
12976 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12977 && CONST_INT_P (XEXP (ind, 1))
12978 && INTVAL (XEXP (ind, 1)) > -1024
12979 && INTVAL (XEXP (ind, 1)) < 1024
12980 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12981 return TRUE;
12982
12983 return FALSE;
12984 }
12985
12986 /* Return TRUE if OP is a memory operand which we can load or store a vector
12987 to/from. TYPE is one of the following values:
12988 0 - Vector load/stor (vldr)
12989 1 - Core registers (ldm)
12990 2 - Element/structure loads (vld1)
12991 */
12992 int
12993 neon_vector_mem_operand (rtx op, int type, bool strict)
12994 {
12995 rtx ind;
12996
12997 /* Reject eliminable registers. */
12998 if (strict && ! (reload_in_progress || reload_completed)
12999 && (reg_mentioned_p (frame_pointer_rtx, op)
13000 || reg_mentioned_p (arg_pointer_rtx, op)
13001 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13002 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13003 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13004 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13005 return FALSE;
13006
13007 /* Constants are converted into offsets from labels. */
13008 if (!MEM_P (op))
13009 return FALSE;
13010
13011 ind = XEXP (op, 0);
13012
13013 if (reload_completed
13014 && (GET_CODE (ind) == LABEL_REF
13015 || (GET_CODE (ind) == CONST
13016 && GET_CODE (XEXP (ind, 0)) == PLUS
13017 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13018 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13019 return TRUE;
13020
13021 /* Match: (mem (reg)). */
13022 if (REG_P (ind))
13023 return arm_address_register_rtx_p (ind, 0);
13024
13025 /* Allow post-increment with Neon registers. */
13026 if ((type != 1 && GET_CODE (ind) == POST_INC)
13027 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13028 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13029
13030 /* Allow post-increment by register for VLDn */
13031 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13032 && GET_CODE (XEXP (ind, 1)) == PLUS
13033 && REG_P (XEXP (XEXP (ind, 1), 1)))
13034 return true;
13035
13036 /* Match:
13037 (plus (reg)
13038 (const)). */
13039 if (type == 0
13040 && GET_CODE (ind) == PLUS
13041 && REG_P (XEXP (ind, 0))
13042 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13043 && CONST_INT_P (XEXP (ind, 1))
13044 && INTVAL (XEXP (ind, 1)) > -1024
13045 /* For quad modes, we restrict the constant offset to be slightly less
13046 than what the instruction format permits. We have no such constraint
13047 on double mode offsets. (This must match arm_legitimate_index_p.) */
13048 && (INTVAL (XEXP (ind, 1))
13049 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13050 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13051 return TRUE;
13052
13053 return FALSE;
13054 }
13055
13056 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13057 type. */
13058 int
13059 neon_struct_mem_operand (rtx op)
13060 {
13061 rtx ind;
13062
13063 /* Reject eliminable registers. */
13064 if (! (reload_in_progress || reload_completed)
13065 && ( reg_mentioned_p (frame_pointer_rtx, op)
13066 || reg_mentioned_p (arg_pointer_rtx, op)
13067 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13068 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13069 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13070 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13071 return FALSE;
13072
13073 /* Constants are converted into offsets from labels. */
13074 if (!MEM_P (op))
13075 return FALSE;
13076
13077 ind = XEXP (op, 0);
13078
13079 if (reload_completed
13080 && (GET_CODE (ind) == LABEL_REF
13081 || (GET_CODE (ind) == CONST
13082 && GET_CODE (XEXP (ind, 0)) == PLUS
13083 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13084 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13085 return TRUE;
13086
13087 /* Match: (mem (reg)). */
13088 if (REG_P (ind))
13089 return arm_address_register_rtx_p (ind, 0);
13090
13091 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13092 if (GET_CODE (ind) == POST_INC
13093 || GET_CODE (ind) == PRE_DEC)
13094 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13095
13096 return FALSE;
13097 }
13098
13099 /* Prepares the operands for the VCMLA by lane instruction such that the right
13100 register number is selected. This instruction is special in that it always
13101 requires a D register, however there is a choice to be made between Dn[0],
13102 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13103
13104 The VCMLA by lane function always selects two values. For instance given D0
13105 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13106 used by the instruction. However given V4SF then index 0 and 1 are valid as
13107 D0[0] or D1[0] are both valid.
13108
13109 This function centralizes that information based on OPERANDS, OPERANDS[3]
13110 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13111 updated to contain the right index. */
13112
13113 rtx *
13114 neon_vcmla_lane_prepare_operands (rtx *operands)
13115 {
13116 int lane = INTVAL (operands[4]);
13117 machine_mode constmode = SImode;
13118 machine_mode mode = GET_MODE (operands[3]);
13119 int regno = REGNO (operands[3]);
13120 regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13121 if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13122 {
13123 operands[3] = gen_int_mode (regno + 1, constmode);
13124 operands[4]
13125 = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13126 }
13127 else
13128 {
13129 operands[3] = gen_int_mode (regno, constmode);
13130 operands[4] = gen_int_mode (lane, constmode);
13131 }
13132 return operands;
13133 }
13134
13135
13136 /* Return true if X is a register that will be eliminated later on. */
13137 int
13138 arm_eliminable_register (rtx x)
13139 {
13140 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13141 || REGNO (x) == ARG_POINTER_REGNUM
13142 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13143 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13144 }
13145
13146 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13147 coprocessor registers. Otherwise return NO_REGS. */
13148
13149 enum reg_class
13150 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13151 {
13152 if (mode == HFmode)
13153 {
13154 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13155 return GENERAL_REGS;
13156 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13157 return NO_REGS;
13158 return GENERAL_REGS;
13159 }
13160
13161 /* The neon move patterns handle all legitimate vector and struct
13162 addresses. */
13163 if (TARGET_NEON
13164 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13165 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13166 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13167 || VALID_NEON_STRUCT_MODE (mode)))
13168 return NO_REGS;
13169
13170 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13171 return NO_REGS;
13172
13173 return GENERAL_REGS;
13174 }
13175
13176 /* Values which must be returned in the most-significant end of the return
13177 register. */
13178
13179 static bool
13180 arm_return_in_msb (const_tree valtype)
13181 {
13182 return (TARGET_AAPCS_BASED
13183 && BYTES_BIG_ENDIAN
13184 && (AGGREGATE_TYPE_P (valtype)
13185 || TREE_CODE (valtype) == COMPLEX_TYPE
13186 || FIXED_POINT_TYPE_P (valtype)));
13187 }
13188
13189 /* Return TRUE if X references a SYMBOL_REF. */
13190 int
13191 symbol_mentioned_p (rtx x)
13192 {
13193 const char * fmt;
13194 int i;
13195
13196 if (GET_CODE (x) == SYMBOL_REF)
13197 return 1;
13198
13199 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13200 are constant offsets, not symbols. */
13201 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13202 return 0;
13203
13204 fmt = GET_RTX_FORMAT (GET_CODE (x));
13205
13206 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13207 {
13208 if (fmt[i] == 'E')
13209 {
13210 int j;
13211
13212 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13213 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13214 return 1;
13215 }
13216 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13217 return 1;
13218 }
13219
13220 return 0;
13221 }
13222
13223 /* Return TRUE if X references a LABEL_REF. */
13224 int
13225 label_mentioned_p (rtx x)
13226 {
13227 const char * fmt;
13228 int i;
13229
13230 if (GET_CODE (x) == LABEL_REF)
13231 return 1;
13232
13233 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13234 instruction, but they are constant offsets, not symbols. */
13235 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13236 return 0;
13237
13238 fmt = GET_RTX_FORMAT (GET_CODE (x));
13239 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13240 {
13241 if (fmt[i] == 'E')
13242 {
13243 int j;
13244
13245 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13246 if (label_mentioned_p (XVECEXP (x, i, j)))
13247 return 1;
13248 }
13249 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13250 return 1;
13251 }
13252
13253 return 0;
13254 }
13255
13256 int
13257 tls_mentioned_p (rtx x)
13258 {
13259 switch (GET_CODE (x))
13260 {
13261 case CONST:
13262 return tls_mentioned_p (XEXP (x, 0));
13263
13264 case UNSPEC:
13265 if (XINT (x, 1) == UNSPEC_TLS)
13266 return 1;
13267
13268 /* Fall through. */
13269 default:
13270 return 0;
13271 }
13272 }
13273
13274 /* Must not copy any rtx that uses a pc-relative address.
13275 Also, disallow copying of load-exclusive instructions that
13276 may appear after splitting of compare-and-swap-style operations
13277 so as to prevent those loops from being transformed away from their
13278 canonical forms (see PR 69904). */
13279
13280 static bool
13281 arm_cannot_copy_insn_p (rtx_insn *insn)
13282 {
13283 /* The tls call insn cannot be copied, as it is paired with a data
13284 word. */
13285 if (recog_memoized (insn) == CODE_FOR_tlscall)
13286 return true;
13287
13288 subrtx_iterator::array_type array;
13289 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13290 {
13291 const_rtx x = *iter;
13292 if (GET_CODE (x) == UNSPEC
13293 && (XINT (x, 1) == UNSPEC_PIC_BASE
13294 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13295 return true;
13296 }
13297
13298 rtx set = single_set (insn);
13299 if (set)
13300 {
13301 rtx src = SET_SRC (set);
13302 if (GET_CODE (src) == ZERO_EXTEND)
13303 src = XEXP (src, 0);
13304
13305 /* Catch the load-exclusive and load-acquire operations. */
13306 if (GET_CODE (src) == UNSPEC_VOLATILE
13307 && (XINT (src, 1) == VUNSPEC_LL
13308 || XINT (src, 1) == VUNSPEC_LAX))
13309 return true;
13310 }
13311 return false;
13312 }
13313
13314 enum rtx_code
13315 minmax_code (rtx x)
13316 {
13317 enum rtx_code code = GET_CODE (x);
13318
13319 switch (code)
13320 {
13321 case SMAX:
13322 return GE;
13323 case SMIN:
13324 return LE;
13325 case UMIN:
13326 return LEU;
13327 case UMAX:
13328 return GEU;
13329 default:
13330 gcc_unreachable ();
13331 }
13332 }
13333
13334 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13335
13336 bool
13337 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13338 int *mask, bool *signed_sat)
13339 {
13340 /* The high bound must be a power of two minus one. */
13341 int log = exact_log2 (INTVAL (hi_bound) + 1);
13342 if (log == -1)
13343 return false;
13344
13345 /* The low bound is either zero (for usat) or one less than the
13346 negation of the high bound (for ssat). */
13347 if (INTVAL (lo_bound) == 0)
13348 {
13349 if (mask)
13350 *mask = log;
13351 if (signed_sat)
13352 *signed_sat = false;
13353
13354 return true;
13355 }
13356
13357 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13358 {
13359 if (mask)
13360 *mask = log + 1;
13361 if (signed_sat)
13362 *signed_sat = true;
13363
13364 return true;
13365 }
13366
13367 return false;
13368 }
13369
13370 /* Return 1 if memory locations are adjacent. */
13371 int
13372 adjacent_mem_locations (rtx a, rtx b)
13373 {
13374 /* We don't guarantee to preserve the order of these memory refs. */
13375 if (volatile_refs_p (a) || volatile_refs_p (b))
13376 return 0;
13377
13378 if ((REG_P (XEXP (a, 0))
13379 || (GET_CODE (XEXP (a, 0)) == PLUS
13380 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13381 && (REG_P (XEXP (b, 0))
13382 || (GET_CODE (XEXP (b, 0)) == PLUS
13383 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13384 {
13385 HOST_WIDE_INT val0 = 0, val1 = 0;
13386 rtx reg0, reg1;
13387 int val_diff;
13388
13389 if (GET_CODE (XEXP (a, 0)) == PLUS)
13390 {
13391 reg0 = XEXP (XEXP (a, 0), 0);
13392 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13393 }
13394 else
13395 reg0 = XEXP (a, 0);
13396
13397 if (GET_CODE (XEXP (b, 0)) == PLUS)
13398 {
13399 reg1 = XEXP (XEXP (b, 0), 0);
13400 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13401 }
13402 else
13403 reg1 = XEXP (b, 0);
13404
13405 /* Don't accept any offset that will require multiple
13406 instructions to handle, since this would cause the
13407 arith_adjacentmem pattern to output an overlong sequence. */
13408 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13409 return 0;
13410
13411 /* Don't allow an eliminable register: register elimination can make
13412 the offset too large. */
13413 if (arm_eliminable_register (reg0))
13414 return 0;
13415
13416 val_diff = val1 - val0;
13417
13418 if (arm_ld_sched)
13419 {
13420 /* If the target has load delay slots, then there's no benefit
13421 to using an ldm instruction unless the offset is zero and
13422 we are optimizing for size. */
13423 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13424 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13425 && (val_diff == 4 || val_diff == -4));
13426 }
13427
13428 return ((REGNO (reg0) == REGNO (reg1))
13429 && (val_diff == 4 || val_diff == -4));
13430 }
13431
13432 return 0;
13433 }
13434
13435 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13436 for load operations, false for store operations. CONSECUTIVE is true
13437 if the register numbers in the operation must be consecutive in the register
13438 bank. RETURN_PC is true if value is to be loaded in PC.
13439 The pattern we are trying to match for load is:
13440 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13441 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13442 :
13443 :
13444 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13445 ]
13446 where
13447 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13448 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13449 3. If consecutive is TRUE, then for kth register being loaded,
13450 REGNO (R_dk) = REGNO (R_d0) + k.
13451 The pattern for store is similar. */
13452 bool
13453 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13454 bool consecutive, bool return_pc)
13455 {
13456 HOST_WIDE_INT count = XVECLEN (op, 0);
13457 rtx reg, mem, addr;
13458 unsigned regno;
13459 unsigned first_regno;
13460 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13461 rtx elt;
13462 bool addr_reg_in_reglist = false;
13463 bool update = false;
13464 int reg_increment;
13465 int offset_adj;
13466 int regs_per_val;
13467
13468 /* If not in SImode, then registers must be consecutive
13469 (e.g., VLDM instructions for DFmode). */
13470 gcc_assert ((mode == SImode) || consecutive);
13471 /* Setting return_pc for stores is illegal. */
13472 gcc_assert (!return_pc || load);
13473
13474 /* Set up the increments and the regs per val based on the mode. */
13475 reg_increment = GET_MODE_SIZE (mode);
13476 regs_per_val = reg_increment / 4;
13477 offset_adj = return_pc ? 1 : 0;
13478
13479 if (count <= 1
13480 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13481 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13482 return false;
13483
13484 /* Check if this is a write-back. */
13485 elt = XVECEXP (op, 0, offset_adj);
13486 if (GET_CODE (SET_SRC (elt)) == PLUS)
13487 {
13488 i++;
13489 base = 1;
13490 update = true;
13491
13492 /* The offset adjustment must be the number of registers being
13493 popped times the size of a single register. */
13494 if (!REG_P (SET_DEST (elt))
13495 || !REG_P (XEXP (SET_SRC (elt), 0))
13496 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13497 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13498 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13499 ((count - 1 - offset_adj) * reg_increment))
13500 return false;
13501 }
13502
13503 i = i + offset_adj;
13504 base = base + offset_adj;
13505 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13506 success depends on the type: VLDM can do just one reg,
13507 LDM must do at least two. */
13508 if ((count <= i) && (mode == SImode))
13509 return false;
13510
13511 elt = XVECEXP (op, 0, i - 1);
13512 if (GET_CODE (elt) != SET)
13513 return false;
13514
13515 if (load)
13516 {
13517 reg = SET_DEST (elt);
13518 mem = SET_SRC (elt);
13519 }
13520 else
13521 {
13522 reg = SET_SRC (elt);
13523 mem = SET_DEST (elt);
13524 }
13525
13526 if (!REG_P (reg) || !MEM_P (mem))
13527 return false;
13528
13529 regno = REGNO (reg);
13530 first_regno = regno;
13531 addr = XEXP (mem, 0);
13532 if (GET_CODE (addr) == PLUS)
13533 {
13534 if (!CONST_INT_P (XEXP (addr, 1)))
13535 return false;
13536
13537 offset = INTVAL (XEXP (addr, 1));
13538 addr = XEXP (addr, 0);
13539 }
13540
13541 if (!REG_P (addr))
13542 return false;
13543
13544 /* Don't allow SP to be loaded unless it is also the base register. It
13545 guarantees that SP is reset correctly when an LDM instruction
13546 is interrupted. Otherwise, we might end up with a corrupt stack. */
13547 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13548 return false;
13549
13550 if (regno == REGNO (addr))
13551 addr_reg_in_reglist = true;
13552
13553 for (; i < count; i++)
13554 {
13555 elt = XVECEXP (op, 0, i);
13556 if (GET_CODE (elt) != SET)
13557 return false;
13558
13559 if (load)
13560 {
13561 reg = SET_DEST (elt);
13562 mem = SET_SRC (elt);
13563 }
13564 else
13565 {
13566 reg = SET_SRC (elt);
13567 mem = SET_DEST (elt);
13568 }
13569
13570 if (!REG_P (reg)
13571 || GET_MODE (reg) != mode
13572 || REGNO (reg) <= regno
13573 || (consecutive
13574 && (REGNO (reg) !=
13575 (unsigned int) (first_regno + regs_per_val * (i - base))))
13576 /* Don't allow SP to be loaded unless it is also the base register. It
13577 guarantees that SP is reset correctly when an LDM instruction
13578 is interrupted. Otherwise, we might end up with a corrupt stack. */
13579 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13580 || !MEM_P (mem)
13581 || GET_MODE (mem) != mode
13582 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13583 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13584 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13585 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13586 offset + (i - base) * reg_increment))
13587 && (!REG_P (XEXP (mem, 0))
13588 || offset + (i - base) * reg_increment != 0)))
13589 return false;
13590
13591 regno = REGNO (reg);
13592 if (regno == REGNO (addr))
13593 addr_reg_in_reglist = true;
13594 }
13595
13596 if (load)
13597 {
13598 if (update && addr_reg_in_reglist)
13599 return false;
13600
13601 /* For Thumb-1, address register is always modified - either by write-back
13602 or by explicit load. If the pattern does not describe an update,
13603 then the address register must be in the list of loaded registers. */
13604 if (TARGET_THUMB1)
13605 return update || addr_reg_in_reglist;
13606 }
13607
13608 return true;
13609 }
13610
13611 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13612 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13613 instruction. ADD_OFFSET is nonzero if the base address register needs
13614 to be modified with an add instruction before we can use it. */
13615
13616 static bool
13617 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13618 int nops, HOST_WIDE_INT add_offset)
13619 {
13620 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13621 if the offset isn't small enough. The reason 2 ldrs are faster
13622 is because these ARMs are able to do more than one cache access
13623 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13624 whilst the ARM8 has a double bandwidth cache. This means that
13625 these cores can do both an instruction fetch and a data fetch in
13626 a single cycle, so the trick of calculating the address into a
13627 scratch register (one of the result regs) and then doing a load
13628 multiple actually becomes slower (and no smaller in code size).
13629 That is the transformation
13630
13631 ldr rd1, [rbase + offset]
13632 ldr rd2, [rbase + offset + 4]
13633
13634 to
13635
13636 add rd1, rbase, offset
13637 ldmia rd1, {rd1, rd2}
13638
13639 produces worse code -- '3 cycles + any stalls on rd2' instead of
13640 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13641 access per cycle, the first sequence could never complete in less
13642 than 6 cycles, whereas the ldm sequence would only take 5 and
13643 would make better use of sequential accesses if not hitting the
13644 cache.
13645
13646 We cheat here and test 'arm_ld_sched' which we currently know to
13647 only be true for the ARM8, ARM9 and StrongARM. If this ever
13648 changes, then the test below needs to be reworked. */
13649 if (nops == 2 && arm_ld_sched && add_offset != 0)
13650 return false;
13651
13652 /* XScale has load-store double instructions, but they have stricter
13653 alignment requirements than load-store multiple, so we cannot
13654 use them.
13655
13656 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13657 the pipeline until completion.
13658
13659 NREGS CYCLES
13660 1 3
13661 2 4
13662 3 5
13663 4 6
13664
13665 An ldr instruction takes 1-3 cycles, but does not block the
13666 pipeline.
13667
13668 NREGS CYCLES
13669 1 1-3
13670 2 2-6
13671 3 3-9
13672 4 4-12
13673
13674 Best case ldr will always win. However, the more ldr instructions
13675 we issue, the less likely we are to be able to schedule them well.
13676 Using ldr instructions also increases code size.
13677
13678 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13679 for counts of 3 or 4 regs. */
13680 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13681 return false;
13682 return true;
13683 }
13684
13685 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13686 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13687 an array ORDER which describes the sequence to use when accessing the
13688 offsets that produces an ascending order. In this sequence, each
13689 offset must be larger by exactly 4 than the previous one. ORDER[0]
13690 must have been filled in with the lowest offset by the caller.
13691 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13692 we use to verify that ORDER produces an ascending order of registers.
13693 Return true if it was possible to construct such an order, false if
13694 not. */
13695
13696 static bool
13697 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13698 int *unsorted_regs)
13699 {
13700 int i;
13701 for (i = 1; i < nops; i++)
13702 {
13703 int j;
13704
13705 order[i] = order[i - 1];
13706 for (j = 0; j < nops; j++)
13707 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13708 {
13709 /* We must find exactly one offset that is higher than the
13710 previous one by 4. */
13711 if (order[i] != order[i - 1])
13712 return false;
13713 order[i] = j;
13714 }
13715 if (order[i] == order[i - 1])
13716 return false;
13717 /* The register numbers must be ascending. */
13718 if (unsorted_regs != NULL
13719 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13720 return false;
13721 }
13722 return true;
13723 }
13724
13725 /* Used to determine in a peephole whether a sequence of load
13726 instructions can be changed into a load-multiple instruction.
13727 NOPS is the number of separate load instructions we are examining. The
13728 first NOPS entries in OPERANDS are the destination registers, the
13729 next NOPS entries are memory operands. If this function is
13730 successful, *BASE is set to the common base register of the memory
13731 accesses; *LOAD_OFFSET is set to the first memory location's offset
13732 from that base register.
13733 REGS is an array filled in with the destination register numbers.
13734 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13735 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13736 the sequence of registers in REGS matches the loads from ascending memory
13737 locations, and the function verifies that the register numbers are
13738 themselves ascending. If CHECK_REGS is false, the register numbers
13739 are stored in the order they are found in the operands. */
13740 static int
13741 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13742 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13743 {
13744 int unsorted_regs[MAX_LDM_STM_OPS];
13745 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13746 int order[MAX_LDM_STM_OPS];
13747 int base_reg = -1;
13748 int i, ldm_case;
13749
13750 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13751 easily extended if required. */
13752 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13753
13754 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13755
13756 /* Loop over the operands and check that the memory references are
13757 suitable (i.e. immediate offsets from the same base register). At
13758 the same time, extract the target register, and the memory
13759 offsets. */
13760 for (i = 0; i < nops; i++)
13761 {
13762 rtx reg;
13763 rtx offset;
13764
13765 /* Convert a subreg of a mem into the mem itself. */
13766 if (GET_CODE (operands[nops + i]) == SUBREG)
13767 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13768
13769 gcc_assert (MEM_P (operands[nops + i]));
13770
13771 /* Don't reorder volatile memory references; it doesn't seem worth
13772 looking for the case where the order is ok anyway. */
13773 if (MEM_VOLATILE_P (operands[nops + i]))
13774 return 0;
13775
13776 offset = const0_rtx;
13777
13778 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13779 || (GET_CODE (reg) == SUBREG
13780 && REG_P (reg = SUBREG_REG (reg))))
13781 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13782 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13783 || (GET_CODE (reg) == SUBREG
13784 && REG_P (reg = SUBREG_REG (reg))))
13785 && (CONST_INT_P (offset
13786 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13787 {
13788 if (i == 0)
13789 {
13790 base_reg = REGNO (reg);
13791 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13792 return 0;
13793 }
13794 else if (base_reg != (int) REGNO (reg))
13795 /* Not addressed from the same base register. */
13796 return 0;
13797
13798 unsorted_regs[i] = (REG_P (operands[i])
13799 ? REGNO (operands[i])
13800 : REGNO (SUBREG_REG (operands[i])));
13801
13802 /* If it isn't an integer register, or if it overwrites the
13803 base register but isn't the last insn in the list, then
13804 we can't do this. */
13805 if (unsorted_regs[i] < 0
13806 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13807 || unsorted_regs[i] > 14
13808 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13809 return 0;
13810
13811 /* Don't allow SP to be loaded unless it is also the base
13812 register. It guarantees that SP is reset correctly when
13813 an LDM instruction is interrupted. Otherwise, we might
13814 end up with a corrupt stack. */
13815 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13816 return 0;
13817
13818 unsorted_offsets[i] = INTVAL (offset);
13819 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13820 order[0] = i;
13821 }
13822 else
13823 /* Not a suitable memory address. */
13824 return 0;
13825 }
13826
13827 /* All the useful information has now been extracted from the
13828 operands into unsorted_regs and unsorted_offsets; additionally,
13829 order[0] has been set to the lowest offset in the list. Sort
13830 the offsets into order, verifying that they are adjacent, and
13831 check that the register numbers are ascending. */
13832 if (!compute_offset_order (nops, unsorted_offsets, order,
13833 check_regs ? unsorted_regs : NULL))
13834 return 0;
13835
13836 if (saved_order)
13837 memcpy (saved_order, order, sizeof order);
13838
13839 if (base)
13840 {
13841 *base = base_reg;
13842
13843 for (i = 0; i < nops; i++)
13844 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13845
13846 *load_offset = unsorted_offsets[order[0]];
13847 }
13848
13849 if (unsorted_offsets[order[0]] == 0)
13850 ldm_case = 1; /* ldmia */
13851 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13852 ldm_case = 2; /* ldmib */
13853 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13854 ldm_case = 3; /* ldmda */
13855 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13856 ldm_case = 4; /* ldmdb */
13857 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13858 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13859 ldm_case = 5;
13860 else
13861 return 0;
13862
13863 if (!multiple_operation_profitable_p (false, nops,
13864 ldm_case == 5
13865 ? unsorted_offsets[order[0]] : 0))
13866 return 0;
13867
13868 return ldm_case;
13869 }
13870
13871 /* Used to determine in a peephole whether a sequence of store instructions can
13872 be changed into a store-multiple instruction.
13873 NOPS is the number of separate store instructions we are examining.
13874 NOPS_TOTAL is the total number of instructions recognized by the peephole
13875 pattern.
13876 The first NOPS entries in OPERANDS are the source registers, the next
13877 NOPS entries are memory operands. If this function is successful, *BASE is
13878 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13879 to the first memory location's offset from that base register. REGS is an
13880 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13881 likewise filled with the corresponding rtx's.
13882 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13883 numbers to an ascending order of stores.
13884 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13885 from ascending memory locations, and the function verifies that the register
13886 numbers are themselves ascending. If CHECK_REGS is false, the register
13887 numbers are stored in the order they are found in the operands. */
13888 static int
13889 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13890 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13891 HOST_WIDE_INT *load_offset, bool check_regs)
13892 {
13893 int unsorted_regs[MAX_LDM_STM_OPS];
13894 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13895 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13896 int order[MAX_LDM_STM_OPS];
13897 int base_reg = -1;
13898 rtx base_reg_rtx = NULL;
13899 int i, stm_case;
13900
13901 /* Write back of base register is currently only supported for Thumb 1. */
13902 int base_writeback = TARGET_THUMB1;
13903
13904 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13905 easily extended if required. */
13906 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13907
13908 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13909
13910 /* Loop over the operands and check that the memory references are
13911 suitable (i.e. immediate offsets from the same base register). At
13912 the same time, extract the target register, and the memory
13913 offsets. */
13914 for (i = 0; i < nops; i++)
13915 {
13916 rtx reg;
13917 rtx offset;
13918
13919 /* Convert a subreg of a mem into the mem itself. */
13920 if (GET_CODE (operands[nops + i]) == SUBREG)
13921 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13922
13923 gcc_assert (MEM_P (operands[nops + i]));
13924
13925 /* Don't reorder volatile memory references; it doesn't seem worth
13926 looking for the case where the order is ok anyway. */
13927 if (MEM_VOLATILE_P (operands[nops + i]))
13928 return 0;
13929
13930 offset = const0_rtx;
13931
13932 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13933 || (GET_CODE (reg) == SUBREG
13934 && REG_P (reg = SUBREG_REG (reg))))
13935 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13936 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13937 || (GET_CODE (reg) == SUBREG
13938 && REG_P (reg = SUBREG_REG (reg))))
13939 && (CONST_INT_P (offset
13940 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13941 {
13942 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13943 ? operands[i] : SUBREG_REG (operands[i]));
13944 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13945
13946 if (i == 0)
13947 {
13948 base_reg = REGNO (reg);
13949 base_reg_rtx = reg;
13950 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13951 return 0;
13952 }
13953 else if (base_reg != (int) REGNO (reg))
13954 /* Not addressed from the same base register. */
13955 return 0;
13956
13957 /* If it isn't an integer register, then we can't do this. */
13958 if (unsorted_regs[i] < 0
13959 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13960 /* The effects are unpredictable if the base register is
13961 both updated and stored. */
13962 || (base_writeback && unsorted_regs[i] == base_reg)
13963 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13964 || unsorted_regs[i] > 14)
13965 return 0;
13966
13967 unsorted_offsets[i] = INTVAL (offset);
13968 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13969 order[0] = i;
13970 }
13971 else
13972 /* Not a suitable memory address. */
13973 return 0;
13974 }
13975
13976 /* All the useful information has now been extracted from the
13977 operands into unsorted_regs and unsorted_offsets; additionally,
13978 order[0] has been set to the lowest offset in the list. Sort
13979 the offsets into order, verifying that they are adjacent, and
13980 check that the register numbers are ascending. */
13981 if (!compute_offset_order (nops, unsorted_offsets, order,
13982 check_regs ? unsorted_regs : NULL))
13983 return 0;
13984
13985 if (saved_order)
13986 memcpy (saved_order, order, sizeof order);
13987
13988 if (base)
13989 {
13990 *base = base_reg;
13991
13992 for (i = 0; i < nops; i++)
13993 {
13994 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13995 if (reg_rtxs)
13996 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13997 }
13998
13999 *load_offset = unsorted_offsets[order[0]];
14000 }
14001
14002 if (TARGET_THUMB1
14003 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14004 return 0;
14005
14006 if (unsorted_offsets[order[0]] == 0)
14007 stm_case = 1; /* stmia */
14008 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14009 stm_case = 2; /* stmib */
14010 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14011 stm_case = 3; /* stmda */
14012 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14013 stm_case = 4; /* stmdb */
14014 else
14015 return 0;
14016
14017 if (!multiple_operation_profitable_p (false, nops, 0))
14018 return 0;
14019
14020 return stm_case;
14021 }
14022 \f
14023 /* Routines for use in generating RTL. */
14024
14025 /* Generate a load-multiple instruction. COUNT is the number of loads in
14026 the instruction; REGS and MEMS are arrays containing the operands.
14027 BASEREG is the base register to be used in addressing the memory operands.
14028 WBACK_OFFSET is nonzero if the instruction should update the base
14029 register. */
14030
14031 static rtx
14032 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14033 HOST_WIDE_INT wback_offset)
14034 {
14035 int i = 0, j;
14036 rtx result;
14037
14038 if (!multiple_operation_profitable_p (false, count, 0))
14039 {
14040 rtx seq;
14041
14042 start_sequence ();
14043
14044 for (i = 0; i < count; i++)
14045 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14046
14047 if (wback_offset != 0)
14048 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14049
14050 seq = get_insns ();
14051 end_sequence ();
14052
14053 return seq;
14054 }
14055
14056 result = gen_rtx_PARALLEL (VOIDmode,
14057 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14058 if (wback_offset != 0)
14059 {
14060 XVECEXP (result, 0, 0)
14061 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14062 i = 1;
14063 count++;
14064 }
14065
14066 for (j = 0; i < count; i++, j++)
14067 XVECEXP (result, 0, i)
14068 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14069
14070 return result;
14071 }
14072
14073 /* Generate a store-multiple instruction. COUNT is the number of stores in
14074 the instruction; REGS and MEMS are arrays containing the operands.
14075 BASEREG is the base register to be used in addressing the memory operands.
14076 WBACK_OFFSET is nonzero if the instruction should update the base
14077 register. */
14078
14079 static rtx
14080 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14081 HOST_WIDE_INT wback_offset)
14082 {
14083 int i = 0, j;
14084 rtx result;
14085
14086 if (GET_CODE (basereg) == PLUS)
14087 basereg = XEXP (basereg, 0);
14088
14089 if (!multiple_operation_profitable_p (false, count, 0))
14090 {
14091 rtx seq;
14092
14093 start_sequence ();
14094
14095 for (i = 0; i < count; i++)
14096 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14097
14098 if (wback_offset != 0)
14099 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14100
14101 seq = get_insns ();
14102 end_sequence ();
14103
14104 return seq;
14105 }
14106
14107 result = gen_rtx_PARALLEL (VOIDmode,
14108 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14109 if (wback_offset != 0)
14110 {
14111 XVECEXP (result, 0, 0)
14112 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14113 i = 1;
14114 count++;
14115 }
14116
14117 for (j = 0; i < count; i++, j++)
14118 XVECEXP (result, 0, i)
14119 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14120
14121 return result;
14122 }
14123
14124 /* Generate either a load-multiple or a store-multiple instruction. This
14125 function can be used in situations where we can start with a single MEM
14126 rtx and adjust its address upwards.
14127 COUNT is the number of operations in the instruction, not counting a
14128 possible update of the base register. REGS is an array containing the
14129 register operands.
14130 BASEREG is the base register to be used in addressing the memory operands,
14131 which are constructed from BASEMEM.
14132 WRITE_BACK specifies whether the generated instruction should include an
14133 update of the base register.
14134 OFFSETP is used to pass an offset to and from this function; this offset
14135 is not used when constructing the address (instead BASEMEM should have an
14136 appropriate offset in its address), it is used only for setting
14137 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14138
14139 static rtx
14140 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14141 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14142 {
14143 rtx mems[MAX_LDM_STM_OPS];
14144 HOST_WIDE_INT offset = *offsetp;
14145 int i;
14146
14147 gcc_assert (count <= MAX_LDM_STM_OPS);
14148
14149 if (GET_CODE (basereg) == PLUS)
14150 basereg = XEXP (basereg, 0);
14151
14152 for (i = 0; i < count; i++)
14153 {
14154 rtx addr = plus_constant (Pmode, basereg, i * 4);
14155 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14156 offset += 4;
14157 }
14158
14159 if (write_back)
14160 *offsetp = offset;
14161
14162 if (is_load)
14163 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14164 write_back ? 4 * count : 0);
14165 else
14166 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14167 write_back ? 4 * count : 0);
14168 }
14169
14170 rtx
14171 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14172 rtx basemem, HOST_WIDE_INT *offsetp)
14173 {
14174 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14175 offsetp);
14176 }
14177
14178 rtx
14179 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14180 rtx basemem, HOST_WIDE_INT *offsetp)
14181 {
14182 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14183 offsetp);
14184 }
14185
14186 /* Called from a peephole2 expander to turn a sequence of loads into an
14187 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14188 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14189 is true if we can reorder the registers because they are used commutatively
14190 subsequently.
14191 Returns true iff we could generate a new instruction. */
14192
14193 bool
14194 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14195 {
14196 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14197 rtx mems[MAX_LDM_STM_OPS];
14198 int i, j, base_reg;
14199 rtx base_reg_rtx;
14200 HOST_WIDE_INT offset;
14201 int write_back = FALSE;
14202 int ldm_case;
14203 rtx addr;
14204
14205 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14206 &base_reg, &offset, !sort_regs);
14207
14208 if (ldm_case == 0)
14209 return false;
14210
14211 if (sort_regs)
14212 for (i = 0; i < nops - 1; i++)
14213 for (j = i + 1; j < nops; j++)
14214 if (regs[i] > regs[j])
14215 {
14216 int t = regs[i];
14217 regs[i] = regs[j];
14218 regs[j] = t;
14219 }
14220 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14221
14222 if (TARGET_THUMB1)
14223 {
14224 gcc_assert (ldm_case == 1 || ldm_case == 5);
14225
14226 /* Thumb-1 ldm uses writeback except if the base is loaded. */
14227 write_back = true;
14228 for (i = 0; i < nops; i++)
14229 if (base_reg == regs[i])
14230 write_back = false;
14231
14232 /* Ensure the base is dead if it is updated. */
14233 if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
14234 return false;
14235 }
14236
14237 if (ldm_case == 5)
14238 {
14239 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14240 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14241 offset = 0;
14242 base_reg_rtx = newbase;
14243 }
14244
14245 for (i = 0; i < nops; i++)
14246 {
14247 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14248 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14249 SImode, addr, 0);
14250 }
14251 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14252 write_back ? offset + i * 4 : 0));
14253 return true;
14254 }
14255
14256 /* Called from a peephole2 expander to turn a sequence of stores into an
14257 STM instruction. OPERANDS are the operands found by the peephole matcher;
14258 NOPS indicates how many separate stores we are trying to combine.
14259 Returns true iff we could generate a new instruction. */
14260
14261 bool
14262 gen_stm_seq (rtx *operands, int nops)
14263 {
14264 int i;
14265 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14266 rtx mems[MAX_LDM_STM_OPS];
14267 int base_reg;
14268 rtx base_reg_rtx;
14269 HOST_WIDE_INT offset;
14270 int write_back = FALSE;
14271 int stm_case;
14272 rtx addr;
14273 bool base_reg_dies;
14274
14275 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14276 mem_order, &base_reg, &offset, true);
14277
14278 if (stm_case == 0)
14279 return false;
14280
14281 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14282
14283 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14284 if (TARGET_THUMB1)
14285 {
14286 gcc_assert (base_reg_dies);
14287 write_back = TRUE;
14288 }
14289
14290 if (stm_case == 5)
14291 {
14292 gcc_assert (base_reg_dies);
14293 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14294 offset = 0;
14295 }
14296
14297 addr = plus_constant (Pmode, base_reg_rtx, offset);
14298
14299 for (i = 0; i < nops; i++)
14300 {
14301 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14302 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14303 SImode, addr, 0);
14304 }
14305 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14306 write_back ? offset + i * 4 : 0));
14307 return true;
14308 }
14309
14310 /* Called from a peephole2 expander to turn a sequence of stores that are
14311 preceded by constant loads into an STM instruction. OPERANDS are the
14312 operands found by the peephole matcher; NOPS indicates how many
14313 separate stores we are trying to combine; there are 2 * NOPS
14314 instructions in the peephole.
14315 Returns true iff we could generate a new instruction. */
14316
14317 bool
14318 gen_const_stm_seq (rtx *operands, int nops)
14319 {
14320 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14321 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14322 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14323 rtx mems[MAX_LDM_STM_OPS];
14324 int base_reg;
14325 rtx base_reg_rtx;
14326 HOST_WIDE_INT offset;
14327 int write_back = FALSE;
14328 int stm_case;
14329 rtx addr;
14330 bool base_reg_dies;
14331 int i, j;
14332 HARD_REG_SET allocated;
14333
14334 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14335 mem_order, &base_reg, &offset, false);
14336
14337 if (stm_case == 0)
14338 return false;
14339
14340 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14341
14342 /* If the same register is used more than once, try to find a free
14343 register. */
14344 CLEAR_HARD_REG_SET (allocated);
14345 for (i = 0; i < nops; i++)
14346 {
14347 for (j = i + 1; j < nops; j++)
14348 if (regs[i] == regs[j])
14349 {
14350 rtx t = peep2_find_free_register (0, nops * 2,
14351 TARGET_THUMB1 ? "l" : "r",
14352 SImode, &allocated);
14353 if (t == NULL_RTX)
14354 return false;
14355 reg_rtxs[i] = t;
14356 regs[i] = REGNO (t);
14357 }
14358 }
14359
14360 /* Compute an ordering that maps the register numbers to an ascending
14361 sequence. */
14362 reg_order[0] = 0;
14363 for (i = 0; i < nops; i++)
14364 if (regs[i] < regs[reg_order[0]])
14365 reg_order[0] = i;
14366
14367 for (i = 1; i < nops; i++)
14368 {
14369 int this_order = reg_order[i - 1];
14370 for (j = 0; j < nops; j++)
14371 if (regs[j] > regs[reg_order[i - 1]]
14372 && (this_order == reg_order[i - 1]
14373 || regs[j] < regs[this_order]))
14374 this_order = j;
14375 reg_order[i] = this_order;
14376 }
14377
14378 /* Ensure that registers that must be live after the instruction end
14379 up with the correct value. */
14380 for (i = 0; i < nops; i++)
14381 {
14382 int this_order = reg_order[i];
14383 if ((this_order != mem_order[i]
14384 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14385 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14386 return false;
14387 }
14388
14389 /* Load the constants. */
14390 for (i = 0; i < nops; i++)
14391 {
14392 rtx op = operands[2 * nops + mem_order[i]];
14393 sorted_regs[i] = regs[reg_order[i]];
14394 emit_move_insn (reg_rtxs[reg_order[i]], op);
14395 }
14396
14397 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14398
14399 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14400 if (TARGET_THUMB1)
14401 {
14402 gcc_assert (base_reg_dies);
14403 write_back = TRUE;
14404 }
14405
14406 if (stm_case == 5)
14407 {
14408 gcc_assert (base_reg_dies);
14409 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14410 offset = 0;
14411 }
14412
14413 addr = plus_constant (Pmode, base_reg_rtx, offset);
14414
14415 for (i = 0; i < nops; i++)
14416 {
14417 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14418 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14419 SImode, addr, 0);
14420 }
14421 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14422 write_back ? offset + i * 4 : 0));
14423 return true;
14424 }
14425
14426 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14427 unaligned copies on processors which support unaligned semantics for those
14428 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14429 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14430 An interleave factor of 1 (the minimum) will perform no interleaving.
14431 Load/store multiple are used for aligned addresses where possible. */
14432
14433 static void
14434 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14435 HOST_WIDE_INT length,
14436 unsigned int interleave_factor)
14437 {
14438 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14439 int *regnos = XALLOCAVEC (int, interleave_factor);
14440 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14441 HOST_WIDE_INT i, j;
14442 HOST_WIDE_INT remaining = length, words;
14443 rtx halfword_tmp = NULL, byte_tmp = NULL;
14444 rtx dst, src;
14445 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14446 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14447 HOST_WIDE_INT srcoffset, dstoffset;
14448 HOST_WIDE_INT src_autoinc, dst_autoinc;
14449 rtx mem, addr;
14450
14451 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
14452
14453 /* Use hard registers if we have aligned source or destination so we can use
14454 load/store multiple with contiguous registers. */
14455 if (dst_aligned || src_aligned)
14456 for (i = 0; i < interleave_factor; i++)
14457 regs[i] = gen_rtx_REG (SImode, i);
14458 else
14459 for (i = 0; i < interleave_factor; i++)
14460 regs[i] = gen_reg_rtx (SImode);
14461
14462 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14463 src = copy_addr_to_reg (XEXP (srcbase, 0));
14464
14465 srcoffset = dstoffset = 0;
14466
14467 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14468 For copying the last bytes we want to subtract this offset again. */
14469 src_autoinc = dst_autoinc = 0;
14470
14471 for (i = 0; i < interleave_factor; i++)
14472 regnos[i] = i;
14473
14474 /* Copy BLOCK_SIZE_BYTES chunks. */
14475
14476 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14477 {
14478 /* Load words. */
14479 if (src_aligned && interleave_factor > 1)
14480 {
14481 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14482 TRUE, srcbase, &srcoffset));
14483 src_autoinc += UNITS_PER_WORD * interleave_factor;
14484 }
14485 else
14486 {
14487 for (j = 0; j < interleave_factor; j++)
14488 {
14489 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14490 - src_autoinc));
14491 mem = adjust_automodify_address (srcbase, SImode, addr,
14492 srcoffset + j * UNITS_PER_WORD);
14493 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14494 }
14495 srcoffset += block_size_bytes;
14496 }
14497
14498 /* Store words. */
14499 if (dst_aligned && interleave_factor > 1)
14500 {
14501 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14502 TRUE, dstbase, &dstoffset));
14503 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14504 }
14505 else
14506 {
14507 for (j = 0; j < interleave_factor; j++)
14508 {
14509 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14510 - dst_autoinc));
14511 mem = adjust_automodify_address (dstbase, SImode, addr,
14512 dstoffset + j * UNITS_PER_WORD);
14513 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14514 }
14515 dstoffset += block_size_bytes;
14516 }
14517
14518 remaining -= block_size_bytes;
14519 }
14520
14521 /* Copy any whole words left (note these aren't interleaved with any
14522 subsequent halfword/byte load/stores in the interests of simplicity). */
14523
14524 words = remaining / UNITS_PER_WORD;
14525
14526 gcc_assert (words < interleave_factor);
14527
14528 if (src_aligned && words > 1)
14529 {
14530 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14531 &srcoffset));
14532 src_autoinc += UNITS_PER_WORD * words;
14533 }
14534 else
14535 {
14536 for (j = 0; j < words; j++)
14537 {
14538 addr = plus_constant (Pmode, src,
14539 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14540 mem = adjust_automodify_address (srcbase, SImode, addr,
14541 srcoffset + j * UNITS_PER_WORD);
14542 if (src_aligned)
14543 emit_move_insn (regs[j], mem);
14544 else
14545 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14546 }
14547 srcoffset += words * UNITS_PER_WORD;
14548 }
14549
14550 if (dst_aligned && words > 1)
14551 {
14552 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14553 &dstoffset));
14554 dst_autoinc += words * UNITS_PER_WORD;
14555 }
14556 else
14557 {
14558 for (j = 0; j < words; j++)
14559 {
14560 addr = plus_constant (Pmode, dst,
14561 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14562 mem = adjust_automodify_address (dstbase, SImode, addr,
14563 dstoffset + j * UNITS_PER_WORD);
14564 if (dst_aligned)
14565 emit_move_insn (mem, regs[j]);
14566 else
14567 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14568 }
14569 dstoffset += words * UNITS_PER_WORD;
14570 }
14571
14572 remaining -= words * UNITS_PER_WORD;
14573
14574 gcc_assert (remaining < 4);
14575
14576 /* Copy a halfword if necessary. */
14577
14578 if (remaining >= 2)
14579 {
14580 halfword_tmp = gen_reg_rtx (SImode);
14581
14582 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14583 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14584 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14585
14586 /* Either write out immediately, or delay until we've loaded the last
14587 byte, depending on interleave factor. */
14588 if (interleave_factor == 1)
14589 {
14590 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14591 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14592 emit_insn (gen_unaligned_storehi (mem,
14593 gen_lowpart (HImode, halfword_tmp)));
14594 halfword_tmp = NULL;
14595 dstoffset += 2;
14596 }
14597
14598 remaining -= 2;
14599 srcoffset += 2;
14600 }
14601
14602 gcc_assert (remaining < 2);
14603
14604 /* Copy last byte. */
14605
14606 if ((remaining & 1) != 0)
14607 {
14608 byte_tmp = gen_reg_rtx (SImode);
14609
14610 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14611 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14612 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14613
14614 if (interleave_factor == 1)
14615 {
14616 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14617 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14618 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14619 byte_tmp = NULL;
14620 dstoffset++;
14621 }
14622
14623 remaining--;
14624 srcoffset++;
14625 }
14626
14627 /* Store last halfword if we haven't done so already. */
14628
14629 if (halfword_tmp)
14630 {
14631 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14632 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14633 emit_insn (gen_unaligned_storehi (mem,
14634 gen_lowpart (HImode, halfword_tmp)));
14635 dstoffset += 2;
14636 }
14637
14638 /* Likewise for last byte. */
14639
14640 if (byte_tmp)
14641 {
14642 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14643 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14644 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14645 dstoffset++;
14646 }
14647
14648 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14649 }
14650
14651 /* From mips_adjust_block_mem:
14652
14653 Helper function for doing a loop-based block operation on memory
14654 reference MEM. Each iteration of the loop will operate on LENGTH
14655 bytes of MEM.
14656
14657 Create a new base register for use within the loop and point it to
14658 the start of MEM. Create a new memory reference that uses this
14659 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14660
14661 static void
14662 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14663 rtx *loop_mem)
14664 {
14665 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14666
14667 /* Although the new mem does not refer to a known location,
14668 it does keep up to LENGTH bytes of alignment. */
14669 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14670 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14671 }
14672
14673 /* From mips_block_move_loop:
14674
14675 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14676 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14677 the memory regions do not overlap. */
14678
14679 static void
14680 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14681 unsigned int interleave_factor,
14682 HOST_WIDE_INT bytes_per_iter)
14683 {
14684 rtx src_reg, dest_reg, final_src, test;
14685 HOST_WIDE_INT leftover;
14686
14687 leftover = length % bytes_per_iter;
14688 length -= leftover;
14689
14690 /* Create registers and memory references for use within the loop. */
14691 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14692 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14693
14694 /* Calculate the value that SRC_REG should have after the last iteration of
14695 the loop. */
14696 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14697 0, 0, OPTAB_WIDEN);
14698
14699 /* Emit the start of the loop. */
14700 rtx_code_label *label = gen_label_rtx ();
14701 emit_label (label);
14702
14703 /* Emit the loop body. */
14704 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14705 interleave_factor);
14706
14707 /* Move on to the next block. */
14708 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14709 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14710
14711 /* Emit the loop condition. */
14712 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14713 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14714
14715 /* Mop up any left-over bytes. */
14716 if (leftover)
14717 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14718 }
14719
14720 /* Emit a block move when either the source or destination is unaligned (not
14721 aligned to a four-byte boundary). This may need further tuning depending on
14722 core type, optimize_size setting, etc. */
14723
14724 static int
14725 arm_cpymemqi_unaligned (rtx *operands)
14726 {
14727 HOST_WIDE_INT length = INTVAL (operands[2]);
14728
14729 if (optimize_size)
14730 {
14731 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14732 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14733 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14734 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14735 or dst_aligned though: allow more interleaving in those cases since the
14736 resulting code can be smaller. */
14737 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14738 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14739
14740 if (length > 12)
14741 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14742 interleave_factor, bytes_per_iter);
14743 else
14744 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14745 interleave_factor);
14746 }
14747 else
14748 {
14749 /* Note that the loop created by arm_block_move_unaligned_loop may be
14750 subject to loop unrolling, which makes tuning this condition a little
14751 redundant. */
14752 if (length > 32)
14753 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14754 else
14755 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14756 }
14757
14758 return 1;
14759 }
14760
14761 int
14762 arm_gen_cpymemqi (rtx *operands)
14763 {
14764 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14765 HOST_WIDE_INT srcoffset, dstoffset;
14766 rtx src, dst, srcbase, dstbase;
14767 rtx part_bytes_reg = NULL;
14768 rtx mem;
14769
14770 if (!CONST_INT_P (operands[2])
14771 || !CONST_INT_P (operands[3])
14772 || INTVAL (operands[2]) > 64)
14773 return 0;
14774
14775 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14776 return arm_cpymemqi_unaligned (operands);
14777
14778 if (INTVAL (operands[3]) & 3)
14779 return 0;
14780
14781 dstbase = operands[0];
14782 srcbase = operands[1];
14783
14784 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14785 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14786
14787 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14788 out_words_to_go = INTVAL (operands[2]) / 4;
14789 last_bytes = INTVAL (operands[2]) & 3;
14790 dstoffset = srcoffset = 0;
14791
14792 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14793 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14794
14795 while (in_words_to_go >= 2)
14796 {
14797 if (in_words_to_go > 4)
14798 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14799 TRUE, srcbase, &srcoffset));
14800 else
14801 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14802 src, FALSE, srcbase,
14803 &srcoffset));
14804
14805 if (out_words_to_go)
14806 {
14807 if (out_words_to_go > 4)
14808 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14809 TRUE, dstbase, &dstoffset));
14810 else if (out_words_to_go != 1)
14811 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14812 out_words_to_go, dst,
14813 (last_bytes == 0
14814 ? FALSE : TRUE),
14815 dstbase, &dstoffset));
14816 else
14817 {
14818 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14819 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14820 if (last_bytes != 0)
14821 {
14822 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14823 dstoffset += 4;
14824 }
14825 }
14826 }
14827
14828 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14829 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14830 }
14831
14832 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14833 if (out_words_to_go)
14834 {
14835 rtx sreg;
14836
14837 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14838 sreg = copy_to_reg (mem);
14839
14840 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14841 emit_move_insn (mem, sreg);
14842 in_words_to_go--;
14843
14844 gcc_assert (!in_words_to_go); /* Sanity check */
14845 }
14846
14847 if (in_words_to_go)
14848 {
14849 gcc_assert (in_words_to_go > 0);
14850
14851 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14852 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14853 }
14854
14855 gcc_assert (!last_bytes || part_bytes_reg);
14856
14857 if (BYTES_BIG_ENDIAN && last_bytes)
14858 {
14859 rtx tmp = gen_reg_rtx (SImode);
14860
14861 /* The bytes we want are in the top end of the word. */
14862 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14863 GEN_INT (8 * (4 - last_bytes))));
14864 part_bytes_reg = tmp;
14865
14866 while (last_bytes)
14867 {
14868 mem = adjust_automodify_address (dstbase, QImode,
14869 plus_constant (Pmode, dst,
14870 last_bytes - 1),
14871 dstoffset + last_bytes - 1);
14872 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14873
14874 if (--last_bytes)
14875 {
14876 tmp = gen_reg_rtx (SImode);
14877 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14878 part_bytes_reg = tmp;
14879 }
14880 }
14881
14882 }
14883 else
14884 {
14885 if (last_bytes > 1)
14886 {
14887 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14888 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14889 last_bytes -= 2;
14890 if (last_bytes)
14891 {
14892 rtx tmp = gen_reg_rtx (SImode);
14893 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14894 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14895 part_bytes_reg = tmp;
14896 dstoffset += 2;
14897 }
14898 }
14899
14900 if (last_bytes)
14901 {
14902 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14903 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14904 }
14905 }
14906
14907 return 1;
14908 }
14909
14910 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
14911 by mode size. */
14912 inline static rtx
14913 next_consecutive_mem (rtx mem)
14914 {
14915 machine_mode mode = GET_MODE (mem);
14916 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14917 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14918
14919 return adjust_automodify_address (mem, mode, addr, offset);
14920 }
14921
14922 /* Copy using LDRD/STRD instructions whenever possible.
14923 Returns true upon success. */
14924 bool
14925 gen_cpymem_ldrd_strd (rtx *operands)
14926 {
14927 unsigned HOST_WIDE_INT len;
14928 HOST_WIDE_INT align;
14929 rtx src, dst, base;
14930 rtx reg0;
14931 bool src_aligned, dst_aligned;
14932 bool src_volatile, dst_volatile;
14933
14934 gcc_assert (CONST_INT_P (operands[2]));
14935 gcc_assert (CONST_INT_P (operands[3]));
14936
14937 len = UINTVAL (operands[2]);
14938 if (len > 64)
14939 return false;
14940
14941 /* Maximum alignment we can assume for both src and dst buffers. */
14942 align = INTVAL (operands[3]);
14943
14944 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14945 return false;
14946
14947 /* Place src and dst addresses in registers
14948 and update the corresponding mem rtx. */
14949 dst = operands[0];
14950 dst_volatile = MEM_VOLATILE_P (dst);
14951 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14952 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14953 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14954
14955 src = operands[1];
14956 src_volatile = MEM_VOLATILE_P (src);
14957 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14958 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14959 src = adjust_automodify_address (src, VOIDmode, base, 0);
14960
14961 if (!unaligned_access && !(src_aligned && dst_aligned))
14962 return false;
14963
14964 if (src_volatile || dst_volatile)
14965 return false;
14966
14967 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14968 if (!(dst_aligned || src_aligned))
14969 return arm_gen_cpymemqi (operands);
14970
14971 /* If the either src or dst is unaligned we'll be accessing it as pairs
14972 of unaligned SImode accesses. Otherwise we can generate DImode
14973 ldrd/strd instructions. */
14974 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14975 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14976
14977 while (len >= 8)
14978 {
14979 len -= 8;
14980 reg0 = gen_reg_rtx (DImode);
14981 rtx low_reg = NULL_RTX;
14982 rtx hi_reg = NULL_RTX;
14983
14984 if (!src_aligned || !dst_aligned)
14985 {
14986 low_reg = gen_lowpart (SImode, reg0);
14987 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14988 }
14989 if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
14990 emit_move_insn (reg0, src);
14991 else if (src_aligned)
14992 emit_insn (gen_unaligned_loaddi (reg0, src));
14993 else
14994 {
14995 emit_insn (gen_unaligned_loadsi (low_reg, src));
14996 src = next_consecutive_mem (src);
14997 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14998 }
14999
15000 if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15001 emit_move_insn (dst, reg0);
15002 else if (dst_aligned)
15003 emit_insn (gen_unaligned_storedi (dst, reg0));
15004 else
15005 {
15006 emit_insn (gen_unaligned_storesi (dst, low_reg));
15007 dst = next_consecutive_mem (dst);
15008 emit_insn (gen_unaligned_storesi (dst, hi_reg));
15009 }
15010
15011 src = next_consecutive_mem (src);
15012 dst = next_consecutive_mem (dst);
15013 }
15014
15015 gcc_assert (len < 8);
15016 if (len >= 4)
15017 {
15018 /* More than a word but less than a double-word to copy. Copy a word. */
15019 reg0 = gen_reg_rtx (SImode);
15020 src = adjust_address (src, SImode, 0);
15021 dst = adjust_address (dst, SImode, 0);
15022 if (src_aligned)
15023 emit_move_insn (reg0, src);
15024 else
15025 emit_insn (gen_unaligned_loadsi (reg0, src));
15026
15027 if (dst_aligned)
15028 emit_move_insn (dst, reg0);
15029 else
15030 emit_insn (gen_unaligned_storesi (dst, reg0));
15031
15032 src = next_consecutive_mem (src);
15033 dst = next_consecutive_mem (dst);
15034 len -= 4;
15035 }
15036
15037 if (len == 0)
15038 return true;
15039
15040 /* Copy the remaining bytes. */
15041 if (len >= 2)
15042 {
15043 dst = adjust_address (dst, HImode, 0);
15044 src = adjust_address (src, HImode, 0);
15045 reg0 = gen_reg_rtx (SImode);
15046 if (src_aligned)
15047 emit_insn (gen_zero_extendhisi2 (reg0, src));
15048 else
15049 emit_insn (gen_unaligned_loadhiu (reg0, src));
15050
15051 if (dst_aligned)
15052 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15053 else
15054 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15055
15056 src = next_consecutive_mem (src);
15057 dst = next_consecutive_mem (dst);
15058 if (len == 2)
15059 return true;
15060 }
15061
15062 dst = adjust_address (dst, QImode, 0);
15063 src = adjust_address (src, QImode, 0);
15064 reg0 = gen_reg_rtx (QImode);
15065 emit_move_insn (reg0, src);
15066 emit_move_insn (dst, reg0);
15067 return true;
15068 }
15069
15070 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15071 into its component 32-bit subregs. OP2 may be an immediate
15072 constant and we want to simplify it in that case. */
15073 void
15074 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15075 rtx *lo_op2, rtx *hi_op2)
15076 {
15077 *lo_op1 = gen_lowpart (SImode, op1);
15078 *hi_op1 = gen_highpart (SImode, op1);
15079 *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15080 subreg_lowpart_offset (SImode, DImode));
15081 *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15082 subreg_highpart_offset (SImode, DImode));
15083 }
15084
15085 /* Select a dominance comparison mode if possible for a test of the general
15086 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15087 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15088 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15089 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15090 In all cases OP will be either EQ or NE, but we don't need to know which
15091 here. If we are unable to support a dominance comparison we return
15092 CC mode. This will then fail to match for the RTL expressions that
15093 generate this call. */
15094 machine_mode
15095 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15096 {
15097 enum rtx_code cond1, cond2;
15098 int swapped = 0;
15099
15100 /* Currently we will probably get the wrong result if the individual
15101 comparisons are not simple. This also ensures that it is safe to
15102 reverse a comparison if necessary. */
15103 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15104 != CCmode)
15105 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15106 != CCmode))
15107 return CCmode;
15108
15109 /* The if_then_else variant of this tests the second condition if the
15110 first passes, but is true if the first fails. Reverse the first
15111 condition to get a true "inclusive-or" expression. */
15112 if (cond_or == DOM_CC_NX_OR_Y)
15113 cond1 = reverse_condition (cond1);
15114
15115 /* If the comparisons are not equal, and one doesn't dominate the other,
15116 then we can't do this. */
15117 if (cond1 != cond2
15118 && !comparison_dominates_p (cond1, cond2)
15119 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15120 return CCmode;
15121
15122 if (swapped)
15123 std::swap (cond1, cond2);
15124
15125 switch (cond1)
15126 {
15127 case EQ:
15128 if (cond_or == DOM_CC_X_AND_Y)
15129 return CC_DEQmode;
15130
15131 switch (cond2)
15132 {
15133 case EQ: return CC_DEQmode;
15134 case LE: return CC_DLEmode;
15135 case LEU: return CC_DLEUmode;
15136 case GE: return CC_DGEmode;
15137 case GEU: return CC_DGEUmode;
15138 default: gcc_unreachable ();
15139 }
15140
15141 case LT:
15142 if (cond_or == DOM_CC_X_AND_Y)
15143 return CC_DLTmode;
15144
15145 switch (cond2)
15146 {
15147 case LT:
15148 return CC_DLTmode;
15149 case LE:
15150 return CC_DLEmode;
15151 case NE:
15152 return CC_DNEmode;
15153 default:
15154 gcc_unreachable ();
15155 }
15156
15157 case GT:
15158 if (cond_or == DOM_CC_X_AND_Y)
15159 return CC_DGTmode;
15160
15161 switch (cond2)
15162 {
15163 case GT:
15164 return CC_DGTmode;
15165 case GE:
15166 return CC_DGEmode;
15167 case NE:
15168 return CC_DNEmode;
15169 default:
15170 gcc_unreachable ();
15171 }
15172
15173 case LTU:
15174 if (cond_or == DOM_CC_X_AND_Y)
15175 return CC_DLTUmode;
15176
15177 switch (cond2)
15178 {
15179 case LTU:
15180 return CC_DLTUmode;
15181 case LEU:
15182 return CC_DLEUmode;
15183 case NE:
15184 return CC_DNEmode;
15185 default:
15186 gcc_unreachable ();
15187 }
15188
15189 case GTU:
15190 if (cond_or == DOM_CC_X_AND_Y)
15191 return CC_DGTUmode;
15192
15193 switch (cond2)
15194 {
15195 case GTU:
15196 return CC_DGTUmode;
15197 case GEU:
15198 return CC_DGEUmode;
15199 case NE:
15200 return CC_DNEmode;
15201 default:
15202 gcc_unreachable ();
15203 }
15204
15205 /* The remaining cases only occur when both comparisons are the
15206 same. */
15207 case NE:
15208 gcc_assert (cond1 == cond2);
15209 return CC_DNEmode;
15210
15211 case LE:
15212 gcc_assert (cond1 == cond2);
15213 return CC_DLEmode;
15214
15215 case GE:
15216 gcc_assert (cond1 == cond2);
15217 return CC_DGEmode;
15218
15219 case LEU:
15220 gcc_assert (cond1 == cond2);
15221 return CC_DLEUmode;
15222
15223 case GEU:
15224 gcc_assert (cond1 == cond2);
15225 return CC_DGEUmode;
15226
15227 default:
15228 gcc_unreachable ();
15229 }
15230 }
15231
15232 machine_mode
15233 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15234 {
15235 /* All floating point compares return CCFP if it is an equality
15236 comparison, and CCFPE otherwise. */
15237 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15238 {
15239 switch (op)
15240 {
15241 case EQ:
15242 case NE:
15243 case UNORDERED:
15244 case ORDERED:
15245 case UNLT:
15246 case UNLE:
15247 case UNGT:
15248 case UNGE:
15249 case UNEQ:
15250 case LTGT:
15251 return CCFPmode;
15252
15253 case LT:
15254 case LE:
15255 case GT:
15256 case GE:
15257 return CCFPEmode;
15258
15259 default:
15260 gcc_unreachable ();
15261 }
15262 }
15263
15264 /* A compare with a shifted operand. Because of canonicalization, the
15265 comparison will have to be swapped when we emit the assembler. */
15266 if (GET_MODE (y) == SImode
15267 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15268 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15269 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15270 || GET_CODE (x) == ROTATERT))
15271 return CC_SWPmode;
15272
15273 /* A widened compare of the sum of a value plus a carry against a
15274 constant. This is a representation of RSC. We want to swap the
15275 result of the comparison at output. Not valid if the Z bit is
15276 needed. */
15277 if (GET_MODE (x) == DImode
15278 && GET_CODE (x) == PLUS
15279 && arm_borrow_operation (XEXP (x, 1), DImode)
15280 && CONST_INT_P (y)
15281 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
15282 && (op == LE || op == GT))
15283 || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
15284 && (op == LEU || op == GTU))))
15285 return CC_SWPmode;
15286
15287 /* If X is a constant we want to use CC_RSBmode. This is
15288 non-canonical, but arm_gen_compare_reg uses this to generate the
15289 correct canonical form. */
15290 if (GET_MODE (y) == SImode
15291 && (REG_P (y) || GET_CODE (y) == SUBREG)
15292 && CONST_INT_P (x))
15293 return CC_RSBmode;
15294
15295 /* This operation is performed swapped, but since we only rely on the Z
15296 flag we don't need an additional mode. */
15297 if (GET_MODE (y) == SImode
15298 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15299 && GET_CODE (x) == NEG
15300 && (op == EQ || op == NE))
15301 return CC_Zmode;
15302
15303 /* This is a special case that is used by combine to allow a
15304 comparison of a shifted byte load to be split into a zero-extend
15305 followed by a comparison of the shifted integer (only valid for
15306 equalities and unsigned inequalities). */
15307 if (GET_MODE (x) == SImode
15308 && GET_CODE (x) == ASHIFT
15309 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15310 && GET_CODE (XEXP (x, 0)) == SUBREG
15311 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15312 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15313 && (op == EQ || op == NE
15314 || op == GEU || op == GTU || op == LTU || op == LEU)
15315 && CONST_INT_P (y))
15316 return CC_Zmode;
15317
15318 /* A construct for a conditional compare, if the false arm contains
15319 0, then both conditions must be true, otherwise either condition
15320 must be true. Not all conditions are possible, so CCmode is
15321 returned if it can't be done. */
15322 if (GET_CODE (x) == IF_THEN_ELSE
15323 && (XEXP (x, 2) == const0_rtx
15324 || XEXP (x, 2) == const1_rtx)
15325 && COMPARISON_P (XEXP (x, 0))
15326 && COMPARISON_P (XEXP (x, 1)))
15327 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15328 INTVAL (XEXP (x, 2)));
15329
15330 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15331 if (GET_CODE (x) == AND
15332 && (op == EQ || op == NE)
15333 && COMPARISON_P (XEXP (x, 0))
15334 && COMPARISON_P (XEXP (x, 1)))
15335 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15336 DOM_CC_X_AND_Y);
15337
15338 if (GET_CODE (x) == IOR
15339 && (op == EQ || op == NE)
15340 && COMPARISON_P (XEXP (x, 0))
15341 && COMPARISON_P (XEXP (x, 1)))
15342 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15343 DOM_CC_X_OR_Y);
15344
15345 /* An operation (on Thumb) where we want to test for a single bit.
15346 This is done by shifting that bit up into the top bit of a
15347 scratch register; we can then branch on the sign bit. */
15348 if (TARGET_THUMB1
15349 && GET_MODE (x) == SImode
15350 && (op == EQ || op == NE)
15351 && GET_CODE (x) == ZERO_EXTRACT
15352 && XEXP (x, 1) == const1_rtx)
15353 return CC_Nmode;
15354
15355 /* An operation that sets the condition codes as a side-effect, the
15356 V flag is not set correctly, so we can only use comparisons where
15357 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15358 instead.) */
15359 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15360 if (GET_MODE (x) == SImode
15361 && y == const0_rtx
15362 && (op == EQ || op == NE || op == LT || op == GE)
15363 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15364 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15365 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15366 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15367 || GET_CODE (x) == LSHIFTRT
15368 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15369 || GET_CODE (x) == ROTATERT
15370 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15371 return CC_NOOVmode;
15372
15373 /* A comparison of ~reg with a const is really a special
15374 canoncialization of compare (~const, reg), which is a reverse
15375 subtract operation. We may not get here if CONST is 0, but that
15376 doesn't matter because ~0 isn't a valid immediate for RSB. */
15377 if (GET_MODE (x) == SImode
15378 && GET_CODE (x) == NOT
15379 && CONST_INT_P (y))
15380 return CC_RSBmode;
15381
15382 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15383 return CC_Zmode;
15384
15385 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15386 && GET_CODE (x) == PLUS
15387 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15388 return CC_Cmode;
15389
15390 if (GET_MODE (x) == DImode
15391 && GET_CODE (x) == PLUS
15392 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
15393 && CONST_INT_P (y)
15394 && UINTVAL (y) == 0x800000000
15395 && (op == GEU || op == LTU))
15396 return CC_ADCmode;
15397
15398 if (GET_MODE (x) == DImode
15399 && (op == GE || op == LT)
15400 && GET_CODE (x) == SIGN_EXTEND
15401 && ((GET_CODE (y) == PLUS
15402 && arm_borrow_operation (XEXP (y, 0), DImode))
15403 || arm_borrow_operation (y, DImode)))
15404 return CC_NVmode;
15405
15406 if (GET_MODE (x) == DImode
15407 && (op == GEU || op == LTU)
15408 && GET_CODE (x) == ZERO_EXTEND
15409 && ((GET_CODE (y) == PLUS
15410 && arm_borrow_operation (XEXP (y, 0), DImode))
15411 || arm_borrow_operation (y, DImode)))
15412 return CC_Bmode;
15413
15414 if (GET_MODE (x) == DImode
15415 && (op == EQ || op == NE)
15416 && GET_CODE (x) == PLUS
15417 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
15418 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
15419 && GET_CODE (y) == SIGN_EXTEND
15420 && GET_CODE (XEXP (y, 0)) == PLUS)
15421 return CC_Vmode;
15422
15423 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15424 return GET_MODE (x);
15425
15426 return CCmode;
15427 }
15428
15429 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
15430 the sequence of instructions needed to generate a suitable condition
15431 code register. Return the CC register result. */
15432 static rtx
15433 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
15434 {
15435 machine_mode mode;
15436 rtx cc_reg;
15437
15438 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
15439 gcc_assert (TARGET_32BIT);
15440 gcc_assert (!CONST_INT_P (x));
15441
15442 rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
15443 subreg_lowpart_offset (SImode, DImode));
15444 rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
15445 subreg_highpart_offset (SImode, DImode));
15446 rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
15447 subreg_lowpart_offset (SImode, DImode));
15448 rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
15449 subreg_highpart_offset (SImode, DImode));
15450 switch (code)
15451 {
15452 case EQ:
15453 case NE:
15454 {
15455 if (y_lo == const0_rtx || y_hi == const0_rtx)
15456 {
15457 if (y_lo != const0_rtx)
15458 {
15459 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
15460
15461 gcc_assert (y_hi == const0_rtx);
15462 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
15463 if (!arm_add_operand (y_lo, SImode))
15464 y_lo = force_reg (SImode, y_lo);
15465 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
15466 x_lo = scratch2;
15467 }
15468 else if (y_hi != const0_rtx)
15469 {
15470 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
15471
15472 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
15473 if (!arm_add_operand (y_hi, SImode))
15474 y_hi = force_reg (SImode, y_hi);
15475 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
15476 x_hi = scratch2;
15477 }
15478
15479 if (!scratch)
15480 {
15481 gcc_assert (!reload_completed);
15482 scratch = gen_rtx_SCRATCH (SImode);
15483 }
15484
15485 rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15486 cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
15487
15488 rtx set
15489 = gen_rtx_SET (cc_reg,
15490 gen_rtx_COMPARE (CC_NOOVmode,
15491 gen_rtx_IOR (SImode, x_lo, x_hi),
15492 const0_rtx));
15493 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
15494 clobber)));
15495 return cc_reg;
15496 }
15497
15498 if (!arm_add_operand (y_lo, SImode))
15499 y_lo = force_reg (SImode, y_lo);
15500
15501 if (!arm_add_operand (y_hi, SImode))
15502 y_hi = force_reg (SImode, y_hi);
15503
15504 rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
15505 rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
15506 rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
15507 mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
15508 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15509
15510 emit_insn (gen_rtx_SET (cc_reg,
15511 gen_rtx_COMPARE (VOIDmode, conjunction,
15512 const0_rtx)));
15513 return cc_reg;
15514 }
15515
15516 case LT:
15517 case GE:
15518 {
15519 if (y_lo == const0_rtx)
15520 {
15521 /* If the low word of y is 0, then this is simply a normal
15522 compare of the upper words. */
15523 if (!arm_add_operand (y_hi, SImode))
15524 y_hi = force_reg (SImode, y_hi);
15525
15526 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
15527 }
15528
15529 if (!arm_add_operand (y_lo, SImode))
15530 y_lo = force_reg (SImode, y_lo);
15531
15532 rtx cmp1
15533 = gen_rtx_LTU (DImode,
15534 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
15535 const0_rtx);
15536
15537 if (!scratch)
15538 scratch = gen_rtx_SCRATCH (SImode);
15539
15540 if (!arm_not_operand (y_hi, SImode))
15541 y_hi = force_reg (SImode, y_hi);
15542
15543 rtx_insn *insn;
15544 if (y_hi == const0_rtx)
15545 insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
15546 cmp1));
15547 else if (CONST_INT_P (y_hi))
15548 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
15549 y_hi, cmp1));
15550 else
15551 insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
15552 cmp1));
15553 return SET_DEST (single_set (insn));
15554 }
15555
15556 case LE:
15557 case GT:
15558 {
15559 /* During expansion, we only expect to get here if y is a
15560 constant that we want to handle, otherwise we should have
15561 swapped the operands already. */
15562 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
15563
15564 if (!const_ok_for_arm (INTVAL (y_lo)))
15565 y_lo = force_reg (SImode, y_lo);
15566
15567 /* Perform a reverse subtract and compare. */
15568 rtx cmp1
15569 = gen_rtx_LTU (DImode,
15570 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
15571 const0_rtx);
15572 rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
15573 x_hi, cmp1));
15574 return SET_DEST (single_set (insn));
15575 }
15576
15577 case LTU:
15578 case GEU:
15579 {
15580 if (y_lo == const0_rtx)
15581 {
15582 /* If the low word of y is 0, then this is simply a normal
15583 compare of the upper words. */
15584 if (!arm_add_operand (y_hi, SImode))
15585 y_hi = force_reg (SImode, y_hi);
15586
15587 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
15588 }
15589
15590 if (!arm_add_operand (y_lo, SImode))
15591 y_lo = force_reg (SImode, y_lo);
15592
15593 rtx cmp1
15594 = gen_rtx_LTU (DImode,
15595 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
15596 const0_rtx);
15597
15598 if (!scratch)
15599 scratch = gen_rtx_SCRATCH (SImode);
15600 if (!arm_not_operand (y_hi, SImode))
15601 y_hi = force_reg (SImode, y_hi);
15602
15603 rtx_insn *insn;
15604 if (y_hi == const0_rtx)
15605 insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
15606 cmp1));
15607 else if (CONST_INT_P (y_hi))
15608 {
15609 /* Constant is viewed as unsigned when zero-extended. */
15610 y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
15611 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
15612 y_hi, cmp1));
15613 }
15614 else
15615 insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
15616 cmp1));
15617 return SET_DEST (single_set (insn));
15618 }
15619
15620 case LEU:
15621 case GTU:
15622 {
15623 /* During expansion, we only expect to get here if y is a
15624 constant that we want to handle, otherwise we should have
15625 swapped the operands already. */
15626 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
15627
15628 if (!const_ok_for_arm (INTVAL (y_lo)))
15629 y_lo = force_reg (SImode, y_lo);
15630
15631 /* Perform a reverse subtract and compare. */
15632 rtx cmp1
15633 = gen_rtx_LTU (DImode,
15634 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
15635 const0_rtx);
15636 y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
15637 rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
15638 x_hi, cmp1));
15639 return SET_DEST (single_set (insn));
15640 }
15641
15642 default:
15643 gcc_unreachable ();
15644 }
15645 }
15646
15647 /* X and Y are two things to compare using CODE. Emit the compare insn and
15648 return the rtx for register 0 in the proper mode. */
15649 rtx
15650 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
15651 {
15652 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15653 return arm_gen_dicompare_reg (code, x, y, scratch);
15654
15655 machine_mode mode = SELECT_CC_MODE (code, x, y);
15656 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15657 if (mode == CC_RSBmode)
15658 {
15659 if (!scratch)
15660 scratch = gen_rtx_SCRATCH (SImode);
15661 emit_insn (gen_rsb_imm_compare_scratch (scratch,
15662 GEN_INT (~UINTVAL (x)), y));
15663 }
15664 else
15665 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15666
15667 return cc_reg;
15668 }
15669
15670 /* Generate a sequence of insns that will generate the correct return
15671 address mask depending on the physical architecture that the program
15672 is running on. */
15673 rtx
15674 arm_gen_return_addr_mask (void)
15675 {
15676 rtx reg = gen_reg_rtx (Pmode);
15677
15678 emit_insn (gen_return_addr_mask (reg));
15679 return reg;
15680 }
15681
15682 void
15683 arm_reload_in_hi (rtx *operands)
15684 {
15685 rtx ref = operands[1];
15686 rtx base, scratch;
15687 HOST_WIDE_INT offset = 0;
15688
15689 if (GET_CODE (ref) == SUBREG)
15690 {
15691 offset = SUBREG_BYTE (ref);
15692 ref = SUBREG_REG (ref);
15693 }
15694
15695 if (REG_P (ref))
15696 {
15697 /* We have a pseudo which has been spilt onto the stack; there
15698 are two cases here: the first where there is a simple
15699 stack-slot replacement and a second where the stack-slot is
15700 out of range, or is used as a subreg. */
15701 if (reg_equiv_mem (REGNO (ref)))
15702 {
15703 ref = reg_equiv_mem (REGNO (ref));
15704 base = find_replacement (&XEXP (ref, 0));
15705 }
15706 else
15707 /* The slot is out of range, or was dressed up in a SUBREG. */
15708 base = reg_equiv_address (REGNO (ref));
15709
15710 /* PR 62554: If there is no equivalent memory location then just move
15711 the value as an SImode register move. This happens when the target
15712 architecture variant does not have an HImode register move. */
15713 if (base == NULL)
15714 {
15715 gcc_assert (REG_P (operands[0]));
15716 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15717 gen_rtx_SUBREG (SImode, ref, 0)));
15718 return;
15719 }
15720 }
15721 else
15722 base = find_replacement (&XEXP (ref, 0));
15723
15724 /* Handle the case where the address is too complex to be offset by 1. */
15725 if (GET_CODE (base) == MINUS
15726 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15727 {
15728 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15729
15730 emit_set_insn (base_plus, base);
15731 base = base_plus;
15732 }
15733 else if (GET_CODE (base) == PLUS)
15734 {
15735 /* The addend must be CONST_INT, or we would have dealt with it above. */
15736 HOST_WIDE_INT hi, lo;
15737
15738 offset += INTVAL (XEXP (base, 1));
15739 base = XEXP (base, 0);
15740
15741 /* Rework the address into a legal sequence of insns. */
15742 /* Valid range for lo is -4095 -> 4095 */
15743 lo = (offset >= 0
15744 ? (offset & 0xfff)
15745 : -((-offset) & 0xfff));
15746
15747 /* Corner case, if lo is the max offset then we would be out of range
15748 once we have added the additional 1 below, so bump the msb into the
15749 pre-loading insn(s). */
15750 if (lo == 4095)
15751 lo &= 0x7ff;
15752
15753 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15754 ^ (HOST_WIDE_INT) 0x80000000)
15755 - (HOST_WIDE_INT) 0x80000000);
15756
15757 gcc_assert (hi + lo == offset);
15758
15759 if (hi != 0)
15760 {
15761 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15762
15763 /* Get the base address; addsi3 knows how to handle constants
15764 that require more than one insn. */
15765 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15766 base = base_plus;
15767 offset = lo;
15768 }
15769 }
15770
15771 /* Operands[2] may overlap operands[0] (though it won't overlap
15772 operands[1]), that's why we asked for a DImode reg -- so we can
15773 use the bit that does not overlap. */
15774 if (REGNO (operands[2]) == REGNO (operands[0]))
15775 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15776 else
15777 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15778
15779 emit_insn (gen_zero_extendqisi2 (scratch,
15780 gen_rtx_MEM (QImode,
15781 plus_constant (Pmode, base,
15782 offset))));
15783 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15784 gen_rtx_MEM (QImode,
15785 plus_constant (Pmode, base,
15786 offset + 1))));
15787 if (!BYTES_BIG_ENDIAN)
15788 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15789 gen_rtx_IOR (SImode,
15790 gen_rtx_ASHIFT
15791 (SImode,
15792 gen_rtx_SUBREG (SImode, operands[0], 0),
15793 GEN_INT (8)),
15794 scratch));
15795 else
15796 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15797 gen_rtx_IOR (SImode,
15798 gen_rtx_ASHIFT (SImode, scratch,
15799 GEN_INT (8)),
15800 gen_rtx_SUBREG (SImode, operands[0], 0)));
15801 }
15802
15803 /* Handle storing a half-word to memory during reload by synthesizing as two
15804 byte stores. Take care not to clobber the input values until after we
15805 have moved them somewhere safe. This code assumes that if the DImode
15806 scratch in operands[2] overlaps either the input value or output address
15807 in some way, then that value must die in this insn (we absolutely need
15808 two scratch registers for some corner cases). */
15809 void
15810 arm_reload_out_hi (rtx *operands)
15811 {
15812 rtx ref = operands[0];
15813 rtx outval = operands[1];
15814 rtx base, scratch;
15815 HOST_WIDE_INT offset = 0;
15816
15817 if (GET_CODE (ref) == SUBREG)
15818 {
15819 offset = SUBREG_BYTE (ref);
15820 ref = SUBREG_REG (ref);
15821 }
15822
15823 if (REG_P (ref))
15824 {
15825 /* We have a pseudo which has been spilt onto the stack; there
15826 are two cases here: the first where there is a simple
15827 stack-slot replacement and a second where the stack-slot is
15828 out of range, or is used as a subreg. */
15829 if (reg_equiv_mem (REGNO (ref)))
15830 {
15831 ref = reg_equiv_mem (REGNO (ref));
15832 base = find_replacement (&XEXP (ref, 0));
15833 }
15834 else
15835 /* The slot is out of range, or was dressed up in a SUBREG. */
15836 base = reg_equiv_address (REGNO (ref));
15837
15838 /* PR 62254: If there is no equivalent memory location then just move
15839 the value as an SImode register move. This happens when the target
15840 architecture variant does not have an HImode register move. */
15841 if (base == NULL)
15842 {
15843 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15844
15845 if (REG_P (outval))
15846 {
15847 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15848 gen_rtx_SUBREG (SImode, outval, 0)));
15849 }
15850 else /* SUBREG_P (outval) */
15851 {
15852 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15853 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15854 SUBREG_REG (outval)));
15855 else
15856 /* FIXME: Handle other cases ? */
15857 gcc_unreachable ();
15858 }
15859 return;
15860 }
15861 }
15862 else
15863 base = find_replacement (&XEXP (ref, 0));
15864
15865 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15866
15867 /* Handle the case where the address is too complex to be offset by 1. */
15868 if (GET_CODE (base) == MINUS
15869 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15870 {
15871 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15872
15873 /* Be careful not to destroy OUTVAL. */
15874 if (reg_overlap_mentioned_p (base_plus, outval))
15875 {
15876 /* Updating base_plus might destroy outval, see if we can
15877 swap the scratch and base_plus. */
15878 if (!reg_overlap_mentioned_p (scratch, outval))
15879 std::swap (scratch, base_plus);
15880 else
15881 {
15882 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15883
15884 /* Be conservative and copy OUTVAL into the scratch now,
15885 this should only be necessary if outval is a subreg
15886 of something larger than a word. */
15887 /* XXX Might this clobber base? I can't see how it can,
15888 since scratch is known to overlap with OUTVAL, and
15889 must be wider than a word. */
15890 emit_insn (gen_movhi (scratch_hi, outval));
15891 outval = scratch_hi;
15892 }
15893 }
15894
15895 emit_set_insn (base_plus, base);
15896 base = base_plus;
15897 }
15898 else if (GET_CODE (base) == PLUS)
15899 {
15900 /* The addend must be CONST_INT, or we would have dealt with it above. */
15901 HOST_WIDE_INT hi, lo;
15902
15903 offset += INTVAL (XEXP (base, 1));
15904 base = XEXP (base, 0);
15905
15906 /* Rework the address into a legal sequence of insns. */
15907 /* Valid range for lo is -4095 -> 4095 */
15908 lo = (offset >= 0
15909 ? (offset & 0xfff)
15910 : -((-offset) & 0xfff));
15911
15912 /* Corner case, if lo is the max offset then we would be out of range
15913 once we have added the additional 1 below, so bump the msb into the
15914 pre-loading insn(s). */
15915 if (lo == 4095)
15916 lo &= 0x7ff;
15917
15918 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15919 ^ (HOST_WIDE_INT) 0x80000000)
15920 - (HOST_WIDE_INT) 0x80000000);
15921
15922 gcc_assert (hi + lo == offset);
15923
15924 if (hi != 0)
15925 {
15926 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15927
15928 /* Be careful not to destroy OUTVAL. */
15929 if (reg_overlap_mentioned_p (base_plus, outval))
15930 {
15931 /* Updating base_plus might destroy outval, see if we
15932 can swap the scratch and base_plus. */
15933 if (!reg_overlap_mentioned_p (scratch, outval))
15934 std::swap (scratch, base_plus);
15935 else
15936 {
15937 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15938
15939 /* Be conservative and copy outval into scratch now,
15940 this should only be necessary if outval is a
15941 subreg of something larger than a word. */
15942 /* XXX Might this clobber base? I can't see how it
15943 can, since scratch is known to overlap with
15944 outval. */
15945 emit_insn (gen_movhi (scratch_hi, outval));
15946 outval = scratch_hi;
15947 }
15948 }
15949
15950 /* Get the base address; addsi3 knows how to handle constants
15951 that require more than one insn. */
15952 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15953 base = base_plus;
15954 offset = lo;
15955 }
15956 }
15957
15958 if (BYTES_BIG_ENDIAN)
15959 {
15960 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15961 plus_constant (Pmode, base,
15962 offset + 1)),
15963 gen_lowpart (QImode, outval)));
15964 emit_insn (gen_lshrsi3 (scratch,
15965 gen_rtx_SUBREG (SImode, outval, 0),
15966 GEN_INT (8)));
15967 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15968 offset)),
15969 gen_lowpart (QImode, scratch)));
15970 }
15971 else
15972 {
15973 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15974 offset)),
15975 gen_lowpart (QImode, outval)));
15976 emit_insn (gen_lshrsi3 (scratch,
15977 gen_rtx_SUBREG (SImode, outval, 0),
15978 GEN_INT (8)));
15979 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15980 plus_constant (Pmode, base,
15981 offset + 1)),
15982 gen_lowpart (QImode, scratch)));
15983 }
15984 }
15985
15986 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15987 (padded to the size of a word) should be passed in a register. */
15988
15989 static bool
15990 arm_must_pass_in_stack (const function_arg_info &arg)
15991 {
15992 if (TARGET_AAPCS_BASED)
15993 return must_pass_in_stack_var_size (arg);
15994 else
15995 return must_pass_in_stack_var_size_or_pad (arg);
15996 }
15997
15998
15999 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16000 byte of a stack argument has useful data. For legacy APCS ABIs we use
16001 the default. For AAPCS based ABIs small aggregate types are placed
16002 in the lowest memory address. */
16003
16004 static pad_direction
16005 arm_function_arg_padding (machine_mode mode, const_tree type)
16006 {
16007 if (!TARGET_AAPCS_BASED)
16008 return default_function_arg_padding (mode, type);
16009
16010 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16011 return PAD_DOWNWARD;
16012
16013 return PAD_UPWARD;
16014 }
16015
16016
16017 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16018 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16019 register has useful data, and return the opposite if the most
16020 significant byte does. */
16021
16022 bool
16023 arm_pad_reg_upward (machine_mode mode,
16024 tree type, int first ATTRIBUTE_UNUSED)
16025 {
16026 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16027 {
16028 /* For AAPCS, small aggregates, small fixed-point types,
16029 and small complex types are always padded upwards. */
16030 if (type)
16031 {
16032 if ((AGGREGATE_TYPE_P (type)
16033 || TREE_CODE (type) == COMPLEX_TYPE
16034 || FIXED_POINT_TYPE_P (type))
16035 && int_size_in_bytes (type) <= 4)
16036 return true;
16037 }
16038 else
16039 {
16040 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16041 && GET_MODE_SIZE (mode) <= 4)
16042 return true;
16043 }
16044 }
16045
16046 /* Otherwise, use default padding. */
16047 return !BYTES_BIG_ENDIAN;
16048 }
16049
16050 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16051 assuming that the address in the base register is word aligned. */
16052 bool
16053 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16054 {
16055 HOST_WIDE_INT max_offset;
16056
16057 /* Offset must be a multiple of 4 in Thumb mode. */
16058 if (TARGET_THUMB2 && ((offset & 3) != 0))
16059 return false;
16060
16061 if (TARGET_THUMB2)
16062 max_offset = 1020;
16063 else if (TARGET_ARM)
16064 max_offset = 255;
16065 else
16066 return false;
16067
16068 return ((offset <= max_offset) && (offset >= -max_offset));
16069 }
16070
16071 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16072 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16073 Assumes that the address in the base register RN is word aligned. Pattern
16074 guarantees that both memory accesses use the same base register,
16075 the offsets are constants within the range, and the gap between the offsets is 4.
16076 If preload complete then check that registers are legal. WBACK indicates whether
16077 address is updated. LOAD indicates whether memory access is load or store. */
16078 bool
16079 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16080 bool wback, bool load)
16081 {
16082 unsigned int t, t2, n;
16083
16084 if (!reload_completed)
16085 return true;
16086
16087 if (!offset_ok_for_ldrd_strd (offset))
16088 return false;
16089
16090 t = REGNO (rt);
16091 t2 = REGNO (rt2);
16092 n = REGNO (rn);
16093
16094 if ((TARGET_THUMB2)
16095 && ((wback && (n == t || n == t2))
16096 || (t == SP_REGNUM)
16097 || (t == PC_REGNUM)
16098 || (t2 == SP_REGNUM)
16099 || (t2 == PC_REGNUM)
16100 || (!load && (n == PC_REGNUM))
16101 || (load && (t == t2))
16102 /* Triggers Cortex-M3 LDRD errata. */
16103 || (!wback && load && fix_cm3_ldrd && (n == t))))
16104 return false;
16105
16106 if ((TARGET_ARM)
16107 && ((wback && (n == t || n == t2))
16108 || (t2 == PC_REGNUM)
16109 || (t % 2 != 0) /* First destination register is not even. */
16110 || (t2 != t + 1)
16111 /* PC can be used as base register (for offset addressing only),
16112 but it is depricated. */
16113 || (n == PC_REGNUM)))
16114 return false;
16115
16116 return true;
16117 }
16118
16119 /* Return true if a 64-bit access with alignment ALIGN and with a
16120 constant offset OFFSET from the base pointer is permitted on this
16121 architecture. */
16122 static bool
16123 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
16124 {
16125 return (unaligned_access
16126 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
16127 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
16128 }
16129
16130 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
16131 operand MEM's address contains an immediate offset from the base
16132 register and has no side effects, in which case it sets BASE,
16133 OFFSET and ALIGN accordingly. */
16134 static bool
16135 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
16136 {
16137 rtx addr;
16138
16139 gcc_assert (base != NULL && offset != NULL);
16140
16141 /* TODO: Handle more general memory operand patterns, such as
16142 PRE_DEC and PRE_INC. */
16143
16144 if (side_effects_p (mem))
16145 return false;
16146
16147 /* Can't deal with subregs. */
16148 if (GET_CODE (mem) == SUBREG)
16149 return false;
16150
16151 gcc_assert (MEM_P (mem));
16152
16153 *offset = const0_rtx;
16154 *align = MEM_ALIGN (mem);
16155
16156 addr = XEXP (mem, 0);
16157
16158 /* If addr isn't valid for DImode, then we can't handle it. */
16159 if (!arm_legitimate_address_p (DImode, addr,
16160 reload_in_progress || reload_completed))
16161 return false;
16162
16163 if (REG_P (addr))
16164 {
16165 *base = addr;
16166 return true;
16167 }
16168 else if (GET_CODE (addr) == PLUS)
16169 {
16170 *base = XEXP (addr, 0);
16171 *offset = XEXP (addr, 1);
16172 return (REG_P (*base) && CONST_INT_P (*offset));
16173 }
16174
16175 return false;
16176 }
16177
16178 /* Called from a peephole2 to replace two word-size accesses with a
16179 single LDRD/STRD instruction. Returns true iff we can generate a
16180 new instruction sequence. That is, both accesses use the same base
16181 register and the gap between constant offsets is 4. This function
16182 may reorder its operands to match ldrd/strd RTL templates.
16183 OPERANDS are the operands found by the peephole matcher;
16184 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
16185 corresponding memory operands. LOAD indicaates whether the access
16186 is load or store. CONST_STORE indicates a store of constant
16187 integer values held in OPERANDS[4,5] and assumes that the pattern
16188 is of length 4 insn, for the purpose of checking dead registers.
16189 COMMUTE indicates that register operands may be reordered. */
16190 bool
16191 gen_operands_ldrd_strd (rtx *operands, bool load,
16192 bool const_store, bool commute)
16193 {
16194 int nops = 2;
16195 HOST_WIDE_INT offsets[2], offset, align[2];
16196 rtx base = NULL_RTX;
16197 rtx cur_base, cur_offset, tmp;
16198 int i, gap;
16199 HARD_REG_SET regset;
16200
16201 gcc_assert (!const_store || !load);
16202 /* Check that the memory references are immediate offsets from the
16203 same base register. Extract the base register, the destination
16204 registers, and the corresponding memory offsets. */
16205 for (i = 0; i < nops; i++)
16206 {
16207 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16208 &align[i]))
16209 return false;
16210
16211 if (i == 0)
16212 base = cur_base;
16213 else if (REGNO (base) != REGNO (cur_base))
16214 return false;
16215
16216 offsets[i] = INTVAL (cur_offset);
16217 if (GET_CODE (operands[i]) == SUBREG)
16218 {
16219 tmp = SUBREG_REG (operands[i]);
16220 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
16221 operands[i] = tmp;
16222 }
16223 }
16224
16225 /* Make sure there is no dependency between the individual loads. */
16226 if (load && REGNO (operands[0]) == REGNO (base))
16227 return false; /* RAW */
16228
16229 if (load && REGNO (operands[0]) == REGNO (operands[1]))
16230 return false; /* WAW */
16231
16232 /* If the same input register is used in both stores
16233 when storing different constants, try to find a free register.
16234 For example, the code
16235 mov r0, 0
16236 str r0, [r2]
16237 mov r0, 1
16238 str r0, [r2, #4]
16239 can be transformed into
16240 mov r1, 0
16241 mov r0, 1
16242 strd r1, r0, [r2]
16243 in Thumb mode assuming that r1 is free.
16244 For ARM mode do the same but only if the starting register
16245 can be made to be even. */
16246 if (const_store
16247 && REGNO (operands[0]) == REGNO (operands[1])
16248 && INTVAL (operands[4]) != INTVAL (operands[5]))
16249 {
16250 if (TARGET_THUMB2)
16251 {
16252 CLEAR_HARD_REG_SET (regset);
16253 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16254 if (tmp == NULL_RTX)
16255 return false;
16256
16257 /* Use the new register in the first load to ensure that
16258 if the original input register is not dead after peephole,
16259 then it will have the correct constant value. */
16260 operands[0] = tmp;
16261 }
16262 else if (TARGET_ARM)
16263 {
16264 int regno = REGNO (operands[0]);
16265 if (!peep2_reg_dead_p (4, operands[0]))
16266 {
16267 /* When the input register is even and is not dead after the
16268 pattern, it has to hold the second constant but we cannot
16269 form a legal STRD in ARM mode with this register as the second
16270 register. */
16271 if (regno % 2 == 0)
16272 return false;
16273
16274 /* Is regno-1 free? */
16275 SET_HARD_REG_SET (regset);
16276 CLEAR_HARD_REG_BIT(regset, regno - 1);
16277 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16278 if (tmp == NULL_RTX)
16279 return false;
16280
16281 operands[0] = tmp;
16282 }
16283 else
16284 {
16285 /* Find a DImode register. */
16286 CLEAR_HARD_REG_SET (regset);
16287 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16288 if (tmp != NULL_RTX)
16289 {
16290 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16291 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16292 }
16293 else
16294 {
16295 /* Can we use the input register to form a DI register? */
16296 SET_HARD_REG_SET (regset);
16297 CLEAR_HARD_REG_BIT(regset,
16298 regno % 2 == 0 ? regno + 1 : regno - 1);
16299 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16300 if (tmp == NULL_RTX)
16301 return false;
16302 operands[regno % 2 == 1 ? 0 : 1] = tmp;
16303 }
16304 }
16305
16306 gcc_assert (operands[0] != NULL_RTX);
16307 gcc_assert (operands[1] != NULL_RTX);
16308 gcc_assert (REGNO (operands[0]) % 2 == 0);
16309 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
16310 }
16311 }
16312
16313 /* Make sure the instructions are ordered with lower memory access first. */
16314 if (offsets[0] > offsets[1])
16315 {
16316 gap = offsets[0] - offsets[1];
16317 offset = offsets[1];
16318
16319 /* Swap the instructions such that lower memory is accessed first. */
16320 std::swap (operands[0], operands[1]);
16321 std::swap (operands[2], operands[3]);
16322 std::swap (align[0], align[1]);
16323 if (const_store)
16324 std::swap (operands[4], operands[5]);
16325 }
16326 else
16327 {
16328 gap = offsets[1] - offsets[0];
16329 offset = offsets[0];
16330 }
16331
16332 /* Make sure accesses are to consecutive memory locations. */
16333 if (gap != GET_MODE_SIZE (SImode))
16334 return false;
16335
16336 if (!align_ok_ldrd_strd (align[0], offset))
16337 return false;
16338
16339 /* Make sure we generate legal instructions. */
16340 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16341 false, load))
16342 return true;
16343
16344 /* In Thumb state, where registers are almost unconstrained, there
16345 is little hope to fix it. */
16346 if (TARGET_THUMB2)
16347 return false;
16348
16349 if (load && commute)
16350 {
16351 /* Try reordering registers. */
16352 std::swap (operands[0], operands[1]);
16353 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16354 false, load))
16355 return true;
16356 }
16357
16358 if (const_store)
16359 {
16360 /* If input registers are dead after this pattern, they can be
16361 reordered or replaced by other registers that are free in the
16362 current pattern. */
16363 if (!peep2_reg_dead_p (4, operands[0])
16364 || !peep2_reg_dead_p (4, operands[1]))
16365 return false;
16366
16367 /* Try to reorder the input registers. */
16368 /* For example, the code
16369 mov r0, 0
16370 mov r1, 1
16371 str r1, [r2]
16372 str r0, [r2, #4]
16373 can be transformed into
16374 mov r1, 0
16375 mov r0, 1
16376 strd r0, [r2]
16377 */
16378 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
16379 false, false))
16380 {
16381 std::swap (operands[0], operands[1]);
16382 return true;
16383 }
16384
16385 /* Try to find a free DI register. */
16386 CLEAR_HARD_REG_SET (regset);
16387 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16388 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16389 while (true)
16390 {
16391 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16392 if (tmp == NULL_RTX)
16393 return false;
16394
16395 /* DREG must be an even-numbered register in DImode.
16396 Split it into SI registers. */
16397 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16398 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16399 gcc_assert (operands[0] != NULL_RTX);
16400 gcc_assert (operands[1] != NULL_RTX);
16401 gcc_assert (REGNO (operands[0]) % 2 == 0);
16402 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16403
16404 return (operands_ok_ldrd_strd (operands[0], operands[1],
16405 base, offset,
16406 false, load));
16407 }
16408 }
16409
16410 return false;
16411 }
16412
16413
16414 /* Return true if parallel execution of the two word-size accesses provided
16415 could be satisfied with a single LDRD/STRD instruction. Two word-size
16416 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
16417 register operands and OPERANDS[2,3] are the corresponding memory operands.
16418 */
16419 bool
16420 valid_operands_ldrd_strd (rtx *operands, bool load)
16421 {
16422 int nops = 2;
16423 HOST_WIDE_INT offsets[2], offset, align[2];
16424 rtx base = NULL_RTX;
16425 rtx cur_base, cur_offset;
16426 int i, gap;
16427
16428 /* Check that the memory references are immediate offsets from the
16429 same base register. Extract the base register, the destination
16430 registers, and the corresponding memory offsets. */
16431 for (i = 0; i < nops; i++)
16432 {
16433 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16434 &align[i]))
16435 return false;
16436
16437 if (i == 0)
16438 base = cur_base;
16439 else if (REGNO (base) != REGNO (cur_base))
16440 return false;
16441
16442 offsets[i] = INTVAL (cur_offset);
16443 if (GET_CODE (operands[i]) == SUBREG)
16444 return false;
16445 }
16446
16447 if (offsets[0] > offsets[1])
16448 return false;
16449
16450 gap = offsets[1] - offsets[0];
16451 offset = offsets[0];
16452
16453 /* Make sure accesses are to consecutive memory locations. */
16454 if (gap != GET_MODE_SIZE (SImode))
16455 return false;
16456
16457 if (!align_ok_ldrd_strd (align[0], offset))
16458 return false;
16459
16460 return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16461 false, load);
16462 }
16463
16464 \f
16465 /* Print a symbolic form of X to the debug file, F. */
16466 static void
16467 arm_print_value (FILE *f, rtx x)
16468 {
16469 switch (GET_CODE (x))
16470 {
16471 case CONST_INT:
16472 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16473 return;
16474
16475 case CONST_DOUBLE:
16476 {
16477 char fpstr[20];
16478 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16479 sizeof (fpstr), 0, 1);
16480 fputs (fpstr, f);
16481 }
16482 return;
16483
16484 case CONST_VECTOR:
16485 {
16486 int i;
16487
16488 fprintf (f, "<");
16489 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16490 {
16491 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16492 if (i < (CONST_VECTOR_NUNITS (x) - 1))
16493 fputc (',', f);
16494 }
16495 fprintf (f, ">");
16496 }
16497 return;
16498
16499 case CONST_STRING:
16500 fprintf (f, "\"%s\"", XSTR (x, 0));
16501 return;
16502
16503 case SYMBOL_REF:
16504 fprintf (f, "`%s'", XSTR (x, 0));
16505 return;
16506
16507 case LABEL_REF:
16508 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16509 return;
16510
16511 case CONST:
16512 arm_print_value (f, XEXP (x, 0));
16513 return;
16514
16515 case PLUS:
16516 arm_print_value (f, XEXP (x, 0));
16517 fprintf (f, "+");
16518 arm_print_value (f, XEXP (x, 1));
16519 return;
16520
16521 case PC:
16522 fprintf (f, "pc");
16523 return;
16524
16525 default:
16526 fprintf (f, "????");
16527 return;
16528 }
16529 }
16530 \f
16531 /* Routines for manipulation of the constant pool. */
16532
16533 /* Arm instructions cannot load a large constant directly into a
16534 register; they have to come from a pc relative load. The constant
16535 must therefore be placed in the addressable range of the pc
16536 relative load. Depending on the precise pc relative load
16537 instruction the range is somewhere between 256 bytes and 4k. This
16538 means that we often have to dump a constant inside a function, and
16539 generate code to branch around it.
16540
16541 It is important to minimize this, since the branches will slow
16542 things down and make the code larger.
16543
16544 Normally we can hide the table after an existing unconditional
16545 branch so that there is no interruption of the flow, but in the
16546 worst case the code looks like this:
16547
16548 ldr rn, L1
16549 ...
16550 b L2
16551 align
16552 L1: .long value
16553 L2:
16554 ...
16555
16556 ldr rn, L3
16557 ...
16558 b L4
16559 align
16560 L3: .long value
16561 L4:
16562 ...
16563
16564 We fix this by performing a scan after scheduling, which notices
16565 which instructions need to have their operands fetched from the
16566 constant table and builds the table.
16567
16568 The algorithm starts by building a table of all the constants that
16569 need fixing up and all the natural barriers in the function (places
16570 where a constant table can be dropped without breaking the flow).
16571 For each fixup we note how far the pc-relative replacement will be
16572 able to reach and the offset of the instruction into the function.
16573
16574 Having built the table we then group the fixes together to form
16575 tables that are as large as possible (subject to addressing
16576 constraints) and emit each table of constants after the last
16577 barrier that is within range of all the instructions in the group.
16578 If a group does not contain a barrier, then we forcibly create one
16579 by inserting a jump instruction into the flow. Once the table has
16580 been inserted, the insns are then modified to reference the
16581 relevant entry in the pool.
16582
16583 Possible enhancements to the algorithm (not implemented) are:
16584
16585 1) For some processors and object formats, there may be benefit in
16586 aligning the pools to the start of cache lines; this alignment
16587 would need to be taken into account when calculating addressability
16588 of a pool. */
16589
16590 /* These typedefs are located at the start of this file, so that
16591 they can be used in the prototypes there. This comment is to
16592 remind readers of that fact so that the following structures
16593 can be understood more easily.
16594
16595 typedef struct minipool_node Mnode;
16596 typedef struct minipool_fixup Mfix; */
16597
16598 struct minipool_node
16599 {
16600 /* Doubly linked chain of entries. */
16601 Mnode * next;
16602 Mnode * prev;
16603 /* The maximum offset into the code that this entry can be placed. While
16604 pushing fixes for forward references, all entries are sorted in order
16605 of increasing max_address. */
16606 HOST_WIDE_INT max_address;
16607 /* Similarly for an entry inserted for a backwards ref. */
16608 HOST_WIDE_INT min_address;
16609 /* The number of fixes referencing this entry. This can become zero
16610 if we "unpush" an entry. In this case we ignore the entry when we
16611 come to emit the code. */
16612 int refcount;
16613 /* The offset from the start of the minipool. */
16614 HOST_WIDE_INT offset;
16615 /* The value in table. */
16616 rtx value;
16617 /* The mode of value. */
16618 machine_mode mode;
16619 /* The size of the value. With iWMMXt enabled
16620 sizes > 4 also imply an alignment of 8-bytes. */
16621 int fix_size;
16622 };
16623
16624 struct minipool_fixup
16625 {
16626 Mfix * next;
16627 rtx_insn * insn;
16628 HOST_WIDE_INT address;
16629 rtx * loc;
16630 machine_mode mode;
16631 int fix_size;
16632 rtx value;
16633 Mnode * minipool;
16634 HOST_WIDE_INT forwards;
16635 HOST_WIDE_INT backwards;
16636 };
16637
16638 /* Fixes less than a word need padding out to a word boundary. */
16639 #define MINIPOOL_FIX_SIZE(mode) \
16640 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16641
16642 static Mnode * minipool_vector_head;
16643 static Mnode * minipool_vector_tail;
16644 static rtx_code_label *minipool_vector_label;
16645 static int minipool_pad;
16646
16647 /* The linked list of all minipool fixes required for this function. */
16648 Mfix * minipool_fix_head;
16649 Mfix * minipool_fix_tail;
16650 /* The fix entry for the current minipool, once it has been placed. */
16651 Mfix * minipool_barrier;
16652
16653 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16654 #define JUMP_TABLES_IN_TEXT_SECTION 0
16655 #endif
16656
16657 static HOST_WIDE_INT
16658 get_jump_table_size (rtx_jump_table_data *insn)
16659 {
16660 /* ADDR_VECs only take room if read-only data does into the text
16661 section. */
16662 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16663 {
16664 rtx body = PATTERN (insn);
16665 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16666 HOST_WIDE_INT size;
16667 HOST_WIDE_INT modesize;
16668
16669 modesize = GET_MODE_SIZE (GET_MODE (body));
16670 size = modesize * XVECLEN (body, elt);
16671 switch (modesize)
16672 {
16673 case 1:
16674 /* Round up size of TBB table to a halfword boundary. */
16675 size = (size + 1) & ~HOST_WIDE_INT_1;
16676 break;
16677 case 2:
16678 /* No padding necessary for TBH. */
16679 break;
16680 case 4:
16681 /* Add two bytes for alignment on Thumb. */
16682 if (TARGET_THUMB)
16683 size += 2;
16684 break;
16685 default:
16686 gcc_unreachable ();
16687 }
16688 return size;
16689 }
16690
16691 return 0;
16692 }
16693
16694 /* Emit insns to load the function address from FUNCDESC (an FDPIC
16695 function descriptor) into a register and the GOT address into the
16696 FDPIC register, returning an rtx for the register holding the
16697 function address. */
16698
16699 rtx
16700 arm_load_function_descriptor (rtx funcdesc)
16701 {
16702 rtx fnaddr_reg = gen_reg_rtx (Pmode);
16703 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
16704 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
16705 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
16706
16707 emit_move_insn (fnaddr_reg, fnaddr);
16708
16709 /* The ABI requires the entry point address to be loaded first, but
16710 since we cannot support lazy binding for lack of atomic load of
16711 two 32-bits values, we do not need to bother to prevent the
16712 previous load from being moved after that of the GOT address. */
16713 emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
16714
16715 return fnaddr_reg;
16716 }
16717
16718 /* Return the maximum amount of padding that will be inserted before
16719 label LABEL. */
16720 static HOST_WIDE_INT
16721 get_label_padding (rtx label)
16722 {
16723 HOST_WIDE_INT align, min_insn_size;
16724
16725 align = 1 << label_to_alignment (label).levels[0].log;
16726 min_insn_size = TARGET_THUMB ? 2 : 4;
16727 return align > min_insn_size ? align - min_insn_size : 0;
16728 }
16729
16730 /* Move a minipool fix MP from its current location to before MAX_MP.
16731 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16732 constraints may need updating. */
16733 static Mnode *
16734 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16735 HOST_WIDE_INT max_address)
16736 {
16737 /* The code below assumes these are different. */
16738 gcc_assert (mp != max_mp);
16739
16740 if (max_mp == NULL)
16741 {
16742 if (max_address < mp->max_address)
16743 mp->max_address = max_address;
16744 }
16745 else
16746 {
16747 if (max_address > max_mp->max_address - mp->fix_size)
16748 mp->max_address = max_mp->max_address - mp->fix_size;
16749 else
16750 mp->max_address = max_address;
16751
16752 /* Unlink MP from its current position. Since max_mp is non-null,
16753 mp->prev must be non-null. */
16754 mp->prev->next = mp->next;
16755 if (mp->next != NULL)
16756 mp->next->prev = mp->prev;
16757 else
16758 minipool_vector_tail = mp->prev;
16759
16760 /* Re-insert it before MAX_MP. */
16761 mp->next = max_mp;
16762 mp->prev = max_mp->prev;
16763 max_mp->prev = mp;
16764
16765 if (mp->prev != NULL)
16766 mp->prev->next = mp;
16767 else
16768 minipool_vector_head = mp;
16769 }
16770
16771 /* Save the new entry. */
16772 max_mp = mp;
16773
16774 /* Scan over the preceding entries and adjust their addresses as
16775 required. */
16776 while (mp->prev != NULL
16777 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16778 {
16779 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16780 mp = mp->prev;
16781 }
16782
16783 return max_mp;
16784 }
16785
16786 /* Add a constant to the minipool for a forward reference. Returns the
16787 node added or NULL if the constant will not fit in this pool. */
16788 static Mnode *
16789 add_minipool_forward_ref (Mfix *fix)
16790 {
16791 /* If set, max_mp is the first pool_entry that has a lower
16792 constraint than the one we are trying to add. */
16793 Mnode * max_mp = NULL;
16794 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16795 Mnode * mp;
16796
16797 /* If the minipool starts before the end of FIX->INSN then this FIX
16798 cannot be placed into the current pool. Furthermore, adding the
16799 new constant pool entry may cause the pool to start FIX_SIZE bytes
16800 earlier. */
16801 if (minipool_vector_head &&
16802 (fix->address + get_attr_length (fix->insn)
16803 >= minipool_vector_head->max_address - fix->fix_size))
16804 return NULL;
16805
16806 /* Scan the pool to see if a constant with the same value has
16807 already been added. While we are doing this, also note the
16808 location where we must insert the constant if it doesn't already
16809 exist. */
16810 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16811 {
16812 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16813 && fix->mode == mp->mode
16814 && (!LABEL_P (fix->value)
16815 || (CODE_LABEL_NUMBER (fix->value)
16816 == CODE_LABEL_NUMBER (mp->value)))
16817 && rtx_equal_p (fix->value, mp->value))
16818 {
16819 /* More than one fix references this entry. */
16820 mp->refcount++;
16821 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16822 }
16823
16824 /* Note the insertion point if necessary. */
16825 if (max_mp == NULL
16826 && mp->max_address > max_address)
16827 max_mp = mp;
16828
16829 /* If we are inserting an 8-bytes aligned quantity and
16830 we have not already found an insertion point, then
16831 make sure that all such 8-byte aligned quantities are
16832 placed at the start of the pool. */
16833 if (ARM_DOUBLEWORD_ALIGN
16834 && max_mp == NULL
16835 && fix->fix_size >= 8
16836 && mp->fix_size < 8)
16837 {
16838 max_mp = mp;
16839 max_address = mp->max_address;
16840 }
16841 }
16842
16843 /* The value is not currently in the minipool, so we need to create
16844 a new entry for it. If MAX_MP is NULL, the entry will be put on
16845 the end of the list since the placement is less constrained than
16846 any existing entry. Otherwise, we insert the new fix before
16847 MAX_MP and, if necessary, adjust the constraints on the other
16848 entries. */
16849 mp = XNEW (Mnode);
16850 mp->fix_size = fix->fix_size;
16851 mp->mode = fix->mode;
16852 mp->value = fix->value;
16853 mp->refcount = 1;
16854 /* Not yet required for a backwards ref. */
16855 mp->min_address = -65536;
16856
16857 if (max_mp == NULL)
16858 {
16859 mp->max_address = max_address;
16860 mp->next = NULL;
16861 mp->prev = minipool_vector_tail;
16862
16863 if (mp->prev == NULL)
16864 {
16865 minipool_vector_head = mp;
16866 minipool_vector_label = gen_label_rtx ();
16867 }
16868 else
16869 mp->prev->next = mp;
16870
16871 minipool_vector_tail = mp;
16872 }
16873 else
16874 {
16875 if (max_address > max_mp->max_address - mp->fix_size)
16876 mp->max_address = max_mp->max_address - mp->fix_size;
16877 else
16878 mp->max_address = max_address;
16879
16880 mp->next = max_mp;
16881 mp->prev = max_mp->prev;
16882 max_mp->prev = mp;
16883 if (mp->prev != NULL)
16884 mp->prev->next = mp;
16885 else
16886 minipool_vector_head = mp;
16887 }
16888
16889 /* Save the new entry. */
16890 max_mp = mp;
16891
16892 /* Scan over the preceding entries and adjust their addresses as
16893 required. */
16894 while (mp->prev != NULL
16895 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16896 {
16897 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16898 mp = mp->prev;
16899 }
16900
16901 return max_mp;
16902 }
16903
16904 static Mnode *
16905 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16906 HOST_WIDE_INT min_address)
16907 {
16908 HOST_WIDE_INT offset;
16909
16910 /* The code below assumes these are different. */
16911 gcc_assert (mp != min_mp);
16912
16913 if (min_mp == NULL)
16914 {
16915 if (min_address > mp->min_address)
16916 mp->min_address = min_address;
16917 }
16918 else
16919 {
16920 /* We will adjust this below if it is too loose. */
16921 mp->min_address = min_address;
16922
16923 /* Unlink MP from its current position. Since min_mp is non-null,
16924 mp->next must be non-null. */
16925 mp->next->prev = mp->prev;
16926 if (mp->prev != NULL)
16927 mp->prev->next = mp->next;
16928 else
16929 minipool_vector_head = mp->next;
16930
16931 /* Reinsert it after MIN_MP. */
16932 mp->prev = min_mp;
16933 mp->next = min_mp->next;
16934 min_mp->next = mp;
16935 if (mp->next != NULL)
16936 mp->next->prev = mp;
16937 else
16938 minipool_vector_tail = mp;
16939 }
16940
16941 min_mp = mp;
16942
16943 offset = 0;
16944 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16945 {
16946 mp->offset = offset;
16947 if (mp->refcount > 0)
16948 offset += mp->fix_size;
16949
16950 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16951 mp->next->min_address = mp->min_address + mp->fix_size;
16952 }
16953
16954 return min_mp;
16955 }
16956
16957 /* Add a constant to the minipool for a backward reference. Returns the
16958 node added or NULL if the constant will not fit in this pool.
16959
16960 Note that the code for insertion for a backwards reference can be
16961 somewhat confusing because the calculated offsets for each fix do
16962 not take into account the size of the pool (which is still under
16963 construction. */
16964 static Mnode *
16965 add_minipool_backward_ref (Mfix *fix)
16966 {
16967 /* If set, min_mp is the last pool_entry that has a lower constraint
16968 than the one we are trying to add. */
16969 Mnode *min_mp = NULL;
16970 /* This can be negative, since it is only a constraint. */
16971 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16972 Mnode *mp;
16973
16974 /* If we can't reach the current pool from this insn, or if we can't
16975 insert this entry at the end of the pool without pushing other
16976 fixes out of range, then we don't try. This ensures that we
16977 can't fail later on. */
16978 if (min_address >= minipool_barrier->address
16979 || (minipool_vector_tail->min_address + fix->fix_size
16980 >= minipool_barrier->address))
16981 return NULL;
16982
16983 /* Scan the pool to see if a constant with the same value has
16984 already been added. While we are doing this, also note the
16985 location where we must insert the constant if it doesn't already
16986 exist. */
16987 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16988 {
16989 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16990 && fix->mode == mp->mode
16991 && (!LABEL_P (fix->value)
16992 || (CODE_LABEL_NUMBER (fix->value)
16993 == CODE_LABEL_NUMBER (mp->value)))
16994 && rtx_equal_p (fix->value, mp->value)
16995 /* Check that there is enough slack to move this entry to the
16996 end of the table (this is conservative). */
16997 && (mp->max_address
16998 > (minipool_barrier->address
16999 + minipool_vector_tail->offset
17000 + minipool_vector_tail->fix_size)))
17001 {
17002 mp->refcount++;
17003 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17004 }
17005
17006 if (min_mp != NULL)
17007 mp->min_address += fix->fix_size;
17008 else
17009 {
17010 /* Note the insertion point if necessary. */
17011 if (mp->min_address < min_address)
17012 {
17013 /* For now, we do not allow the insertion of 8-byte alignment
17014 requiring nodes anywhere but at the start of the pool. */
17015 if (ARM_DOUBLEWORD_ALIGN
17016 && fix->fix_size >= 8 && mp->fix_size < 8)
17017 return NULL;
17018 else
17019 min_mp = mp;
17020 }
17021 else if (mp->max_address
17022 < minipool_barrier->address + mp->offset + fix->fix_size)
17023 {
17024 /* Inserting before this entry would push the fix beyond
17025 its maximum address (which can happen if we have
17026 re-located a forwards fix); force the new fix to come
17027 after it. */
17028 if (ARM_DOUBLEWORD_ALIGN
17029 && fix->fix_size >= 8 && mp->fix_size < 8)
17030 return NULL;
17031 else
17032 {
17033 min_mp = mp;
17034 min_address = mp->min_address + fix->fix_size;
17035 }
17036 }
17037 /* Do not insert a non-8-byte aligned quantity before 8-byte
17038 aligned quantities. */
17039 else if (ARM_DOUBLEWORD_ALIGN
17040 && fix->fix_size < 8
17041 && mp->fix_size >= 8)
17042 {
17043 min_mp = mp;
17044 min_address = mp->min_address + fix->fix_size;
17045 }
17046 }
17047 }
17048
17049 /* We need to create a new entry. */
17050 mp = XNEW (Mnode);
17051 mp->fix_size = fix->fix_size;
17052 mp->mode = fix->mode;
17053 mp->value = fix->value;
17054 mp->refcount = 1;
17055 mp->max_address = minipool_barrier->address + 65536;
17056
17057 mp->min_address = min_address;
17058
17059 if (min_mp == NULL)
17060 {
17061 mp->prev = NULL;
17062 mp->next = minipool_vector_head;
17063
17064 if (mp->next == NULL)
17065 {
17066 minipool_vector_tail = mp;
17067 minipool_vector_label = gen_label_rtx ();
17068 }
17069 else
17070 mp->next->prev = mp;
17071
17072 minipool_vector_head = mp;
17073 }
17074 else
17075 {
17076 mp->next = min_mp->next;
17077 mp->prev = min_mp;
17078 min_mp->next = mp;
17079
17080 if (mp->next != NULL)
17081 mp->next->prev = mp;
17082 else
17083 minipool_vector_tail = mp;
17084 }
17085
17086 /* Save the new entry. */
17087 min_mp = mp;
17088
17089 if (mp->prev)
17090 mp = mp->prev;
17091 else
17092 mp->offset = 0;
17093
17094 /* Scan over the following entries and adjust their offsets. */
17095 while (mp->next != NULL)
17096 {
17097 if (mp->next->min_address < mp->min_address + mp->fix_size)
17098 mp->next->min_address = mp->min_address + mp->fix_size;
17099
17100 if (mp->refcount)
17101 mp->next->offset = mp->offset + mp->fix_size;
17102 else
17103 mp->next->offset = mp->offset;
17104
17105 mp = mp->next;
17106 }
17107
17108 return min_mp;
17109 }
17110
17111 static void
17112 assign_minipool_offsets (Mfix *barrier)
17113 {
17114 HOST_WIDE_INT offset = 0;
17115 Mnode *mp;
17116
17117 minipool_barrier = barrier;
17118
17119 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17120 {
17121 mp->offset = offset;
17122
17123 if (mp->refcount > 0)
17124 offset += mp->fix_size;
17125 }
17126 }
17127
17128 /* Output the literal table */
17129 static void
17130 dump_minipool (rtx_insn *scan)
17131 {
17132 Mnode * mp;
17133 Mnode * nmp;
17134 int align64 = 0;
17135
17136 if (ARM_DOUBLEWORD_ALIGN)
17137 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17138 if (mp->refcount > 0 && mp->fix_size >= 8)
17139 {
17140 align64 = 1;
17141 break;
17142 }
17143
17144 if (dump_file)
17145 fprintf (dump_file,
17146 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
17147 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
17148
17149 scan = emit_label_after (gen_label_rtx (), scan);
17150 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
17151 scan = emit_label_after (minipool_vector_label, scan);
17152
17153 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
17154 {
17155 if (mp->refcount > 0)
17156 {
17157 if (dump_file)
17158 {
17159 fprintf (dump_file,
17160 ";; Offset %u, min %ld, max %ld ",
17161 (unsigned) mp->offset, (unsigned long) mp->min_address,
17162 (unsigned long) mp->max_address);
17163 arm_print_value (dump_file, mp->value);
17164 fputc ('\n', dump_file);
17165 }
17166
17167 rtx val = copy_rtx (mp->value);
17168
17169 switch (GET_MODE_SIZE (mp->mode))
17170 {
17171 #ifdef HAVE_consttable_1
17172 case 1:
17173 scan = emit_insn_after (gen_consttable_1 (val), scan);
17174 break;
17175
17176 #endif
17177 #ifdef HAVE_consttable_2
17178 case 2:
17179 scan = emit_insn_after (gen_consttable_2 (val), scan);
17180 break;
17181
17182 #endif
17183 #ifdef HAVE_consttable_4
17184 case 4:
17185 scan = emit_insn_after (gen_consttable_4 (val), scan);
17186 break;
17187
17188 #endif
17189 #ifdef HAVE_consttable_8
17190 case 8:
17191 scan = emit_insn_after (gen_consttable_8 (val), scan);
17192 break;
17193
17194 #endif
17195 #ifdef HAVE_consttable_16
17196 case 16:
17197 scan = emit_insn_after (gen_consttable_16 (val), scan);
17198 break;
17199
17200 #endif
17201 default:
17202 gcc_unreachable ();
17203 }
17204 }
17205
17206 nmp = mp->next;
17207 free (mp);
17208 }
17209
17210 minipool_vector_head = minipool_vector_tail = NULL;
17211 scan = emit_insn_after (gen_consttable_end (), scan);
17212 scan = emit_barrier_after (scan);
17213 }
17214
17215 /* Return the cost of forcibly inserting a barrier after INSN. */
17216 static int
17217 arm_barrier_cost (rtx_insn *insn)
17218 {
17219 /* Basing the location of the pool on the loop depth is preferable,
17220 but at the moment, the basic block information seems to be
17221 corrupt by this stage of the compilation. */
17222 int base_cost = 50;
17223 rtx_insn *next = next_nonnote_insn (insn);
17224
17225 if (next != NULL && LABEL_P (next))
17226 base_cost -= 20;
17227
17228 switch (GET_CODE (insn))
17229 {
17230 case CODE_LABEL:
17231 /* It will always be better to place the table before the label, rather
17232 than after it. */
17233 return 50;
17234
17235 case INSN:
17236 case CALL_INSN:
17237 return base_cost;
17238
17239 case JUMP_INSN:
17240 return base_cost - 10;
17241
17242 default:
17243 return base_cost + 10;
17244 }
17245 }
17246
17247 /* Find the best place in the insn stream in the range
17248 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
17249 Create the barrier by inserting a jump and add a new fix entry for
17250 it. */
17251 static Mfix *
17252 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
17253 {
17254 HOST_WIDE_INT count = 0;
17255 rtx_barrier *barrier;
17256 rtx_insn *from = fix->insn;
17257 /* The instruction after which we will insert the jump. */
17258 rtx_insn *selected = NULL;
17259 int selected_cost;
17260 /* The address at which the jump instruction will be placed. */
17261 HOST_WIDE_INT selected_address;
17262 Mfix * new_fix;
17263 HOST_WIDE_INT max_count = max_address - fix->address;
17264 rtx_code_label *label = gen_label_rtx ();
17265
17266 selected_cost = arm_barrier_cost (from);
17267 selected_address = fix->address;
17268
17269 while (from && count < max_count)
17270 {
17271 rtx_jump_table_data *tmp;
17272 int new_cost;
17273
17274 /* This code shouldn't have been called if there was a natural barrier
17275 within range. */
17276 gcc_assert (!BARRIER_P (from));
17277
17278 /* Count the length of this insn. This must stay in sync with the
17279 code that pushes minipool fixes. */
17280 if (LABEL_P (from))
17281 count += get_label_padding (from);
17282 else
17283 count += get_attr_length (from);
17284
17285 /* If there is a jump table, add its length. */
17286 if (tablejump_p (from, NULL, &tmp))
17287 {
17288 count += get_jump_table_size (tmp);
17289
17290 /* Jump tables aren't in a basic block, so base the cost on
17291 the dispatch insn. If we select this location, we will
17292 still put the pool after the table. */
17293 new_cost = arm_barrier_cost (from);
17294
17295 if (count < max_count
17296 && (!selected || new_cost <= selected_cost))
17297 {
17298 selected = tmp;
17299 selected_cost = new_cost;
17300 selected_address = fix->address + count;
17301 }
17302
17303 /* Continue after the dispatch table. */
17304 from = NEXT_INSN (tmp);
17305 continue;
17306 }
17307
17308 new_cost = arm_barrier_cost (from);
17309
17310 if (count < max_count
17311 && (!selected || new_cost <= selected_cost))
17312 {
17313 selected = from;
17314 selected_cost = new_cost;
17315 selected_address = fix->address + count;
17316 }
17317
17318 from = NEXT_INSN (from);
17319 }
17320
17321 /* Make sure that we found a place to insert the jump. */
17322 gcc_assert (selected);
17323
17324 /* Create a new JUMP_INSN that branches around a barrier. */
17325 from = emit_jump_insn_after (gen_jump (label), selected);
17326 JUMP_LABEL (from) = label;
17327 barrier = emit_barrier_after (from);
17328 emit_label_after (label, barrier);
17329
17330 /* Create a minipool barrier entry for the new barrier. */
17331 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
17332 new_fix->insn = barrier;
17333 new_fix->address = selected_address;
17334 new_fix->next = fix->next;
17335 fix->next = new_fix;
17336
17337 return new_fix;
17338 }
17339
17340 /* Record that there is a natural barrier in the insn stream at
17341 ADDRESS. */
17342 static void
17343 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
17344 {
17345 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17346
17347 fix->insn = insn;
17348 fix->address = address;
17349
17350 fix->next = NULL;
17351 if (minipool_fix_head != NULL)
17352 minipool_fix_tail->next = fix;
17353 else
17354 minipool_fix_head = fix;
17355
17356 minipool_fix_tail = fix;
17357 }
17358
17359 /* Record INSN, which will need fixing up to load a value from the
17360 minipool. ADDRESS is the offset of the insn since the start of the
17361 function; LOC is a pointer to the part of the insn which requires
17362 fixing; VALUE is the constant that must be loaded, which is of type
17363 MODE. */
17364 static void
17365 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
17366 machine_mode mode, rtx value)
17367 {
17368 gcc_assert (!arm_disable_literal_pool);
17369 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17370
17371 fix->insn = insn;
17372 fix->address = address;
17373 fix->loc = loc;
17374 fix->mode = mode;
17375 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
17376 fix->value = value;
17377 fix->forwards = get_attr_pool_range (insn);
17378 fix->backwards = get_attr_neg_pool_range (insn);
17379 fix->minipool = NULL;
17380
17381 /* If an insn doesn't have a range defined for it, then it isn't
17382 expecting to be reworked by this code. Better to stop now than
17383 to generate duff assembly code. */
17384 gcc_assert (fix->forwards || fix->backwards);
17385
17386 /* If an entry requires 8-byte alignment then assume all constant pools
17387 require 4 bytes of padding. Trying to do this later on a per-pool
17388 basis is awkward because existing pool entries have to be modified. */
17389 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
17390 minipool_pad = 4;
17391
17392 if (dump_file)
17393 {
17394 fprintf (dump_file,
17395 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17396 GET_MODE_NAME (mode),
17397 INSN_UID (insn), (unsigned long) address,
17398 -1 * (long)fix->backwards, (long)fix->forwards);
17399 arm_print_value (dump_file, fix->value);
17400 fprintf (dump_file, "\n");
17401 }
17402
17403 /* Add it to the chain of fixes. */
17404 fix->next = NULL;
17405
17406 if (minipool_fix_head != NULL)
17407 minipool_fix_tail->next = fix;
17408 else
17409 minipool_fix_head = fix;
17410
17411 minipool_fix_tail = fix;
17412 }
17413
17414 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17415 Returns the number of insns needed, or 99 if we always want to synthesize
17416 the value. */
17417 int
17418 arm_max_const_double_inline_cost ()
17419 {
17420 return ((optimize_size || arm_ld_sched) ? 3 : 4);
17421 }
17422
17423 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17424 Returns the number of insns needed, or 99 if we don't know how to
17425 do it. */
17426 int
17427 arm_const_double_inline_cost (rtx val)
17428 {
17429 rtx lowpart, highpart;
17430 machine_mode mode;
17431
17432 mode = GET_MODE (val);
17433
17434 if (mode == VOIDmode)
17435 mode = DImode;
17436
17437 gcc_assert (GET_MODE_SIZE (mode) == 8);
17438
17439 lowpart = gen_lowpart (SImode, val);
17440 highpart = gen_highpart_mode (SImode, mode, val);
17441
17442 gcc_assert (CONST_INT_P (lowpart));
17443 gcc_assert (CONST_INT_P (highpart));
17444
17445 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
17446 NULL_RTX, NULL_RTX, 0, 0)
17447 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
17448 NULL_RTX, NULL_RTX, 0, 0));
17449 }
17450
17451 /* Cost of loading a SImode constant. */
17452 static inline int
17453 arm_const_inline_cost (enum rtx_code code, rtx val)
17454 {
17455 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17456 NULL_RTX, NULL_RTX, 1, 0);
17457 }
17458
17459 /* Return true if it is worthwhile to split a 64-bit constant into two
17460 32-bit operations. This is the case if optimizing for size, or
17461 if we have load delay slots, or if one 32-bit part can be done with
17462 a single data operation. */
17463 bool
17464 arm_const_double_by_parts (rtx val)
17465 {
17466 machine_mode mode = GET_MODE (val);
17467 rtx part;
17468
17469 if (optimize_size || arm_ld_sched)
17470 return true;
17471
17472 if (mode == VOIDmode)
17473 mode = DImode;
17474
17475 part = gen_highpart_mode (SImode, mode, val);
17476
17477 gcc_assert (CONST_INT_P (part));
17478
17479 if (const_ok_for_arm (INTVAL (part))
17480 || const_ok_for_arm (~INTVAL (part)))
17481 return true;
17482
17483 part = gen_lowpart (SImode, val);
17484
17485 gcc_assert (CONST_INT_P (part));
17486
17487 if (const_ok_for_arm (INTVAL (part))
17488 || const_ok_for_arm (~INTVAL (part)))
17489 return true;
17490
17491 return false;
17492 }
17493
17494 /* Return true if it is possible to inline both the high and low parts
17495 of a 64-bit constant into 32-bit data processing instructions. */
17496 bool
17497 arm_const_double_by_immediates (rtx val)
17498 {
17499 machine_mode mode = GET_MODE (val);
17500 rtx part;
17501
17502 if (mode == VOIDmode)
17503 mode = DImode;
17504
17505 part = gen_highpart_mode (SImode, mode, val);
17506
17507 gcc_assert (CONST_INT_P (part));
17508
17509 if (!const_ok_for_arm (INTVAL (part)))
17510 return false;
17511
17512 part = gen_lowpart (SImode, val);
17513
17514 gcc_assert (CONST_INT_P (part));
17515
17516 if (!const_ok_for_arm (INTVAL (part)))
17517 return false;
17518
17519 return true;
17520 }
17521
17522 /* Scan INSN and note any of its operands that need fixing.
17523 If DO_PUSHES is false we do not actually push any of the fixups
17524 needed. */
17525 static void
17526 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17527 {
17528 int opno;
17529
17530 extract_constrain_insn (insn);
17531
17532 if (recog_data.n_alternatives == 0)
17533 return;
17534
17535 /* Fill in recog_op_alt with information about the constraints of
17536 this insn. */
17537 preprocess_constraints (insn);
17538
17539 const operand_alternative *op_alt = which_op_alt ();
17540 for (opno = 0; opno < recog_data.n_operands; opno++)
17541 {
17542 /* Things we need to fix can only occur in inputs. */
17543 if (recog_data.operand_type[opno] != OP_IN)
17544 continue;
17545
17546 /* If this alternative is a memory reference, then any mention
17547 of constants in this alternative is really to fool reload
17548 into allowing us to accept one there. We need to fix them up
17549 now so that we output the right code. */
17550 if (op_alt[opno].memory_ok)
17551 {
17552 rtx op = recog_data.operand[opno];
17553
17554 if (CONSTANT_P (op))
17555 {
17556 if (do_pushes)
17557 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17558 recog_data.operand_mode[opno], op);
17559 }
17560 else if (MEM_P (op)
17561 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17562 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17563 {
17564 if (do_pushes)
17565 {
17566 rtx cop = avoid_constant_pool_reference (op);
17567
17568 /* Casting the address of something to a mode narrower
17569 than a word can cause avoid_constant_pool_reference()
17570 to return the pool reference itself. That's no good to
17571 us here. Lets just hope that we can use the
17572 constant pool value directly. */
17573 if (op == cop)
17574 cop = get_pool_constant (XEXP (op, 0));
17575
17576 push_minipool_fix (insn, address,
17577 recog_data.operand_loc[opno],
17578 recog_data.operand_mode[opno], cop);
17579 }
17580
17581 }
17582 }
17583 }
17584
17585 return;
17586 }
17587
17588 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
17589 and unions in the context of ARMv8-M Security Extensions. It is used as a
17590 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
17591 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
17592 or four masks, depending on whether it is being computed for a
17593 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
17594 respectively. The tree for the type of the argument or a field within an
17595 argument is passed in ARG_TYPE, the current register this argument or field
17596 starts in is kept in the pointer REGNO and updated accordingly, the bit this
17597 argument or field starts at is passed in STARTING_BIT and the last used bit
17598 is kept in LAST_USED_BIT which is also updated accordingly. */
17599
17600 static unsigned HOST_WIDE_INT
17601 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
17602 uint32_t * padding_bits_to_clear,
17603 unsigned starting_bit, int * last_used_bit)
17604
17605 {
17606 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
17607
17608 if (TREE_CODE (arg_type) == RECORD_TYPE)
17609 {
17610 unsigned current_bit = starting_bit;
17611 tree field;
17612 long int offset, size;
17613
17614
17615 field = TYPE_FIELDS (arg_type);
17616 while (field)
17617 {
17618 /* The offset within a structure is always an offset from
17619 the start of that structure. Make sure we take that into the
17620 calculation of the register based offset that we use here. */
17621 offset = starting_bit;
17622 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
17623 offset %= 32;
17624
17625 /* This is the actual size of the field, for bitfields this is the
17626 bitfield width and not the container size. */
17627 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
17628
17629 if (*last_used_bit != offset)
17630 {
17631 if (offset < *last_used_bit)
17632 {
17633 /* This field's offset is before the 'last_used_bit', that
17634 means this field goes on the next register. So we need to
17635 pad the rest of the current register and increase the
17636 register number. */
17637 uint32_t mask;
17638 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
17639 mask++;
17640
17641 padding_bits_to_clear[*regno] |= mask;
17642 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17643 (*regno)++;
17644 }
17645 else
17646 {
17647 /* Otherwise we pad the bits between the last field's end and
17648 the start of the new field. */
17649 uint32_t mask;
17650
17651 mask = ((uint32_t)-1) >> (32 - offset);
17652 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
17653 padding_bits_to_clear[*regno] |= mask;
17654 }
17655 current_bit = offset;
17656 }
17657
17658 /* Calculate further padding bits for inner structs/unions too. */
17659 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
17660 {
17661 *last_used_bit = current_bit;
17662 not_to_clear_reg_mask
17663 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
17664 padding_bits_to_clear, offset,
17665 last_used_bit);
17666 }
17667 else
17668 {
17669 /* Update 'current_bit' with this field's size. If the
17670 'current_bit' lies in a subsequent register, update 'regno' and
17671 reset 'current_bit' to point to the current bit in that new
17672 register. */
17673 current_bit += size;
17674 while (current_bit >= 32)
17675 {
17676 current_bit-=32;
17677 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17678 (*regno)++;
17679 }
17680 *last_used_bit = current_bit;
17681 }
17682
17683 field = TREE_CHAIN (field);
17684 }
17685 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17686 }
17687 else if (TREE_CODE (arg_type) == UNION_TYPE)
17688 {
17689 tree field, field_t;
17690 int i, regno_t, field_size;
17691 int max_reg = -1;
17692 int max_bit = -1;
17693 uint32_t mask;
17694 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
17695 = {-1, -1, -1, -1};
17696
17697 /* To compute the padding bits in a union we only consider bits as
17698 padding bits if they are always either a padding bit or fall outside a
17699 fields size for all fields in the union. */
17700 field = TYPE_FIELDS (arg_type);
17701 while (field)
17702 {
17703 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
17704 = {0U, 0U, 0U, 0U};
17705 int last_used_bit_t = *last_used_bit;
17706 regno_t = *regno;
17707 field_t = TREE_TYPE (field);
17708
17709 /* If the field's type is either a record or a union make sure to
17710 compute their padding bits too. */
17711 if (RECORD_OR_UNION_TYPE_P (field_t))
17712 not_to_clear_reg_mask
17713 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
17714 &padding_bits_to_clear_t[0],
17715 starting_bit, &last_used_bit_t);
17716 else
17717 {
17718 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
17719 regno_t = (field_size / 32) + *regno;
17720 last_used_bit_t = (starting_bit + field_size) % 32;
17721 }
17722
17723 for (i = *regno; i < regno_t; i++)
17724 {
17725 /* For all but the last register used by this field only keep the
17726 padding bits that were padding bits in this field. */
17727 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
17728 }
17729
17730 /* For the last register, keep all padding bits that were padding
17731 bits in this field and any padding bits that are still valid
17732 as padding bits but fall outside of this field's size. */
17733 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
17734 padding_bits_to_clear_res[regno_t]
17735 &= padding_bits_to_clear_t[regno_t] | mask;
17736
17737 /* Update the maximum size of the fields in terms of registers used
17738 ('max_reg') and the 'last_used_bit' in said register. */
17739 if (max_reg < regno_t)
17740 {
17741 max_reg = regno_t;
17742 max_bit = last_used_bit_t;
17743 }
17744 else if (max_reg == regno_t && max_bit < last_used_bit_t)
17745 max_bit = last_used_bit_t;
17746
17747 field = TREE_CHAIN (field);
17748 }
17749
17750 /* Update the current padding_bits_to_clear using the intersection of the
17751 padding bits of all the fields. */
17752 for (i=*regno; i < max_reg; i++)
17753 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
17754
17755 /* Do not keep trailing padding bits, we do not know yet whether this
17756 is the end of the argument. */
17757 mask = ((uint32_t) 1 << max_bit) - 1;
17758 padding_bits_to_clear[max_reg]
17759 |= padding_bits_to_clear_res[max_reg] & mask;
17760
17761 *regno = max_reg;
17762 *last_used_bit = max_bit;
17763 }
17764 else
17765 /* This function should only be used for structs and unions. */
17766 gcc_unreachable ();
17767
17768 return not_to_clear_reg_mask;
17769 }
17770
17771 /* In the context of ARMv8-M Security Extensions, this function is used for both
17772 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17773 registers are used when returning or passing arguments, which is then
17774 returned as a mask. It will also compute a mask to indicate padding/unused
17775 bits for each of these registers, and passes this through the
17776 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
17777 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17778 the starting register used to pass this argument or return value is passed
17779 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17780 for struct and union types. */
17781
17782 static unsigned HOST_WIDE_INT
17783 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17784 uint32_t * padding_bits_to_clear)
17785
17786 {
17787 int last_used_bit = 0;
17788 unsigned HOST_WIDE_INT not_to_clear_mask;
17789
17790 if (RECORD_OR_UNION_TYPE_P (arg_type))
17791 {
17792 not_to_clear_mask
17793 = comp_not_to_clear_mask_str_un (arg_type, &regno,
17794 padding_bits_to_clear, 0,
17795 &last_used_bit);
17796
17797
17798 /* If the 'last_used_bit' is not zero, that means we are still using a
17799 part of the last 'regno'. In such cases we must clear the trailing
17800 bits. Otherwise we are not using regno and we should mark it as to
17801 clear. */
17802 if (last_used_bit != 0)
17803 padding_bits_to_clear[regno]
17804 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17805 else
17806 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17807 }
17808 else
17809 {
17810 not_to_clear_mask = 0;
17811 /* We are not dealing with structs nor unions. So these arguments may be
17812 passed in floating point registers too. In some cases a BLKmode is
17813 used when returning or passing arguments in multiple VFP registers. */
17814 if (GET_MODE (arg_rtx) == BLKmode)
17815 {
17816 int i, arg_regs;
17817 rtx reg;
17818
17819 /* This should really only occur when dealing with the hard-float
17820 ABI. */
17821 gcc_assert (TARGET_HARD_FLOAT_ABI);
17822
17823 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17824 {
17825 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17826 gcc_assert (REG_P (reg));
17827
17828 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17829
17830 /* If we are dealing with DF mode, make sure we don't
17831 clear either of the registers it addresses. */
17832 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17833 if (arg_regs > 1)
17834 {
17835 unsigned HOST_WIDE_INT mask;
17836 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17837 mask -= HOST_WIDE_INT_1U << REGNO (reg);
17838 not_to_clear_mask |= mask;
17839 }
17840 }
17841 }
17842 else
17843 {
17844 /* Otherwise we can rely on the MODE to determine how many registers
17845 are being used by this argument. */
17846 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17847 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17848 if (arg_regs > 1)
17849 {
17850 unsigned HOST_WIDE_INT
17851 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17852 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17853 not_to_clear_mask |= mask;
17854 }
17855 }
17856 }
17857
17858 return not_to_clear_mask;
17859 }
17860
17861 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17862 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
17863 are to be fully cleared, using the value in register CLEARING_REG if more
17864 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17865 the bits that needs to be cleared in caller-saved core registers, with
17866 SCRATCH_REG used as a scratch register for that clearing.
17867
17868 NOTE: one of three following assertions must hold:
17869 - SCRATCH_REG is a low register
17870 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17871 in TO_CLEAR_BITMAP)
17872 - CLEARING_REG is a low register. */
17873
17874 static void
17875 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17876 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17877 {
17878 bool saved_clearing = false;
17879 rtx saved_clearing_reg = NULL_RTX;
17880 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17881
17882 gcc_assert (arm_arch_cmse);
17883
17884 if (!bitmap_empty_p (to_clear_bitmap))
17885 {
17886 minregno = bitmap_first_set_bit (to_clear_bitmap);
17887 maxregno = bitmap_last_set_bit (to_clear_bitmap);
17888 }
17889 clearing_regno = REGNO (clearing_reg);
17890
17891 /* Clear padding bits. */
17892 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17893 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17894 {
17895 uint64_t mask;
17896 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17897
17898 if (padding_bits_to_clear[i] == 0)
17899 continue;
17900
17901 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17902 CLEARING_REG as scratch. */
17903 if (TARGET_THUMB1
17904 && REGNO (scratch_reg) > LAST_LO_REGNUM)
17905 {
17906 /* clearing_reg is not to be cleared, copy its value into scratch_reg
17907 such that we can use clearing_reg to clear the unused bits in the
17908 arguments. */
17909 if ((clearing_regno > maxregno
17910 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17911 && !saved_clearing)
17912 {
17913 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17914 emit_move_insn (scratch_reg, clearing_reg);
17915 saved_clearing = true;
17916 saved_clearing_reg = scratch_reg;
17917 }
17918 scratch_reg = clearing_reg;
17919 }
17920
17921 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
17922 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17923 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17924
17925 /* Fill the top half of the negated padding_bits_to_clear[i]. */
17926 mask = (~padding_bits_to_clear[i]) >> 16;
17927 rtx16 = gen_int_mode (16, SImode);
17928 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17929 if (mask)
17930 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17931
17932 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17933 }
17934 if (saved_clearing)
17935 emit_move_insn (clearing_reg, saved_clearing_reg);
17936
17937
17938 /* Clear full registers. */
17939
17940 /* If not marked for clearing, clearing_reg already does not contain
17941 any secret. */
17942 if (clearing_regno <= maxregno
17943 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17944 {
17945 emit_move_insn (clearing_reg, const0_rtx);
17946 emit_use (clearing_reg);
17947 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17948 }
17949
17950 for (regno = minregno; regno <= maxregno; regno++)
17951 {
17952 if (!bitmap_bit_p (to_clear_bitmap, regno))
17953 continue;
17954
17955 if (IS_VFP_REGNUM (regno))
17956 {
17957 /* If regno is an even vfp register and its successor is also to
17958 be cleared, use vmov. */
17959 if (TARGET_VFP_DOUBLE
17960 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17961 && bitmap_bit_p (to_clear_bitmap, regno + 1))
17962 {
17963 emit_move_insn (gen_rtx_REG (DFmode, regno),
17964 CONST1_RTX (DFmode));
17965 emit_use (gen_rtx_REG (DFmode, regno));
17966 regno++;
17967 }
17968 else
17969 {
17970 emit_move_insn (gen_rtx_REG (SFmode, regno),
17971 CONST1_RTX (SFmode));
17972 emit_use (gen_rtx_REG (SFmode, regno));
17973 }
17974 }
17975 else
17976 {
17977 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
17978 emit_use (gen_rtx_REG (SImode, regno));
17979 }
17980 }
17981 }
17982
17983 /* Clears caller saved registers not used to pass arguments before a
17984 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
17985 registers is done in __gnu_cmse_nonsecure_call libcall.
17986 See libgcc/config/arm/cmse_nonsecure_call.S. */
17987
17988 static void
17989 cmse_nonsecure_call_clear_caller_saved (void)
17990 {
17991 basic_block bb;
17992
17993 FOR_EACH_BB_FN (bb, cfun)
17994 {
17995 rtx_insn *insn;
17996
17997 FOR_BB_INSNS (bb, insn)
17998 {
17999 unsigned address_regnum, regno, maxregno =
18000 TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
18001 auto_sbitmap to_clear_bitmap (maxregno + 1);
18002 rtx_insn *seq;
18003 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
18004 rtx address;
18005 CUMULATIVE_ARGS args_so_far_v;
18006 cumulative_args_t args_so_far;
18007 tree arg_type, fntype;
18008 bool first_param = true;
18009 function_args_iterator args_iter;
18010 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
18011
18012 if (!NONDEBUG_INSN_P (insn))
18013 continue;
18014
18015 if (!CALL_P (insn))
18016 continue;
18017
18018 pat = PATTERN (insn);
18019 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
18020 call = XVECEXP (pat, 0, 0);
18021
18022 /* Get the real call RTX if the insn sets a value, ie. returns. */
18023 if (GET_CODE (call) == SET)
18024 call = SET_SRC (call);
18025
18026 /* Check if it is a cmse_nonsecure_call. */
18027 unspec = XEXP (call, 0);
18028 if (GET_CODE (unspec) != UNSPEC
18029 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
18030 continue;
18031
18032 /* Determine the caller-saved registers we need to clear. */
18033 bitmap_clear (to_clear_bitmap);
18034 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
18035
18036 /* Only look at the caller-saved floating point registers in case of
18037 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
18038 lazy store and loads which clear both caller- and callee-saved
18039 registers. */
18040 if (TARGET_HARD_FLOAT_ABI)
18041 {
18042 auto_sbitmap float_bitmap (maxregno + 1);
18043
18044 bitmap_clear (float_bitmap);
18045 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
18046 D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
18047 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
18048 }
18049
18050 /* Make sure the register used to hold the function address is not
18051 cleared. */
18052 address = RTVEC_ELT (XVEC (unspec, 0), 0);
18053 gcc_assert (MEM_P (address));
18054 gcc_assert (REG_P (XEXP (address, 0)));
18055 address_regnum = REGNO (XEXP (address, 0));
18056 if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
18057 bitmap_clear_bit (to_clear_bitmap, address_regnum);
18058
18059 /* Set basic block of call insn so that df rescan is performed on
18060 insns inserted here. */
18061 set_block_for_insn (insn, bb);
18062 df_set_flags (DF_DEFER_INSN_RESCAN);
18063 start_sequence ();
18064
18065 /* Make sure the scheduler doesn't schedule other insns beyond
18066 here. */
18067 emit_insn (gen_blockage ());
18068
18069 /* Walk through all arguments and clear registers appropriately.
18070 */
18071 fntype = TREE_TYPE (MEM_EXPR (address));
18072 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
18073 NULL_TREE);
18074 args_so_far = pack_cumulative_args (&args_so_far_v);
18075 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
18076 {
18077 rtx arg_rtx;
18078 uint64_t to_clear_args_mask;
18079
18080 if (VOID_TYPE_P (arg_type))
18081 continue;
18082
18083 function_arg_info arg (arg_type, /*named=*/true);
18084 if (!first_param)
18085 /* ??? We should advance after processing the argument and pass
18086 the argument we're advancing past. */
18087 arm_function_arg_advance (args_so_far, arg);
18088
18089 arg_rtx = arm_function_arg (args_so_far, arg);
18090 gcc_assert (REG_P (arg_rtx));
18091 to_clear_args_mask
18092 = compute_not_to_clear_mask (arg_type, arg_rtx,
18093 REGNO (arg_rtx),
18094 &padding_bits_to_clear[0]);
18095 if (to_clear_args_mask)
18096 {
18097 for (regno = R0_REGNUM; regno <= maxregno; regno++)
18098 {
18099 if (to_clear_args_mask & (1ULL << regno))
18100 bitmap_clear_bit (to_clear_bitmap, regno);
18101 }
18102 }
18103
18104 first_param = false;
18105 }
18106
18107 /* We use right shift and left shift to clear the LSB of the address
18108 we jump to instead of using bic, to avoid having to use an extra
18109 register on Thumb-1. */
18110 clearing_reg = XEXP (address, 0);
18111 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
18112 emit_insn (gen_rtx_SET (clearing_reg, shift));
18113 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
18114 emit_insn (gen_rtx_SET (clearing_reg, shift));
18115
18116 /* Clear caller-saved registers that leak before doing a non-secure
18117 call. */
18118 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
18119 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
18120 NUM_ARG_REGS, ip_reg, clearing_reg);
18121
18122 seq = get_insns ();
18123 end_sequence ();
18124 emit_insn_before (seq, insn);
18125 }
18126 }
18127 }
18128
18129 /* Rewrite move insn into subtract of 0 if the condition codes will
18130 be useful in next conditional jump insn. */
18131
18132 static void
18133 thumb1_reorg (void)
18134 {
18135 basic_block bb;
18136
18137 FOR_EACH_BB_FN (bb, cfun)
18138 {
18139 rtx dest, src;
18140 rtx cmp, op0, op1, set = NULL;
18141 rtx_insn *prev, *insn = BB_END (bb);
18142 bool insn_clobbered = false;
18143
18144 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
18145 insn = PREV_INSN (insn);
18146
18147 /* Find the last cbranchsi4_insn in basic block BB. */
18148 if (insn == BB_HEAD (bb)
18149 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
18150 continue;
18151
18152 /* Get the register with which we are comparing. */
18153 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
18154 op0 = XEXP (cmp, 0);
18155 op1 = XEXP (cmp, 1);
18156
18157 /* Check that comparison is against ZERO. */
18158 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
18159 continue;
18160
18161 /* Find the first flag setting insn before INSN in basic block BB. */
18162 gcc_assert (insn != BB_HEAD (bb));
18163 for (prev = PREV_INSN (insn);
18164 (!insn_clobbered
18165 && prev != BB_HEAD (bb)
18166 && (NOTE_P (prev)
18167 || DEBUG_INSN_P (prev)
18168 || ((set = single_set (prev)) != NULL
18169 && get_attr_conds (prev) == CONDS_NOCOND)));
18170 prev = PREV_INSN (prev))
18171 {
18172 if (reg_set_p (op0, prev))
18173 insn_clobbered = true;
18174 }
18175
18176 /* Skip if op0 is clobbered by insn other than prev. */
18177 if (insn_clobbered)
18178 continue;
18179
18180 if (!set)
18181 continue;
18182
18183 dest = SET_DEST (set);
18184 src = SET_SRC (set);
18185 if (!low_register_operand (dest, SImode)
18186 || !low_register_operand (src, SImode))
18187 continue;
18188
18189 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
18190 in INSN. Both src and dest of the move insn are checked. */
18191 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
18192 {
18193 dest = copy_rtx (dest);
18194 src = copy_rtx (src);
18195 src = gen_rtx_MINUS (SImode, src, const0_rtx);
18196 PATTERN (prev) = gen_rtx_SET (dest, src);
18197 INSN_CODE (prev) = -1;
18198 /* Set test register in INSN to dest. */
18199 XEXP (cmp, 0) = copy_rtx (dest);
18200 INSN_CODE (insn) = -1;
18201 }
18202 }
18203 }
18204
18205 /* Convert instructions to their cc-clobbering variant if possible, since
18206 that allows us to use smaller encodings. */
18207
18208 static void
18209 thumb2_reorg (void)
18210 {
18211 basic_block bb;
18212 regset_head live;
18213
18214 INIT_REG_SET (&live);
18215
18216 /* We are freeing block_for_insn in the toplev to keep compatibility
18217 with old MDEP_REORGS that are not CFG based. Recompute it now. */
18218 compute_bb_for_insn ();
18219 df_analyze ();
18220
18221 enum Convert_Action {SKIP, CONV, SWAP_CONV};
18222
18223 FOR_EACH_BB_FN (bb, cfun)
18224 {
18225 if ((current_tune->disparage_flag_setting_t16_encodings
18226 == tune_params::DISPARAGE_FLAGS_ALL)
18227 && optimize_bb_for_speed_p (bb))
18228 continue;
18229
18230 rtx_insn *insn;
18231 Convert_Action action = SKIP;
18232 Convert_Action action_for_partial_flag_setting
18233 = ((current_tune->disparage_flag_setting_t16_encodings
18234 != tune_params::DISPARAGE_FLAGS_NEITHER)
18235 && optimize_bb_for_speed_p (bb))
18236 ? SKIP : CONV;
18237
18238 COPY_REG_SET (&live, DF_LR_OUT (bb));
18239 df_simulate_initialize_backwards (bb, &live);
18240 FOR_BB_INSNS_REVERSE (bb, insn)
18241 {
18242 if (NONJUMP_INSN_P (insn)
18243 && !REGNO_REG_SET_P (&live, CC_REGNUM)
18244 && GET_CODE (PATTERN (insn)) == SET)
18245 {
18246 action = SKIP;
18247 rtx pat = PATTERN (insn);
18248 rtx dst = XEXP (pat, 0);
18249 rtx src = XEXP (pat, 1);
18250 rtx op0 = NULL_RTX, op1 = NULL_RTX;
18251
18252 if (UNARY_P (src) || BINARY_P (src))
18253 op0 = XEXP (src, 0);
18254
18255 if (BINARY_P (src))
18256 op1 = XEXP (src, 1);
18257
18258 if (low_register_operand (dst, SImode))
18259 {
18260 switch (GET_CODE (src))
18261 {
18262 case PLUS:
18263 /* Adding two registers and storing the result
18264 in the first source is already a 16-bit
18265 operation. */
18266 if (rtx_equal_p (dst, op0)
18267 && register_operand (op1, SImode))
18268 break;
18269
18270 if (low_register_operand (op0, SImode))
18271 {
18272 /* ADDS <Rd>,<Rn>,<Rm> */
18273 if (low_register_operand (op1, SImode))
18274 action = CONV;
18275 /* ADDS <Rdn>,#<imm8> */
18276 /* SUBS <Rdn>,#<imm8> */
18277 else if (rtx_equal_p (dst, op0)
18278 && CONST_INT_P (op1)
18279 && IN_RANGE (INTVAL (op1), -255, 255))
18280 action = CONV;
18281 /* ADDS <Rd>,<Rn>,#<imm3> */
18282 /* SUBS <Rd>,<Rn>,#<imm3> */
18283 else if (CONST_INT_P (op1)
18284 && IN_RANGE (INTVAL (op1), -7, 7))
18285 action = CONV;
18286 }
18287 /* ADCS <Rd>, <Rn> */
18288 else if (GET_CODE (XEXP (src, 0)) == PLUS
18289 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
18290 && low_register_operand (XEXP (XEXP (src, 0), 1),
18291 SImode)
18292 && COMPARISON_P (op1)
18293 && cc_register (XEXP (op1, 0), VOIDmode)
18294 && maybe_get_arm_condition_code (op1) == ARM_CS
18295 && XEXP (op1, 1) == const0_rtx)
18296 action = CONV;
18297 break;
18298
18299 case MINUS:
18300 /* RSBS <Rd>,<Rn>,#0
18301 Not handled here: see NEG below. */
18302 /* SUBS <Rd>,<Rn>,#<imm3>
18303 SUBS <Rdn>,#<imm8>
18304 Not handled here: see PLUS above. */
18305 /* SUBS <Rd>,<Rn>,<Rm> */
18306 if (low_register_operand (op0, SImode)
18307 && low_register_operand (op1, SImode))
18308 action = CONV;
18309 break;
18310
18311 case MULT:
18312 /* MULS <Rdm>,<Rn>,<Rdm>
18313 As an exception to the rule, this is only used
18314 when optimizing for size since MULS is slow on all
18315 known implementations. We do not even want to use
18316 MULS in cold code, if optimizing for speed, so we
18317 test the global flag here. */
18318 if (!optimize_size)
18319 break;
18320 /* Fall through. */
18321 case AND:
18322 case IOR:
18323 case XOR:
18324 /* ANDS <Rdn>,<Rm> */
18325 if (rtx_equal_p (dst, op0)
18326 && low_register_operand (op1, SImode))
18327 action = action_for_partial_flag_setting;
18328 else if (rtx_equal_p (dst, op1)
18329 && low_register_operand (op0, SImode))
18330 action = action_for_partial_flag_setting == SKIP
18331 ? SKIP : SWAP_CONV;
18332 break;
18333
18334 case ASHIFTRT:
18335 case ASHIFT:
18336 case LSHIFTRT:
18337 /* ASRS <Rdn>,<Rm> */
18338 /* LSRS <Rdn>,<Rm> */
18339 /* LSLS <Rdn>,<Rm> */
18340 if (rtx_equal_p (dst, op0)
18341 && low_register_operand (op1, SImode))
18342 action = action_for_partial_flag_setting;
18343 /* ASRS <Rd>,<Rm>,#<imm5> */
18344 /* LSRS <Rd>,<Rm>,#<imm5> */
18345 /* LSLS <Rd>,<Rm>,#<imm5> */
18346 else if (low_register_operand (op0, SImode)
18347 && CONST_INT_P (op1)
18348 && IN_RANGE (INTVAL (op1), 0, 31))
18349 action = action_for_partial_flag_setting;
18350 break;
18351
18352 case ROTATERT:
18353 /* RORS <Rdn>,<Rm> */
18354 if (rtx_equal_p (dst, op0)
18355 && low_register_operand (op1, SImode))
18356 action = action_for_partial_flag_setting;
18357 break;
18358
18359 case NOT:
18360 /* MVNS <Rd>,<Rm> */
18361 if (low_register_operand (op0, SImode))
18362 action = action_for_partial_flag_setting;
18363 break;
18364
18365 case NEG:
18366 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
18367 if (low_register_operand (op0, SImode))
18368 action = CONV;
18369 break;
18370
18371 case CONST_INT:
18372 /* MOVS <Rd>,#<imm8> */
18373 if (CONST_INT_P (src)
18374 && IN_RANGE (INTVAL (src), 0, 255))
18375 action = action_for_partial_flag_setting;
18376 break;
18377
18378 case REG:
18379 /* MOVS and MOV<c> with registers have different
18380 encodings, so are not relevant here. */
18381 break;
18382
18383 default:
18384 break;
18385 }
18386 }
18387
18388 if (action != SKIP)
18389 {
18390 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18391 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18392 rtvec vec;
18393
18394 if (action == SWAP_CONV)
18395 {
18396 src = copy_rtx (src);
18397 XEXP (src, 0) = op1;
18398 XEXP (src, 1) = op0;
18399 pat = gen_rtx_SET (dst, src);
18400 vec = gen_rtvec (2, pat, clobber);
18401 }
18402 else /* action == CONV */
18403 vec = gen_rtvec (2, pat, clobber);
18404
18405 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
18406 INSN_CODE (insn) = -1;
18407 }
18408 }
18409
18410 if (NONDEBUG_INSN_P (insn))
18411 df_simulate_one_insn_backwards (bb, insn, &live);
18412 }
18413 }
18414
18415 CLEAR_REG_SET (&live);
18416 }
18417
18418 /* Gcc puts the pool in the wrong place for ARM, since we can only
18419 load addresses a limited distance around the pc. We do some
18420 special munging to move the constant pool values to the correct
18421 point in the code. */
18422 static void
18423 arm_reorg (void)
18424 {
18425 rtx_insn *insn;
18426 HOST_WIDE_INT address = 0;
18427 Mfix * fix;
18428
18429 if (use_cmse)
18430 cmse_nonsecure_call_clear_caller_saved ();
18431
18432 /* We cannot run the Thumb passes for thunks because there is no CFG. */
18433 if (cfun->is_thunk)
18434 ;
18435 else if (TARGET_THUMB1)
18436 thumb1_reorg ();
18437 else if (TARGET_THUMB2)
18438 thumb2_reorg ();
18439
18440 /* Ensure all insns that must be split have been split at this point.
18441 Otherwise, the pool placement code below may compute incorrect
18442 insn lengths. Note that when optimizing, all insns have already
18443 been split at this point. */
18444 if (!optimize)
18445 split_all_insns_noflow ();
18446
18447 /* Make sure we do not attempt to create a literal pool even though it should
18448 no longer be necessary to create any. */
18449 if (arm_disable_literal_pool)
18450 return ;
18451
18452 minipool_fix_head = minipool_fix_tail = NULL;
18453
18454 /* The first insn must always be a note, or the code below won't
18455 scan it properly. */
18456 insn = get_insns ();
18457 gcc_assert (NOTE_P (insn));
18458 minipool_pad = 0;
18459
18460 /* Scan all the insns and record the operands that will need fixing. */
18461 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
18462 {
18463 if (BARRIER_P (insn))
18464 push_minipool_barrier (insn, address);
18465 else if (INSN_P (insn))
18466 {
18467 rtx_jump_table_data *table;
18468
18469 note_invalid_constants (insn, address, true);
18470 address += get_attr_length (insn);
18471
18472 /* If the insn is a vector jump, add the size of the table
18473 and skip the table. */
18474 if (tablejump_p (insn, NULL, &table))
18475 {
18476 address += get_jump_table_size (table);
18477 insn = table;
18478 }
18479 }
18480 else if (LABEL_P (insn))
18481 /* Add the worst-case padding due to alignment. We don't add
18482 the _current_ padding because the minipool insertions
18483 themselves might change it. */
18484 address += get_label_padding (insn);
18485 }
18486
18487 fix = minipool_fix_head;
18488
18489 /* Now scan the fixups and perform the required changes. */
18490 while (fix)
18491 {
18492 Mfix * ftmp;
18493 Mfix * fdel;
18494 Mfix * last_added_fix;
18495 Mfix * last_barrier = NULL;
18496 Mfix * this_fix;
18497
18498 /* Skip any further barriers before the next fix. */
18499 while (fix && BARRIER_P (fix->insn))
18500 fix = fix->next;
18501
18502 /* No more fixes. */
18503 if (fix == NULL)
18504 break;
18505
18506 last_added_fix = NULL;
18507
18508 for (ftmp = fix; ftmp; ftmp = ftmp->next)
18509 {
18510 if (BARRIER_P (ftmp->insn))
18511 {
18512 if (ftmp->address >= minipool_vector_head->max_address)
18513 break;
18514
18515 last_barrier = ftmp;
18516 }
18517 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
18518 break;
18519
18520 last_added_fix = ftmp; /* Keep track of the last fix added. */
18521 }
18522
18523 /* If we found a barrier, drop back to that; any fixes that we
18524 could have reached but come after the barrier will now go in
18525 the next mini-pool. */
18526 if (last_barrier != NULL)
18527 {
18528 /* Reduce the refcount for those fixes that won't go into this
18529 pool after all. */
18530 for (fdel = last_barrier->next;
18531 fdel && fdel != ftmp;
18532 fdel = fdel->next)
18533 {
18534 fdel->minipool->refcount--;
18535 fdel->minipool = NULL;
18536 }
18537
18538 ftmp = last_barrier;
18539 }
18540 else
18541 {
18542 /* ftmp is first fix that we can't fit into this pool and
18543 there no natural barriers that we could use. Insert a
18544 new barrier in the code somewhere between the previous
18545 fix and this one, and arrange to jump around it. */
18546 HOST_WIDE_INT max_address;
18547
18548 /* The last item on the list of fixes must be a barrier, so
18549 we can never run off the end of the list of fixes without
18550 last_barrier being set. */
18551 gcc_assert (ftmp);
18552
18553 max_address = minipool_vector_head->max_address;
18554 /* Check that there isn't another fix that is in range that
18555 we couldn't fit into this pool because the pool was
18556 already too large: we need to put the pool before such an
18557 instruction. The pool itself may come just after the
18558 fix because create_fix_barrier also allows space for a
18559 jump instruction. */
18560 if (ftmp->address < max_address)
18561 max_address = ftmp->address + 1;
18562
18563 last_barrier = create_fix_barrier (last_added_fix, max_address);
18564 }
18565
18566 assign_minipool_offsets (last_barrier);
18567
18568 while (ftmp)
18569 {
18570 if (!BARRIER_P (ftmp->insn)
18571 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
18572 == NULL))
18573 break;
18574
18575 ftmp = ftmp->next;
18576 }
18577
18578 /* Scan over the fixes we have identified for this pool, fixing them
18579 up and adding the constants to the pool itself. */
18580 for (this_fix = fix; this_fix && ftmp != this_fix;
18581 this_fix = this_fix->next)
18582 if (!BARRIER_P (this_fix->insn))
18583 {
18584 rtx addr
18585 = plus_constant (Pmode,
18586 gen_rtx_LABEL_REF (VOIDmode,
18587 minipool_vector_label),
18588 this_fix->minipool->offset);
18589 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
18590 }
18591
18592 dump_minipool (last_barrier->insn);
18593 fix = ftmp;
18594 }
18595
18596 /* From now on we must synthesize any constants that we can't handle
18597 directly. This can happen if the RTL gets split during final
18598 instruction generation. */
18599 cfun->machine->after_arm_reorg = 1;
18600
18601 /* Free the minipool memory. */
18602 obstack_free (&minipool_obstack, minipool_startobj);
18603 }
18604 \f
18605 /* Routines to output assembly language. */
18606
18607 /* Return string representation of passed in real value. */
18608 static const char *
18609 fp_const_from_val (REAL_VALUE_TYPE *r)
18610 {
18611 if (!fp_consts_inited)
18612 init_fp_table ();
18613
18614 gcc_assert (real_equal (r, &value_fp0));
18615 return "0";
18616 }
18617
18618 /* OPERANDS[0] is the entire list of insns that constitute pop,
18619 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
18620 is in the list, UPDATE is true iff the list contains explicit
18621 update of base register. */
18622 void
18623 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
18624 bool update)
18625 {
18626 int i;
18627 char pattern[100];
18628 int offset;
18629 const char *conditional;
18630 int num_saves = XVECLEN (operands[0], 0);
18631 unsigned int regno;
18632 unsigned int regno_base = REGNO (operands[1]);
18633 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
18634
18635 offset = 0;
18636 offset += update ? 1 : 0;
18637 offset += return_pc ? 1 : 0;
18638
18639 /* Is the base register in the list? */
18640 for (i = offset; i < num_saves; i++)
18641 {
18642 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
18643 /* If SP is in the list, then the base register must be SP. */
18644 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
18645 /* If base register is in the list, there must be no explicit update. */
18646 if (regno == regno_base)
18647 gcc_assert (!update);
18648 }
18649
18650 conditional = reverse ? "%?%D0" : "%?%d0";
18651 /* Can't use POP if returning from an interrupt. */
18652 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
18653 sprintf (pattern, "pop%s\t{", conditional);
18654 else
18655 {
18656 /* Output ldmfd when the base register is SP, otherwise output ldmia.
18657 It's just a convention, their semantics are identical. */
18658 if (regno_base == SP_REGNUM)
18659 sprintf (pattern, "ldmfd%s\t", conditional);
18660 else if (update)
18661 sprintf (pattern, "ldmia%s\t", conditional);
18662 else
18663 sprintf (pattern, "ldm%s\t", conditional);
18664
18665 strcat (pattern, reg_names[regno_base]);
18666 if (update)
18667 strcat (pattern, "!, {");
18668 else
18669 strcat (pattern, ", {");
18670 }
18671
18672 /* Output the first destination register. */
18673 strcat (pattern,
18674 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
18675
18676 /* Output the rest of the destination registers. */
18677 for (i = offset + 1; i < num_saves; i++)
18678 {
18679 strcat (pattern, ", ");
18680 strcat (pattern,
18681 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
18682 }
18683
18684 strcat (pattern, "}");
18685
18686 if (interrupt_p && return_pc)
18687 strcat (pattern, "^");
18688
18689 output_asm_insn (pattern, &cond);
18690 }
18691
18692
18693 /* Output the assembly for a store multiple. */
18694
18695 const char *
18696 vfp_output_vstmd (rtx * operands)
18697 {
18698 char pattern[100];
18699 int p;
18700 int base;
18701 int i;
18702 rtx addr_reg = REG_P (XEXP (operands[0], 0))
18703 ? XEXP (operands[0], 0)
18704 : XEXP (XEXP (operands[0], 0), 0);
18705 bool push_p = REGNO (addr_reg) == SP_REGNUM;
18706
18707 if (push_p)
18708 strcpy (pattern, "vpush%?.64\t{%P1");
18709 else
18710 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
18711
18712 p = strlen (pattern);
18713
18714 gcc_assert (REG_P (operands[1]));
18715
18716 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
18717 for (i = 1; i < XVECLEN (operands[2], 0); i++)
18718 {
18719 p += sprintf (&pattern[p], ", d%d", base + i);
18720 }
18721 strcpy (&pattern[p], "}");
18722
18723 output_asm_insn (pattern, operands);
18724 return "";
18725 }
18726
18727
18728 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
18729 number of bytes pushed. */
18730
18731 static int
18732 vfp_emit_fstmd (int base_reg, int count)
18733 {
18734 rtx par;
18735 rtx dwarf;
18736 rtx tmp, reg;
18737 int i;
18738
18739 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
18740 register pairs are stored by a store multiple insn. We avoid this
18741 by pushing an extra pair. */
18742 if (count == 2 && !arm_arch6)
18743 {
18744 if (base_reg == LAST_VFP_REGNUM - 3)
18745 base_reg -= 2;
18746 count++;
18747 }
18748
18749 /* FSTMD may not store more than 16 doubleword registers at once. Split
18750 larger stores into multiple parts (up to a maximum of two, in
18751 practice). */
18752 if (count > 16)
18753 {
18754 int saved;
18755 /* NOTE: base_reg is an internal register number, so each D register
18756 counts as 2. */
18757 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
18758 saved += vfp_emit_fstmd (base_reg, 16);
18759 return saved;
18760 }
18761
18762 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
18763 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
18764
18765 reg = gen_rtx_REG (DFmode, base_reg);
18766 base_reg += 2;
18767
18768 XVECEXP (par, 0, 0)
18769 = gen_rtx_SET (gen_frame_mem
18770 (BLKmode,
18771 gen_rtx_PRE_MODIFY (Pmode,
18772 stack_pointer_rtx,
18773 plus_constant
18774 (Pmode, stack_pointer_rtx,
18775 - (count * 8)))
18776 ),
18777 gen_rtx_UNSPEC (BLKmode,
18778 gen_rtvec (1, reg),
18779 UNSPEC_PUSH_MULT));
18780
18781 tmp = gen_rtx_SET (stack_pointer_rtx,
18782 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
18783 RTX_FRAME_RELATED_P (tmp) = 1;
18784 XVECEXP (dwarf, 0, 0) = tmp;
18785
18786 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
18787 RTX_FRAME_RELATED_P (tmp) = 1;
18788 XVECEXP (dwarf, 0, 1) = tmp;
18789
18790 for (i = 1; i < count; i++)
18791 {
18792 reg = gen_rtx_REG (DFmode, base_reg);
18793 base_reg += 2;
18794 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18795
18796 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18797 plus_constant (Pmode,
18798 stack_pointer_rtx,
18799 i * 8)),
18800 reg);
18801 RTX_FRAME_RELATED_P (tmp) = 1;
18802 XVECEXP (dwarf, 0, i + 1) = tmp;
18803 }
18804
18805 par = emit_insn (par);
18806 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18807 RTX_FRAME_RELATED_P (par) = 1;
18808
18809 return count * 8;
18810 }
18811
18812 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18813 has the cmse_nonsecure_call attribute and returns false otherwise. */
18814
18815 bool
18816 detect_cmse_nonsecure_call (tree addr)
18817 {
18818 if (!addr)
18819 return FALSE;
18820
18821 tree fntype = TREE_TYPE (addr);
18822 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18823 TYPE_ATTRIBUTES (fntype)))
18824 return TRUE;
18825 return FALSE;
18826 }
18827
18828
18829 /* Emit a call instruction with pattern PAT. ADDR is the address of
18830 the call target. */
18831
18832 void
18833 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18834 {
18835 rtx insn;
18836
18837 insn = emit_call_insn (pat);
18838
18839 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18840 If the call might use such an entry, add a use of the PIC register
18841 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18842 if (TARGET_VXWORKS_RTP
18843 && flag_pic
18844 && !sibcall
18845 && GET_CODE (addr) == SYMBOL_REF
18846 && (SYMBOL_REF_DECL (addr)
18847 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18848 : !SYMBOL_REF_LOCAL_P (addr)))
18849 {
18850 require_pic_register (NULL_RTX, false /*compute_now*/);
18851 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18852 }
18853
18854 if (TARGET_FDPIC)
18855 {
18856 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
18857 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
18858 }
18859
18860 if (TARGET_AAPCS_BASED)
18861 {
18862 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18863 linker. We need to add an IP clobber to allow setting
18864 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18865 is not needed since it's a fixed register. */
18866 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18867 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18868 }
18869 }
18870
18871 /* Output a 'call' insn. */
18872 const char *
18873 output_call (rtx *operands)
18874 {
18875 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
18876
18877 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18878 if (REGNO (operands[0]) == LR_REGNUM)
18879 {
18880 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18881 output_asm_insn ("mov%?\t%0, %|lr", operands);
18882 }
18883
18884 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18885
18886 if (TARGET_INTERWORK || arm_arch4t)
18887 output_asm_insn ("bx%?\t%0", operands);
18888 else
18889 output_asm_insn ("mov%?\t%|pc, %0", operands);
18890
18891 return "";
18892 }
18893
18894 /* Output a move from arm registers to arm registers of a long double
18895 OPERANDS[0] is the destination.
18896 OPERANDS[1] is the source. */
18897 const char *
18898 output_mov_long_double_arm_from_arm (rtx *operands)
18899 {
18900 /* We have to be careful here because the two might overlap. */
18901 int dest_start = REGNO (operands[0]);
18902 int src_start = REGNO (operands[1]);
18903 rtx ops[2];
18904 int i;
18905
18906 if (dest_start < src_start)
18907 {
18908 for (i = 0; i < 3; i++)
18909 {
18910 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18911 ops[1] = gen_rtx_REG (SImode, src_start + i);
18912 output_asm_insn ("mov%?\t%0, %1", ops);
18913 }
18914 }
18915 else
18916 {
18917 for (i = 2; i >= 0; i--)
18918 {
18919 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18920 ops[1] = gen_rtx_REG (SImode, src_start + i);
18921 output_asm_insn ("mov%?\t%0, %1", ops);
18922 }
18923 }
18924
18925 return "";
18926 }
18927
18928 void
18929 arm_emit_movpair (rtx dest, rtx src)
18930 {
18931 /* If the src is an immediate, simplify it. */
18932 if (CONST_INT_P (src))
18933 {
18934 HOST_WIDE_INT val = INTVAL (src);
18935 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18936 if ((val >> 16) & 0x0000ffff)
18937 {
18938 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18939 GEN_INT (16)),
18940 GEN_INT ((val >> 16) & 0x0000ffff));
18941 rtx_insn *insn = get_last_insn ();
18942 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18943 }
18944 return;
18945 }
18946 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18947 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18948 rtx_insn *insn = get_last_insn ();
18949 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18950 }
18951
18952 /* Output a move between double words. It must be REG<-MEM
18953 or MEM<-REG. */
18954 const char *
18955 output_move_double (rtx *operands, bool emit, int *count)
18956 {
18957 enum rtx_code code0 = GET_CODE (operands[0]);
18958 enum rtx_code code1 = GET_CODE (operands[1]);
18959 rtx otherops[3];
18960 if (count)
18961 *count = 1;
18962
18963 /* The only case when this might happen is when
18964 you are looking at the length of a DImode instruction
18965 that has an invalid constant in it. */
18966 if (code0 == REG && code1 != MEM)
18967 {
18968 gcc_assert (!emit);
18969 *count = 2;
18970 return "";
18971 }
18972
18973 if (code0 == REG)
18974 {
18975 unsigned int reg0 = REGNO (operands[0]);
18976
18977 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18978
18979 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18980
18981 switch (GET_CODE (XEXP (operands[1], 0)))
18982 {
18983 case REG:
18984
18985 if (emit)
18986 {
18987 if (TARGET_LDRD
18988 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18989 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18990 else
18991 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18992 }
18993 break;
18994
18995 case PRE_INC:
18996 gcc_assert (TARGET_LDRD);
18997 if (emit)
18998 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18999 break;
19000
19001 case PRE_DEC:
19002 if (emit)
19003 {
19004 if (TARGET_LDRD)
19005 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
19006 else
19007 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
19008 }
19009 break;
19010
19011 case POST_INC:
19012 if (emit)
19013 {
19014 if (TARGET_LDRD)
19015 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
19016 else
19017 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
19018 }
19019 break;
19020
19021 case POST_DEC:
19022 gcc_assert (TARGET_LDRD);
19023 if (emit)
19024 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
19025 break;
19026
19027 case PRE_MODIFY:
19028 case POST_MODIFY:
19029 /* Autoicrement addressing modes should never have overlapping
19030 base and destination registers, and overlapping index registers
19031 are already prohibited, so this doesn't need to worry about
19032 fix_cm3_ldrd. */
19033 otherops[0] = operands[0];
19034 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
19035 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
19036
19037 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
19038 {
19039 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
19040 {
19041 /* Registers overlap so split out the increment. */
19042 if (emit)
19043 {
19044 output_asm_insn ("add%?\t%1, %1, %2", otherops);
19045 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
19046 }
19047 if (count)
19048 *count = 2;
19049 }
19050 else
19051 {
19052 /* Use a single insn if we can.
19053 FIXME: IWMMXT allows offsets larger than ldrd can
19054 handle, fix these up with a pair of ldr. */
19055 if (TARGET_THUMB2
19056 || !CONST_INT_P (otherops[2])
19057 || (INTVAL (otherops[2]) > -256
19058 && INTVAL (otherops[2]) < 256))
19059 {
19060 if (emit)
19061 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
19062 }
19063 else
19064 {
19065 if (emit)
19066 {
19067 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
19068 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
19069 }
19070 if (count)
19071 *count = 2;
19072
19073 }
19074 }
19075 }
19076 else
19077 {
19078 /* Use a single insn if we can.
19079 FIXME: IWMMXT allows offsets larger than ldrd can handle,
19080 fix these up with a pair of ldr. */
19081 if (TARGET_THUMB2
19082 || !CONST_INT_P (otherops[2])
19083 || (INTVAL (otherops[2]) > -256
19084 && INTVAL (otherops[2]) < 256))
19085 {
19086 if (emit)
19087 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
19088 }
19089 else
19090 {
19091 if (emit)
19092 {
19093 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
19094 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
19095 }
19096 if (count)
19097 *count = 2;
19098 }
19099 }
19100 break;
19101
19102 case LABEL_REF:
19103 case CONST:
19104 /* We might be able to use ldrd %0, %1 here. However the range is
19105 different to ldr/adr, and it is broken on some ARMv7-M
19106 implementations. */
19107 /* Use the second register of the pair to avoid problematic
19108 overlap. */
19109 otherops[1] = operands[1];
19110 if (emit)
19111 output_asm_insn ("adr%?\t%0, %1", otherops);
19112 operands[1] = otherops[0];
19113 if (emit)
19114 {
19115 if (TARGET_LDRD)
19116 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
19117 else
19118 output_asm_insn ("ldmia%?\t%1, %M0", operands);
19119 }
19120
19121 if (count)
19122 *count = 2;
19123 break;
19124
19125 /* ??? This needs checking for thumb2. */
19126 default:
19127 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
19128 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
19129 {
19130 otherops[0] = operands[0];
19131 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
19132 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
19133
19134 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
19135 {
19136 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
19137 {
19138 switch ((int) INTVAL (otherops[2]))
19139 {
19140 case -8:
19141 if (emit)
19142 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
19143 return "";
19144 case -4:
19145 if (TARGET_THUMB2)
19146 break;
19147 if (emit)
19148 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
19149 return "";
19150 case 4:
19151 if (TARGET_THUMB2)
19152 break;
19153 if (emit)
19154 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
19155 return "";
19156 }
19157 }
19158 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
19159 operands[1] = otherops[0];
19160 if (TARGET_LDRD
19161 && (REG_P (otherops[2])
19162 || TARGET_THUMB2
19163 || (CONST_INT_P (otherops[2])
19164 && INTVAL (otherops[2]) > -256
19165 && INTVAL (otherops[2]) < 256)))
19166 {
19167 if (reg_overlap_mentioned_p (operands[0],
19168 otherops[2]))
19169 {
19170 /* Swap base and index registers over to
19171 avoid a conflict. */
19172 std::swap (otherops[1], otherops[2]);
19173 }
19174 /* If both registers conflict, it will usually
19175 have been fixed by a splitter. */
19176 if (reg_overlap_mentioned_p (operands[0], otherops[2])
19177 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
19178 {
19179 if (emit)
19180 {
19181 output_asm_insn ("add%?\t%0, %1, %2", otherops);
19182 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
19183 }
19184 if (count)
19185 *count = 2;
19186 }
19187 else
19188 {
19189 otherops[0] = operands[0];
19190 if (emit)
19191 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
19192 }
19193 return "";
19194 }
19195
19196 if (CONST_INT_P (otherops[2]))
19197 {
19198 if (emit)
19199 {
19200 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
19201 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
19202 else
19203 output_asm_insn ("add%?\t%0, %1, %2", otherops);
19204 }
19205 }
19206 else
19207 {
19208 if (emit)
19209 output_asm_insn ("add%?\t%0, %1, %2", otherops);
19210 }
19211 }
19212 else
19213 {
19214 if (emit)
19215 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
19216 }
19217
19218 if (count)
19219 *count = 2;
19220
19221 if (TARGET_LDRD)
19222 return "ldrd%?\t%0, [%1]";
19223
19224 return "ldmia%?\t%1, %M0";
19225 }
19226 else
19227 {
19228 otherops[1] = adjust_address (operands[1], SImode, 4);
19229 /* Take care of overlapping base/data reg. */
19230 if (reg_mentioned_p (operands[0], operands[1]))
19231 {
19232 if (emit)
19233 {
19234 output_asm_insn ("ldr%?\t%0, %1", otherops);
19235 output_asm_insn ("ldr%?\t%0, %1", operands);
19236 }
19237 if (count)
19238 *count = 2;
19239
19240 }
19241 else
19242 {
19243 if (emit)
19244 {
19245 output_asm_insn ("ldr%?\t%0, %1", operands);
19246 output_asm_insn ("ldr%?\t%0, %1", otherops);
19247 }
19248 if (count)
19249 *count = 2;
19250 }
19251 }
19252 }
19253 }
19254 else
19255 {
19256 /* Constraints should ensure this. */
19257 gcc_assert (code0 == MEM && code1 == REG);
19258 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
19259 || (TARGET_ARM && TARGET_LDRD));
19260
19261 /* For TARGET_ARM the first source register of an STRD
19262 must be even. This is usually the case for double-word
19263 values but user assembly constraints can force an odd
19264 starting register. */
19265 bool allow_strd = TARGET_LDRD
19266 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
19267 switch (GET_CODE (XEXP (operands[0], 0)))
19268 {
19269 case REG:
19270 if (emit)
19271 {
19272 if (allow_strd)
19273 output_asm_insn ("strd%?\t%1, [%m0]", operands);
19274 else
19275 output_asm_insn ("stm%?\t%m0, %M1", operands);
19276 }
19277 break;
19278
19279 case PRE_INC:
19280 gcc_assert (allow_strd);
19281 if (emit)
19282 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
19283 break;
19284
19285 case PRE_DEC:
19286 if (emit)
19287 {
19288 if (allow_strd)
19289 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
19290 else
19291 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
19292 }
19293 break;
19294
19295 case POST_INC:
19296 if (emit)
19297 {
19298 if (allow_strd)
19299 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
19300 else
19301 output_asm_insn ("stm%?\t%m0!, %M1", operands);
19302 }
19303 break;
19304
19305 case POST_DEC:
19306 gcc_assert (allow_strd);
19307 if (emit)
19308 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
19309 break;
19310
19311 case PRE_MODIFY:
19312 case POST_MODIFY:
19313 otherops[0] = operands[1];
19314 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
19315 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
19316
19317 /* IWMMXT allows offsets larger than strd can handle,
19318 fix these up with a pair of str. */
19319 if (!TARGET_THUMB2
19320 && CONST_INT_P (otherops[2])
19321 && (INTVAL(otherops[2]) <= -256
19322 || INTVAL(otherops[2]) >= 256))
19323 {
19324 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
19325 {
19326 if (emit)
19327 {
19328 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
19329 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
19330 }
19331 if (count)
19332 *count = 2;
19333 }
19334 else
19335 {
19336 if (emit)
19337 {
19338 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
19339 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
19340 }
19341 if (count)
19342 *count = 2;
19343 }
19344 }
19345 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
19346 {
19347 if (emit)
19348 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
19349 }
19350 else
19351 {
19352 if (emit)
19353 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
19354 }
19355 break;
19356
19357 case PLUS:
19358 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
19359 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
19360 {
19361 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
19362 {
19363 case -8:
19364 if (emit)
19365 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
19366 return "";
19367
19368 case -4:
19369 if (TARGET_THUMB2)
19370 break;
19371 if (emit)
19372 output_asm_insn ("stmda%?\t%m0, %M1", operands);
19373 return "";
19374
19375 case 4:
19376 if (TARGET_THUMB2)
19377 break;
19378 if (emit)
19379 output_asm_insn ("stmib%?\t%m0, %M1", operands);
19380 return "";
19381 }
19382 }
19383 if (allow_strd
19384 && (REG_P (otherops[2])
19385 || TARGET_THUMB2
19386 || (CONST_INT_P (otherops[2])
19387 && INTVAL (otherops[2]) > -256
19388 && INTVAL (otherops[2]) < 256)))
19389 {
19390 otherops[0] = operands[1];
19391 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
19392 if (emit)
19393 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
19394 return "";
19395 }
19396 /* Fall through */
19397
19398 default:
19399 otherops[0] = adjust_address (operands[0], SImode, 4);
19400 otherops[1] = operands[1];
19401 if (emit)
19402 {
19403 output_asm_insn ("str%?\t%1, %0", operands);
19404 output_asm_insn ("str%?\t%H1, %0", otherops);
19405 }
19406 if (count)
19407 *count = 2;
19408 }
19409 }
19410
19411 return "";
19412 }
19413
19414 /* Output a move, load or store for quad-word vectors in ARM registers. Only
19415 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
19416
19417 const char *
19418 output_move_quad (rtx *operands)
19419 {
19420 if (REG_P (operands[0]))
19421 {
19422 /* Load, or reg->reg move. */
19423
19424 if (MEM_P (operands[1]))
19425 {
19426 switch (GET_CODE (XEXP (operands[1], 0)))
19427 {
19428 case REG:
19429 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
19430 break;
19431
19432 case LABEL_REF:
19433 case CONST:
19434 output_asm_insn ("adr%?\t%0, %1", operands);
19435 output_asm_insn ("ldmia%?\t%0, %M0", operands);
19436 break;
19437
19438 default:
19439 gcc_unreachable ();
19440 }
19441 }
19442 else
19443 {
19444 rtx ops[2];
19445 int dest, src, i;
19446
19447 gcc_assert (REG_P (operands[1]));
19448
19449 dest = REGNO (operands[0]);
19450 src = REGNO (operands[1]);
19451
19452 /* This seems pretty dumb, but hopefully GCC won't try to do it
19453 very often. */
19454 if (dest < src)
19455 for (i = 0; i < 4; i++)
19456 {
19457 ops[0] = gen_rtx_REG (SImode, dest + i);
19458 ops[1] = gen_rtx_REG (SImode, src + i);
19459 output_asm_insn ("mov%?\t%0, %1", ops);
19460 }
19461 else
19462 for (i = 3; i >= 0; i--)
19463 {
19464 ops[0] = gen_rtx_REG (SImode, dest + i);
19465 ops[1] = gen_rtx_REG (SImode, src + i);
19466 output_asm_insn ("mov%?\t%0, %1", ops);
19467 }
19468 }
19469 }
19470 else
19471 {
19472 gcc_assert (MEM_P (operands[0]));
19473 gcc_assert (REG_P (operands[1]));
19474 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
19475
19476 switch (GET_CODE (XEXP (operands[0], 0)))
19477 {
19478 case REG:
19479 output_asm_insn ("stm%?\t%m0, %M1", operands);
19480 break;
19481
19482 default:
19483 gcc_unreachable ();
19484 }
19485 }
19486
19487 return "";
19488 }
19489
19490 /* Output a VFP load or store instruction. */
19491
19492 const char *
19493 output_move_vfp (rtx *operands)
19494 {
19495 rtx reg, mem, addr, ops[2];
19496 int load = REG_P (operands[0]);
19497 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
19498 int sp = (!TARGET_VFP_FP16INST
19499 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
19500 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
19501 const char *templ;
19502 char buff[50];
19503 machine_mode mode;
19504
19505 reg = operands[!load];
19506 mem = operands[load];
19507
19508 mode = GET_MODE (reg);
19509
19510 gcc_assert (REG_P (reg));
19511 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
19512 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
19513 || mode == SFmode
19514 || mode == DFmode
19515 || mode == HImode
19516 || mode == SImode
19517 || mode == DImode
19518 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
19519 gcc_assert (MEM_P (mem));
19520
19521 addr = XEXP (mem, 0);
19522
19523 switch (GET_CODE (addr))
19524 {
19525 case PRE_DEC:
19526 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
19527 ops[0] = XEXP (addr, 0);
19528 ops[1] = reg;
19529 break;
19530
19531 case POST_INC:
19532 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
19533 ops[0] = XEXP (addr, 0);
19534 ops[1] = reg;
19535 break;
19536
19537 default:
19538 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
19539 ops[0] = reg;
19540 ops[1] = mem;
19541 break;
19542 }
19543
19544 sprintf (buff, templ,
19545 load ? "ld" : "st",
19546 dp ? "64" : sp ? "32" : "16",
19547 dp ? "P" : "",
19548 integer_p ? "\t%@ int" : "");
19549 output_asm_insn (buff, ops);
19550
19551 return "";
19552 }
19553
19554 /* Output a Neon double-word or quad-word load or store, or a load
19555 or store for larger structure modes.
19556
19557 WARNING: The ordering of elements is weird in big-endian mode,
19558 because the EABI requires that vectors stored in memory appear
19559 as though they were stored by a VSTM, as required by the EABI.
19560 GCC RTL defines element ordering based on in-memory order.
19561 This can be different from the architectural ordering of elements
19562 within a NEON register. The intrinsics defined in arm_neon.h use the
19563 NEON register element ordering, not the GCC RTL element ordering.
19564
19565 For example, the in-memory ordering of a big-endian a quadword
19566 vector with 16-bit elements when stored from register pair {d0,d1}
19567 will be (lowest address first, d0[N] is NEON register element N):
19568
19569 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
19570
19571 When necessary, quadword registers (dN, dN+1) are moved to ARM
19572 registers from rN in the order:
19573
19574 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
19575
19576 So that STM/LDM can be used on vectors in ARM registers, and the
19577 same memory layout will result as if VSTM/VLDM were used.
19578
19579 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
19580 possible, which allows use of appropriate alignment tags.
19581 Note that the choice of "64" is independent of the actual vector
19582 element size; this size simply ensures that the behavior is
19583 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
19584
19585 Due to limitations of those instructions, use of VST1.64/VLD1.64
19586 is not possible if:
19587 - the address contains PRE_DEC, or
19588 - the mode refers to more than 4 double-word registers
19589
19590 In those cases, it would be possible to replace VSTM/VLDM by a
19591 sequence of instructions; this is not currently implemented since
19592 this is not certain to actually improve performance. */
19593
19594 const char *
19595 output_move_neon (rtx *operands)
19596 {
19597 rtx reg, mem, addr, ops[2];
19598 int regno, nregs, load = REG_P (operands[0]);
19599 const char *templ;
19600 char buff[50];
19601 machine_mode mode;
19602
19603 reg = operands[!load];
19604 mem = operands[load];
19605
19606 mode = GET_MODE (reg);
19607
19608 gcc_assert (REG_P (reg));
19609 regno = REGNO (reg);
19610 nregs = REG_NREGS (reg) / 2;
19611 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
19612 || NEON_REGNO_OK_FOR_QUAD (regno));
19613 gcc_assert (VALID_NEON_DREG_MODE (mode)
19614 || VALID_NEON_QREG_MODE (mode)
19615 || VALID_NEON_STRUCT_MODE (mode));
19616 gcc_assert (MEM_P (mem));
19617
19618 addr = XEXP (mem, 0);
19619
19620 /* Strip off const from addresses like (const (plus (...))). */
19621 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
19622 addr = XEXP (addr, 0);
19623
19624 switch (GET_CODE (addr))
19625 {
19626 case POST_INC:
19627 /* We have to use vldm / vstm for too-large modes. */
19628 if (nregs > 4)
19629 {
19630 templ = "v%smia%%?\t%%0!, %%h1";
19631 ops[0] = XEXP (addr, 0);
19632 }
19633 else
19634 {
19635 templ = "v%s1.64\t%%h1, %%A0";
19636 ops[0] = mem;
19637 }
19638 ops[1] = reg;
19639 break;
19640
19641 case PRE_DEC:
19642 /* We have to use vldm / vstm in this case, since there is no
19643 pre-decrement form of the vld1 / vst1 instructions. */
19644 templ = "v%smdb%%?\t%%0!, %%h1";
19645 ops[0] = XEXP (addr, 0);
19646 ops[1] = reg;
19647 break;
19648
19649 case POST_MODIFY:
19650 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
19651 gcc_unreachable ();
19652
19653 case REG:
19654 /* We have to use vldm / vstm for too-large modes. */
19655 if (nregs > 1)
19656 {
19657 if (nregs > 4)
19658 templ = "v%smia%%?\t%%m0, %%h1";
19659 else
19660 templ = "v%s1.64\t%%h1, %%A0";
19661
19662 ops[0] = mem;
19663 ops[1] = reg;
19664 break;
19665 }
19666 /* Fall through. */
19667 case LABEL_REF:
19668 case PLUS:
19669 {
19670 int i;
19671 int overlap = -1;
19672 for (i = 0; i < nregs; i++)
19673 {
19674 /* We're only using DImode here because it's a convenient size. */
19675 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
19676 ops[1] = adjust_address (mem, DImode, 8 * i);
19677 if (reg_overlap_mentioned_p (ops[0], mem))
19678 {
19679 gcc_assert (overlap == -1);
19680 overlap = i;
19681 }
19682 else
19683 {
19684 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
19685 output_asm_insn (buff, ops);
19686 }
19687 }
19688 if (overlap != -1)
19689 {
19690 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
19691 ops[1] = adjust_address (mem, SImode, 8 * overlap);
19692 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
19693 output_asm_insn (buff, ops);
19694 }
19695
19696 return "";
19697 }
19698
19699 default:
19700 gcc_unreachable ();
19701 }
19702
19703 sprintf (buff, templ, load ? "ld" : "st");
19704 output_asm_insn (buff, ops);
19705
19706 return "";
19707 }
19708
19709 /* Compute and return the length of neon_mov<mode>, where <mode> is
19710 one of VSTRUCT modes: EI, OI, CI or XI. */
19711 int
19712 arm_attr_length_move_neon (rtx_insn *insn)
19713 {
19714 rtx reg, mem, addr;
19715 int load;
19716 machine_mode mode;
19717
19718 extract_insn_cached (insn);
19719
19720 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
19721 {
19722 mode = GET_MODE (recog_data.operand[0]);
19723 switch (mode)
19724 {
19725 case E_EImode:
19726 case E_OImode:
19727 return 8;
19728 case E_CImode:
19729 return 12;
19730 case E_XImode:
19731 return 16;
19732 default:
19733 gcc_unreachable ();
19734 }
19735 }
19736
19737 load = REG_P (recog_data.operand[0]);
19738 reg = recog_data.operand[!load];
19739 mem = recog_data.operand[load];
19740
19741 gcc_assert (MEM_P (mem));
19742
19743 addr = XEXP (mem, 0);
19744
19745 /* Strip off const from addresses like (const (plus (...))). */
19746 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
19747 addr = XEXP (addr, 0);
19748
19749 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
19750 {
19751 int insns = REG_NREGS (reg) / 2;
19752 return insns * 4;
19753 }
19754 else
19755 return 4;
19756 }
19757
19758 /* Return nonzero if the offset in the address is an immediate. Otherwise,
19759 return zero. */
19760
19761 int
19762 arm_address_offset_is_imm (rtx_insn *insn)
19763 {
19764 rtx mem, addr;
19765
19766 extract_insn_cached (insn);
19767
19768 if (REG_P (recog_data.operand[0]))
19769 return 0;
19770
19771 mem = recog_data.operand[0];
19772
19773 gcc_assert (MEM_P (mem));
19774
19775 addr = XEXP (mem, 0);
19776
19777 if (REG_P (addr)
19778 || (GET_CODE (addr) == PLUS
19779 && REG_P (XEXP (addr, 0))
19780 && CONST_INT_P (XEXP (addr, 1))))
19781 return 1;
19782 else
19783 return 0;
19784 }
19785
19786 /* Output an ADD r, s, #n where n may be too big for one instruction.
19787 If adding zero to one register, output nothing. */
19788 const char *
19789 output_add_immediate (rtx *operands)
19790 {
19791 HOST_WIDE_INT n = INTVAL (operands[2]);
19792
19793 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19794 {
19795 if (n < 0)
19796 output_multi_immediate (operands,
19797 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19798 -n);
19799 else
19800 output_multi_immediate (operands,
19801 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19802 n);
19803 }
19804
19805 return "";
19806 }
19807
19808 /* Output a multiple immediate operation.
19809 OPERANDS is the vector of operands referred to in the output patterns.
19810 INSTR1 is the output pattern to use for the first constant.
19811 INSTR2 is the output pattern to use for subsequent constants.
19812 IMMED_OP is the index of the constant slot in OPERANDS.
19813 N is the constant value. */
19814 static const char *
19815 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19816 int immed_op, HOST_WIDE_INT n)
19817 {
19818 #if HOST_BITS_PER_WIDE_INT > 32
19819 n &= 0xffffffff;
19820 #endif
19821
19822 if (n == 0)
19823 {
19824 /* Quick and easy output. */
19825 operands[immed_op] = const0_rtx;
19826 output_asm_insn (instr1, operands);
19827 }
19828 else
19829 {
19830 int i;
19831 const char * instr = instr1;
19832
19833 /* Note that n is never zero here (which would give no output). */
19834 for (i = 0; i < 32; i += 2)
19835 {
19836 if (n & (3 << i))
19837 {
19838 operands[immed_op] = GEN_INT (n & (255 << i));
19839 output_asm_insn (instr, operands);
19840 instr = instr2;
19841 i += 6;
19842 }
19843 }
19844 }
19845
19846 return "";
19847 }
19848
19849 /* Return the name of a shifter operation. */
19850 static const char *
19851 arm_shift_nmem(enum rtx_code code)
19852 {
19853 switch (code)
19854 {
19855 case ASHIFT:
19856 return ARM_LSL_NAME;
19857
19858 case ASHIFTRT:
19859 return "asr";
19860
19861 case LSHIFTRT:
19862 return "lsr";
19863
19864 case ROTATERT:
19865 return "ror";
19866
19867 default:
19868 abort();
19869 }
19870 }
19871
19872 /* Return the appropriate ARM instruction for the operation code.
19873 The returned result should not be overwritten. OP is the rtx of the
19874 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19875 was shifted. */
19876 const char *
19877 arithmetic_instr (rtx op, int shift_first_arg)
19878 {
19879 switch (GET_CODE (op))
19880 {
19881 case PLUS:
19882 return "add";
19883
19884 case MINUS:
19885 return shift_first_arg ? "rsb" : "sub";
19886
19887 case IOR:
19888 return "orr";
19889
19890 case XOR:
19891 return "eor";
19892
19893 case AND:
19894 return "and";
19895
19896 case ASHIFT:
19897 case ASHIFTRT:
19898 case LSHIFTRT:
19899 case ROTATERT:
19900 return arm_shift_nmem(GET_CODE(op));
19901
19902 default:
19903 gcc_unreachable ();
19904 }
19905 }
19906
19907 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19908 for the operation code. The returned result should not be overwritten.
19909 OP is the rtx code of the shift.
19910 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19911 shift. */
19912 static const char *
19913 shift_op (rtx op, HOST_WIDE_INT *amountp)
19914 {
19915 const char * mnem;
19916 enum rtx_code code = GET_CODE (op);
19917
19918 switch (code)
19919 {
19920 case ROTATE:
19921 if (!CONST_INT_P (XEXP (op, 1)))
19922 {
19923 output_operand_lossage ("invalid shift operand");
19924 return NULL;
19925 }
19926
19927 code = ROTATERT;
19928 *amountp = 32 - INTVAL (XEXP (op, 1));
19929 mnem = "ror";
19930 break;
19931
19932 case ASHIFT:
19933 case ASHIFTRT:
19934 case LSHIFTRT:
19935 case ROTATERT:
19936 mnem = arm_shift_nmem(code);
19937 if (CONST_INT_P (XEXP (op, 1)))
19938 {
19939 *amountp = INTVAL (XEXP (op, 1));
19940 }
19941 else if (REG_P (XEXP (op, 1)))
19942 {
19943 *amountp = -1;
19944 return mnem;
19945 }
19946 else
19947 {
19948 output_operand_lossage ("invalid shift operand");
19949 return NULL;
19950 }
19951 break;
19952
19953 case MULT:
19954 /* We never have to worry about the amount being other than a
19955 power of 2, since this case can never be reloaded from a reg. */
19956 if (!CONST_INT_P (XEXP (op, 1)))
19957 {
19958 output_operand_lossage ("invalid shift operand");
19959 return NULL;
19960 }
19961
19962 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19963
19964 /* Amount must be a power of two. */
19965 if (*amountp & (*amountp - 1))
19966 {
19967 output_operand_lossage ("invalid shift operand");
19968 return NULL;
19969 }
19970
19971 *amountp = exact_log2 (*amountp);
19972 gcc_assert (IN_RANGE (*amountp, 0, 31));
19973 return ARM_LSL_NAME;
19974
19975 default:
19976 output_operand_lossage ("invalid shift operand");
19977 return NULL;
19978 }
19979
19980 /* This is not 100% correct, but follows from the desire to merge
19981 multiplication by a power of 2 with the recognizer for a
19982 shift. >=32 is not a valid shift for "lsl", so we must try and
19983 output a shift that produces the correct arithmetical result.
19984 Using lsr #32 is identical except for the fact that the carry bit
19985 is not set correctly if we set the flags; but we never use the
19986 carry bit from such an operation, so we can ignore that. */
19987 if (code == ROTATERT)
19988 /* Rotate is just modulo 32. */
19989 *amountp &= 31;
19990 else if (*amountp != (*amountp & 31))
19991 {
19992 if (code == ASHIFT)
19993 mnem = "lsr";
19994 *amountp = 32;
19995 }
19996
19997 /* Shifts of 0 are no-ops. */
19998 if (*amountp == 0)
19999 return NULL;
20000
20001 return mnem;
20002 }
20003
20004 /* Output a .ascii pseudo-op, keeping track of lengths. This is
20005 because /bin/as is horribly restrictive. The judgement about
20006 whether or not each character is 'printable' (and can be output as
20007 is) or not (and must be printed with an octal escape) must be made
20008 with reference to the *host* character set -- the situation is
20009 similar to that discussed in the comments above pp_c_char in
20010 c-pretty-print.c. */
20011
20012 #define MAX_ASCII_LEN 51
20013
20014 void
20015 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
20016 {
20017 int i;
20018 int len_so_far = 0;
20019
20020 fputs ("\t.ascii\t\"", stream);
20021
20022 for (i = 0; i < len; i++)
20023 {
20024 int c = p[i];
20025
20026 if (len_so_far >= MAX_ASCII_LEN)
20027 {
20028 fputs ("\"\n\t.ascii\t\"", stream);
20029 len_so_far = 0;
20030 }
20031
20032 if (ISPRINT (c))
20033 {
20034 if (c == '\\' || c == '\"')
20035 {
20036 putc ('\\', stream);
20037 len_so_far++;
20038 }
20039 putc (c, stream);
20040 len_so_far++;
20041 }
20042 else
20043 {
20044 fprintf (stream, "\\%03o", c);
20045 len_so_far += 4;
20046 }
20047 }
20048
20049 fputs ("\"\n", stream);
20050 }
20051 \f
20052
20053 /* Compute the register save mask for registers 0 through 12
20054 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
20055
20056 static unsigned long
20057 arm_compute_save_reg0_reg12_mask (void)
20058 {
20059 unsigned long func_type = arm_current_func_type ();
20060 unsigned long save_reg_mask = 0;
20061 unsigned int reg;
20062
20063 if (IS_INTERRUPT (func_type))
20064 {
20065 unsigned int max_reg;
20066 /* Interrupt functions must not corrupt any registers,
20067 even call clobbered ones. If this is a leaf function
20068 we can just examine the registers used by the RTL, but
20069 otherwise we have to assume that whatever function is
20070 called might clobber anything, and so we have to save
20071 all the call-clobbered registers as well. */
20072 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
20073 /* FIQ handlers have registers r8 - r12 banked, so
20074 we only need to check r0 - r7, Normal ISRs only
20075 bank r14 and r15, so we must check up to r12.
20076 r13 is the stack pointer which is always preserved,
20077 so we do not need to consider it here. */
20078 max_reg = 7;
20079 else
20080 max_reg = 12;
20081
20082 for (reg = 0; reg <= max_reg; reg++)
20083 if (df_regs_ever_live_p (reg)
20084 || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg)))
20085 save_reg_mask |= (1 << reg);
20086
20087 /* Also save the pic base register if necessary. */
20088 if (PIC_REGISTER_MAY_NEED_SAVING
20089 && crtl->uses_pic_offset_table)
20090 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
20091 }
20092 else if (IS_VOLATILE(func_type))
20093 {
20094 /* For noreturn functions we historically omitted register saves
20095 altogether. However this really messes up debugging. As a
20096 compromise save just the frame pointers. Combined with the link
20097 register saved elsewhere this should be sufficient to get
20098 a backtrace. */
20099 if (frame_pointer_needed)
20100 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
20101 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
20102 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
20103 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
20104 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
20105 }
20106 else
20107 {
20108 /* In the normal case we only need to save those registers
20109 which are call saved and which are used by this function. */
20110 for (reg = 0; reg <= 11; reg++)
20111 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
20112 save_reg_mask |= (1 << reg);
20113
20114 /* Handle the frame pointer as a special case. */
20115 if (frame_pointer_needed)
20116 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
20117
20118 /* If we aren't loading the PIC register,
20119 don't stack it even though it may be live. */
20120 if (PIC_REGISTER_MAY_NEED_SAVING
20121 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
20122 || crtl->uses_pic_offset_table))
20123 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
20124
20125 /* The prologue will copy SP into R0, so save it. */
20126 if (IS_STACKALIGN (func_type))
20127 save_reg_mask |= 1;
20128 }
20129
20130 /* Save registers so the exception handler can modify them. */
20131 if (crtl->calls_eh_return)
20132 {
20133 unsigned int i;
20134
20135 for (i = 0; ; i++)
20136 {
20137 reg = EH_RETURN_DATA_REGNO (i);
20138 if (reg == INVALID_REGNUM)
20139 break;
20140 save_reg_mask |= 1 << reg;
20141 }
20142 }
20143
20144 return save_reg_mask;
20145 }
20146
20147 /* Return true if r3 is live at the start of the function. */
20148
20149 static bool
20150 arm_r3_live_at_start_p (void)
20151 {
20152 /* Just look at cfg info, which is still close enough to correct at this
20153 point. This gives false positives for broken functions that might use
20154 uninitialized data that happens to be allocated in r3, but who cares? */
20155 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
20156 }
20157
20158 /* Compute the number of bytes used to store the static chain register on the
20159 stack, above the stack frame. We need to know this accurately to get the
20160 alignment of the rest of the stack frame correct. */
20161
20162 static int
20163 arm_compute_static_chain_stack_bytes (void)
20164 {
20165 /* Once the value is updated from the init value of -1, do not
20166 re-compute. */
20167 if (cfun->machine->static_chain_stack_bytes != -1)
20168 return cfun->machine->static_chain_stack_bytes;
20169
20170 /* See the defining assertion in arm_expand_prologue. */
20171 if (IS_NESTED (arm_current_func_type ())
20172 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20173 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
20174 || flag_stack_clash_protection)
20175 && !df_regs_ever_live_p (LR_REGNUM)))
20176 && arm_r3_live_at_start_p ()
20177 && crtl->args.pretend_args_size == 0)
20178 return 4;
20179
20180 return 0;
20181 }
20182
20183 /* Compute a bit mask of which core registers need to be
20184 saved on the stack for the current function.
20185 This is used by arm_compute_frame_layout, which may add extra registers. */
20186
20187 static unsigned long
20188 arm_compute_save_core_reg_mask (void)
20189 {
20190 unsigned int save_reg_mask = 0;
20191 unsigned long func_type = arm_current_func_type ();
20192 unsigned int reg;
20193
20194 if (IS_NAKED (func_type))
20195 /* This should never really happen. */
20196 return 0;
20197
20198 /* If we are creating a stack frame, then we must save the frame pointer,
20199 IP (which will hold the old stack pointer), LR and the PC. */
20200 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20201 save_reg_mask |=
20202 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
20203 | (1 << IP_REGNUM)
20204 | (1 << LR_REGNUM)
20205 | (1 << PC_REGNUM);
20206
20207 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
20208
20209 /* Decide if we need to save the link register.
20210 Interrupt routines have their own banked link register,
20211 so they never need to save it.
20212 Otherwise if we do not use the link register we do not need to save
20213 it. If we are pushing other registers onto the stack however, we
20214 can save an instruction in the epilogue by pushing the link register
20215 now and then popping it back into the PC. This incurs extra memory
20216 accesses though, so we only do it when optimizing for size, and only
20217 if we know that we will not need a fancy return sequence. */
20218 if (df_regs_ever_live_p (LR_REGNUM)
20219 || (save_reg_mask
20220 && optimize_size
20221 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
20222 && !crtl->tail_call_emit
20223 && !crtl->calls_eh_return))
20224 save_reg_mask |= 1 << LR_REGNUM;
20225
20226 if (cfun->machine->lr_save_eliminated)
20227 save_reg_mask &= ~ (1 << LR_REGNUM);
20228
20229 if (TARGET_REALLY_IWMMXT
20230 && ((bit_count (save_reg_mask)
20231 + ARM_NUM_INTS (crtl->args.pretend_args_size +
20232 arm_compute_static_chain_stack_bytes())
20233 ) % 2) != 0)
20234 {
20235 /* The total number of registers that are going to be pushed
20236 onto the stack is odd. We need to ensure that the stack
20237 is 64-bit aligned before we start to save iWMMXt registers,
20238 and also before we start to create locals. (A local variable
20239 might be a double or long long which we will load/store using
20240 an iWMMXt instruction). Therefore we need to push another
20241 ARM register, so that the stack will be 64-bit aligned. We
20242 try to avoid using the arg registers (r0 -r3) as they might be
20243 used to pass values in a tail call. */
20244 for (reg = 4; reg <= 12; reg++)
20245 if ((save_reg_mask & (1 << reg)) == 0)
20246 break;
20247
20248 if (reg <= 12)
20249 save_reg_mask |= (1 << reg);
20250 else
20251 {
20252 cfun->machine->sibcall_blocked = 1;
20253 save_reg_mask |= (1 << 3);
20254 }
20255 }
20256
20257 /* We may need to push an additional register for use initializing the
20258 PIC base register. */
20259 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
20260 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
20261 {
20262 reg = thumb_find_work_register (1 << 4);
20263 if (!call_used_or_fixed_reg_p (reg))
20264 save_reg_mask |= (1 << reg);
20265 }
20266
20267 return save_reg_mask;
20268 }
20269
20270 /* Compute a bit mask of which core registers need to be
20271 saved on the stack for the current function. */
20272 static unsigned long
20273 thumb1_compute_save_core_reg_mask (void)
20274 {
20275 unsigned long mask;
20276 unsigned reg;
20277
20278 mask = 0;
20279 for (reg = 0; reg < 12; reg ++)
20280 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
20281 mask |= 1 << reg;
20282
20283 /* Handle the frame pointer as a special case. */
20284 if (frame_pointer_needed)
20285 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
20286
20287 if (flag_pic
20288 && !TARGET_SINGLE_PIC_BASE
20289 && arm_pic_register != INVALID_REGNUM
20290 && crtl->uses_pic_offset_table)
20291 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
20292
20293 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
20294 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20295 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
20296
20297 /* LR will also be pushed if any lo regs are pushed. */
20298 if (mask & 0xff || thumb_force_lr_save ())
20299 mask |= (1 << LR_REGNUM);
20300
20301 bool call_clobbered_scratch
20302 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
20303 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
20304
20305 /* Make sure we have a low work register if we need one. We will
20306 need one if we are going to push a high register, but we are not
20307 currently intending to push a low register. However if both the
20308 prologue and epilogue have a spare call-clobbered low register,
20309 then we won't need to find an additional work register. It does
20310 not need to be the same register in the prologue and
20311 epilogue. */
20312 if ((mask & 0xff) == 0
20313 && !call_clobbered_scratch
20314 && ((mask & 0x0f00) || TARGET_BACKTRACE))
20315 {
20316 /* Use thumb_find_work_register to choose which register
20317 we will use. If the register is live then we will
20318 have to push it. Use LAST_LO_REGNUM as our fallback
20319 choice for the register to select. */
20320 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
20321 /* Make sure the register returned by thumb_find_work_register is
20322 not part of the return value. */
20323 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
20324 reg = LAST_LO_REGNUM;
20325
20326 if (callee_saved_reg_p (reg))
20327 mask |= 1 << reg;
20328 }
20329
20330 /* The 504 below is 8 bytes less than 512 because there are two possible
20331 alignment words. We can't tell here if they will be present or not so we
20332 have to play it safe and assume that they are. */
20333 if ((CALLER_INTERWORKING_SLOT_SIZE +
20334 ROUND_UP_WORD (get_frame_size ()) +
20335 crtl->outgoing_args_size) >= 504)
20336 {
20337 /* This is the same as the code in thumb1_expand_prologue() which
20338 determines which register to use for stack decrement. */
20339 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
20340 if (mask & (1 << reg))
20341 break;
20342
20343 if (reg > LAST_LO_REGNUM)
20344 {
20345 /* Make sure we have a register available for stack decrement. */
20346 mask |= 1 << LAST_LO_REGNUM;
20347 }
20348 }
20349
20350 return mask;
20351 }
20352
20353
20354 /* Return the number of bytes required to save VFP registers. */
20355 static int
20356 arm_get_vfp_saved_size (void)
20357 {
20358 unsigned int regno;
20359 int count;
20360 int saved;
20361
20362 saved = 0;
20363 /* Space for saved VFP registers. */
20364 if (TARGET_HARD_FLOAT)
20365 {
20366 count = 0;
20367 for (regno = FIRST_VFP_REGNUM;
20368 regno < LAST_VFP_REGNUM;
20369 regno += 2)
20370 {
20371 if ((!df_regs_ever_live_p (regno)
20372 || call_used_or_fixed_reg_p (regno))
20373 && (!df_regs_ever_live_p (regno + 1)
20374 || call_used_or_fixed_reg_p (regno + 1)))
20375 {
20376 if (count > 0)
20377 {
20378 /* Workaround ARM10 VFPr1 bug. */
20379 if (count == 2 && !arm_arch6)
20380 count++;
20381 saved += count * 8;
20382 }
20383 count = 0;
20384 }
20385 else
20386 count++;
20387 }
20388 if (count > 0)
20389 {
20390 if (count == 2 && !arm_arch6)
20391 count++;
20392 saved += count * 8;
20393 }
20394 }
20395 return saved;
20396 }
20397
20398
20399 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
20400 everything bar the final return instruction. If simple_return is true,
20401 then do not output epilogue, because it has already been emitted in RTL.
20402
20403 Note: do not forget to update length attribute of corresponding insn pattern
20404 when changing assembly output (eg. length attribute of
20405 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
20406 register clearing sequences). */
20407 const char *
20408 output_return_instruction (rtx operand, bool really_return, bool reverse,
20409 bool simple_return)
20410 {
20411 char conditional[10];
20412 char instr[100];
20413 unsigned reg;
20414 unsigned long live_regs_mask;
20415 unsigned long func_type;
20416 arm_stack_offsets *offsets;
20417
20418 func_type = arm_current_func_type ();
20419
20420 if (IS_NAKED (func_type))
20421 return "";
20422
20423 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
20424 {
20425 /* If this function was declared non-returning, and we have
20426 found a tail call, then we have to trust that the called
20427 function won't return. */
20428 if (really_return)
20429 {
20430 rtx ops[2];
20431
20432 /* Otherwise, trap an attempted return by aborting. */
20433 ops[0] = operand;
20434 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
20435 : "abort");
20436 assemble_external_libcall (ops[1]);
20437 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
20438 }
20439
20440 return "";
20441 }
20442
20443 gcc_assert (!cfun->calls_alloca || really_return);
20444
20445 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
20446
20447 cfun->machine->return_used_this_function = 1;
20448
20449 offsets = arm_get_frame_offsets ();
20450 live_regs_mask = offsets->saved_regs_mask;
20451
20452 if (!simple_return && live_regs_mask)
20453 {
20454 const char * return_reg;
20455
20456 /* If we do not have any special requirements for function exit
20457 (e.g. interworking) then we can load the return address
20458 directly into the PC. Otherwise we must load it into LR. */
20459 if (really_return
20460 && !IS_CMSE_ENTRY (func_type)
20461 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
20462 return_reg = reg_names[PC_REGNUM];
20463 else
20464 return_reg = reg_names[LR_REGNUM];
20465
20466 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
20467 {
20468 /* There are three possible reasons for the IP register
20469 being saved. 1) a stack frame was created, in which case
20470 IP contains the old stack pointer, or 2) an ISR routine
20471 corrupted it, or 3) it was saved to align the stack on
20472 iWMMXt. In case 1, restore IP into SP, otherwise just
20473 restore IP. */
20474 if (frame_pointer_needed)
20475 {
20476 live_regs_mask &= ~ (1 << IP_REGNUM);
20477 live_regs_mask |= (1 << SP_REGNUM);
20478 }
20479 else
20480 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
20481 }
20482
20483 /* On some ARM architectures it is faster to use LDR rather than
20484 LDM to load a single register. On other architectures, the
20485 cost is the same. In 26 bit mode, or for exception handlers,
20486 we have to use LDM to load the PC so that the CPSR is also
20487 restored. */
20488 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
20489 if (live_regs_mask == (1U << reg))
20490 break;
20491
20492 if (reg <= LAST_ARM_REGNUM
20493 && (reg != LR_REGNUM
20494 || ! really_return
20495 || ! IS_INTERRUPT (func_type)))
20496 {
20497 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
20498 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
20499 }
20500 else
20501 {
20502 char *p;
20503 int first = 1;
20504
20505 /* Generate the load multiple instruction to restore the
20506 registers. Note we can get here, even if
20507 frame_pointer_needed is true, but only if sp already
20508 points to the base of the saved core registers. */
20509 if (live_regs_mask & (1 << SP_REGNUM))
20510 {
20511 unsigned HOST_WIDE_INT stack_adjust;
20512
20513 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
20514 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
20515
20516 if (stack_adjust && arm_arch5t && TARGET_ARM)
20517 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
20518 else
20519 {
20520 /* If we can't use ldmib (SA110 bug),
20521 then try to pop r3 instead. */
20522 if (stack_adjust)
20523 live_regs_mask |= 1 << 3;
20524
20525 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
20526 }
20527 }
20528 /* For interrupt returns we have to use an LDM rather than
20529 a POP so that we can use the exception return variant. */
20530 else if (IS_INTERRUPT (func_type))
20531 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
20532 else
20533 sprintf (instr, "pop%s\t{", conditional);
20534
20535 p = instr + strlen (instr);
20536
20537 for (reg = 0; reg <= SP_REGNUM; reg++)
20538 if (live_regs_mask & (1 << reg))
20539 {
20540 int l = strlen (reg_names[reg]);
20541
20542 if (first)
20543 first = 0;
20544 else
20545 {
20546 memcpy (p, ", ", 2);
20547 p += 2;
20548 }
20549
20550 memcpy (p, "%|", 2);
20551 memcpy (p + 2, reg_names[reg], l);
20552 p += l + 2;
20553 }
20554
20555 if (live_regs_mask & (1 << LR_REGNUM))
20556 {
20557 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
20558 /* If returning from an interrupt, restore the CPSR. */
20559 if (IS_INTERRUPT (func_type))
20560 strcat (p, "^");
20561 }
20562 else
20563 strcpy (p, "}");
20564 }
20565
20566 output_asm_insn (instr, & operand);
20567
20568 /* See if we need to generate an extra instruction to
20569 perform the actual function return. */
20570 if (really_return
20571 && func_type != ARM_FT_INTERWORKED
20572 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
20573 {
20574 /* The return has already been handled
20575 by loading the LR into the PC. */
20576 return "";
20577 }
20578 }
20579
20580 if (really_return)
20581 {
20582 switch ((int) ARM_FUNC_TYPE (func_type))
20583 {
20584 case ARM_FT_ISR:
20585 case ARM_FT_FIQ:
20586 /* ??? This is wrong for unified assembly syntax. */
20587 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
20588 break;
20589
20590 case ARM_FT_INTERWORKED:
20591 gcc_assert (arm_arch5t || arm_arch4t);
20592 sprintf (instr, "bx%s\t%%|lr", conditional);
20593 break;
20594
20595 case ARM_FT_EXCEPTION:
20596 /* ??? This is wrong for unified assembly syntax. */
20597 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
20598 break;
20599
20600 default:
20601 if (IS_CMSE_ENTRY (func_type))
20602 {
20603 /* Check if we have to clear the 'GE bits' which is only used if
20604 parallel add and subtraction instructions are available. */
20605 if (TARGET_INT_SIMD)
20606 snprintf (instr, sizeof (instr),
20607 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
20608 else
20609 snprintf (instr, sizeof (instr),
20610 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
20611
20612 output_asm_insn (instr, & operand);
20613 if (TARGET_HARD_FLOAT)
20614 {
20615 /* Clear the cumulative exception-status bits (0-4,7) and the
20616 condition code bits (28-31) of the FPSCR. We need to
20617 remember to clear the first scratch register used (IP) and
20618 save and restore the second (r4). */
20619 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
20620 output_asm_insn (instr, & operand);
20621 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
20622 output_asm_insn (instr, & operand);
20623 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
20624 output_asm_insn (instr, & operand);
20625 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
20626 output_asm_insn (instr, & operand);
20627 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
20628 output_asm_insn (instr, & operand);
20629 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
20630 output_asm_insn (instr, & operand);
20631 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
20632 output_asm_insn (instr, & operand);
20633 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
20634 output_asm_insn (instr, & operand);
20635 }
20636 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
20637 }
20638 /* Use bx if it's available. */
20639 else if (arm_arch5t || arm_arch4t)
20640 sprintf (instr, "bx%s\t%%|lr", conditional);
20641 else
20642 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
20643 break;
20644 }
20645
20646 output_asm_insn (instr, & operand);
20647 }
20648
20649 return "";
20650 }
20651
20652 /* Output in FILE asm statements needed to declare the NAME of the function
20653 defined by its DECL node. */
20654
20655 void
20656 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
20657 {
20658 size_t cmse_name_len;
20659 char *cmse_name = 0;
20660 char cmse_prefix[] = "__acle_se_";
20661
20662 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
20663 extra function label for each function with the 'cmse_nonsecure_entry'
20664 attribute. This extra function label should be prepended with
20665 '__acle_se_', telling the linker that it needs to create secure gateway
20666 veneers for this function. */
20667 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
20668 DECL_ATTRIBUTES (decl)))
20669 {
20670 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
20671 cmse_name = XALLOCAVEC (char, cmse_name_len);
20672 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
20673 targetm.asm_out.globalize_label (file, cmse_name);
20674
20675 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
20676 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
20677 }
20678
20679 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
20680 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20681 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20682 ASM_OUTPUT_LABEL (file, name);
20683
20684 if (cmse_name)
20685 ASM_OUTPUT_LABEL (file, cmse_name);
20686
20687 ARM_OUTPUT_FN_UNWIND (file, TRUE);
20688 }
20689
20690 /* Write the function name into the code section, directly preceding
20691 the function prologue.
20692
20693 Code will be output similar to this:
20694 t0
20695 .ascii "arm_poke_function_name", 0
20696 .align
20697 t1
20698 .word 0xff000000 + (t1 - t0)
20699 arm_poke_function_name
20700 mov ip, sp
20701 stmfd sp!, {fp, ip, lr, pc}
20702 sub fp, ip, #4
20703
20704 When performing a stack backtrace, code can inspect the value
20705 of 'pc' stored at 'fp' + 0. If the trace function then looks
20706 at location pc - 12 and the top 8 bits are set, then we know
20707 that there is a function name embedded immediately preceding this
20708 location and has length ((pc[-3]) & 0xff000000).
20709
20710 We assume that pc is declared as a pointer to an unsigned long.
20711
20712 It is of no benefit to output the function name if we are assembling
20713 a leaf function. These function types will not contain a stack
20714 backtrace structure, therefore it is not possible to determine the
20715 function name. */
20716 void
20717 arm_poke_function_name (FILE *stream, const char *name)
20718 {
20719 unsigned long alignlength;
20720 unsigned long length;
20721 rtx x;
20722
20723 length = strlen (name) + 1;
20724 alignlength = ROUND_UP_WORD (length);
20725
20726 ASM_OUTPUT_ASCII (stream, name, length);
20727 ASM_OUTPUT_ALIGN (stream, 2);
20728 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
20729 assemble_aligned_integer (UNITS_PER_WORD, x);
20730 }
20731
20732 /* Place some comments into the assembler stream
20733 describing the current function. */
20734 static void
20735 arm_output_function_prologue (FILE *f)
20736 {
20737 unsigned long func_type;
20738
20739 /* Sanity check. */
20740 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
20741
20742 func_type = arm_current_func_type ();
20743
20744 switch ((int) ARM_FUNC_TYPE (func_type))
20745 {
20746 default:
20747 case ARM_FT_NORMAL:
20748 break;
20749 case ARM_FT_INTERWORKED:
20750 asm_fprintf (f, "\t%@ Function supports interworking.\n");
20751 break;
20752 case ARM_FT_ISR:
20753 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
20754 break;
20755 case ARM_FT_FIQ:
20756 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
20757 break;
20758 case ARM_FT_EXCEPTION:
20759 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
20760 break;
20761 }
20762
20763 if (IS_NAKED (func_type))
20764 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
20765
20766 if (IS_VOLATILE (func_type))
20767 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
20768
20769 if (IS_NESTED (func_type))
20770 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
20771 if (IS_STACKALIGN (func_type))
20772 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
20773 if (IS_CMSE_ENTRY (func_type))
20774 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
20775
20776 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
20777 (HOST_WIDE_INT) crtl->args.size,
20778 crtl->args.pretend_args_size,
20779 (HOST_WIDE_INT) get_frame_size ());
20780
20781 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20782 frame_pointer_needed,
20783 cfun->machine->uses_anonymous_args);
20784
20785 if (cfun->machine->lr_save_eliminated)
20786 asm_fprintf (f, "\t%@ link register save eliminated.\n");
20787
20788 if (crtl->calls_eh_return)
20789 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
20790
20791 }
20792
20793 static void
20794 arm_output_function_epilogue (FILE *)
20795 {
20796 arm_stack_offsets *offsets;
20797
20798 if (TARGET_THUMB1)
20799 {
20800 int regno;
20801
20802 /* Emit any call-via-reg trampolines that are needed for v4t support
20803 of call_reg and call_value_reg type insns. */
20804 for (regno = 0; regno < LR_REGNUM; regno++)
20805 {
20806 rtx label = cfun->machine->call_via[regno];
20807
20808 if (label != NULL)
20809 {
20810 switch_to_section (function_section (current_function_decl));
20811 targetm.asm_out.internal_label (asm_out_file, "L",
20812 CODE_LABEL_NUMBER (label));
20813 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20814 }
20815 }
20816
20817 /* ??? Probably not safe to set this here, since it assumes that a
20818 function will be emitted as assembly immediately after we generate
20819 RTL for it. This does not happen for inline functions. */
20820 cfun->machine->return_used_this_function = 0;
20821 }
20822 else /* TARGET_32BIT */
20823 {
20824 /* We need to take into account any stack-frame rounding. */
20825 offsets = arm_get_frame_offsets ();
20826
20827 gcc_assert (!use_return_insn (FALSE, NULL)
20828 || (cfun->machine->return_used_this_function != 0)
20829 || offsets->saved_regs == offsets->outgoing_args
20830 || frame_pointer_needed);
20831 }
20832 }
20833
20834 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20835 STR and STRD. If an even number of registers are being pushed, one
20836 or more STRD patterns are created for each register pair. If an
20837 odd number of registers are pushed, emit an initial STR followed by
20838 as many STRD instructions as are needed. This works best when the
20839 stack is initially 64-bit aligned (the normal case), since it
20840 ensures that each STRD is also 64-bit aligned. */
20841 static void
20842 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20843 {
20844 int num_regs = 0;
20845 int i;
20846 int regno;
20847 rtx par = NULL_RTX;
20848 rtx dwarf = NULL_RTX;
20849 rtx tmp;
20850 bool first = true;
20851
20852 num_regs = bit_count (saved_regs_mask);
20853
20854 /* Must be at least one register to save, and can't save SP or PC. */
20855 gcc_assert (num_regs > 0 && num_regs <= 14);
20856 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20857 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20858
20859 /* Create sequence for DWARF info. All the frame-related data for
20860 debugging is held in this wrapper. */
20861 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20862
20863 /* Describe the stack adjustment. */
20864 tmp = gen_rtx_SET (stack_pointer_rtx,
20865 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20866 RTX_FRAME_RELATED_P (tmp) = 1;
20867 XVECEXP (dwarf, 0, 0) = tmp;
20868
20869 /* Find the first register. */
20870 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20871 ;
20872
20873 i = 0;
20874
20875 /* If there's an odd number of registers to push. Start off by
20876 pushing a single register. This ensures that subsequent strd
20877 operations are dword aligned (assuming that SP was originally
20878 64-bit aligned). */
20879 if ((num_regs & 1) != 0)
20880 {
20881 rtx reg, mem, insn;
20882
20883 reg = gen_rtx_REG (SImode, regno);
20884 if (num_regs == 1)
20885 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20886 stack_pointer_rtx));
20887 else
20888 mem = gen_frame_mem (Pmode,
20889 gen_rtx_PRE_MODIFY
20890 (Pmode, stack_pointer_rtx,
20891 plus_constant (Pmode, stack_pointer_rtx,
20892 -4 * num_regs)));
20893
20894 tmp = gen_rtx_SET (mem, reg);
20895 RTX_FRAME_RELATED_P (tmp) = 1;
20896 insn = emit_insn (tmp);
20897 RTX_FRAME_RELATED_P (insn) = 1;
20898 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20899 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20900 RTX_FRAME_RELATED_P (tmp) = 1;
20901 i++;
20902 regno++;
20903 XVECEXP (dwarf, 0, i) = tmp;
20904 first = false;
20905 }
20906
20907 while (i < num_regs)
20908 if (saved_regs_mask & (1 << regno))
20909 {
20910 rtx reg1, reg2, mem1, mem2;
20911 rtx tmp0, tmp1, tmp2;
20912 int regno2;
20913
20914 /* Find the register to pair with this one. */
20915 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20916 regno2++)
20917 ;
20918
20919 reg1 = gen_rtx_REG (SImode, regno);
20920 reg2 = gen_rtx_REG (SImode, regno2);
20921
20922 if (first)
20923 {
20924 rtx insn;
20925
20926 first = false;
20927 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20928 stack_pointer_rtx,
20929 -4 * num_regs));
20930 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20931 stack_pointer_rtx,
20932 -4 * (num_regs - 1)));
20933 tmp0 = gen_rtx_SET (stack_pointer_rtx,
20934 plus_constant (Pmode, stack_pointer_rtx,
20935 -4 * (num_regs)));
20936 tmp1 = gen_rtx_SET (mem1, reg1);
20937 tmp2 = gen_rtx_SET (mem2, reg2);
20938 RTX_FRAME_RELATED_P (tmp0) = 1;
20939 RTX_FRAME_RELATED_P (tmp1) = 1;
20940 RTX_FRAME_RELATED_P (tmp2) = 1;
20941 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20942 XVECEXP (par, 0, 0) = tmp0;
20943 XVECEXP (par, 0, 1) = tmp1;
20944 XVECEXP (par, 0, 2) = tmp2;
20945 insn = emit_insn (par);
20946 RTX_FRAME_RELATED_P (insn) = 1;
20947 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20948 }
20949 else
20950 {
20951 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20952 stack_pointer_rtx,
20953 4 * i));
20954 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20955 stack_pointer_rtx,
20956 4 * (i + 1)));
20957 tmp1 = gen_rtx_SET (mem1, reg1);
20958 tmp2 = gen_rtx_SET (mem2, reg2);
20959 RTX_FRAME_RELATED_P (tmp1) = 1;
20960 RTX_FRAME_RELATED_P (tmp2) = 1;
20961 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20962 XVECEXP (par, 0, 0) = tmp1;
20963 XVECEXP (par, 0, 1) = tmp2;
20964 emit_insn (par);
20965 }
20966
20967 /* Create unwind information. This is an approximation. */
20968 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20969 plus_constant (Pmode,
20970 stack_pointer_rtx,
20971 4 * i)),
20972 reg1);
20973 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20974 plus_constant (Pmode,
20975 stack_pointer_rtx,
20976 4 * (i + 1))),
20977 reg2);
20978
20979 RTX_FRAME_RELATED_P (tmp1) = 1;
20980 RTX_FRAME_RELATED_P (tmp2) = 1;
20981 XVECEXP (dwarf, 0, i + 1) = tmp1;
20982 XVECEXP (dwarf, 0, i + 2) = tmp2;
20983 i += 2;
20984 regno = regno2 + 1;
20985 }
20986 else
20987 regno++;
20988
20989 return;
20990 }
20991
20992 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20993 whenever possible, otherwise it emits single-word stores. The first store
20994 also allocates stack space for all saved registers, using writeback with
20995 post-addressing mode. All other stores use offset addressing. If no STRD
20996 can be emitted, this function emits a sequence of single-word stores,
20997 and not an STM as before, because single-word stores provide more freedom
20998 scheduling and can be turned into an STM by peephole optimizations. */
20999 static void
21000 arm_emit_strd_push (unsigned long saved_regs_mask)
21001 {
21002 int num_regs = 0;
21003 int i, j, dwarf_index = 0;
21004 int offset = 0;
21005 rtx dwarf = NULL_RTX;
21006 rtx insn = NULL_RTX;
21007 rtx tmp, mem;
21008
21009 /* TODO: A more efficient code can be emitted by changing the
21010 layout, e.g., first push all pairs that can use STRD to keep the
21011 stack aligned, and then push all other registers. */
21012 for (i = 0; i <= LAST_ARM_REGNUM; i++)
21013 if (saved_regs_mask & (1 << i))
21014 num_regs++;
21015
21016 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21017 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21018 gcc_assert (num_regs > 0);
21019
21020 /* Create sequence for DWARF info. */
21021 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21022
21023 /* For dwarf info, we generate explicit stack update. */
21024 tmp = gen_rtx_SET (stack_pointer_rtx,
21025 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21026 RTX_FRAME_RELATED_P (tmp) = 1;
21027 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21028
21029 /* Save registers. */
21030 offset = - 4 * num_regs;
21031 j = 0;
21032 while (j <= LAST_ARM_REGNUM)
21033 if (saved_regs_mask & (1 << j))
21034 {
21035 if ((j % 2 == 0)
21036 && (saved_regs_mask & (1 << (j + 1))))
21037 {
21038 /* Current register and previous register form register pair for
21039 which STRD can be generated. */
21040 if (offset < 0)
21041 {
21042 /* Allocate stack space for all saved registers. */
21043 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
21044 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21045 mem = gen_frame_mem (DImode, tmp);
21046 offset = 0;
21047 }
21048 else if (offset > 0)
21049 mem = gen_frame_mem (DImode,
21050 plus_constant (Pmode,
21051 stack_pointer_rtx,
21052 offset));
21053 else
21054 mem = gen_frame_mem (DImode, stack_pointer_rtx);
21055
21056 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
21057 RTX_FRAME_RELATED_P (tmp) = 1;
21058 tmp = emit_insn (tmp);
21059
21060 /* Record the first store insn. */
21061 if (dwarf_index == 1)
21062 insn = tmp;
21063
21064 /* Generate dwarf info. */
21065 mem = gen_frame_mem (SImode,
21066 plus_constant (Pmode,
21067 stack_pointer_rtx,
21068 offset));
21069 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21070 RTX_FRAME_RELATED_P (tmp) = 1;
21071 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21072
21073 mem = gen_frame_mem (SImode,
21074 plus_constant (Pmode,
21075 stack_pointer_rtx,
21076 offset + 4));
21077 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
21078 RTX_FRAME_RELATED_P (tmp) = 1;
21079 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21080
21081 offset += 8;
21082 j += 2;
21083 }
21084 else
21085 {
21086 /* Emit a single word store. */
21087 if (offset < 0)
21088 {
21089 /* Allocate stack space for all saved registers. */
21090 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
21091 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21092 mem = gen_frame_mem (SImode, tmp);
21093 offset = 0;
21094 }
21095 else if (offset > 0)
21096 mem = gen_frame_mem (SImode,
21097 plus_constant (Pmode,
21098 stack_pointer_rtx,
21099 offset));
21100 else
21101 mem = gen_frame_mem (SImode, stack_pointer_rtx);
21102
21103 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21104 RTX_FRAME_RELATED_P (tmp) = 1;
21105 tmp = emit_insn (tmp);
21106
21107 /* Record the first store insn. */
21108 if (dwarf_index == 1)
21109 insn = tmp;
21110
21111 /* Generate dwarf info. */
21112 mem = gen_frame_mem (SImode,
21113 plus_constant(Pmode,
21114 stack_pointer_rtx,
21115 offset));
21116 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21117 RTX_FRAME_RELATED_P (tmp) = 1;
21118 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21119
21120 offset += 4;
21121 j += 1;
21122 }
21123 }
21124 else
21125 j++;
21126
21127 /* Attach dwarf info to the first insn we generate. */
21128 gcc_assert (insn != NULL_RTX);
21129 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21130 RTX_FRAME_RELATED_P (insn) = 1;
21131 }
21132
21133 /* Generate and emit an insn that we will recognize as a push_multi.
21134 Unfortunately, since this insn does not reflect very well the actual
21135 semantics of the operation, we need to annotate the insn for the benefit
21136 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
21137 MASK for registers that should be annotated for DWARF2 frame unwind
21138 information. */
21139 static rtx
21140 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
21141 {
21142 int num_regs = 0;
21143 int num_dwarf_regs = 0;
21144 int i, j;
21145 rtx par;
21146 rtx dwarf;
21147 int dwarf_par_index;
21148 rtx tmp, reg;
21149
21150 /* We don't record the PC in the dwarf frame information. */
21151 dwarf_regs_mask &= ~(1 << PC_REGNUM);
21152
21153 for (i = 0; i <= LAST_ARM_REGNUM; i++)
21154 {
21155 if (mask & (1 << i))
21156 num_regs++;
21157 if (dwarf_regs_mask & (1 << i))
21158 num_dwarf_regs++;
21159 }
21160
21161 gcc_assert (num_regs && num_regs <= 16);
21162 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
21163
21164 /* For the body of the insn we are going to generate an UNSPEC in
21165 parallel with several USEs. This allows the insn to be recognized
21166 by the push_multi pattern in the arm.md file.
21167
21168 The body of the insn looks something like this:
21169
21170 (parallel [
21171 (set (mem:BLK (pre_modify:SI (reg:SI sp)
21172 (const_int:SI <num>)))
21173 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
21174 (use (reg:SI XX))
21175 (use (reg:SI YY))
21176 ...
21177 ])
21178
21179 For the frame note however, we try to be more explicit and actually
21180 show each register being stored into the stack frame, plus a (single)
21181 decrement of the stack pointer. We do it this way in order to be
21182 friendly to the stack unwinding code, which only wants to see a single
21183 stack decrement per instruction. The RTL we generate for the note looks
21184 something like this:
21185
21186 (sequence [
21187 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
21188 (set (mem:SI (reg:SI sp)) (reg:SI r4))
21189 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
21190 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
21191 ...
21192 ])
21193
21194 FIXME:: In an ideal world the PRE_MODIFY would not exist and
21195 instead we'd have a parallel expression detailing all
21196 the stores to the various memory addresses so that debug
21197 information is more up-to-date. Remember however while writing
21198 this to take care of the constraints with the push instruction.
21199
21200 Note also that this has to be taken care of for the VFP registers.
21201
21202 For more see PR43399. */
21203
21204 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
21205 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
21206 dwarf_par_index = 1;
21207
21208 for (i = 0; i <= LAST_ARM_REGNUM; i++)
21209 {
21210 if (mask & (1 << i))
21211 {
21212 reg = gen_rtx_REG (SImode, i);
21213
21214 XVECEXP (par, 0, 0)
21215 = gen_rtx_SET (gen_frame_mem
21216 (BLKmode,
21217 gen_rtx_PRE_MODIFY (Pmode,
21218 stack_pointer_rtx,
21219 plus_constant
21220 (Pmode, stack_pointer_rtx,
21221 -4 * num_regs))
21222 ),
21223 gen_rtx_UNSPEC (BLKmode,
21224 gen_rtvec (1, reg),
21225 UNSPEC_PUSH_MULT));
21226
21227 if (dwarf_regs_mask & (1 << i))
21228 {
21229 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
21230 reg);
21231 RTX_FRAME_RELATED_P (tmp) = 1;
21232 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
21233 }
21234
21235 break;
21236 }
21237 }
21238
21239 for (j = 1, i++; j < num_regs; i++)
21240 {
21241 if (mask & (1 << i))
21242 {
21243 reg = gen_rtx_REG (SImode, i);
21244
21245 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
21246
21247 if (dwarf_regs_mask & (1 << i))
21248 {
21249 tmp
21250 = gen_rtx_SET (gen_frame_mem
21251 (SImode,
21252 plus_constant (Pmode, stack_pointer_rtx,
21253 4 * j)),
21254 reg);
21255 RTX_FRAME_RELATED_P (tmp) = 1;
21256 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
21257 }
21258
21259 j++;
21260 }
21261 }
21262
21263 par = emit_insn (par);
21264
21265 tmp = gen_rtx_SET (stack_pointer_rtx,
21266 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21267 RTX_FRAME_RELATED_P (tmp) = 1;
21268 XVECEXP (dwarf, 0, 0) = tmp;
21269
21270 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
21271
21272 return par;
21273 }
21274
21275 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
21276 SIZE is the offset to be adjusted.
21277 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
21278 static void
21279 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
21280 {
21281 rtx dwarf;
21282
21283 RTX_FRAME_RELATED_P (insn) = 1;
21284 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
21285 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
21286 }
21287
21288 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
21289 SAVED_REGS_MASK shows which registers need to be restored.
21290
21291 Unfortunately, since this insn does not reflect very well the actual
21292 semantics of the operation, we need to annotate the insn for the benefit
21293 of DWARF2 frame unwind information. */
21294 static void
21295 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
21296 {
21297 int num_regs = 0;
21298 int i, j;
21299 rtx par;
21300 rtx dwarf = NULL_RTX;
21301 rtx tmp, reg;
21302 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
21303 int offset_adj;
21304 int emit_update;
21305
21306 offset_adj = return_in_pc ? 1 : 0;
21307 for (i = 0; i <= LAST_ARM_REGNUM; i++)
21308 if (saved_regs_mask & (1 << i))
21309 num_regs++;
21310
21311 gcc_assert (num_regs && num_regs <= 16);
21312
21313 /* If SP is in reglist, then we don't emit SP update insn. */
21314 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
21315
21316 /* The parallel needs to hold num_regs SETs
21317 and one SET for the stack update. */
21318 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
21319
21320 if (return_in_pc)
21321 XVECEXP (par, 0, 0) = ret_rtx;
21322
21323 if (emit_update)
21324 {
21325 /* Increment the stack pointer, based on there being
21326 num_regs 4-byte registers to restore. */
21327 tmp = gen_rtx_SET (stack_pointer_rtx,
21328 plus_constant (Pmode,
21329 stack_pointer_rtx,
21330 4 * num_regs));
21331 RTX_FRAME_RELATED_P (tmp) = 1;
21332 XVECEXP (par, 0, offset_adj) = tmp;
21333 }
21334
21335 /* Now restore every reg, which may include PC. */
21336 for (j = 0, i = 0; j < num_regs; i++)
21337 if (saved_regs_mask & (1 << i))
21338 {
21339 reg = gen_rtx_REG (SImode, i);
21340 if ((num_regs == 1) && emit_update && !return_in_pc)
21341 {
21342 /* Emit single load with writeback. */
21343 tmp = gen_frame_mem (SImode,
21344 gen_rtx_POST_INC (Pmode,
21345 stack_pointer_rtx));
21346 tmp = emit_insn (gen_rtx_SET (reg, tmp));
21347 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
21348 return;
21349 }
21350
21351 tmp = gen_rtx_SET (reg,
21352 gen_frame_mem
21353 (SImode,
21354 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
21355 RTX_FRAME_RELATED_P (tmp) = 1;
21356 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
21357
21358 /* We need to maintain a sequence for DWARF info too. As dwarf info
21359 should not have PC, skip PC. */
21360 if (i != PC_REGNUM)
21361 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
21362
21363 j++;
21364 }
21365
21366 if (return_in_pc)
21367 par = emit_jump_insn (par);
21368 else
21369 par = emit_insn (par);
21370
21371 REG_NOTES (par) = dwarf;
21372 if (!return_in_pc)
21373 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
21374 stack_pointer_rtx, stack_pointer_rtx);
21375 }
21376
21377 /* Generate and emit an insn pattern that we will recognize as a pop_multi
21378 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
21379
21380 Unfortunately, since this insn does not reflect very well the actual
21381 semantics of the operation, we need to annotate the insn for the benefit
21382 of DWARF2 frame unwind information. */
21383 static void
21384 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
21385 {
21386 int i, j;
21387 rtx par;
21388 rtx dwarf = NULL_RTX;
21389 rtx tmp, reg;
21390
21391 gcc_assert (num_regs && num_regs <= 32);
21392
21393 /* Workaround ARM10 VFPr1 bug. */
21394 if (num_regs == 2 && !arm_arch6)
21395 {
21396 if (first_reg == 15)
21397 first_reg--;
21398
21399 num_regs++;
21400 }
21401
21402 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
21403 there could be up to 32 D-registers to restore.
21404 If there are more than 16 D-registers, make two recursive calls,
21405 each of which emits one pop_multi instruction. */
21406 if (num_regs > 16)
21407 {
21408 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
21409 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
21410 return;
21411 }
21412
21413 /* The parallel needs to hold num_regs SETs
21414 and one SET for the stack update. */
21415 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
21416
21417 /* Increment the stack pointer, based on there being
21418 num_regs 8-byte registers to restore. */
21419 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
21420 RTX_FRAME_RELATED_P (tmp) = 1;
21421 XVECEXP (par, 0, 0) = tmp;
21422
21423 /* Now show every reg that will be restored, using a SET for each. */
21424 for (j = 0, i=first_reg; j < num_regs; i += 2)
21425 {
21426 reg = gen_rtx_REG (DFmode, i);
21427
21428 tmp = gen_rtx_SET (reg,
21429 gen_frame_mem
21430 (DFmode,
21431 plus_constant (Pmode, base_reg, 8 * j)));
21432 RTX_FRAME_RELATED_P (tmp) = 1;
21433 XVECEXP (par, 0, j + 1) = tmp;
21434
21435 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
21436
21437 j++;
21438 }
21439
21440 par = emit_insn (par);
21441 REG_NOTES (par) = dwarf;
21442
21443 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
21444 if (REGNO (base_reg) == IP_REGNUM)
21445 {
21446 RTX_FRAME_RELATED_P (par) = 1;
21447 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
21448 }
21449 else
21450 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
21451 base_reg, base_reg);
21452 }
21453
21454 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
21455 number of registers are being popped, multiple LDRD patterns are created for
21456 all register pairs. If odd number of registers are popped, last register is
21457 loaded by using LDR pattern. */
21458 static void
21459 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
21460 {
21461 int num_regs = 0;
21462 int i, j;
21463 rtx par = NULL_RTX;
21464 rtx dwarf = NULL_RTX;
21465 rtx tmp, reg, tmp1;
21466 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
21467
21468 for (i = 0; i <= LAST_ARM_REGNUM; i++)
21469 if (saved_regs_mask & (1 << i))
21470 num_regs++;
21471
21472 gcc_assert (num_regs && num_regs <= 16);
21473
21474 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
21475 to be popped. So, if num_regs is even, now it will become odd,
21476 and we can generate pop with PC. If num_regs is odd, it will be
21477 even now, and ldr with return can be generated for PC. */
21478 if (return_in_pc)
21479 num_regs--;
21480
21481 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21482
21483 /* Var j iterates over all the registers to gather all the registers in
21484 saved_regs_mask. Var i gives index of saved registers in stack frame.
21485 A PARALLEL RTX of register-pair is created here, so that pattern for
21486 LDRD can be matched. As PC is always last register to be popped, and
21487 we have already decremented num_regs if PC, we don't have to worry
21488 about PC in this loop. */
21489 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
21490 if (saved_regs_mask & (1 << j))
21491 {
21492 /* Create RTX for memory load. */
21493 reg = gen_rtx_REG (SImode, j);
21494 tmp = gen_rtx_SET (reg,
21495 gen_frame_mem (SImode,
21496 plus_constant (Pmode,
21497 stack_pointer_rtx, 4 * i)));
21498 RTX_FRAME_RELATED_P (tmp) = 1;
21499
21500 if (i % 2 == 0)
21501 {
21502 /* When saved-register index (i) is even, the RTX to be emitted is
21503 yet to be created. Hence create it first. The LDRD pattern we
21504 are generating is :
21505 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
21506 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
21507 where target registers need not be consecutive. */
21508 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21509 dwarf = NULL_RTX;
21510 }
21511
21512 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
21513 added as 0th element and if i is odd, reg_i is added as 1st element
21514 of LDRD pattern shown above. */
21515 XVECEXP (par, 0, (i % 2)) = tmp;
21516 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
21517
21518 if ((i % 2) == 1)
21519 {
21520 /* When saved-register index (i) is odd, RTXs for both the registers
21521 to be loaded are generated in above given LDRD pattern, and the
21522 pattern can be emitted now. */
21523 par = emit_insn (par);
21524 REG_NOTES (par) = dwarf;
21525 RTX_FRAME_RELATED_P (par) = 1;
21526 }
21527
21528 i++;
21529 }
21530
21531 /* If the number of registers pushed is odd AND return_in_pc is false OR
21532 number of registers are even AND return_in_pc is true, last register is
21533 popped using LDR. It can be PC as well. Hence, adjust the stack first and
21534 then LDR with post increment. */
21535
21536 /* Increment the stack pointer, based on there being
21537 num_regs 4-byte registers to restore. */
21538 tmp = gen_rtx_SET (stack_pointer_rtx,
21539 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
21540 RTX_FRAME_RELATED_P (tmp) = 1;
21541 tmp = emit_insn (tmp);
21542 if (!return_in_pc)
21543 {
21544 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
21545 stack_pointer_rtx, stack_pointer_rtx);
21546 }
21547
21548 dwarf = NULL_RTX;
21549
21550 if (((num_regs % 2) == 1 && !return_in_pc)
21551 || ((num_regs % 2) == 0 && return_in_pc))
21552 {
21553 /* Scan for the single register to be popped. Skip until the saved
21554 register is found. */
21555 for (; (saved_regs_mask & (1 << j)) == 0; j++);
21556
21557 /* Gen LDR with post increment here. */
21558 tmp1 = gen_rtx_MEM (SImode,
21559 gen_rtx_POST_INC (SImode,
21560 stack_pointer_rtx));
21561 set_mem_alias_set (tmp1, get_frame_alias_set ());
21562
21563 reg = gen_rtx_REG (SImode, j);
21564 tmp = gen_rtx_SET (reg, tmp1);
21565 RTX_FRAME_RELATED_P (tmp) = 1;
21566 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
21567
21568 if (return_in_pc)
21569 {
21570 /* If return_in_pc, j must be PC_REGNUM. */
21571 gcc_assert (j == PC_REGNUM);
21572 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21573 XVECEXP (par, 0, 0) = ret_rtx;
21574 XVECEXP (par, 0, 1) = tmp;
21575 par = emit_jump_insn (par);
21576 }
21577 else
21578 {
21579 par = emit_insn (tmp);
21580 REG_NOTES (par) = dwarf;
21581 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
21582 stack_pointer_rtx, stack_pointer_rtx);
21583 }
21584
21585 }
21586 else if ((num_regs % 2) == 1 && return_in_pc)
21587 {
21588 /* There are 2 registers to be popped. So, generate the pattern
21589 pop_multiple_with_stack_update_and_return to pop in PC. */
21590 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
21591 }
21592
21593 return;
21594 }
21595
21596 /* LDRD in ARM mode needs consecutive registers as operands. This function
21597 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
21598 offset addressing and then generates one separate stack udpate. This provides
21599 more scheduling freedom, compared to writeback on every load. However,
21600 if the function returns using load into PC directly
21601 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
21602 before the last load. TODO: Add a peephole optimization to recognize
21603 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
21604 peephole optimization to merge the load at stack-offset zero
21605 with the stack update instruction using load with writeback
21606 in post-index addressing mode. */
21607 static void
21608 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
21609 {
21610 int j = 0;
21611 int offset = 0;
21612 rtx par = NULL_RTX;
21613 rtx dwarf = NULL_RTX;
21614 rtx tmp, mem;
21615
21616 /* Restore saved registers. */
21617 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
21618 j = 0;
21619 while (j <= LAST_ARM_REGNUM)
21620 if (saved_regs_mask & (1 << j))
21621 {
21622 if ((j % 2) == 0
21623 && (saved_regs_mask & (1 << (j + 1)))
21624 && (j + 1) != PC_REGNUM)
21625 {
21626 /* Current register and next register form register pair for which
21627 LDRD can be generated. PC is always the last register popped, and
21628 we handle it separately. */
21629 if (offset > 0)
21630 mem = gen_frame_mem (DImode,
21631 plus_constant (Pmode,
21632 stack_pointer_rtx,
21633 offset));
21634 else
21635 mem = gen_frame_mem (DImode, stack_pointer_rtx);
21636
21637 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
21638 tmp = emit_insn (tmp);
21639 RTX_FRAME_RELATED_P (tmp) = 1;
21640
21641 /* Generate dwarf info. */
21642
21643 dwarf = alloc_reg_note (REG_CFA_RESTORE,
21644 gen_rtx_REG (SImode, j),
21645 NULL_RTX);
21646 dwarf = alloc_reg_note (REG_CFA_RESTORE,
21647 gen_rtx_REG (SImode, j + 1),
21648 dwarf);
21649
21650 REG_NOTES (tmp) = dwarf;
21651
21652 offset += 8;
21653 j += 2;
21654 }
21655 else if (j != PC_REGNUM)
21656 {
21657 /* Emit a single word load. */
21658 if (offset > 0)
21659 mem = gen_frame_mem (SImode,
21660 plus_constant (Pmode,
21661 stack_pointer_rtx,
21662 offset));
21663 else
21664 mem = gen_frame_mem (SImode, stack_pointer_rtx);
21665
21666 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
21667 tmp = emit_insn (tmp);
21668 RTX_FRAME_RELATED_P (tmp) = 1;
21669
21670 /* Generate dwarf info. */
21671 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
21672 gen_rtx_REG (SImode, j),
21673 NULL_RTX);
21674
21675 offset += 4;
21676 j += 1;
21677 }
21678 else /* j == PC_REGNUM */
21679 j++;
21680 }
21681 else
21682 j++;
21683
21684 /* Update the stack. */
21685 if (offset > 0)
21686 {
21687 tmp = gen_rtx_SET (stack_pointer_rtx,
21688 plus_constant (Pmode,
21689 stack_pointer_rtx,
21690 offset));
21691 tmp = emit_insn (tmp);
21692 arm_add_cfa_adjust_cfa_note (tmp, offset,
21693 stack_pointer_rtx, stack_pointer_rtx);
21694 offset = 0;
21695 }
21696
21697 if (saved_regs_mask & (1 << PC_REGNUM))
21698 {
21699 /* Only PC is to be popped. */
21700 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21701 XVECEXP (par, 0, 0) = ret_rtx;
21702 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
21703 gen_frame_mem (SImode,
21704 gen_rtx_POST_INC (SImode,
21705 stack_pointer_rtx)));
21706 RTX_FRAME_RELATED_P (tmp) = 1;
21707 XVECEXP (par, 0, 1) = tmp;
21708 par = emit_jump_insn (par);
21709
21710 /* Generate dwarf info. */
21711 dwarf = alloc_reg_note (REG_CFA_RESTORE,
21712 gen_rtx_REG (SImode, PC_REGNUM),
21713 NULL_RTX);
21714 REG_NOTES (par) = dwarf;
21715 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
21716 stack_pointer_rtx, stack_pointer_rtx);
21717 }
21718 }
21719
21720 /* Calculate the size of the return value that is passed in registers. */
21721 static unsigned
21722 arm_size_return_regs (void)
21723 {
21724 machine_mode mode;
21725
21726 if (crtl->return_rtx != 0)
21727 mode = GET_MODE (crtl->return_rtx);
21728 else
21729 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21730
21731 return GET_MODE_SIZE (mode);
21732 }
21733
21734 /* Return true if the current function needs to save/restore LR. */
21735 static bool
21736 thumb_force_lr_save (void)
21737 {
21738 return !cfun->machine->lr_save_eliminated
21739 && (!crtl->is_leaf
21740 || thumb_far_jump_used_p ()
21741 || df_regs_ever_live_p (LR_REGNUM));
21742 }
21743
21744 /* We do not know if r3 will be available because
21745 we do have an indirect tailcall happening in this
21746 particular case. */
21747 static bool
21748 is_indirect_tailcall_p (rtx call)
21749 {
21750 rtx pat = PATTERN (call);
21751
21752 /* Indirect tail call. */
21753 pat = XVECEXP (pat, 0, 0);
21754 if (GET_CODE (pat) == SET)
21755 pat = SET_SRC (pat);
21756
21757 pat = XEXP (XEXP (pat, 0), 0);
21758 return REG_P (pat);
21759 }
21760
21761 /* Return true if r3 is used by any of the tail call insns in the
21762 current function. */
21763 static bool
21764 any_sibcall_could_use_r3 (void)
21765 {
21766 edge_iterator ei;
21767 edge e;
21768
21769 if (!crtl->tail_call_emit)
21770 return false;
21771 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
21772 if (e->flags & EDGE_SIBCALL)
21773 {
21774 rtx_insn *call = BB_END (e->src);
21775 if (!CALL_P (call))
21776 call = prev_nonnote_nondebug_insn (call);
21777 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
21778 if (find_regno_fusage (call, USE, 3)
21779 || is_indirect_tailcall_p (call))
21780 return true;
21781 }
21782 return false;
21783 }
21784
21785
21786 /* Compute the distance from register FROM to register TO.
21787 These can be the arg pointer (26), the soft frame pointer (25),
21788 the stack pointer (13) or the hard frame pointer (11).
21789 In thumb mode r7 is used as the soft frame pointer, if needed.
21790 Typical stack layout looks like this:
21791
21792 old stack pointer -> | |
21793 ----
21794 | | \
21795 | | saved arguments for
21796 | | vararg functions
21797 | | /
21798 --
21799 hard FP & arg pointer -> | | \
21800 | | stack
21801 | | frame
21802 | | /
21803 --
21804 | | \
21805 | | call saved
21806 | | registers
21807 soft frame pointer -> | | /
21808 --
21809 | | \
21810 | | local
21811 | | variables
21812 locals base pointer -> | | /
21813 --
21814 | | \
21815 | | outgoing
21816 | | arguments
21817 current stack pointer -> | | /
21818 --
21819
21820 For a given function some or all of these stack components
21821 may not be needed, giving rise to the possibility of
21822 eliminating some of the registers.
21823
21824 The values returned by this function must reflect the behavior
21825 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21826
21827 The sign of the number returned reflects the direction of stack
21828 growth, so the values are positive for all eliminations except
21829 from the soft frame pointer to the hard frame pointer.
21830
21831 SFP may point just inside the local variables block to ensure correct
21832 alignment. */
21833
21834
21835 /* Return cached stack offsets. */
21836
21837 static arm_stack_offsets *
21838 arm_get_frame_offsets (void)
21839 {
21840 struct arm_stack_offsets *offsets;
21841
21842 offsets = &cfun->machine->stack_offsets;
21843
21844 return offsets;
21845 }
21846
21847
21848 /* Calculate stack offsets. These are used to calculate register elimination
21849 offsets and in prologue/epilogue code. Also calculates which registers
21850 should be saved. */
21851
21852 static void
21853 arm_compute_frame_layout (void)
21854 {
21855 struct arm_stack_offsets *offsets;
21856 unsigned long func_type;
21857 int saved;
21858 int core_saved;
21859 HOST_WIDE_INT frame_size;
21860 int i;
21861
21862 offsets = &cfun->machine->stack_offsets;
21863
21864 /* Initially this is the size of the local variables. It will translated
21865 into an offset once we have determined the size of preceding data. */
21866 frame_size = ROUND_UP_WORD (get_frame_size ());
21867
21868 /* Space for variadic functions. */
21869 offsets->saved_args = crtl->args.pretend_args_size;
21870
21871 /* In Thumb mode this is incorrect, but never used. */
21872 offsets->frame
21873 = (offsets->saved_args
21874 + arm_compute_static_chain_stack_bytes ()
21875 + (frame_pointer_needed ? 4 : 0));
21876
21877 if (TARGET_32BIT)
21878 {
21879 unsigned int regno;
21880
21881 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21882 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21883 saved = core_saved;
21884
21885 /* We know that SP will be doubleword aligned on entry, and we must
21886 preserve that condition at any subroutine call. We also require the
21887 soft frame pointer to be doubleword aligned. */
21888
21889 if (TARGET_REALLY_IWMMXT)
21890 {
21891 /* Check for the call-saved iWMMXt registers. */
21892 for (regno = FIRST_IWMMXT_REGNUM;
21893 regno <= LAST_IWMMXT_REGNUM;
21894 regno++)
21895 if (df_regs_ever_live_p (regno)
21896 && !call_used_or_fixed_reg_p (regno))
21897 saved += 8;
21898 }
21899
21900 func_type = arm_current_func_type ();
21901 /* Space for saved VFP registers. */
21902 if (! IS_VOLATILE (func_type)
21903 && TARGET_HARD_FLOAT)
21904 saved += arm_get_vfp_saved_size ();
21905 }
21906 else /* TARGET_THUMB1 */
21907 {
21908 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21909 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21910 saved = core_saved;
21911 if (TARGET_BACKTRACE)
21912 saved += 16;
21913 }
21914
21915 /* Saved registers include the stack frame. */
21916 offsets->saved_regs
21917 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21918 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21919
21920 /* A leaf function does not need any stack alignment if it has nothing
21921 on the stack. */
21922 if (crtl->is_leaf && frame_size == 0
21923 /* However if it calls alloca(), we have a dynamically allocated
21924 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21925 && ! cfun->calls_alloca)
21926 {
21927 offsets->outgoing_args = offsets->soft_frame;
21928 offsets->locals_base = offsets->soft_frame;
21929 return;
21930 }
21931
21932 /* Ensure SFP has the correct alignment. */
21933 if (ARM_DOUBLEWORD_ALIGN
21934 && (offsets->soft_frame & 7))
21935 {
21936 offsets->soft_frame += 4;
21937 /* Try to align stack by pushing an extra reg. Don't bother doing this
21938 when there is a stack frame as the alignment will be rolled into
21939 the normal stack adjustment. */
21940 if (frame_size + crtl->outgoing_args_size == 0)
21941 {
21942 int reg = -1;
21943
21944 /* Register r3 is caller-saved. Normally it does not need to be
21945 saved on entry by the prologue. However if we choose to save
21946 it for padding then we may confuse the compiler into thinking
21947 a prologue sequence is required when in fact it is not. This
21948 will occur when shrink-wrapping if r3 is used as a scratch
21949 register and there are no other callee-saved writes.
21950
21951 This situation can be avoided when other callee-saved registers
21952 are available and r3 is not mandatory if we choose a callee-saved
21953 register for padding. */
21954 bool prefer_callee_reg_p = false;
21955
21956 /* If it is safe to use r3, then do so. This sometimes
21957 generates better code on Thumb-2 by avoiding the need to
21958 use 32-bit push/pop instructions. */
21959 if (! any_sibcall_could_use_r3 ()
21960 && arm_size_return_regs () <= 12
21961 && (offsets->saved_regs_mask & (1 << 3)) == 0
21962 && (TARGET_THUMB2
21963 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21964 {
21965 reg = 3;
21966 if (!TARGET_THUMB2)
21967 prefer_callee_reg_p = true;
21968 }
21969 if (reg == -1
21970 || prefer_callee_reg_p)
21971 {
21972 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21973 {
21974 /* Avoid fixed registers; they may be changed at
21975 arbitrary times so it's unsafe to restore them
21976 during the epilogue. */
21977 if (!fixed_regs[i]
21978 && (offsets->saved_regs_mask & (1 << i)) == 0)
21979 {
21980 reg = i;
21981 break;
21982 }
21983 }
21984 }
21985
21986 if (reg != -1)
21987 {
21988 offsets->saved_regs += 4;
21989 offsets->saved_regs_mask |= (1 << reg);
21990 }
21991 }
21992 }
21993
21994 offsets->locals_base = offsets->soft_frame + frame_size;
21995 offsets->outgoing_args = (offsets->locals_base
21996 + crtl->outgoing_args_size);
21997
21998 if (ARM_DOUBLEWORD_ALIGN)
21999 {
22000 /* Ensure SP remains doubleword aligned. */
22001 if (offsets->outgoing_args & 7)
22002 offsets->outgoing_args += 4;
22003 gcc_assert (!(offsets->outgoing_args & 7));
22004 }
22005 }
22006
22007
22008 /* Calculate the relative offsets for the different stack pointers. Positive
22009 offsets are in the direction of stack growth. */
22010
22011 HOST_WIDE_INT
22012 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22013 {
22014 arm_stack_offsets *offsets;
22015
22016 offsets = arm_get_frame_offsets ();
22017
22018 /* OK, now we have enough information to compute the distances.
22019 There must be an entry in these switch tables for each pair
22020 of registers in ELIMINABLE_REGS, even if some of the entries
22021 seem to be redundant or useless. */
22022 switch (from)
22023 {
22024 case ARG_POINTER_REGNUM:
22025 switch (to)
22026 {
22027 case THUMB_HARD_FRAME_POINTER_REGNUM:
22028 return 0;
22029
22030 case FRAME_POINTER_REGNUM:
22031 /* This is the reverse of the soft frame pointer
22032 to hard frame pointer elimination below. */
22033 return offsets->soft_frame - offsets->saved_args;
22034
22035 case ARM_HARD_FRAME_POINTER_REGNUM:
22036 /* This is only non-zero in the case where the static chain register
22037 is stored above the frame. */
22038 return offsets->frame - offsets->saved_args - 4;
22039
22040 case STACK_POINTER_REGNUM:
22041 /* If nothing has been pushed on the stack at all
22042 then this will return -4. This *is* correct! */
22043 return offsets->outgoing_args - (offsets->saved_args + 4);
22044
22045 default:
22046 gcc_unreachable ();
22047 }
22048 gcc_unreachable ();
22049
22050 case FRAME_POINTER_REGNUM:
22051 switch (to)
22052 {
22053 case THUMB_HARD_FRAME_POINTER_REGNUM:
22054 return 0;
22055
22056 case ARM_HARD_FRAME_POINTER_REGNUM:
22057 /* The hard frame pointer points to the top entry in the
22058 stack frame. The soft frame pointer to the bottom entry
22059 in the stack frame. If there is no stack frame at all,
22060 then they are identical. */
22061
22062 return offsets->frame - offsets->soft_frame;
22063
22064 case STACK_POINTER_REGNUM:
22065 return offsets->outgoing_args - offsets->soft_frame;
22066
22067 default:
22068 gcc_unreachable ();
22069 }
22070 gcc_unreachable ();
22071
22072 default:
22073 /* You cannot eliminate from the stack pointer.
22074 In theory you could eliminate from the hard frame
22075 pointer to the stack pointer, but this will never
22076 happen, since if a stack frame is not needed the
22077 hard frame pointer will never be used. */
22078 gcc_unreachable ();
22079 }
22080 }
22081
22082 /* Given FROM and TO register numbers, say whether this elimination is
22083 allowed. Frame pointer elimination is automatically handled.
22084
22085 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
22086 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
22087 pointer, we must eliminate FRAME_POINTER_REGNUM into
22088 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
22089 ARG_POINTER_REGNUM. */
22090
22091 bool
22092 arm_can_eliminate (const int from, const int to)
22093 {
22094 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
22095 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
22096 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
22097 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
22098 true);
22099 }
22100
22101 /* Emit RTL to save coprocessor registers on function entry. Returns the
22102 number of bytes pushed. */
22103
22104 static int
22105 arm_save_coproc_regs(void)
22106 {
22107 int saved_size = 0;
22108 unsigned reg;
22109 unsigned start_reg;
22110 rtx insn;
22111
22112 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
22113 if (df_regs_ever_live_p (reg) && !call_used_or_fixed_reg_p (reg))
22114 {
22115 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22116 insn = gen_rtx_MEM (V2SImode, insn);
22117 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
22118 RTX_FRAME_RELATED_P (insn) = 1;
22119 saved_size += 8;
22120 }
22121
22122 if (TARGET_HARD_FLOAT)
22123 {
22124 start_reg = FIRST_VFP_REGNUM;
22125
22126 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
22127 {
22128 if ((!df_regs_ever_live_p (reg) || call_used_or_fixed_reg_p (reg))
22129 && (!df_regs_ever_live_p (reg + 1)
22130 || call_used_or_fixed_reg_p (reg + 1)))
22131 {
22132 if (start_reg != reg)
22133 saved_size += vfp_emit_fstmd (start_reg,
22134 (reg - start_reg) / 2);
22135 start_reg = reg + 2;
22136 }
22137 }
22138 if (start_reg != reg)
22139 saved_size += vfp_emit_fstmd (start_reg,
22140 (reg - start_reg) / 2);
22141 }
22142 return saved_size;
22143 }
22144
22145
22146 /* Set the Thumb frame pointer from the stack pointer. */
22147
22148 static void
22149 thumb_set_frame_pointer (arm_stack_offsets *offsets)
22150 {
22151 HOST_WIDE_INT amount;
22152 rtx insn, dwarf;
22153
22154 amount = offsets->outgoing_args - offsets->locals_base;
22155 if (amount < 1024)
22156 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22157 stack_pointer_rtx, GEN_INT (amount)));
22158 else
22159 {
22160 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
22161 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
22162 expects the first two operands to be the same. */
22163 if (TARGET_THUMB2)
22164 {
22165 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22166 stack_pointer_rtx,
22167 hard_frame_pointer_rtx));
22168 }
22169 else
22170 {
22171 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22172 hard_frame_pointer_rtx,
22173 stack_pointer_rtx));
22174 }
22175 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
22176 plus_constant (Pmode, stack_pointer_rtx, amount));
22177 RTX_FRAME_RELATED_P (dwarf) = 1;
22178 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22179 }
22180
22181 RTX_FRAME_RELATED_P (insn) = 1;
22182 }
22183
22184 struct scratch_reg {
22185 rtx reg;
22186 bool saved;
22187 };
22188
22189 /* Return a short-lived scratch register for use as a 2nd scratch register on
22190 function entry after the registers are saved in the prologue. This register
22191 must be released by means of release_scratch_register_on_entry. IP is not
22192 considered since it is always used as the 1st scratch register if available.
22193
22194 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
22195 mask of live registers. */
22196
22197 static void
22198 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
22199 unsigned long live_regs)
22200 {
22201 int regno = -1;
22202
22203 sr->saved = false;
22204
22205 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
22206 regno = LR_REGNUM;
22207 else
22208 {
22209 unsigned int i;
22210
22211 for (i = 4; i < 11; i++)
22212 if (regno1 != i && (live_regs & (1 << i)) != 0)
22213 {
22214 regno = i;
22215 break;
22216 }
22217
22218 if (regno < 0)
22219 {
22220 /* If IP is used as the 1st scratch register for a nested function,
22221 then either r3 wasn't available or is used to preserve IP. */
22222 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
22223 regno1 = 3;
22224 regno = (regno1 == 3 ? 2 : 3);
22225 sr->saved
22226 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
22227 regno);
22228 }
22229 }
22230
22231 sr->reg = gen_rtx_REG (SImode, regno);
22232 if (sr->saved)
22233 {
22234 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22235 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
22236 rtx x = gen_rtx_SET (stack_pointer_rtx,
22237 plus_constant (Pmode, stack_pointer_rtx, -4));
22238 RTX_FRAME_RELATED_P (insn) = 1;
22239 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
22240 }
22241 }
22242
22243 /* Release a scratch register obtained from the preceding function. */
22244
22245 static void
22246 release_scratch_register_on_entry (struct scratch_reg *sr)
22247 {
22248 if (sr->saved)
22249 {
22250 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
22251 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
22252 rtx x = gen_rtx_SET (stack_pointer_rtx,
22253 plus_constant (Pmode, stack_pointer_rtx, 4));
22254 RTX_FRAME_RELATED_P (insn) = 1;
22255 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
22256 }
22257 }
22258
22259 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
22260
22261 #if PROBE_INTERVAL > 4096
22262 #error Cannot use indexed addressing mode for stack probing
22263 #endif
22264
22265 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
22266 inclusive. These are offsets from the current stack pointer. REGNO1
22267 is the index number of the 1st scratch register and LIVE_REGS is the
22268 mask of live registers. */
22269
22270 static void
22271 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
22272 unsigned int regno1, unsigned long live_regs)
22273 {
22274 rtx reg1 = gen_rtx_REG (Pmode, regno1);
22275
22276 /* See if we have a constant small number of probes to generate. If so,
22277 that's the easy case. */
22278 if (size <= PROBE_INTERVAL)
22279 {
22280 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
22281 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
22282 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
22283 }
22284
22285 /* The run-time loop is made up of 10 insns in the generic case while the
22286 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
22287 else if (size <= 5 * PROBE_INTERVAL)
22288 {
22289 HOST_WIDE_INT i, rem;
22290
22291 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
22292 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
22293 emit_stack_probe (reg1);
22294
22295 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
22296 it exceeds SIZE. If only two probes are needed, this will not
22297 generate any code. Then probe at FIRST + SIZE. */
22298 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
22299 {
22300 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
22301 emit_stack_probe (reg1);
22302 }
22303
22304 rem = size - (i - PROBE_INTERVAL);
22305 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
22306 {
22307 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
22308 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
22309 }
22310 else
22311 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
22312 }
22313
22314 /* Otherwise, do the same as above, but in a loop. Note that we must be
22315 extra careful with variables wrapping around because we might be at
22316 the very top (or the very bottom) of the address space and we have
22317 to be able to handle this case properly; in particular, we use an
22318 equality test for the loop condition. */
22319 else
22320 {
22321 HOST_WIDE_INT rounded_size;
22322 struct scratch_reg sr;
22323
22324 get_scratch_register_on_entry (&sr, regno1, live_regs);
22325
22326 emit_move_insn (reg1, GEN_INT (first));
22327
22328
22329 /* Step 1: round SIZE to the previous multiple of the interval. */
22330
22331 rounded_size = size & -PROBE_INTERVAL;
22332 emit_move_insn (sr.reg, GEN_INT (rounded_size));
22333
22334
22335 /* Step 2: compute initial and final value of the loop counter. */
22336
22337 /* TEST_ADDR = SP + FIRST. */
22338 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
22339
22340 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
22341 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
22342
22343
22344 /* Step 3: the loop
22345
22346 do
22347 {
22348 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
22349 probe at TEST_ADDR
22350 }
22351 while (TEST_ADDR != LAST_ADDR)
22352
22353 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
22354 until it is equal to ROUNDED_SIZE. */
22355
22356 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
22357
22358
22359 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
22360 that SIZE is equal to ROUNDED_SIZE. */
22361
22362 if (size != rounded_size)
22363 {
22364 HOST_WIDE_INT rem = size - rounded_size;
22365
22366 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
22367 {
22368 emit_set_insn (sr.reg,
22369 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
22370 emit_stack_probe (plus_constant (Pmode, sr.reg,
22371 PROBE_INTERVAL - rem));
22372 }
22373 else
22374 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
22375 }
22376
22377 release_scratch_register_on_entry (&sr);
22378 }
22379
22380 /* Make sure nothing is scheduled before we are done. */
22381 emit_insn (gen_blockage ());
22382 }
22383
22384 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
22385 absolute addresses. */
22386
22387 const char *
22388 output_probe_stack_range (rtx reg1, rtx reg2)
22389 {
22390 static int labelno = 0;
22391 char loop_lab[32];
22392 rtx xops[2];
22393
22394 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
22395
22396 /* Loop. */
22397 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
22398
22399 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
22400 xops[0] = reg1;
22401 xops[1] = GEN_INT (PROBE_INTERVAL);
22402 output_asm_insn ("sub\t%0, %0, %1", xops);
22403
22404 /* Probe at TEST_ADDR. */
22405 output_asm_insn ("str\tr0, [%0, #0]", xops);
22406
22407 /* Test if TEST_ADDR == LAST_ADDR. */
22408 xops[1] = reg2;
22409 output_asm_insn ("cmp\t%0, %1", xops);
22410
22411 /* Branch. */
22412 fputs ("\tbne\t", asm_out_file);
22413 assemble_name_raw (asm_out_file, loop_lab);
22414 fputc ('\n', asm_out_file);
22415
22416 return "";
22417 }
22418
22419 /* Generate the prologue instructions for entry into an ARM or Thumb-2
22420 function. */
22421 void
22422 arm_expand_prologue (void)
22423 {
22424 rtx amount;
22425 rtx insn;
22426 rtx ip_rtx;
22427 unsigned long live_regs_mask;
22428 unsigned long func_type;
22429 int fp_offset = 0;
22430 int saved_pretend_args = 0;
22431 int saved_regs = 0;
22432 unsigned HOST_WIDE_INT args_to_push;
22433 HOST_WIDE_INT size;
22434 arm_stack_offsets *offsets;
22435 bool clobber_ip;
22436
22437 func_type = arm_current_func_type ();
22438
22439 /* Naked functions don't have prologues. */
22440 if (IS_NAKED (func_type))
22441 {
22442 if (flag_stack_usage_info)
22443 current_function_static_stack_size = 0;
22444 return;
22445 }
22446
22447 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
22448 args_to_push = crtl->args.pretend_args_size;
22449
22450 /* Compute which register we will have to save onto the stack. */
22451 offsets = arm_get_frame_offsets ();
22452 live_regs_mask = offsets->saved_regs_mask;
22453
22454 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
22455
22456 if (IS_STACKALIGN (func_type))
22457 {
22458 rtx r0, r1;
22459
22460 /* Handle a word-aligned stack pointer. We generate the following:
22461
22462 mov r0, sp
22463 bic r1, r0, #7
22464 mov sp, r1
22465 <save and restore r0 in normal prologue/epilogue>
22466 mov sp, r0
22467 bx lr
22468
22469 The unwinder doesn't need to know about the stack realignment.
22470 Just tell it we saved SP in r0. */
22471 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
22472
22473 r0 = gen_rtx_REG (SImode, R0_REGNUM);
22474 r1 = gen_rtx_REG (SImode, R1_REGNUM);
22475
22476 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
22477 RTX_FRAME_RELATED_P (insn) = 1;
22478 add_reg_note (insn, REG_CFA_REGISTER, NULL);
22479
22480 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
22481
22482 /* ??? The CFA changes here, which may cause GDB to conclude that it
22483 has entered a different function. That said, the unwind info is
22484 correct, individually, before and after this instruction because
22485 we've described the save of SP, which will override the default
22486 handling of SP as restoring from the CFA. */
22487 emit_insn (gen_movsi (stack_pointer_rtx, r1));
22488 }
22489
22490 /* Let's compute the static_chain_stack_bytes required and store it. Right
22491 now the value must be -1 as stored by arm_init_machine_status (). */
22492 cfun->machine->static_chain_stack_bytes
22493 = arm_compute_static_chain_stack_bytes ();
22494
22495 /* The static chain register is the same as the IP register. If it is
22496 clobbered when creating the frame, we need to save and restore it. */
22497 clobber_ip = IS_NESTED (func_type)
22498 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
22499 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
22500 || flag_stack_clash_protection)
22501 && !df_regs_ever_live_p (LR_REGNUM)
22502 && arm_r3_live_at_start_p ()));
22503
22504 /* Find somewhere to store IP whilst the frame is being created.
22505 We try the following places in order:
22506
22507 1. The last argument register r3 if it is available.
22508 2. A slot on the stack above the frame if there are no
22509 arguments to push onto the stack.
22510 3. Register r3 again, after pushing the argument registers
22511 onto the stack, if this is a varargs function.
22512 4. The last slot on the stack created for the arguments to
22513 push, if this isn't a varargs function.
22514
22515 Note - we only need to tell the dwarf2 backend about the SP
22516 adjustment in the second variant; the static chain register
22517 doesn't need to be unwound, as it doesn't contain a value
22518 inherited from the caller. */
22519 if (clobber_ip)
22520 {
22521 if (!arm_r3_live_at_start_p ())
22522 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
22523 else if (args_to_push == 0)
22524 {
22525 rtx addr, dwarf;
22526
22527 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
22528 saved_regs += 4;
22529
22530 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22531 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
22532 fp_offset = 4;
22533
22534 /* Just tell the dwarf backend that we adjusted SP. */
22535 dwarf = gen_rtx_SET (stack_pointer_rtx,
22536 plus_constant (Pmode, stack_pointer_rtx,
22537 -fp_offset));
22538 RTX_FRAME_RELATED_P (insn) = 1;
22539 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22540 }
22541 else
22542 {
22543 /* Store the args on the stack. */
22544 if (cfun->machine->uses_anonymous_args)
22545 {
22546 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
22547 (0xf0 >> (args_to_push / 4)) & 0xf);
22548 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
22549 saved_pretend_args = 1;
22550 }
22551 else
22552 {
22553 rtx addr, dwarf;
22554
22555 if (args_to_push == 4)
22556 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22557 else
22558 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
22559 plus_constant (Pmode,
22560 stack_pointer_rtx,
22561 -args_to_push));
22562
22563 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
22564
22565 /* Just tell the dwarf backend that we adjusted SP. */
22566 dwarf = gen_rtx_SET (stack_pointer_rtx,
22567 plus_constant (Pmode, stack_pointer_rtx,
22568 -args_to_push));
22569 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22570 }
22571
22572 RTX_FRAME_RELATED_P (insn) = 1;
22573 fp_offset = args_to_push;
22574 args_to_push = 0;
22575 }
22576 }
22577
22578 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
22579 {
22580 if (IS_INTERRUPT (func_type))
22581 {
22582 /* Interrupt functions must not corrupt any registers.
22583 Creating a frame pointer however, corrupts the IP
22584 register, so we must push it first. */
22585 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
22586
22587 /* Do not set RTX_FRAME_RELATED_P on this insn.
22588 The dwarf stack unwinding code only wants to see one
22589 stack decrement per function, and this is not it. If
22590 this instruction is labeled as being part of the frame
22591 creation sequence then dwarf2out_frame_debug_expr will
22592 die when it encounters the assignment of IP to FP
22593 later on, since the use of SP here establishes SP as
22594 the CFA register and not IP.
22595
22596 Anyway this instruction is not really part of the stack
22597 frame creation although it is part of the prologue. */
22598 }
22599
22600 insn = emit_set_insn (ip_rtx,
22601 plus_constant (Pmode, stack_pointer_rtx,
22602 fp_offset));
22603 RTX_FRAME_RELATED_P (insn) = 1;
22604 }
22605
22606 if (args_to_push)
22607 {
22608 /* Push the argument registers, or reserve space for them. */
22609 if (cfun->machine->uses_anonymous_args)
22610 insn = emit_multi_reg_push
22611 ((0xf0 >> (args_to_push / 4)) & 0xf,
22612 (0xf0 >> (args_to_push / 4)) & 0xf);
22613 else
22614 insn = emit_insn
22615 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22616 GEN_INT (- args_to_push)));
22617 RTX_FRAME_RELATED_P (insn) = 1;
22618 }
22619
22620 /* If this is an interrupt service routine, and the link register
22621 is going to be pushed, and we're not generating extra
22622 push of IP (needed when frame is needed and frame layout if apcs),
22623 subtracting four from LR now will mean that the function return
22624 can be done with a single instruction. */
22625 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
22626 && (live_regs_mask & (1 << LR_REGNUM)) != 0
22627 && !(frame_pointer_needed && TARGET_APCS_FRAME)
22628 && TARGET_ARM)
22629 {
22630 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
22631
22632 emit_set_insn (lr, plus_constant (SImode, lr, -4));
22633 }
22634
22635 if (live_regs_mask)
22636 {
22637 unsigned long dwarf_regs_mask = live_regs_mask;
22638
22639 saved_regs += bit_count (live_regs_mask) * 4;
22640 if (optimize_size && !frame_pointer_needed
22641 && saved_regs == offsets->saved_regs - offsets->saved_args)
22642 {
22643 /* If no coprocessor registers are being pushed and we don't have
22644 to worry about a frame pointer then push extra registers to
22645 create the stack frame. This is done in a way that does not
22646 alter the frame layout, so is independent of the epilogue. */
22647 int n;
22648 int frame;
22649 n = 0;
22650 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
22651 n++;
22652 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
22653 if (frame && n * 4 >= frame)
22654 {
22655 n = frame / 4;
22656 live_regs_mask |= (1 << n) - 1;
22657 saved_regs += frame;
22658 }
22659 }
22660
22661 if (TARGET_LDRD
22662 && current_tune->prefer_ldrd_strd
22663 && !optimize_function_for_size_p (cfun))
22664 {
22665 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
22666 if (TARGET_THUMB2)
22667 thumb2_emit_strd_push (live_regs_mask);
22668 else if (TARGET_ARM
22669 && !TARGET_APCS_FRAME
22670 && !IS_INTERRUPT (func_type))
22671 arm_emit_strd_push (live_regs_mask);
22672 else
22673 {
22674 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
22675 RTX_FRAME_RELATED_P (insn) = 1;
22676 }
22677 }
22678 else
22679 {
22680 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
22681 RTX_FRAME_RELATED_P (insn) = 1;
22682 }
22683 }
22684
22685 if (! IS_VOLATILE (func_type))
22686 saved_regs += arm_save_coproc_regs ();
22687
22688 if (frame_pointer_needed && TARGET_ARM)
22689 {
22690 /* Create the new frame pointer. */
22691 if (TARGET_APCS_FRAME)
22692 {
22693 insn = GEN_INT (-(4 + args_to_push + fp_offset));
22694 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
22695 RTX_FRAME_RELATED_P (insn) = 1;
22696 }
22697 else
22698 {
22699 insn = GEN_INT (saved_regs - (4 + fp_offset));
22700 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22701 stack_pointer_rtx, insn));
22702 RTX_FRAME_RELATED_P (insn) = 1;
22703 }
22704 }
22705
22706 size = offsets->outgoing_args - offsets->saved_args;
22707 if (flag_stack_usage_info)
22708 current_function_static_stack_size = size;
22709
22710 /* If this isn't an interrupt service routine and we have a frame, then do
22711 stack checking. We use IP as the first scratch register, except for the
22712 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
22713 if (!IS_INTERRUPT (func_type)
22714 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
22715 || flag_stack_clash_protection))
22716 {
22717 unsigned int regno;
22718
22719 if (!IS_NESTED (func_type) || clobber_ip)
22720 regno = IP_REGNUM;
22721 else if (df_regs_ever_live_p (LR_REGNUM))
22722 regno = LR_REGNUM;
22723 else
22724 regno = 3;
22725
22726 if (crtl->is_leaf && !cfun->calls_alloca)
22727 {
22728 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
22729 arm_emit_probe_stack_range (get_stack_check_protect (),
22730 size - get_stack_check_protect (),
22731 regno, live_regs_mask);
22732 }
22733 else if (size > 0)
22734 arm_emit_probe_stack_range (get_stack_check_protect (), size,
22735 regno, live_regs_mask);
22736 }
22737
22738 /* Recover the static chain register. */
22739 if (clobber_ip)
22740 {
22741 if (!arm_r3_live_at_start_p () || saved_pretend_args)
22742 insn = gen_rtx_REG (SImode, 3);
22743 else
22744 {
22745 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
22746 insn = gen_frame_mem (SImode, insn);
22747 }
22748 emit_set_insn (ip_rtx, insn);
22749 emit_insn (gen_force_register_use (ip_rtx));
22750 }
22751
22752 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
22753 {
22754 /* This add can produce multiple insns for a large constant, so we
22755 need to get tricky. */
22756 rtx_insn *last = get_last_insn ();
22757
22758 amount = GEN_INT (offsets->saved_args + saved_regs
22759 - offsets->outgoing_args);
22760
22761 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22762 amount));
22763 do
22764 {
22765 last = last ? NEXT_INSN (last) : get_insns ();
22766 RTX_FRAME_RELATED_P (last) = 1;
22767 }
22768 while (last != insn);
22769
22770 /* If the frame pointer is needed, emit a special barrier that
22771 will prevent the scheduler from moving stores to the frame
22772 before the stack adjustment. */
22773 if (frame_pointer_needed)
22774 emit_insn (gen_stack_tie (stack_pointer_rtx,
22775 hard_frame_pointer_rtx));
22776 }
22777
22778
22779 if (frame_pointer_needed && TARGET_THUMB2)
22780 thumb_set_frame_pointer (offsets);
22781
22782 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22783 {
22784 unsigned long mask;
22785
22786 mask = live_regs_mask;
22787 mask &= THUMB2_WORK_REGS;
22788 if (!IS_NESTED (func_type))
22789 mask |= (1 << IP_REGNUM);
22790 arm_load_pic_register (mask, NULL_RTX);
22791 }
22792
22793 /* If we are profiling, make sure no instructions are scheduled before
22794 the call to mcount. Similarly if the user has requested no
22795 scheduling in the prolog. Similarly if we want non-call exceptions
22796 using the EABI unwinder, to prevent faulting instructions from being
22797 swapped with a stack adjustment. */
22798 if (crtl->profile || !TARGET_SCHED_PROLOG
22799 || (arm_except_unwind_info (&global_options) == UI_TARGET
22800 && cfun->can_throw_non_call_exceptions))
22801 emit_insn (gen_blockage ());
22802
22803 /* If the link register is being kept alive, with the return address in it,
22804 then make sure that it does not get reused by the ce2 pass. */
22805 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22806 cfun->machine->lr_save_eliminated = 1;
22807 }
22808 \f
22809 /* Print condition code to STREAM. Helper function for arm_print_operand. */
22810 static void
22811 arm_print_condition (FILE *stream)
22812 {
22813 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22814 {
22815 /* Branch conversion is not implemented for Thumb-2. */
22816 if (TARGET_THUMB)
22817 {
22818 output_operand_lossage ("predicated Thumb instruction");
22819 return;
22820 }
22821 if (current_insn_predicate != NULL)
22822 {
22823 output_operand_lossage
22824 ("predicated instruction in conditional sequence");
22825 return;
22826 }
22827
22828 fputs (arm_condition_codes[arm_current_cc], stream);
22829 }
22830 else if (current_insn_predicate)
22831 {
22832 enum arm_cond_code code;
22833
22834 if (TARGET_THUMB1)
22835 {
22836 output_operand_lossage ("predicated Thumb instruction");
22837 return;
22838 }
22839
22840 code = get_arm_condition_code (current_insn_predicate);
22841 fputs (arm_condition_codes[code], stream);
22842 }
22843 }
22844
22845
22846 /* Globally reserved letters: acln
22847 Puncutation letters currently used: @_|?().!#
22848 Lower case letters currently used: bcdefhimpqtvwxyz
22849 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22850 Letters previously used, but now deprecated/obsolete: sVWXYZ.
22851
22852 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22853
22854 If CODE is 'd', then the X is a condition operand and the instruction
22855 should only be executed if the condition is true.
22856 if CODE is 'D', then the X is a condition operand and the instruction
22857 should only be executed if the condition is false: however, if the mode
22858 of the comparison is CCFPEmode, then always execute the instruction -- we
22859 do this because in these circumstances !GE does not necessarily imply LT;
22860 in these cases the instruction pattern will take care to make sure that
22861 an instruction containing %d will follow, thereby undoing the effects of
22862 doing this instruction unconditionally.
22863 If CODE is 'N' then X is a floating point operand that must be negated
22864 before output.
22865 If CODE is 'B' then output a bitwise inverted value of X (a const int).
22866 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
22867 static void
22868 arm_print_operand (FILE *stream, rtx x, int code)
22869 {
22870 switch (code)
22871 {
22872 case '@':
22873 fputs (ASM_COMMENT_START, stream);
22874 return;
22875
22876 case '_':
22877 fputs (user_label_prefix, stream);
22878 return;
22879
22880 case '|':
22881 fputs (REGISTER_PREFIX, stream);
22882 return;
22883
22884 case '?':
22885 arm_print_condition (stream);
22886 return;
22887
22888 case '.':
22889 /* The current condition code for a condition code setting instruction.
22890 Preceded by 's' in unified syntax, otherwise followed by 's'. */
22891 fputc('s', stream);
22892 arm_print_condition (stream);
22893 return;
22894
22895 case '!':
22896 /* If the instruction is conditionally executed then print
22897 the current condition code, otherwise print 's'. */
22898 gcc_assert (TARGET_THUMB2);
22899 if (current_insn_predicate)
22900 arm_print_condition (stream);
22901 else
22902 fputc('s', stream);
22903 break;
22904
22905 /* %# is a "break" sequence. It doesn't output anything, but is used to
22906 separate e.g. operand numbers from following text, if that text consists
22907 of further digits which we don't want to be part of the operand
22908 number. */
22909 case '#':
22910 return;
22911
22912 case 'N':
22913 {
22914 REAL_VALUE_TYPE r;
22915 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22916 fprintf (stream, "%s", fp_const_from_val (&r));
22917 }
22918 return;
22919
22920 /* An integer or symbol address without a preceding # sign. */
22921 case 'c':
22922 switch (GET_CODE (x))
22923 {
22924 case CONST_INT:
22925 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22926 break;
22927
22928 case SYMBOL_REF:
22929 output_addr_const (stream, x);
22930 break;
22931
22932 case CONST:
22933 if (GET_CODE (XEXP (x, 0)) == PLUS
22934 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22935 {
22936 output_addr_const (stream, x);
22937 break;
22938 }
22939 /* Fall through. */
22940
22941 default:
22942 output_operand_lossage ("Unsupported operand for code '%c'", code);
22943 }
22944 return;
22945
22946 /* An integer that we want to print in HEX. */
22947 case 'x':
22948 switch (GET_CODE (x))
22949 {
22950 case CONST_INT:
22951 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22952 break;
22953
22954 default:
22955 output_operand_lossage ("Unsupported operand for code '%c'", code);
22956 }
22957 return;
22958
22959 case 'B':
22960 if (CONST_INT_P (x))
22961 {
22962 HOST_WIDE_INT val;
22963 val = ARM_SIGN_EXTEND (~INTVAL (x));
22964 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22965 }
22966 else
22967 {
22968 putc ('~', stream);
22969 output_addr_const (stream, x);
22970 }
22971 return;
22972
22973 case 'b':
22974 /* Print the log2 of a CONST_INT. */
22975 {
22976 HOST_WIDE_INT val;
22977
22978 if (!CONST_INT_P (x)
22979 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22980 output_operand_lossage ("Unsupported operand for code '%c'", code);
22981 else
22982 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22983 }
22984 return;
22985
22986 case 'L':
22987 /* The low 16 bits of an immediate constant. */
22988 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22989 return;
22990
22991 case 'i':
22992 fprintf (stream, "%s", arithmetic_instr (x, 1));
22993 return;
22994
22995 case 'I':
22996 fprintf (stream, "%s", arithmetic_instr (x, 0));
22997 return;
22998
22999 case 'S':
23000 {
23001 HOST_WIDE_INT val;
23002 const char *shift;
23003
23004 shift = shift_op (x, &val);
23005
23006 if (shift)
23007 {
23008 fprintf (stream, ", %s ", shift);
23009 if (val == -1)
23010 arm_print_operand (stream, XEXP (x, 1), 0);
23011 else
23012 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
23013 }
23014 }
23015 return;
23016
23017 /* An explanation of the 'Q', 'R' and 'H' register operands:
23018
23019 In a pair of registers containing a DI or DF value the 'Q'
23020 operand returns the register number of the register containing
23021 the least significant part of the value. The 'R' operand returns
23022 the register number of the register containing the most
23023 significant part of the value.
23024
23025 The 'H' operand returns the higher of the two register numbers.
23026 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
23027 same as the 'Q' operand, since the most significant part of the
23028 value is held in the lower number register. The reverse is true
23029 on systems where WORDS_BIG_ENDIAN is false.
23030
23031 The purpose of these operands is to distinguish between cases
23032 where the endian-ness of the values is important (for example
23033 when they are added together), and cases where the endian-ness
23034 is irrelevant, but the order of register operations is important.
23035 For example when loading a value from memory into a register
23036 pair, the endian-ness does not matter. Provided that the value
23037 from the lower memory address is put into the lower numbered
23038 register, and the value from the higher address is put into the
23039 higher numbered register, the load will work regardless of whether
23040 the value being loaded is big-wordian or little-wordian. The
23041 order of the two register loads can matter however, if the address
23042 of the memory location is actually held in one of the registers
23043 being overwritten by the load.
23044
23045 The 'Q' and 'R' constraints are also available for 64-bit
23046 constants. */
23047 case 'Q':
23048 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
23049 {
23050 rtx part = gen_lowpart (SImode, x);
23051 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
23052 return;
23053 }
23054
23055 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23056 {
23057 output_operand_lossage ("invalid operand for code '%c'", code);
23058 return;
23059 }
23060
23061 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
23062 return;
23063
23064 case 'R':
23065 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
23066 {
23067 machine_mode mode = GET_MODE (x);
23068 rtx part;
23069
23070 if (mode == VOIDmode)
23071 mode = DImode;
23072 part = gen_highpart_mode (SImode, mode, x);
23073 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
23074 return;
23075 }
23076
23077 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23078 {
23079 output_operand_lossage ("invalid operand for code '%c'", code);
23080 return;
23081 }
23082
23083 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
23084 return;
23085
23086 case 'H':
23087 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23088 {
23089 output_operand_lossage ("invalid operand for code '%c'", code);
23090 return;
23091 }
23092
23093 asm_fprintf (stream, "%r", REGNO (x) + 1);
23094 return;
23095
23096 case 'J':
23097 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23098 {
23099 output_operand_lossage ("invalid operand for code '%c'", code);
23100 return;
23101 }
23102
23103 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
23104 return;
23105
23106 case 'K':
23107 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23108 {
23109 output_operand_lossage ("invalid operand for code '%c'", code);
23110 return;
23111 }
23112
23113 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
23114 return;
23115
23116 case 'm':
23117 asm_fprintf (stream, "%r",
23118 REG_P (XEXP (x, 0))
23119 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
23120 return;
23121
23122 case 'M':
23123 asm_fprintf (stream, "{%r-%r}",
23124 REGNO (x),
23125 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
23126 return;
23127
23128 /* Like 'M', but writing doubleword vector registers, for use by Neon
23129 insns. */
23130 case 'h':
23131 {
23132 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
23133 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
23134 if (numregs == 1)
23135 asm_fprintf (stream, "{d%d}", regno);
23136 else
23137 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
23138 }
23139 return;
23140
23141 case 'd':
23142 /* CONST_TRUE_RTX means always -- that's the default. */
23143 if (x == const_true_rtx)
23144 return;
23145
23146 if (!COMPARISON_P (x))
23147 {
23148 output_operand_lossage ("invalid operand for code '%c'", code);
23149 return;
23150 }
23151
23152 fputs (arm_condition_codes[get_arm_condition_code (x)],
23153 stream);
23154 return;
23155
23156 case 'D':
23157 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
23158 want to do that. */
23159 if (x == const_true_rtx)
23160 {
23161 output_operand_lossage ("instruction never executed");
23162 return;
23163 }
23164 if (!COMPARISON_P (x))
23165 {
23166 output_operand_lossage ("invalid operand for code '%c'", code);
23167 return;
23168 }
23169
23170 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
23171 (get_arm_condition_code (x))],
23172 stream);
23173 return;
23174
23175 case 's':
23176 case 'V':
23177 case 'W':
23178 case 'X':
23179 case 'Y':
23180 case 'Z':
23181 /* Former Maverick support, removed after GCC-4.7. */
23182 output_operand_lossage ("obsolete Maverick format code '%c'", code);
23183 return;
23184
23185 case 'U':
23186 if (!REG_P (x)
23187 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
23188 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
23189 /* Bad value for wCG register number. */
23190 {
23191 output_operand_lossage ("invalid operand for code '%c'", code);
23192 return;
23193 }
23194
23195 else
23196 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
23197 return;
23198
23199 /* Print an iWMMXt control register name. */
23200 case 'w':
23201 if (!CONST_INT_P (x)
23202 || INTVAL (x) < 0
23203 || INTVAL (x) >= 16)
23204 /* Bad value for wC register number. */
23205 {
23206 output_operand_lossage ("invalid operand for code '%c'", code);
23207 return;
23208 }
23209
23210 else
23211 {
23212 static const char * wc_reg_names [16] =
23213 {
23214 "wCID", "wCon", "wCSSF", "wCASF",
23215 "wC4", "wC5", "wC6", "wC7",
23216 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
23217 "wC12", "wC13", "wC14", "wC15"
23218 };
23219
23220 fputs (wc_reg_names [INTVAL (x)], stream);
23221 }
23222 return;
23223
23224 /* Print the high single-precision register of a VFP double-precision
23225 register. */
23226 case 'p':
23227 {
23228 machine_mode mode = GET_MODE (x);
23229 int regno;
23230
23231 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
23232 {
23233 output_operand_lossage ("invalid operand for code '%c'", code);
23234 return;
23235 }
23236
23237 regno = REGNO (x);
23238 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
23239 {
23240 output_operand_lossage ("invalid operand for code '%c'", code);
23241 return;
23242 }
23243
23244 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
23245 }
23246 return;
23247
23248 /* Print a VFP/Neon double precision or quad precision register name. */
23249 case 'P':
23250 case 'q':
23251 {
23252 machine_mode mode = GET_MODE (x);
23253 int is_quad = (code == 'q');
23254 int regno;
23255
23256 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
23257 {
23258 output_operand_lossage ("invalid operand for code '%c'", code);
23259 return;
23260 }
23261
23262 if (!REG_P (x)
23263 || !IS_VFP_REGNUM (REGNO (x)))
23264 {
23265 output_operand_lossage ("invalid operand for code '%c'", code);
23266 return;
23267 }
23268
23269 regno = REGNO (x);
23270 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
23271 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
23272 {
23273 output_operand_lossage ("invalid operand for code '%c'", code);
23274 return;
23275 }
23276
23277 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
23278 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
23279 }
23280 return;
23281
23282 /* These two codes print the low/high doubleword register of a Neon quad
23283 register, respectively. For pair-structure types, can also print
23284 low/high quadword registers. */
23285 case 'e':
23286 case 'f':
23287 {
23288 machine_mode mode = GET_MODE (x);
23289 int regno;
23290
23291 if ((GET_MODE_SIZE (mode) != 16
23292 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
23293 {
23294 output_operand_lossage ("invalid operand for code '%c'", code);
23295 return;
23296 }
23297
23298 regno = REGNO (x);
23299 if (!NEON_REGNO_OK_FOR_QUAD (regno))
23300 {
23301 output_operand_lossage ("invalid operand for code '%c'", code);
23302 return;
23303 }
23304
23305 if (GET_MODE_SIZE (mode) == 16)
23306 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
23307 + (code == 'f' ? 1 : 0));
23308 else
23309 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
23310 + (code == 'f' ? 1 : 0));
23311 }
23312 return;
23313
23314 /* Print a VFPv3 floating-point constant, represented as an integer
23315 index. */
23316 case 'G':
23317 {
23318 int index = vfp3_const_double_index (x);
23319 gcc_assert (index != -1);
23320 fprintf (stream, "%d", index);
23321 }
23322 return;
23323
23324 /* Print bits representing opcode features for Neon.
23325
23326 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
23327 and polynomials as unsigned.
23328
23329 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
23330
23331 Bit 2 is 1 for rounding functions, 0 otherwise. */
23332
23333 /* Identify the type as 's', 'u', 'p' or 'f'. */
23334 case 'T':
23335 {
23336 HOST_WIDE_INT bits = INTVAL (x);
23337 fputc ("uspf"[bits & 3], stream);
23338 }
23339 return;
23340
23341 /* Likewise, but signed and unsigned integers are both 'i'. */
23342 case 'F':
23343 {
23344 HOST_WIDE_INT bits = INTVAL (x);
23345 fputc ("iipf"[bits & 3], stream);
23346 }
23347 return;
23348
23349 /* As for 'T', but emit 'u' instead of 'p'. */
23350 case 't':
23351 {
23352 HOST_WIDE_INT bits = INTVAL (x);
23353 fputc ("usuf"[bits & 3], stream);
23354 }
23355 return;
23356
23357 /* Bit 2: rounding (vs none). */
23358 case 'O':
23359 {
23360 HOST_WIDE_INT bits = INTVAL (x);
23361 fputs ((bits & 4) != 0 ? "r" : "", stream);
23362 }
23363 return;
23364
23365 /* Memory operand for vld1/vst1 instruction. */
23366 case 'A':
23367 {
23368 rtx addr;
23369 bool postinc = FALSE;
23370 rtx postinc_reg = NULL;
23371 unsigned align, memsize, align_bits;
23372
23373 gcc_assert (MEM_P (x));
23374 addr = XEXP (x, 0);
23375 if (GET_CODE (addr) == POST_INC)
23376 {
23377 postinc = 1;
23378 addr = XEXP (addr, 0);
23379 }
23380 if (GET_CODE (addr) == POST_MODIFY)
23381 {
23382 postinc_reg = XEXP( XEXP (addr, 1), 1);
23383 addr = XEXP (addr, 0);
23384 }
23385 asm_fprintf (stream, "[%r", REGNO (addr));
23386
23387 /* We know the alignment of this access, so we can emit a hint in the
23388 instruction (for some alignments) as an aid to the memory subsystem
23389 of the target. */
23390 align = MEM_ALIGN (x) >> 3;
23391 memsize = MEM_SIZE (x);
23392
23393 /* Only certain alignment specifiers are supported by the hardware. */
23394 if (memsize == 32 && (align % 32) == 0)
23395 align_bits = 256;
23396 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
23397 align_bits = 128;
23398 else if (memsize >= 8 && (align % 8) == 0)
23399 align_bits = 64;
23400 else
23401 align_bits = 0;
23402
23403 if (align_bits != 0)
23404 asm_fprintf (stream, ":%d", align_bits);
23405
23406 asm_fprintf (stream, "]");
23407
23408 if (postinc)
23409 fputs("!", stream);
23410 if (postinc_reg)
23411 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
23412 }
23413 return;
23414
23415 case 'C':
23416 {
23417 rtx addr;
23418
23419 gcc_assert (MEM_P (x));
23420 addr = XEXP (x, 0);
23421 gcc_assert (REG_P (addr));
23422 asm_fprintf (stream, "[%r]", REGNO (addr));
23423 }
23424 return;
23425
23426 /* Translate an S register number into a D register number and element index. */
23427 case 'y':
23428 {
23429 machine_mode mode = GET_MODE (x);
23430 int regno;
23431
23432 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
23433 {
23434 output_operand_lossage ("invalid operand for code '%c'", code);
23435 return;
23436 }
23437
23438 regno = REGNO (x);
23439 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
23440 {
23441 output_operand_lossage ("invalid operand for code '%c'", code);
23442 return;
23443 }
23444
23445 regno = regno - FIRST_VFP_REGNUM;
23446 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
23447 }
23448 return;
23449
23450 case 'v':
23451 gcc_assert (CONST_DOUBLE_P (x));
23452 int result;
23453 result = vfp3_const_double_for_fract_bits (x);
23454 if (result == 0)
23455 result = vfp3_const_double_for_bits (x);
23456 fprintf (stream, "#%d", result);
23457 return;
23458
23459 /* Register specifier for vld1.16/vst1.16. Translate the S register
23460 number into a D register number and element index. */
23461 case 'z':
23462 {
23463 machine_mode mode = GET_MODE (x);
23464 int regno;
23465
23466 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
23467 {
23468 output_operand_lossage ("invalid operand for code '%c'", code);
23469 return;
23470 }
23471
23472 regno = REGNO (x);
23473 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
23474 {
23475 output_operand_lossage ("invalid operand for code '%c'", code);
23476 return;
23477 }
23478
23479 regno = regno - FIRST_VFP_REGNUM;
23480 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
23481 }
23482 return;
23483
23484 default:
23485 if (x == 0)
23486 {
23487 output_operand_lossage ("missing operand");
23488 return;
23489 }
23490
23491 switch (GET_CODE (x))
23492 {
23493 case REG:
23494 asm_fprintf (stream, "%r", REGNO (x));
23495 break;
23496
23497 case MEM:
23498 output_address (GET_MODE (x), XEXP (x, 0));
23499 break;
23500
23501 case CONST_DOUBLE:
23502 {
23503 char fpstr[20];
23504 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
23505 sizeof (fpstr), 0, 1);
23506 fprintf (stream, "#%s", fpstr);
23507 }
23508 break;
23509
23510 default:
23511 gcc_assert (GET_CODE (x) != NEG);
23512 fputc ('#', stream);
23513 if (GET_CODE (x) == HIGH)
23514 {
23515 fputs (":lower16:", stream);
23516 x = XEXP (x, 0);
23517 }
23518
23519 output_addr_const (stream, x);
23520 break;
23521 }
23522 }
23523 }
23524 \f
23525 /* Target hook for printing a memory address. */
23526 static void
23527 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
23528 {
23529 if (TARGET_32BIT)
23530 {
23531 int is_minus = GET_CODE (x) == MINUS;
23532
23533 if (REG_P (x))
23534 asm_fprintf (stream, "[%r]", REGNO (x));
23535 else if (GET_CODE (x) == PLUS || is_minus)
23536 {
23537 rtx base = XEXP (x, 0);
23538 rtx index = XEXP (x, 1);
23539 HOST_WIDE_INT offset = 0;
23540 if (!REG_P (base)
23541 || (REG_P (index) && REGNO (index) == SP_REGNUM))
23542 {
23543 /* Ensure that BASE is a register. */
23544 /* (one of them must be). */
23545 /* Also ensure the SP is not used as in index register. */
23546 std::swap (base, index);
23547 }
23548 switch (GET_CODE (index))
23549 {
23550 case CONST_INT:
23551 offset = INTVAL (index);
23552 if (is_minus)
23553 offset = -offset;
23554 asm_fprintf (stream, "[%r, #%wd]",
23555 REGNO (base), offset);
23556 break;
23557
23558 case REG:
23559 asm_fprintf (stream, "[%r, %s%r]",
23560 REGNO (base), is_minus ? "-" : "",
23561 REGNO (index));
23562 break;
23563
23564 case MULT:
23565 case ASHIFTRT:
23566 case LSHIFTRT:
23567 case ASHIFT:
23568 case ROTATERT:
23569 {
23570 asm_fprintf (stream, "[%r, %s%r",
23571 REGNO (base), is_minus ? "-" : "",
23572 REGNO (XEXP (index, 0)));
23573 arm_print_operand (stream, index, 'S');
23574 fputs ("]", stream);
23575 break;
23576 }
23577
23578 default:
23579 gcc_unreachable ();
23580 }
23581 }
23582 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
23583 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
23584 {
23585 gcc_assert (REG_P (XEXP (x, 0)));
23586
23587 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
23588 asm_fprintf (stream, "[%r, #%s%d]!",
23589 REGNO (XEXP (x, 0)),
23590 GET_CODE (x) == PRE_DEC ? "-" : "",
23591 GET_MODE_SIZE (mode));
23592 else
23593 asm_fprintf (stream, "[%r], #%s%d",
23594 REGNO (XEXP (x, 0)),
23595 GET_CODE (x) == POST_DEC ? "-" : "",
23596 GET_MODE_SIZE (mode));
23597 }
23598 else if (GET_CODE (x) == PRE_MODIFY)
23599 {
23600 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
23601 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
23602 asm_fprintf (stream, "#%wd]!",
23603 INTVAL (XEXP (XEXP (x, 1), 1)));
23604 else
23605 asm_fprintf (stream, "%r]!",
23606 REGNO (XEXP (XEXP (x, 1), 1)));
23607 }
23608 else if (GET_CODE (x) == POST_MODIFY)
23609 {
23610 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
23611 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
23612 asm_fprintf (stream, "#%wd",
23613 INTVAL (XEXP (XEXP (x, 1), 1)));
23614 else
23615 asm_fprintf (stream, "%r",
23616 REGNO (XEXP (XEXP (x, 1), 1)));
23617 }
23618 else output_addr_const (stream, x);
23619 }
23620 else
23621 {
23622 if (REG_P (x))
23623 asm_fprintf (stream, "[%r]", REGNO (x));
23624 else if (GET_CODE (x) == POST_INC)
23625 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
23626 else if (GET_CODE (x) == PLUS)
23627 {
23628 gcc_assert (REG_P (XEXP (x, 0)));
23629 if (CONST_INT_P (XEXP (x, 1)))
23630 asm_fprintf (stream, "[%r, #%wd]",
23631 REGNO (XEXP (x, 0)),
23632 INTVAL (XEXP (x, 1)));
23633 else
23634 asm_fprintf (stream, "[%r, %r]",
23635 REGNO (XEXP (x, 0)),
23636 REGNO (XEXP (x, 1)));
23637 }
23638 else
23639 output_addr_const (stream, x);
23640 }
23641 }
23642 \f
23643 /* Target hook for indicating whether a punctuation character for
23644 TARGET_PRINT_OPERAND is valid. */
23645 static bool
23646 arm_print_operand_punct_valid_p (unsigned char code)
23647 {
23648 return (code == '@' || code == '|' || code == '.'
23649 || code == '(' || code == ')' || code == '#'
23650 || (TARGET_32BIT && (code == '?'))
23651 || (TARGET_THUMB2 && (code == '!'))
23652 || (TARGET_THUMB && (code == '_')));
23653 }
23654 \f
23655 /* Target hook for assembling integer objects. The ARM version needs to
23656 handle word-sized values specially. */
23657 static bool
23658 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
23659 {
23660 machine_mode mode;
23661
23662 if (size == UNITS_PER_WORD && aligned_p)
23663 {
23664 fputs ("\t.word\t", asm_out_file);
23665 output_addr_const (asm_out_file, x);
23666
23667 /* Mark symbols as position independent. We only do this in the
23668 .text segment, not in the .data segment. */
23669 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
23670 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
23671 {
23672 /* See legitimize_pic_address for an explanation of the
23673 TARGET_VXWORKS_RTP check. */
23674 /* References to weak symbols cannot be resolved locally:
23675 they may be overridden by a non-weak definition at link
23676 time. */
23677 if (!arm_pic_data_is_text_relative
23678 || (GET_CODE (x) == SYMBOL_REF
23679 && (!SYMBOL_REF_LOCAL_P (x)
23680 || (SYMBOL_REF_DECL (x)
23681 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
23682 || (SYMBOL_REF_FUNCTION_P (x)
23683 && !arm_fdpic_local_funcdesc_p (x)))))
23684 {
23685 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
23686 fputs ("(GOTFUNCDESC)", asm_out_file);
23687 else
23688 fputs ("(GOT)", asm_out_file);
23689 }
23690 else
23691 {
23692 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
23693 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
23694 else
23695 {
23696 bool is_readonly;
23697
23698 if (!TARGET_FDPIC
23699 || arm_is_segment_info_known (x, &is_readonly))
23700 fputs ("(GOTOFF)", asm_out_file);
23701 else
23702 fputs ("(GOT)", asm_out_file);
23703 }
23704 }
23705 }
23706
23707 /* For FDPIC we also have to mark symbol for .data section. */
23708 if (TARGET_FDPIC
23709 && !making_const_table
23710 && SYMBOL_REF_P (x)
23711 && SYMBOL_REF_FUNCTION_P (x))
23712 fputs ("(FUNCDESC)", asm_out_file);
23713
23714 fputc ('\n', asm_out_file);
23715 return true;
23716 }
23717
23718 mode = GET_MODE (x);
23719
23720 if (arm_vector_mode_supported_p (mode))
23721 {
23722 int i, units;
23723
23724 gcc_assert (GET_CODE (x) == CONST_VECTOR);
23725
23726 units = CONST_VECTOR_NUNITS (x);
23727 size = GET_MODE_UNIT_SIZE (mode);
23728
23729 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23730 for (i = 0; i < units; i++)
23731 {
23732 rtx elt = CONST_VECTOR_ELT (x, i);
23733 assemble_integer
23734 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
23735 }
23736 else
23737 for (i = 0; i < units; i++)
23738 {
23739 rtx elt = CONST_VECTOR_ELT (x, i);
23740 assemble_real
23741 (*CONST_DOUBLE_REAL_VALUE (elt),
23742 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
23743 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
23744 }
23745
23746 return true;
23747 }
23748
23749 return default_assemble_integer (x, size, aligned_p);
23750 }
23751
23752 static void
23753 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
23754 {
23755 section *s;
23756
23757 if (!TARGET_AAPCS_BASED)
23758 {
23759 (is_ctor ?
23760 default_named_section_asm_out_constructor
23761 : default_named_section_asm_out_destructor) (symbol, priority);
23762 return;
23763 }
23764
23765 /* Put these in the .init_array section, using a special relocation. */
23766 if (priority != DEFAULT_INIT_PRIORITY)
23767 {
23768 char buf[18];
23769 sprintf (buf, "%s.%.5u",
23770 is_ctor ? ".init_array" : ".fini_array",
23771 priority);
23772 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
23773 }
23774 else if (is_ctor)
23775 s = ctors_section;
23776 else
23777 s = dtors_section;
23778
23779 switch_to_section (s);
23780 assemble_align (POINTER_SIZE);
23781 fputs ("\t.word\t", asm_out_file);
23782 output_addr_const (asm_out_file, symbol);
23783 fputs ("(target1)\n", asm_out_file);
23784 }
23785
23786 /* Add a function to the list of static constructors. */
23787
23788 static void
23789 arm_elf_asm_constructor (rtx symbol, int priority)
23790 {
23791 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
23792 }
23793
23794 /* Add a function to the list of static destructors. */
23795
23796 static void
23797 arm_elf_asm_destructor (rtx symbol, int priority)
23798 {
23799 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
23800 }
23801 \f
23802 /* A finite state machine takes care of noticing whether or not instructions
23803 can be conditionally executed, and thus decrease execution time and code
23804 size by deleting branch instructions. The fsm is controlled by
23805 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
23806
23807 /* The state of the fsm controlling condition codes are:
23808 0: normal, do nothing special
23809 1: make ASM_OUTPUT_OPCODE not output this instruction
23810 2: make ASM_OUTPUT_OPCODE not output this instruction
23811 3: make instructions conditional
23812 4: make instructions conditional
23813
23814 State transitions (state->state by whom under condition):
23815 0 -> 1 final_prescan_insn if the `target' is a label
23816 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23817 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23818 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23819 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23820 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23821 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23822 (the target insn is arm_target_insn).
23823
23824 If the jump clobbers the conditions then we use states 2 and 4.
23825
23826 A similar thing can be done with conditional return insns.
23827
23828 XXX In case the `target' is an unconditional branch, this conditionalising
23829 of the instructions always reduces code size, but not always execution
23830 time. But then, I want to reduce the code size to somewhere near what
23831 /bin/cc produces. */
23832
23833 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23834 instructions. When a COND_EXEC instruction is seen the subsequent
23835 instructions are scanned so that multiple conditional instructions can be
23836 combined into a single IT block. arm_condexec_count and arm_condexec_mask
23837 specify the length and true/false mask for the IT block. These will be
23838 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
23839
23840 /* Returns the index of the ARM condition code string in
23841 `arm_condition_codes', or ARM_NV if the comparison is invalid.
23842 COMPARISON should be an rtx like `(eq (...) (...))'. */
23843
23844 enum arm_cond_code
23845 maybe_get_arm_condition_code (rtx comparison)
23846 {
23847 machine_mode mode = GET_MODE (XEXP (comparison, 0));
23848 enum arm_cond_code code;
23849 enum rtx_code comp_code = GET_CODE (comparison);
23850
23851 if (GET_MODE_CLASS (mode) != MODE_CC)
23852 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23853 XEXP (comparison, 1));
23854
23855 switch (mode)
23856 {
23857 case E_CC_DNEmode: code = ARM_NE; goto dominance;
23858 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23859 case E_CC_DGEmode: code = ARM_GE; goto dominance;
23860 case E_CC_DGTmode: code = ARM_GT; goto dominance;
23861 case E_CC_DLEmode: code = ARM_LE; goto dominance;
23862 case E_CC_DLTmode: code = ARM_LT; goto dominance;
23863 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23864 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23865 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23866 case E_CC_DLTUmode: code = ARM_CC;
23867
23868 dominance:
23869 if (comp_code == EQ)
23870 return ARM_INVERSE_CONDITION_CODE (code);
23871 if (comp_code == NE)
23872 return code;
23873 return ARM_NV;
23874
23875 case E_CC_NOOVmode:
23876 switch (comp_code)
23877 {
23878 case NE: return ARM_NE;
23879 case EQ: return ARM_EQ;
23880 case GE: return ARM_PL;
23881 case LT: return ARM_MI;
23882 default: return ARM_NV;
23883 }
23884
23885 case E_CC_Zmode:
23886 switch (comp_code)
23887 {
23888 case NE: return ARM_NE;
23889 case EQ: return ARM_EQ;
23890 default: return ARM_NV;
23891 }
23892
23893 case E_CC_Nmode:
23894 switch (comp_code)
23895 {
23896 case NE: return ARM_MI;
23897 case EQ: return ARM_PL;
23898 default: return ARM_NV;
23899 }
23900
23901 case E_CCFPEmode:
23902 case E_CCFPmode:
23903 /* We can handle all cases except UNEQ and LTGT. */
23904 switch (comp_code)
23905 {
23906 case GE: return ARM_GE;
23907 case GT: return ARM_GT;
23908 case LE: return ARM_LS;
23909 case LT: return ARM_MI;
23910 case NE: return ARM_NE;
23911 case EQ: return ARM_EQ;
23912 case ORDERED: return ARM_VC;
23913 case UNORDERED: return ARM_VS;
23914 case UNLT: return ARM_LT;
23915 case UNLE: return ARM_LE;
23916 case UNGT: return ARM_HI;
23917 case UNGE: return ARM_PL;
23918 /* UNEQ and LTGT do not have a representation. */
23919 case UNEQ: /* Fall through. */
23920 case LTGT: /* Fall through. */
23921 default: return ARM_NV;
23922 }
23923
23924 case E_CC_SWPmode:
23925 switch (comp_code)
23926 {
23927 case NE: return ARM_NE;
23928 case EQ: return ARM_EQ;
23929 case GE: return ARM_LE;
23930 case GT: return ARM_LT;
23931 case LE: return ARM_GE;
23932 case LT: return ARM_GT;
23933 case GEU: return ARM_LS;
23934 case GTU: return ARM_CC;
23935 case LEU: return ARM_CS;
23936 case LTU: return ARM_HI;
23937 default: return ARM_NV;
23938 }
23939
23940 case E_CC_Cmode:
23941 switch (comp_code)
23942 {
23943 case LTU: return ARM_CS;
23944 case GEU: return ARM_CC;
23945 default: return ARM_NV;
23946 }
23947
23948 case E_CC_NVmode:
23949 switch (comp_code)
23950 {
23951 case GE: return ARM_GE;
23952 case LT: return ARM_LT;
23953 default: return ARM_NV;
23954 }
23955
23956 case E_CC_Bmode:
23957 switch (comp_code)
23958 {
23959 case GEU: return ARM_CS;
23960 case LTU: return ARM_CC;
23961 default: return ARM_NV;
23962 }
23963
23964 case E_CC_Vmode:
23965 switch (comp_code)
23966 {
23967 case NE: return ARM_VS;
23968 case EQ: return ARM_VC;
23969 default: return ARM_NV;
23970 }
23971
23972 case E_CC_ADCmode:
23973 switch (comp_code)
23974 {
23975 case GEU: return ARM_CS;
23976 case LTU: return ARM_CC;
23977 default: return ARM_NV;
23978 }
23979
23980 case E_CCmode:
23981 case E_CC_RSBmode:
23982 switch (comp_code)
23983 {
23984 case NE: return ARM_NE;
23985 case EQ: return ARM_EQ;
23986 case GE: return ARM_GE;
23987 case GT: return ARM_GT;
23988 case LE: return ARM_LE;
23989 case LT: return ARM_LT;
23990 case GEU: return ARM_CS;
23991 case GTU: return ARM_HI;
23992 case LEU: return ARM_LS;
23993 case LTU: return ARM_CC;
23994 default: return ARM_NV;
23995 }
23996
23997 default: gcc_unreachable ();
23998 }
23999 }
24000
24001 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
24002 static enum arm_cond_code
24003 get_arm_condition_code (rtx comparison)
24004 {
24005 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
24006 gcc_assert (code != ARM_NV);
24007 return code;
24008 }
24009
24010 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
24011 code registers when not targetting Thumb1. The VFP condition register
24012 only exists when generating hard-float code. */
24013 static bool
24014 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
24015 {
24016 if (!TARGET_32BIT)
24017 return false;
24018
24019 *p1 = CC_REGNUM;
24020 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
24021 return true;
24022 }
24023
24024 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
24025 instructions. */
24026 void
24027 thumb2_final_prescan_insn (rtx_insn *insn)
24028 {
24029 rtx_insn *first_insn = insn;
24030 rtx body = PATTERN (insn);
24031 rtx predicate;
24032 enum arm_cond_code code;
24033 int n;
24034 int mask;
24035 int max;
24036
24037 /* max_insns_skipped in the tune was already taken into account in the
24038 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
24039 just emit the IT blocks as we can. It does not make sense to split
24040 the IT blocks. */
24041 max = MAX_INSN_PER_IT_BLOCK;
24042
24043 /* Remove the previous insn from the count of insns to be output. */
24044 if (arm_condexec_count)
24045 arm_condexec_count--;
24046
24047 /* Nothing to do if we are already inside a conditional block. */
24048 if (arm_condexec_count)
24049 return;
24050
24051 if (GET_CODE (body) != COND_EXEC)
24052 return;
24053
24054 /* Conditional jumps are implemented directly. */
24055 if (JUMP_P (insn))
24056 return;
24057
24058 predicate = COND_EXEC_TEST (body);
24059 arm_current_cc = get_arm_condition_code (predicate);
24060
24061 n = get_attr_ce_count (insn);
24062 arm_condexec_count = 1;
24063 arm_condexec_mask = (1 << n) - 1;
24064 arm_condexec_masklen = n;
24065 /* See if subsequent instructions can be combined into the same block. */
24066 for (;;)
24067 {
24068 insn = next_nonnote_insn (insn);
24069
24070 /* Jumping into the middle of an IT block is illegal, so a label or
24071 barrier terminates the block. */
24072 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
24073 break;
24074
24075 body = PATTERN (insn);
24076 /* USE and CLOBBER aren't really insns, so just skip them. */
24077 if (GET_CODE (body) == USE
24078 || GET_CODE (body) == CLOBBER)
24079 continue;
24080
24081 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
24082 if (GET_CODE (body) != COND_EXEC)
24083 break;
24084 /* Maximum number of conditionally executed instructions in a block. */
24085 n = get_attr_ce_count (insn);
24086 if (arm_condexec_masklen + n > max)
24087 break;
24088
24089 predicate = COND_EXEC_TEST (body);
24090 code = get_arm_condition_code (predicate);
24091 mask = (1 << n) - 1;
24092 if (arm_current_cc == code)
24093 arm_condexec_mask |= (mask << arm_condexec_masklen);
24094 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
24095 break;
24096
24097 arm_condexec_count++;
24098 arm_condexec_masklen += n;
24099
24100 /* A jump must be the last instruction in a conditional block. */
24101 if (JUMP_P (insn))
24102 break;
24103 }
24104 /* Restore recog_data (getting the attributes of other insns can
24105 destroy this array, but final.c assumes that it remains intact
24106 across this call). */
24107 extract_constrain_insn_cached (first_insn);
24108 }
24109
24110 void
24111 arm_final_prescan_insn (rtx_insn *insn)
24112 {
24113 /* BODY will hold the body of INSN. */
24114 rtx body = PATTERN (insn);
24115
24116 /* This will be 1 if trying to repeat the trick, and things need to be
24117 reversed if it appears to fail. */
24118 int reverse = 0;
24119
24120 /* If we start with a return insn, we only succeed if we find another one. */
24121 int seeking_return = 0;
24122 enum rtx_code return_code = UNKNOWN;
24123
24124 /* START_INSN will hold the insn from where we start looking. This is the
24125 first insn after the following code_label if REVERSE is true. */
24126 rtx_insn *start_insn = insn;
24127
24128 /* If in state 4, check if the target branch is reached, in order to
24129 change back to state 0. */
24130 if (arm_ccfsm_state == 4)
24131 {
24132 if (insn == arm_target_insn)
24133 {
24134 arm_target_insn = NULL;
24135 arm_ccfsm_state = 0;
24136 }
24137 return;
24138 }
24139
24140 /* If in state 3, it is possible to repeat the trick, if this insn is an
24141 unconditional branch to a label, and immediately following this branch
24142 is the previous target label which is only used once, and the label this
24143 branch jumps to is not too far off. */
24144 if (arm_ccfsm_state == 3)
24145 {
24146 if (simplejump_p (insn))
24147 {
24148 start_insn = next_nonnote_insn (start_insn);
24149 if (BARRIER_P (start_insn))
24150 {
24151 /* XXX Isn't this always a barrier? */
24152 start_insn = next_nonnote_insn (start_insn);
24153 }
24154 if (LABEL_P (start_insn)
24155 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
24156 && LABEL_NUSES (start_insn) == 1)
24157 reverse = TRUE;
24158 else
24159 return;
24160 }
24161 else if (ANY_RETURN_P (body))
24162 {
24163 start_insn = next_nonnote_insn (start_insn);
24164 if (BARRIER_P (start_insn))
24165 start_insn = next_nonnote_insn (start_insn);
24166 if (LABEL_P (start_insn)
24167 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
24168 && LABEL_NUSES (start_insn) == 1)
24169 {
24170 reverse = TRUE;
24171 seeking_return = 1;
24172 return_code = GET_CODE (body);
24173 }
24174 else
24175 return;
24176 }
24177 else
24178 return;
24179 }
24180
24181 gcc_assert (!arm_ccfsm_state || reverse);
24182 if (!JUMP_P (insn))
24183 return;
24184
24185 /* This jump might be paralleled with a clobber of the condition codes
24186 the jump should always come first */
24187 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
24188 body = XVECEXP (body, 0, 0);
24189
24190 if (reverse
24191 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
24192 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
24193 {
24194 int insns_skipped;
24195 int fail = FALSE, succeed = FALSE;
24196 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
24197 int then_not_else = TRUE;
24198 rtx_insn *this_insn = start_insn;
24199 rtx label = 0;
24200
24201 /* Register the insn jumped to. */
24202 if (reverse)
24203 {
24204 if (!seeking_return)
24205 label = XEXP (SET_SRC (body), 0);
24206 }
24207 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
24208 label = XEXP (XEXP (SET_SRC (body), 1), 0);
24209 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
24210 {
24211 label = XEXP (XEXP (SET_SRC (body), 2), 0);
24212 then_not_else = FALSE;
24213 }
24214 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
24215 {
24216 seeking_return = 1;
24217 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
24218 }
24219 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
24220 {
24221 seeking_return = 1;
24222 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
24223 then_not_else = FALSE;
24224 }
24225 else
24226 gcc_unreachable ();
24227
24228 /* See how many insns this branch skips, and what kind of insns. If all
24229 insns are okay, and the label or unconditional branch to the same
24230 label is not too far away, succeed. */
24231 for (insns_skipped = 0;
24232 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
24233 {
24234 rtx scanbody;
24235
24236 this_insn = next_nonnote_insn (this_insn);
24237 if (!this_insn)
24238 break;
24239
24240 switch (GET_CODE (this_insn))
24241 {
24242 case CODE_LABEL:
24243 /* Succeed if it is the target label, otherwise fail since
24244 control falls in from somewhere else. */
24245 if (this_insn == label)
24246 {
24247 arm_ccfsm_state = 1;
24248 succeed = TRUE;
24249 }
24250 else
24251 fail = TRUE;
24252 break;
24253
24254 case BARRIER:
24255 /* Succeed if the following insn is the target label.
24256 Otherwise fail.
24257 If return insns are used then the last insn in a function
24258 will be a barrier. */
24259 this_insn = next_nonnote_insn (this_insn);
24260 if (this_insn && this_insn == label)
24261 {
24262 arm_ccfsm_state = 1;
24263 succeed = TRUE;
24264 }
24265 else
24266 fail = TRUE;
24267 break;
24268
24269 case CALL_INSN:
24270 /* The AAPCS says that conditional calls should not be
24271 used since they make interworking inefficient (the
24272 linker can't transform BL<cond> into BLX). That's
24273 only a problem if the machine has BLX. */
24274 if (arm_arch5t)
24275 {
24276 fail = TRUE;
24277 break;
24278 }
24279
24280 /* Succeed if the following insn is the target label, or
24281 if the following two insns are a barrier and the
24282 target label. */
24283 this_insn = next_nonnote_insn (this_insn);
24284 if (this_insn && BARRIER_P (this_insn))
24285 this_insn = next_nonnote_insn (this_insn);
24286
24287 if (this_insn && this_insn == label
24288 && insns_skipped < max_insns_skipped)
24289 {
24290 arm_ccfsm_state = 1;
24291 succeed = TRUE;
24292 }
24293 else
24294 fail = TRUE;
24295 break;
24296
24297 case JUMP_INSN:
24298 /* If this is an unconditional branch to the same label, succeed.
24299 If it is to another label, do nothing. If it is conditional,
24300 fail. */
24301 /* XXX Probably, the tests for SET and the PC are
24302 unnecessary. */
24303
24304 scanbody = PATTERN (this_insn);
24305 if (GET_CODE (scanbody) == SET
24306 && GET_CODE (SET_DEST (scanbody)) == PC)
24307 {
24308 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
24309 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
24310 {
24311 arm_ccfsm_state = 2;
24312 succeed = TRUE;
24313 }
24314 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
24315 fail = TRUE;
24316 }
24317 /* Fail if a conditional return is undesirable (e.g. on a
24318 StrongARM), but still allow this if optimizing for size. */
24319 else if (GET_CODE (scanbody) == return_code
24320 && !use_return_insn (TRUE, NULL)
24321 && !optimize_size)
24322 fail = TRUE;
24323 else if (GET_CODE (scanbody) == return_code)
24324 {
24325 arm_ccfsm_state = 2;
24326 succeed = TRUE;
24327 }
24328 else if (GET_CODE (scanbody) == PARALLEL)
24329 {
24330 switch (get_attr_conds (this_insn))
24331 {
24332 case CONDS_NOCOND:
24333 break;
24334 default:
24335 fail = TRUE;
24336 break;
24337 }
24338 }
24339 else
24340 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
24341
24342 break;
24343
24344 case INSN:
24345 /* Instructions using or affecting the condition codes make it
24346 fail. */
24347 scanbody = PATTERN (this_insn);
24348 if (!(GET_CODE (scanbody) == SET
24349 || GET_CODE (scanbody) == PARALLEL)
24350 || get_attr_conds (this_insn) != CONDS_NOCOND)
24351 fail = TRUE;
24352 break;
24353
24354 default:
24355 break;
24356 }
24357 }
24358 if (succeed)
24359 {
24360 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
24361 arm_target_label = CODE_LABEL_NUMBER (label);
24362 else
24363 {
24364 gcc_assert (seeking_return || arm_ccfsm_state == 2);
24365
24366 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
24367 {
24368 this_insn = next_nonnote_insn (this_insn);
24369 gcc_assert (!this_insn
24370 || (!BARRIER_P (this_insn)
24371 && !LABEL_P (this_insn)));
24372 }
24373 if (!this_insn)
24374 {
24375 /* Oh, dear! we ran off the end.. give up. */
24376 extract_constrain_insn_cached (insn);
24377 arm_ccfsm_state = 0;
24378 arm_target_insn = NULL;
24379 return;
24380 }
24381 arm_target_insn = this_insn;
24382 }
24383
24384 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
24385 what it was. */
24386 if (!reverse)
24387 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
24388
24389 if (reverse || then_not_else)
24390 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
24391 }
24392
24393 /* Restore recog_data (getting the attributes of other insns can
24394 destroy this array, but final.c assumes that it remains intact
24395 across this call. */
24396 extract_constrain_insn_cached (insn);
24397 }
24398 }
24399
24400 /* Output IT instructions. */
24401 void
24402 thumb2_asm_output_opcode (FILE * stream)
24403 {
24404 char buff[5];
24405 int n;
24406
24407 if (arm_condexec_mask)
24408 {
24409 for (n = 0; n < arm_condexec_masklen; n++)
24410 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
24411 buff[n] = 0;
24412 asm_fprintf(stream, "i%s\t%s\n\t", buff,
24413 arm_condition_codes[arm_current_cc]);
24414 arm_condexec_mask = 0;
24415 }
24416 }
24417
24418 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
24419 UNITS_PER_WORD bytes wide. */
24420 static unsigned int
24421 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
24422 {
24423 if (TARGET_32BIT
24424 && regno > PC_REGNUM
24425 && regno != FRAME_POINTER_REGNUM
24426 && regno != ARG_POINTER_REGNUM
24427 && !IS_VFP_REGNUM (regno))
24428 return 1;
24429
24430 return ARM_NUM_REGS (mode);
24431 }
24432
24433 /* Implement TARGET_HARD_REGNO_MODE_OK. */
24434 static bool
24435 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
24436 {
24437 if (GET_MODE_CLASS (mode) == MODE_CC)
24438 return (regno == CC_REGNUM
24439 || (TARGET_HARD_FLOAT
24440 && regno == VFPCC_REGNUM));
24441
24442 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
24443 return false;
24444
24445 if (TARGET_THUMB1)
24446 /* For the Thumb we only allow values bigger than SImode in
24447 registers 0 - 6, so that there is always a second low
24448 register available to hold the upper part of the value.
24449 We probably we ought to ensure that the register is the
24450 start of an even numbered register pair. */
24451 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
24452
24453 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
24454 {
24455 if (mode == SFmode || mode == SImode)
24456 return VFP_REGNO_OK_FOR_SINGLE (regno);
24457
24458 if (mode == DFmode)
24459 return VFP_REGNO_OK_FOR_DOUBLE (regno);
24460
24461 if (mode == HFmode)
24462 return VFP_REGNO_OK_FOR_SINGLE (regno);
24463
24464 /* VFP registers can hold HImode values. */
24465 if (mode == HImode)
24466 return VFP_REGNO_OK_FOR_SINGLE (regno);
24467
24468 if (TARGET_NEON)
24469 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
24470 || (VALID_NEON_QREG_MODE (mode)
24471 && NEON_REGNO_OK_FOR_QUAD (regno))
24472 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
24473 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
24474 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
24475 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
24476 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
24477
24478 return false;
24479 }
24480
24481 if (TARGET_REALLY_IWMMXT)
24482 {
24483 if (IS_IWMMXT_GR_REGNUM (regno))
24484 return mode == SImode;
24485
24486 if (IS_IWMMXT_REGNUM (regno))
24487 return VALID_IWMMXT_REG_MODE (mode);
24488 }
24489
24490 /* We allow almost any value to be stored in the general registers.
24491 Restrict doubleword quantities to even register pairs in ARM state
24492 so that we can use ldrd. Do not allow very large Neon structure
24493 opaque modes in general registers; they would use too many. */
24494 if (regno <= LAST_ARM_REGNUM)
24495 {
24496 if (ARM_NUM_REGS (mode) > 4)
24497 return false;
24498
24499 if (TARGET_THUMB2)
24500 return true;
24501
24502 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
24503 }
24504
24505 if (regno == FRAME_POINTER_REGNUM
24506 || regno == ARG_POINTER_REGNUM)
24507 /* We only allow integers in the fake hard registers. */
24508 return GET_MODE_CLASS (mode) == MODE_INT;
24509
24510 return false;
24511 }
24512
24513 /* Implement TARGET_MODES_TIEABLE_P. */
24514
24515 static bool
24516 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
24517 {
24518 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
24519 return true;
24520
24521 /* We specifically want to allow elements of "structure" modes to
24522 be tieable to the structure. This more general condition allows
24523 other rarer situations too. */
24524 if (TARGET_NEON
24525 && (VALID_NEON_DREG_MODE (mode1)
24526 || VALID_NEON_QREG_MODE (mode1)
24527 || VALID_NEON_STRUCT_MODE (mode1))
24528 && (VALID_NEON_DREG_MODE (mode2)
24529 || VALID_NEON_QREG_MODE (mode2)
24530 || VALID_NEON_STRUCT_MODE (mode2)))
24531 return true;
24532
24533 return false;
24534 }
24535
24536 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
24537 not used in arm mode. */
24538
24539 enum reg_class
24540 arm_regno_class (int regno)
24541 {
24542 if (regno == PC_REGNUM)
24543 return NO_REGS;
24544
24545 if (TARGET_THUMB1)
24546 {
24547 if (regno == STACK_POINTER_REGNUM)
24548 return STACK_REG;
24549 if (regno == CC_REGNUM)
24550 return CC_REG;
24551 if (regno < 8)
24552 return LO_REGS;
24553 return HI_REGS;
24554 }
24555
24556 if (TARGET_THUMB2 && regno < 8)
24557 return LO_REGS;
24558
24559 if ( regno <= LAST_ARM_REGNUM
24560 || regno == FRAME_POINTER_REGNUM
24561 || regno == ARG_POINTER_REGNUM)
24562 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
24563
24564 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
24565 return TARGET_THUMB2 ? CC_REG : NO_REGS;
24566
24567 if (IS_VFP_REGNUM (regno))
24568 {
24569 if (regno <= D7_VFP_REGNUM)
24570 return VFP_D0_D7_REGS;
24571 else if (regno <= LAST_LO_VFP_REGNUM)
24572 return VFP_LO_REGS;
24573 else
24574 return VFP_HI_REGS;
24575 }
24576
24577 if (IS_IWMMXT_REGNUM (regno))
24578 return IWMMXT_REGS;
24579
24580 if (IS_IWMMXT_GR_REGNUM (regno))
24581 return IWMMXT_GR_REGS;
24582
24583 return NO_REGS;
24584 }
24585
24586 /* Handle a special case when computing the offset
24587 of an argument from the frame pointer. */
24588 int
24589 arm_debugger_arg_offset (int value, rtx addr)
24590 {
24591 rtx_insn *insn;
24592
24593 /* We are only interested if dbxout_parms() failed to compute the offset. */
24594 if (value != 0)
24595 return 0;
24596
24597 /* We can only cope with the case where the address is held in a register. */
24598 if (!REG_P (addr))
24599 return 0;
24600
24601 /* If we are using the frame pointer to point at the argument, then
24602 an offset of 0 is correct. */
24603 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
24604 return 0;
24605
24606 /* If we are using the stack pointer to point at the
24607 argument, then an offset of 0 is correct. */
24608 /* ??? Check this is consistent with thumb2 frame layout. */
24609 if ((TARGET_THUMB || !frame_pointer_needed)
24610 && REGNO (addr) == SP_REGNUM)
24611 return 0;
24612
24613 /* Oh dear. The argument is pointed to by a register rather
24614 than being held in a register, or being stored at a known
24615 offset from the frame pointer. Since GDB only understands
24616 those two kinds of argument we must translate the address
24617 held in the register into an offset from the frame pointer.
24618 We do this by searching through the insns for the function
24619 looking to see where this register gets its value. If the
24620 register is initialized from the frame pointer plus an offset
24621 then we are in luck and we can continue, otherwise we give up.
24622
24623 This code is exercised by producing debugging information
24624 for a function with arguments like this:
24625
24626 double func (double a, double b, int c, double d) {return d;}
24627
24628 Without this code the stab for parameter 'd' will be set to
24629 an offset of 0 from the frame pointer, rather than 8. */
24630
24631 /* The if() statement says:
24632
24633 If the insn is a normal instruction
24634 and if the insn is setting the value in a register
24635 and if the register being set is the register holding the address of the argument
24636 and if the address is computing by an addition
24637 that involves adding to a register
24638 which is the frame pointer
24639 a constant integer
24640
24641 then... */
24642
24643 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24644 {
24645 if ( NONJUMP_INSN_P (insn)
24646 && GET_CODE (PATTERN (insn)) == SET
24647 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
24648 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
24649 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
24650 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
24651 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
24652 )
24653 {
24654 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
24655
24656 break;
24657 }
24658 }
24659
24660 if (value == 0)
24661 {
24662 debug_rtx (addr);
24663 warning (0, "unable to compute real location of stacked parameter");
24664 value = 8; /* XXX magic hack */
24665 }
24666
24667 return value;
24668 }
24669 \f
24670 /* Implement TARGET_PROMOTED_TYPE. */
24671
24672 static tree
24673 arm_promoted_type (const_tree t)
24674 {
24675 if (SCALAR_FLOAT_TYPE_P (t)
24676 && TYPE_PRECISION (t) == 16
24677 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
24678 return float_type_node;
24679 return NULL_TREE;
24680 }
24681
24682 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24683 This simply adds HFmode as a supported mode; even though we don't
24684 implement arithmetic on this type directly, it's supported by
24685 optabs conversions, much the way the double-word arithmetic is
24686 special-cased in the default hook. */
24687
24688 static bool
24689 arm_scalar_mode_supported_p (scalar_mode mode)
24690 {
24691 if (mode == HFmode)
24692 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24693 else if (ALL_FIXED_POINT_MODE_P (mode))
24694 return true;
24695 else
24696 return default_scalar_mode_supported_p (mode);
24697 }
24698
24699 /* Set the value of FLT_EVAL_METHOD.
24700 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
24701
24702 0: evaluate all operations and constants, whose semantic type has at
24703 most the range and precision of type float, to the range and
24704 precision of float; evaluate all other operations and constants to
24705 the range and precision of the semantic type;
24706
24707 N, where _FloatN is a supported interchange floating type
24708 evaluate all operations and constants, whose semantic type has at
24709 most the range and precision of _FloatN type, to the range and
24710 precision of the _FloatN type; evaluate all other operations and
24711 constants to the range and precision of the semantic type;
24712
24713 If we have the ARMv8.2-A extensions then we support _Float16 in native
24714 precision, so we should set this to 16. Otherwise, we support the type,
24715 but want to evaluate expressions in float precision, so set this to
24716 0. */
24717
24718 static enum flt_eval_method
24719 arm_excess_precision (enum excess_precision_type type)
24720 {
24721 switch (type)
24722 {
24723 case EXCESS_PRECISION_TYPE_FAST:
24724 case EXCESS_PRECISION_TYPE_STANDARD:
24725 /* We can calculate either in 16-bit range and precision or
24726 32-bit range and precision. Make that decision based on whether
24727 we have native support for the ARMv8.2-A 16-bit floating-point
24728 instructions or not. */
24729 return (TARGET_VFP_FP16INST
24730 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
24731 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
24732 case EXCESS_PRECISION_TYPE_IMPLICIT:
24733 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
24734 default:
24735 gcc_unreachable ();
24736 }
24737 return FLT_EVAL_METHOD_UNPREDICTABLE;
24738 }
24739
24740
24741 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
24742 _Float16 if we are using anything other than ieee format for 16-bit
24743 floating point. Otherwise, punt to the default implementation. */
24744 static opt_scalar_float_mode
24745 arm_floatn_mode (int n, bool extended)
24746 {
24747 if (!extended && n == 16)
24748 {
24749 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
24750 return HFmode;
24751 return opt_scalar_float_mode ();
24752 }
24753
24754 return default_floatn_mode (n, extended);
24755 }
24756
24757
24758 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
24759 not to early-clobber SRC registers in the process.
24760
24761 We assume that the operands described by SRC and DEST represent a
24762 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
24763 number of components into which the copy has been decomposed. */
24764 void
24765 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
24766 {
24767 unsigned int i;
24768
24769 if (!reg_overlap_mentioned_p (operands[0], operands[1])
24770 || REGNO (operands[0]) < REGNO (operands[1]))
24771 {
24772 for (i = 0; i < count; i++)
24773 {
24774 operands[2 * i] = dest[i];
24775 operands[2 * i + 1] = src[i];
24776 }
24777 }
24778 else
24779 {
24780 for (i = 0; i < count; i++)
24781 {
24782 operands[2 * i] = dest[count - i - 1];
24783 operands[2 * i + 1] = src[count - i - 1];
24784 }
24785 }
24786 }
24787
24788 /* Split operands into moves from op[1] + op[2] into op[0]. */
24789
24790 void
24791 neon_split_vcombine (rtx operands[3])
24792 {
24793 unsigned int dest = REGNO (operands[0]);
24794 unsigned int src1 = REGNO (operands[1]);
24795 unsigned int src2 = REGNO (operands[2]);
24796 machine_mode halfmode = GET_MODE (operands[1]);
24797 unsigned int halfregs = REG_NREGS (operands[1]);
24798 rtx destlo, desthi;
24799
24800 if (src1 == dest && src2 == dest + halfregs)
24801 {
24802 /* No-op move. Can't split to nothing; emit something. */
24803 emit_note (NOTE_INSN_DELETED);
24804 return;
24805 }
24806
24807 /* Preserve register attributes for variable tracking. */
24808 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
24809 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
24810 GET_MODE_SIZE (halfmode));
24811
24812 /* Special case of reversed high/low parts. Use VSWP. */
24813 if (src2 == dest && src1 == dest + halfregs)
24814 {
24815 rtx x = gen_rtx_SET (destlo, operands[1]);
24816 rtx y = gen_rtx_SET (desthi, operands[2]);
24817 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
24818 return;
24819 }
24820
24821 if (!reg_overlap_mentioned_p (operands[2], destlo))
24822 {
24823 /* Try to avoid unnecessary moves if part of the result
24824 is in the right place already. */
24825 if (src1 != dest)
24826 emit_move_insn (destlo, operands[1]);
24827 if (src2 != dest + halfregs)
24828 emit_move_insn (desthi, operands[2]);
24829 }
24830 else
24831 {
24832 if (src2 != dest + halfregs)
24833 emit_move_insn (desthi, operands[2]);
24834 if (src1 != dest)
24835 emit_move_insn (destlo, operands[1]);
24836 }
24837 }
24838 \f
24839 /* Return the number (counting from 0) of
24840 the least significant set bit in MASK. */
24841
24842 inline static int
24843 number_of_first_bit_set (unsigned mask)
24844 {
24845 return ctz_hwi (mask);
24846 }
24847
24848 /* Like emit_multi_reg_push, but allowing for a different set of
24849 registers to be described as saved. MASK is the set of registers
24850 to be saved; REAL_REGS is the set of registers to be described as
24851 saved. If REAL_REGS is 0, only describe the stack adjustment. */
24852
24853 static rtx_insn *
24854 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24855 {
24856 unsigned long regno;
24857 rtx par[10], tmp, reg;
24858 rtx_insn *insn;
24859 int i, j;
24860
24861 /* Build the parallel of the registers actually being stored. */
24862 for (i = 0; mask; ++i, mask &= mask - 1)
24863 {
24864 regno = ctz_hwi (mask);
24865 reg = gen_rtx_REG (SImode, regno);
24866
24867 if (i == 0)
24868 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24869 else
24870 tmp = gen_rtx_USE (VOIDmode, reg);
24871
24872 par[i] = tmp;
24873 }
24874
24875 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24876 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24877 tmp = gen_frame_mem (BLKmode, tmp);
24878 tmp = gen_rtx_SET (tmp, par[0]);
24879 par[0] = tmp;
24880
24881 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24882 insn = emit_insn (tmp);
24883
24884 /* Always build the stack adjustment note for unwind info. */
24885 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24886 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24887 par[0] = tmp;
24888
24889 /* Build the parallel of the registers recorded as saved for unwind. */
24890 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24891 {
24892 regno = ctz_hwi (real_regs);
24893 reg = gen_rtx_REG (SImode, regno);
24894
24895 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24896 tmp = gen_frame_mem (SImode, tmp);
24897 tmp = gen_rtx_SET (tmp, reg);
24898 RTX_FRAME_RELATED_P (tmp) = 1;
24899 par[j + 1] = tmp;
24900 }
24901
24902 if (j == 0)
24903 tmp = par[0];
24904 else
24905 {
24906 RTX_FRAME_RELATED_P (par[0]) = 1;
24907 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24908 }
24909
24910 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24911
24912 return insn;
24913 }
24914
24915 /* Emit code to push or pop registers to or from the stack. F is the
24916 assembly file. MASK is the registers to pop. */
24917 static void
24918 thumb_pop (FILE *f, unsigned long mask)
24919 {
24920 int regno;
24921 int lo_mask = mask & 0xFF;
24922
24923 gcc_assert (mask);
24924
24925 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24926 {
24927 /* Special case. Do not generate a POP PC statement here, do it in
24928 thumb_exit() */
24929 thumb_exit (f, -1);
24930 return;
24931 }
24932
24933 fprintf (f, "\tpop\t{");
24934
24935 /* Look at the low registers first. */
24936 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24937 {
24938 if (lo_mask & 1)
24939 {
24940 asm_fprintf (f, "%r", regno);
24941
24942 if ((lo_mask & ~1) != 0)
24943 fprintf (f, ", ");
24944 }
24945 }
24946
24947 if (mask & (1 << PC_REGNUM))
24948 {
24949 /* Catch popping the PC. */
24950 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24951 || IS_CMSE_ENTRY (arm_current_func_type ()))
24952 {
24953 /* The PC is never poped directly, instead
24954 it is popped into r3 and then BX is used. */
24955 fprintf (f, "}\n");
24956
24957 thumb_exit (f, -1);
24958
24959 return;
24960 }
24961 else
24962 {
24963 if (mask & 0xFF)
24964 fprintf (f, ", ");
24965
24966 asm_fprintf (f, "%r", PC_REGNUM);
24967 }
24968 }
24969
24970 fprintf (f, "}\n");
24971 }
24972
24973 /* Generate code to return from a thumb function.
24974 If 'reg_containing_return_addr' is -1, then the return address is
24975 actually on the stack, at the stack pointer.
24976
24977 Note: do not forget to update length attribute of corresponding insn pattern
24978 when changing assembly output (eg. length attribute of epilogue_insns when
24979 updating Armv8-M Baseline Security Extensions register clearing
24980 sequences). */
24981 static void
24982 thumb_exit (FILE *f, int reg_containing_return_addr)
24983 {
24984 unsigned regs_available_for_popping;
24985 unsigned regs_to_pop;
24986 int pops_needed;
24987 unsigned available;
24988 unsigned required;
24989 machine_mode mode;
24990 int size;
24991 int restore_a4 = FALSE;
24992
24993 /* Compute the registers we need to pop. */
24994 regs_to_pop = 0;
24995 pops_needed = 0;
24996
24997 if (reg_containing_return_addr == -1)
24998 {
24999 regs_to_pop |= 1 << LR_REGNUM;
25000 ++pops_needed;
25001 }
25002
25003 if (TARGET_BACKTRACE)
25004 {
25005 /* Restore the (ARM) frame pointer and stack pointer. */
25006 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
25007 pops_needed += 2;
25008 }
25009
25010 /* If there is nothing to pop then just emit the BX instruction and
25011 return. */
25012 if (pops_needed == 0)
25013 {
25014 if (crtl->calls_eh_return)
25015 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25016
25017 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25018 {
25019 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
25020 reg_containing_return_addr);
25021 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
25022 }
25023 else
25024 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25025 return;
25026 }
25027 /* Otherwise if we are not supporting interworking and we have not created
25028 a backtrace structure and the function was not entered in ARM mode then
25029 just pop the return address straight into the PC. */
25030 else if (!TARGET_INTERWORK
25031 && !TARGET_BACKTRACE
25032 && !is_called_in_ARM_mode (current_function_decl)
25033 && !crtl->calls_eh_return
25034 && !IS_CMSE_ENTRY (arm_current_func_type ()))
25035 {
25036 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
25037 return;
25038 }
25039
25040 /* Find out how many of the (return) argument registers we can corrupt. */
25041 regs_available_for_popping = 0;
25042
25043 /* If returning via __builtin_eh_return, the bottom three registers
25044 all contain information needed for the return. */
25045 if (crtl->calls_eh_return)
25046 size = 12;
25047 else
25048 {
25049 /* If we can deduce the registers used from the function's
25050 return value. This is more reliable that examining
25051 df_regs_ever_live_p () because that will be set if the register is
25052 ever used in the function, not just if the register is used
25053 to hold a return value. */
25054
25055 if (crtl->return_rtx != 0)
25056 mode = GET_MODE (crtl->return_rtx);
25057 else
25058 mode = DECL_MODE (DECL_RESULT (current_function_decl));
25059
25060 size = GET_MODE_SIZE (mode);
25061
25062 if (size == 0)
25063 {
25064 /* In a void function we can use any argument register.
25065 In a function that returns a structure on the stack
25066 we can use the second and third argument registers. */
25067 if (mode == VOIDmode)
25068 regs_available_for_popping =
25069 (1 << ARG_REGISTER (1))
25070 | (1 << ARG_REGISTER (2))
25071 | (1 << ARG_REGISTER (3));
25072 else
25073 regs_available_for_popping =
25074 (1 << ARG_REGISTER (2))
25075 | (1 << ARG_REGISTER (3));
25076 }
25077 else if (size <= 4)
25078 regs_available_for_popping =
25079 (1 << ARG_REGISTER (2))
25080 | (1 << ARG_REGISTER (3));
25081 else if (size <= 8)
25082 regs_available_for_popping =
25083 (1 << ARG_REGISTER (3));
25084 }
25085
25086 /* Match registers to be popped with registers into which we pop them. */
25087 for (available = regs_available_for_popping,
25088 required = regs_to_pop;
25089 required != 0 && available != 0;
25090 available &= ~(available & - available),
25091 required &= ~(required & - required))
25092 -- pops_needed;
25093
25094 /* If we have any popping registers left over, remove them. */
25095 if (available > 0)
25096 regs_available_for_popping &= ~available;
25097
25098 /* Otherwise if we need another popping register we can use
25099 the fourth argument register. */
25100 else if (pops_needed)
25101 {
25102 /* If we have not found any free argument registers and
25103 reg a4 contains the return address, we must move it. */
25104 if (regs_available_for_popping == 0
25105 && reg_containing_return_addr == LAST_ARG_REGNUM)
25106 {
25107 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25108 reg_containing_return_addr = LR_REGNUM;
25109 }
25110 else if (size > 12)
25111 {
25112 /* Register a4 is being used to hold part of the return value,
25113 but we have dire need of a free, low register. */
25114 restore_a4 = TRUE;
25115
25116 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
25117 }
25118
25119 if (reg_containing_return_addr != LAST_ARG_REGNUM)
25120 {
25121 /* The fourth argument register is available. */
25122 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
25123
25124 --pops_needed;
25125 }
25126 }
25127
25128 /* Pop as many registers as we can. */
25129 thumb_pop (f, regs_available_for_popping);
25130
25131 /* Process the registers we popped. */
25132 if (reg_containing_return_addr == -1)
25133 {
25134 /* The return address was popped into the lowest numbered register. */
25135 regs_to_pop &= ~(1 << LR_REGNUM);
25136
25137 reg_containing_return_addr =
25138 number_of_first_bit_set (regs_available_for_popping);
25139
25140 /* Remove this register for the mask of available registers, so that
25141 the return address will not be corrupted by further pops. */
25142 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
25143 }
25144
25145 /* If we popped other registers then handle them here. */
25146 if (regs_available_for_popping)
25147 {
25148 int frame_pointer;
25149
25150 /* Work out which register currently contains the frame pointer. */
25151 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
25152
25153 /* Move it into the correct place. */
25154 asm_fprintf (f, "\tmov\t%r, %r\n",
25155 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
25156
25157 /* (Temporarily) remove it from the mask of popped registers. */
25158 regs_available_for_popping &= ~(1 << frame_pointer);
25159 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
25160
25161 if (regs_available_for_popping)
25162 {
25163 int stack_pointer;
25164
25165 /* We popped the stack pointer as well,
25166 find the register that contains it. */
25167 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
25168
25169 /* Move it into the stack register. */
25170 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
25171
25172 /* At this point we have popped all necessary registers, so
25173 do not worry about restoring regs_available_for_popping
25174 to its correct value:
25175
25176 assert (pops_needed == 0)
25177 assert (regs_available_for_popping == (1 << frame_pointer))
25178 assert (regs_to_pop == (1 << STACK_POINTER)) */
25179 }
25180 else
25181 {
25182 /* Since we have just move the popped value into the frame
25183 pointer, the popping register is available for reuse, and
25184 we know that we still have the stack pointer left to pop. */
25185 regs_available_for_popping |= (1 << frame_pointer);
25186 }
25187 }
25188
25189 /* If we still have registers left on the stack, but we no longer have
25190 any registers into which we can pop them, then we must move the return
25191 address into the link register and make available the register that
25192 contained it. */
25193 if (regs_available_for_popping == 0 && pops_needed > 0)
25194 {
25195 regs_available_for_popping |= 1 << reg_containing_return_addr;
25196
25197 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
25198 reg_containing_return_addr);
25199
25200 reg_containing_return_addr = LR_REGNUM;
25201 }
25202
25203 /* If we have registers left on the stack then pop some more.
25204 We know that at most we will want to pop FP and SP. */
25205 if (pops_needed > 0)
25206 {
25207 int popped_into;
25208 int move_to;
25209
25210 thumb_pop (f, regs_available_for_popping);
25211
25212 /* We have popped either FP or SP.
25213 Move whichever one it is into the correct register. */
25214 popped_into = number_of_first_bit_set (regs_available_for_popping);
25215 move_to = number_of_first_bit_set (regs_to_pop);
25216
25217 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
25218 --pops_needed;
25219 }
25220
25221 /* If we still have not popped everything then we must have only
25222 had one register available to us and we are now popping the SP. */
25223 if (pops_needed > 0)
25224 {
25225 int popped_into;
25226
25227 thumb_pop (f, regs_available_for_popping);
25228
25229 popped_into = number_of_first_bit_set (regs_available_for_popping);
25230
25231 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
25232 /*
25233 assert (regs_to_pop == (1 << STACK_POINTER))
25234 assert (pops_needed == 1)
25235 */
25236 }
25237
25238 /* If necessary restore the a4 register. */
25239 if (restore_a4)
25240 {
25241 if (reg_containing_return_addr != LR_REGNUM)
25242 {
25243 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25244 reg_containing_return_addr = LR_REGNUM;
25245 }
25246
25247 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
25248 }
25249
25250 if (crtl->calls_eh_return)
25251 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25252
25253 /* Return to caller. */
25254 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25255 {
25256 /* This is for the cases where LR is not being used to contain the return
25257 address. It may therefore contain information that we might not want
25258 to leak, hence it must be cleared. The value in R0 will never be a
25259 secret at this point, so it is safe to use it, see the clearing code
25260 in 'cmse_nonsecure_entry_clear_before_return'. */
25261 if (reg_containing_return_addr != LR_REGNUM)
25262 asm_fprintf (f, "\tmov\tlr, r0\n");
25263
25264 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
25265 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
25266 }
25267 else
25268 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25269 }
25270 \f
25271 /* Scan INSN just before assembler is output for it.
25272 For Thumb-1, we track the status of the condition codes; this
25273 information is used in the cbranchsi4_insn pattern. */
25274 void
25275 thumb1_final_prescan_insn (rtx_insn *insn)
25276 {
25277 if (flag_print_asm_name)
25278 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
25279 INSN_ADDRESSES (INSN_UID (insn)));
25280 /* Don't overwrite the previous setter when we get to a cbranch. */
25281 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
25282 {
25283 enum attr_conds conds;
25284
25285 if (cfun->machine->thumb1_cc_insn)
25286 {
25287 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
25288 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
25289 CC_STATUS_INIT;
25290 }
25291 conds = get_attr_conds (insn);
25292 if (conds == CONDS_SET)
25293 {
25294 rtx set = single_set (insn);
25295 cfun->machine->thumb1_cc_insn = insn;
25296 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
25297 cfun->machine->thumb1_cc_op1 = const0_rtx;
25298 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
25299 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
25300 {
25301 rtx src1 = XEXP (SET_SRC (set), 1);
25302 if (src1 == const0_rtx)
25303 cfun->machine->thumb1_cc_mode = CCmode;
25304 }
25305 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
25306 {
25307 /* Record the src register operand instead of dest because
25308 cprop_hardreg pass propagates src. */
25309 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
25310 }
25311 }
25312 else if (conds != CONDS_NOCOND)
25313 cfun->machine->thumb1_cc_insn = NULL_RTX;
25314 }
25315
25316 /* Check if unexpected far jump is used. */
25317 if (cfun->machine->lr_save_eliminated
25318 && get_attr_far_jump (insn) == FAR_JUMP_YES)
25319 internal_error("Unexpected thumb1 far jump");
25320 }
25321
25322 int
25323 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
25324 {
25325 unsigned HOST_WIDE_INT mask = 0xff;
25326 int i;
25327
25328 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
25329 if (val == 0) /* XXX */
25330 return 0;
25331
25332 for (i = 0; i < 25; i++)
25333 if ((val & (mask << i)) == val)
25334 return 1;
25335
25336 return 0;
25337 }
25338
25339 /* Returns nonzero if the current function contains,
25340 or might contain a far jump. */
25341 static int
25342 thumb_far_jump_used_p (void)
25343 {
25344 rtx_insn *insn;
25345 bool far_jump = false;
25346 unsigned int func_size = 0;
25347
25348 /* If we have already decided that far jumps may be used,
25349 do not bother checking again, and always return true even if
25350 it turns out that they are not being used. Once we have made
25351 the decision that far jumps are present (and that hence the link
25352 register will be pushed onto the stack) we cannot go back on it. */
25353 if (cfun->machine->far_jump_used)
25354 return 1;
25355
25356 /* If this function is not being called from the prologue/epilogue
25357 generation code then it must be being called from the
25358 INITIAL_ELIMINATION_OFFSET macro. */
25359 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
25360 {
25361 /* In this case we know that we are being asked about the elimination
25362 of the arg pointer register. If that register is not being used,
25363 then there are no arguments on the stack, and we do not have to
25364 worry that a far jump might force the prologue to push the link
25365 register, changing the stack offsets. In this case we can just
25366 return false, since the presence of far jumps in the function will
25367 not affect stack offsets.
25368
25369 If the arg pointer is live (or if it was live, but has now been
25370 eliminated and so set to dead) then we do have to test to see if
25371 the function might contain a far jump. This test can lead to some
25372 false negatives, since before reload is completed, then length of
25373 branch instructions is not known, so gcc defaults to returning their
25374 longest length, which in turn sets the far jump attribute to true.
25375
25376 A false negative will not result in bad code being generated, but it
25377 will result in a needless push and pop of the link register. We
25378 hope that this does not occur too often.
25379
25380 If we need doubleword stack alignment this could affect the other
25381 elimination offsets so we can't risk getting it wrong. */
25382 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
25383 cfun->machine->arg_pointer_live = 1;
25384 else if (!cfun->machine->arg_pointer_live)
25385 return 0;
25386 }
25387
25388 /* We should not change far_jump_used during or after reload, as there is
25389 no chance to change stack frame layout. */
25390 if (reload_in_progress || reload_completed)
25391 return 0;
25392
25393 /* Check to see if the function contains a branch
25394 insn with the far jump attribute set. */
25395 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25396 {
25397 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
25398 {
25399 far_jump = true;
25400 }
25401 func_size += get_attr_length (insn);
25402 }
25403
25404 /* Attribute far_jump will always be true for thumb1 before
25405 shorten_branch pass. So checking far_jump attribute before
25406 shorten_branch isn't much useful.
25407
25408 Following heuristic tries to estimate more accurately if a far jump
25409 may finally be used. The heuristic is very conservative as there is
25410 no chance to roll-back the decision of not to use far jump.
25411
25412 Thumb1 long branch offset is -2048 to 2046. The worst case is each
25413 2-byte insn is associated with a 4 byte constant pool. Using
25414 function size 2048/3 as the threshold is conservative enough. */
25415 if (far_jump)
25416 {
25417 if ((func_size * 3) >= 2048)
25418 {
25419 /* Record the fact that we have decided that
25420 the function does use far jumps. */
25421 cfun->machine->far_jump_used = 1;
25422 return 1;
25423 }
25424 }
25425
25426 return 0;
25427 }
25428
25429 /* Return nonzero if FUNC must be entered in ARM mode. */
25430 static bool
25431 is_called_in_ARM_mode (tree func)
25432 {
25433 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
25434
25435 /* Ignore the problem about functions whose address is taken. */
25436 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
25437 return true;
25438
25439 #ifdef ARM_PE
25440 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
25441 #else
25442 return false;
25443 #endif
25444 }
25445
25446 /* Given the stack offsets and register mask in OFFSETS, decide how
25447 many additional registers to push instead of subtracting a constant
25448 from SP. For epilogues the principle is the same except we use pop.
25449 FOR_PROLOGUE indicates which we're generating. */
25450 static int
25451 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
25452 {
25453 HOST_WIDE_INT amount;
25454 unsigned long live_regs_mask = offsets->saved_regs_mask;
25455 /* Extract a mask of the ones we can give to the Thumb's push/pop
25456 instruction. */
25457 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
25458 /* Then count how many other high registers will need to be pushed. */
25459 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25460 int n_free, reg_base, size;
25461
25462 if (!for_prologue && frame_pointer_needed)
25463 amount = offsets->locals_base - offsets->saved_regs;
25464 else
25465 amount = offsets->outgoing_args - offsets->saved_regs;
25466
25467 /* If the stack frame size is 512 exactly, we can save one load
25468 instruction, which should make this a win even when optimizing
25469 for speed. */
25470 if (!optimize_size && amount != 512)
25471 return 0;
25472
25473 /* Can't do this if there are high registers to push. */
25474 if (high_regs_pushed != 0)
25475 return 0;
25476
25477 /* Shouldn't do it in the prologue if no registers would normally
25478 be pushed at all. In the epilogue, also allow it if we'll have
25479 a pop insn for the PC. */
25480 if (l_mask == 0
25481 && (for_prologue
25482 || TARGET_BACKTRACE
25483 || (live_regs_mask & 1 << LR_REGNUM) == 0
25484 || TARGET_INTERWORK
25485 || crtl->args.pretend_args_size != 0))
25486 return 0;
25487
25488 /* Don't do this if thumb_expand_prologue wants to emit instructions
25489 between the push and the stack frame allocation. */
25490 if (for_prologue
25491 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
25492 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
25493 return 0;
25494
25495 reg_base = 0;
25496 n_free = 0;
25497 if (!for_prologue)
25498 {
25499 size = arm_size_return_regs ();
25500 reg_base = ARM_NUM_INTS (size);
25501 live_regs_mask >>= reg_base;
25502 }
25503
25504 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
25505 && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
25506 {
25507 live_regs_mask >>= 1;
25508 n_free++;
25509 }
25510
25511 if (n_free == 0)
25512 return 0;
25513 gcc_assert (amount / 4 * 4 == amount);
25514
25515 if (amount >= 512 && (amount - n_free * 4) < 512)
25516 return (amount - 508) / 4;
25517 if (amount <= n_free * 4)
25518 return amount / 4;
25519 return 0;
25520 }
25521
25522 /* The bits which aren't usefully expanded as rtl. */
25523 const char *
25524 thumb1_unexpanded_epilogue (void)
25525 {
25526 arm_stack_offsets *offsets;
25527 int regno;
25528 unsigned long live_regs_mask = 0;
25529 int high_regs_pushed = 0;
25530 int extra_pop;
25531 int had_to_push_lr;
25532 int size;
25533
25534 if (cfun->machine->return_used_this_function != 0)
25535 return "";
25536
25537 if (IS_NAKED (arm_current_func_type ()))
25538 return "";
25539
25540 offsets = arm_get_frame_offsets ();
25541 live_regs_mask = offsets->saved_regs_mask;
25542 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25543
25544 /* If we can deduce the registers used from the function's return value.
25545 This is more reliable that examining df_regs_ever_live_p () because that
25546 will be set if the register is ever used in the function, not just if
25547 the register is used to hold a return value. */
25548 size = arm_size_return_regs ();
25549
25550 extra_pop = thumb1_extra_regs_pushed (offsets, false);
25551 if (extra_pop > 0)
25552 {
25553 unsigned long extra_mask = (1 << extra_pop) - 1;
25554 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
25555 }
25556
25557 /* The prolog may have pushed some high registers to use as
25558 work registers. e.g. the testsuite file:
25559 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
25560 compiles to produce:
25561 push {r4, r5, r6, r7, lr}
25562 mov r7, r9
25563 mov r6, r8
25564 push {r6, r7}
25565 as part of the prolog. We have to undo that pushing here. */
25566
25567 if (high_regs_pushed)
25568 {
25569 unsigned long mask = live_regs_mask & 0xff;
25570 int next_hi_reg;
25571
25572 mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
25573
25574 if (mask == 0)
25575 /* Oh dear! We have no low registers into which we can pop
25576 high registers! */
25577 internal_error
25578 ("no low registers available for popping high registers");
25579
25580 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25581 if (live_regs_mask & (1 << next_hi_reg))
25582 break;
25583
25584 while (high_regs_pushed)
25585 {
25586 /* Find lo register(s) into which the high register(s) can
25587 be popped. */
25588 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
25589 {
25590 if (mask & (1 << regno))
25591 high_regs_pushed--;
25592 if (high_regs_pushed == 0)
25593 break;
25594 }
25595
25596 if (high_regs_pushed == 0 && regno >= 0)
25597 mask &= ~((1 << regno) - 1);
25598
25599 /* Pop the values into the low register(s). */
25600 thumb_pop (asm_out_file, mask);
25601
25602 /* Move the value(s) into the high registers. */
25603 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
25604 {
25605 if (mask & (1 << regno))
25606 {
25607 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
25608 regno);
25609
25610 for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
25611 next_hi_reg--)
25612 if (live_regs_mask & (1 << next_hi_reg))
25613 break;
25614 }
25615 }
25616 }
25617 live_regs_mask &= ~0x0f00;
25618 }
25619
25620 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
25621 live_regs_mask &= 0xff;
25622
25623 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
25624 {
25625 /* Pop the return address into the PC. */
25626 if (had_to_push_lr)
25627 live_regs_mask |= 1 << PC_REGNUM;
25628
25629 /* Either no argument registers were pushed or a backtrace
25630 structure was created which includes an adjusted stack
25631 pointer, so just pop everything. */
25632 if (live_regs_mask)
25633 thumb_pop (asm_out_file, live_regs_mask);
25634
25635 /* We have either just popped the return address into the
25636 PC or it is was kept in LR for the entire function.
25637 Note that thumb_pop has already called thumb_exit if the
25638 PC was in the list. */
25639 if (!had_to_push_lr)
25640 thumb_exit (asm_out_file, LR_REGNUM);
25641 }
25642 else
25643 {
25644 /* Pop everything but the return address. */
25645 if (live_regs_mask)
25646 thumb_pop (asm_out_file, live_regs_mask);
25647
25648 if (had_to_push_lr)
25649 {
25650 if (size > 12)
25651 {
25652 /* We have no free low regs, so save one. */
25653 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
25654 LAST_ARG_REGNUM);
25655 }
25656
25657 /* Get the return address into a temporary register. */
25658 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
25659
25660 if (size > 12)
25661 {
25662 /* Move the return address to lr. */
25663 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
25664 LAST_ARG_REGNUM);
25665 /* Restore the low register. */
25666 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
25667 IP_REGNUM);
25668 regno = LR_REGNUM;
25669 }
25670 else
25671 regno = LAST_ARG_REGNUM;
25672 }
25673 else
25674 regno = LR_REGNUM;
25675
25676 /* Remove the argument registers that were pushed onto the stack. */
25677 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
25678 SP_REGNUM, SP_REGNUM,
25679 crtl->args.pretend_args_size);
25680
25681 thumb_exit (asm_out_file, regno);
25682 }
25683
25684 return "";
25685 }
25686
25687 /* Functions to save and restore machine-specific function data. */
25688 static struct machine_function *
25689 arm_init_machine_status (void)
25690 {
25691 struct machine_function *machine;
25692 machine = ggc_cleared_alloc<machine_function> ();
25693
25694 #if ARM_FT_UNKNOWN != 0
25695 machine->func_type = ARM_FT_UNKNOWN;
25696 #endif
25697 machine->static_chain_stack_bytes = -1;
25698 return machine;
25699 }
25700
25701 /* Return an RTX indicating where the return address to the
25702 calling function can be found. */
25703 rtx
25704 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
25705 {
25706 if (count != 0)
25707 return NULL_RTX;
25708
25709 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
25710 }
25711
25712 /* Do anything needed before RTL is emitted for each function. */
25713 void
25714 arm_init_expanders (void)
25715 {
25716 /* Arrange to initialize and mark the machine per-function status. */
25717 init_machine_status = arm_init_machine_status;
25718
25719 /* This is to stop the combine pass optimizing away the alignment
25720 adjustment of va_arg. */
25721 /* ??? It is claimed that this should not be necessary. */
25722 if (cfun)
25723 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
25724 }
25725
25726 /* Check that FUNC is called with a different mode. */
25727
25728 bool
25729 arm_change_mode_p (tree func)
25730 {
25731 if (TREE_CODE (func) != FUNCTION_DECL)
25732 return false;
25733
25734 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
25735
25736 if (!callee_tree)
25737 callee_tree = target_option_default_node;
25738
25739 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25740 int flags = callee_opts->x_target_flags;
25741
25742 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
25743 }
25744
25745 /* Like arm_compute_initial_elimination offset. Simpler because there
25746 isn't an ABI specified frame pointer for Thumb. Instead, we set it
25747 to point at the base of the local variables after static stack
25748 space for a function has been allocated. */
25749
25750 HOST_WIDE_INT
25751 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
25752 {
25753 arm_stack_offsets *offsets;
25754
25755 offsets = arm_get_frame_offsets ();
25756
25757 switch (from)
25758 {
25759 case ARG_POINTER_REGNUM:
25760 switch (to)
25761 {
25762 case STACK_POINTER_REGNUM:
25763 return offsets->outgoing_args - offsets->saved_args;
25764
25765 case FRAME_POINTER_REGNUM:
25766 return offsets->soft_frame - offsets->saved_args;
25767
25768 case ARM_HARD_FRAME_POINTER_REGNUM:
25769 return offsets->saved_regs - offsets->saved_args;
25770
25771 case THUMB_HARD_FRAME_POINTER_REGNUM:
25772 return offsets->locals_base - offsets->saved_args;
25773
25774 default:
25775 gcc_unreachable ();
25776 }
25777 break;
25778
25779 case FRAME_POINTER_REGNUM:
25780 switch (to)
25781 {
25782 case STACK_POINTER_REGNUM:
25783 return offsets->outgoing_args - offsets->soft_frame;
25784
25785 case ARM_HARD_FRAME_POINTER_REGNUM:
25786 return offsets->saved_regs - offsets->soft_frame;
25787
25788 case THUMB_HARD_FRAME_POINTER_REGNUM:
25789 return offsets->locals_base - offsets->soft_frame;
25790
25791 default:
25792 gcc_unreachable ();
25793 }
25794 break;
25795
25796 default:
25797 gcc_unreachable ();
25798 }
25799 }
25800
25801 /* Generate the function's prologue. */
25802
25803 void
25804 thumb1_expand_prologue (void)
25805 {
25806 rtx_insn *insn;
25807
25808 HOST_WIDE_INT amount;
25809 HOST_WIDE_INT size;
25810 arm_stack_offsets *offsets;
25811 unsigned long func_type;
25812 int regno;
25813 unsigned long live_regs_mask;
25814 unsigned long l_mask;
25815 unsigned high_regs_pushed = 0;
25816 bool lr_needs_saving;
25817
25818 func_type = arm_current_func_type ();
25819
25820 /* Naked functions don't have prologues. */
25821 if (IS_NAKED (func_type))
25822 {
25823 if (flag_stack_usage_info)
25824 current_function_static_stack_size = 0;
25825 return;
25826 }
25827
25828 if (IS_INTERRUPT (func_type))
25829 {
25830 error ("interrupt Service Routines cannot be coded in Thumb mode");
25831 return;
25832 }
25833
25834 if (is_called_in_ARM_mode (current_function_decl))
25835 emit_insn (gen_prologue_thumb1_interwork ());
25836
25837 offsets = arm_get_frame_offsets ();
25838 live_regs_mask = offsets->saved_regs_mask;
25839 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25840
25841 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
25842 l_mask = live_regs_mask & 0x40ff;
25843 /* Then count how many other high registers will need to be pushed. */
25844 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25845
25846 if (crtl->args.pretend_args_size)
25847 {
25848 rtx x = GEN_INT (-crtl->args.pretend_args_size);
25849
25850 if (cfun->machine->uses_anonymous_args)
25851 {
25852 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25853 unsigned long mask;
25854
25855 mask = 1ul << (LAST_ARG_REGNUM + 1);
25856 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25857
25858 insn = thumb1_emit_multi_reg_push (mask, 0);
25859 }
25860 else
25861 {
25862 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25863 stack_pointer_rtx, x));
25864 }
25865 RTX_FRAME_RELATED_P (insn) = 1;
25866 }
25867
25868 if (TARGET_BACKTRACE)
25869 {
25870 HOST_WIDE_INT offset = 0;
25871 unsigned work_register;
25872 rtx work_reg, x, arm_hfp_rtx;
25873
25874 /* We have been asked to create a stack backtrace structure.
25875 The code looks like this:
25876
25877 0 .align 2
25878 0 func:
25879 0 sub SP, #16 Reserve space for 4 registers.
25880 2 push {R7} Push low registers.
25881 4 add R7, SP, #20 Get the stack pointer before the push.
25882 6 str R7, [SP, #8] Store the stack pointer
25883 (before reserving the space).
25884 8 mov R7, PC Get hold of the start of this code + 12.
25885 10 str R7, [SP, #16] Store it.
25886 12 mov R7, FP Get hold of the current frame pointer.
25887 14 str R7, [SP, #4] Store it.
25888 16 mov R7, LR Get hold of the current return address.
25889 18 str R7, [SP, #12] Store it.
25890 20 add R7, SP, #16 Point at the start of the
25891 backtrace structure.
25892 22 mov FP, R7 Put this value into the frame pointer. */
25893
25894 work_register = thumb_find_work_register (live_regs_mask);
25895 work_reg = gen_rtx_REG (SImode, work_register);
25896 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25897
25898 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25899 stack_pointer_rtx, GEN_INT (-16)));
25900 RTX_FRAME_RELATED_P (insn) = 1;
25901
25902 if (l_mask)
25903 {
25904 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25905 RTX_FRAME_RELATED_P (insn) = 1;
25906 lr_needs_saving = false;
25907
25908 offset = bit_count (l_mask) * UNITS_PER_WORD;
25909 }
25910
25911 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25912 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25913
25914 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25915 x = gen_frame_mem (SImode, x);
25916 emit_move_insn (x, work_reg);
25917
25918 /* Make sure that the instruction fetching the PC is in the right place
25919 to calculate "start of backtrace creation code + 12". */
25920 /* ??? The stores using the common WORK_REG ought to be enough to
25921 prevent the scheduler from doing anything weird. Failing that
25922 we could always move all of the following into an UNSPEC_VOLATILE. */
25923 if (l_mask)
25924 {
25925 x = gen_rtx_REG (SImode, PC_REGNUM);
25926 emit_move_insn (work_reg, x);
25927
25928 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25929 x = gen_frame_mem (SImode, x);
25930 emit_move_insn (x, work_reg);
25931
25932 emit_move_insn (work_reg, arm_hfp_rtx);
25933
25934 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25935 x = gen_frame_mem (SImode, x);
25936 emit_move_insn (x, work_reg);
25937 }
25938 else
25939 {
25940 emit_move_insn (work_reg, arm_hfp_rtx);
25941
25942 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25943 x = gen_frame_mem (SImode, x);
25944 emit_move_insn (x, work_reg);
25945
25946 x = gen_rtx_REG (SImode, PC_REGNUM);
25947 emit_move_insn (work_reg, x);
25948
25949 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25950 x = gen_frame_mem (SImode, x);
25951 emit_move_insn (x, work_reg);
25952 }
25953
25954 x = gen_rtx_REG (SImode, LR_REGNUM);
25955 emit_move_insn (work_reg, x);
25956
25957 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25958 x = gen_frame_mem (SImode, x);
25959 emit_move_insn (x, work_reg);
25960
25961 x = GEN_INT (offset + 12);
25962 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25963
25964 emit_move_insn (arm_hfp_rtx, work_reg);
25965 }
25966 /* Optimization: If we are not pushing any low registers but we are going
25967 to push some high registers then delay our first push. This will just
25968 be a push of LR and we can combine it with the push of the first high
25969 register. */
25970 else if ((l_mask & 0xff) != 0
25971 || (high_regs_pushed == 0 && lr_needs_saving))
25972 {
25973 unsigned long mask = l_mask;
25974 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25975 insn = thumb1_emit_multi_reg_push (mask, mask);
25976 RTX_FRAME_RELATED_P (insn) = 1;
25977 lr_needs_saving = false;
25978 }
25979
25980 if (high_regs_pushed)
25981 {
25982 unsigned pushable_regs;
25983 unsigned next_hi_reg;
25984 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25985 : crtl->args.info.nregs;
25986 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25987
25988 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25989 if (live_regs_mask & (1 << next_hi_reg))
25990 break;
25991
25992 /* Here we need to mask out registers used for passing arguments
25993 even if they can be pushed. This is to avoid using them to
25994 stash the high registers. Such kind of stash may clobber the
25995 use of arguments. */
25996 pushable_regs = l_mask & (~arg_regs_mask);
25997 pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
25998
25999 /* Normally, LR can be used as a scratch register once it has been
26000 saved; but if the function examines its own return address then
26001 the value is still live and we need to avoid using it. */
26002 bool return_addr_live
26003 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
26004 LR_REGNUM);
26005
26006 if (lr_needs_saving || return_addr_live)
26007 pushable_regs &= ~(1 << LR_REGNUM);
26008
26009 if (pushable_regs == 0)
26010 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
26011
26012 while (high_regs_pushed > 0)
26013 {
26014 unsigned long real_regs_mask = 0;
26015 unsigned long push_mask = 0;
26016
26017 for (regno = LR_REGNUM; regno >= 0; regno --)
26018 {
26019 if (pushable_regs & (1 << regno))
26020 {
26021 emit_move_insn (gen_rtx_REG (SImode, regno),
26022 gen_rtx_REG (SImode, next_hi_reg));
26023
26024 high_regs_pushed --;
26025 real_regs_mask |= (1 << next_hi_reg);
26026 push_mask |= (1 << regno);
26027
26028 if (high_regs_pushed)
26029 {
26030 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
26031 next_hi_reg --)
26032 if (live_regs_mask & (1 << next_hi_reg))
26033 break;
26034 }
26035 else
26036 break;
26037 }
26038 }
26039
26040 /* If we had to find a work register and we have not yet
26041 saved the LR then add it to the list of regs to push. */
26042 if (lr_needs_saving)
26043 {
26044 push_mask |= 1 << LR_REGNUM;
26045 real_regs_mask |= 1 << LR_REGNUM;
26046 lr_needs_saving = false;
26047 /* If the return address is not live at this point, we
26048 can add LR to the list of registers that we can use
26049 for pushes. */
26050 if (!return_addr_live)
26051 pushable_regs |= 1 << LR_REGNUM;
26052 }
26053
26054 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
26055 RTX_FRAME_RELATED_P (insn) = 1;
26056 }
26057 }
26058
26059 /* Load the pic register before setting the frame pointer,
26060 so we can use r7 as a temporary work register. */
26061 if (flag_pic && arm_pic_register != INVALID_REGNUM)
26062 arm_load_pic_register (live_regs_mask, NULL_RTX);
26063
26064 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
26065 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
26066 stack_pointer_rtx);
26067
26068 size = offsets->outgoing_args - offsets->saved_args;
26069 if (flag_stack_usage_info)
26070 current_function_static_stack_size = size;
26071
26072 /* If we have a frame, then do stack checking. FIXME: not implemented. */
26073 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
26074 || flag_stack_clash_protection)
26075 && size)
26076 sorry ("%<-fstack-check=specific%> for Thumb-1");
26077
26078 amount = offsets->outgoing_args - offsets->saved_regs;
26079 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
26080 if (amount)
26081 {
26082 if (amount < 512)
26083 {
26084 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26085 GEN_INT (- amount)));
26086 RTX_FRAME_RELATED_P (insn) = 1;
26087 }
26088 else
26089 {
26090 rtx reg, dwarf;
26091
26092 /* The stack decrement is too big for an immediate value in a single
26093 insn. In theory we could issue multiple subtracts, but after
26094 three of them it becomes more space efficient to place the full
26095 value in the constant pool and load into a register. (Also the
26096 ARM debugger really likes to see only one stack decrement per
26097 function). So instead we look for a scratch register into which
26098 we can load the decrement, and then we subtract this from the
26099 stack pointer. Unfortunately on the thumb the only available
26100 scratch registers are the argument registers, and we cannot use
26101 these as they may hold arguments to the function. Instead we
26102 attempt to locate a call preserved register which is used by this
26103 function. If we can find one, then we know that it will have
26104 been pushed at the start of the prologue and so we can corrupt
26105 it now. */
26106 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
26107 if (live_regs_mask & (1 << regno))
26108 break;
26109
26110 gcc_assert(regno <= LAST_LO_REGNUM);
26111
26112 reg = gen_rtx_REG (SImode, regno);
26113
26114 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
26115
26116 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26117 stack_pointer_rtx, reg));
26118
26119 dwarf = gen_rtx_SET (stack_pointer_rtx,
26120 plus_constant (Pmode, stack_pointer_rtx,
26121 -amount));
26122 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
26123 RTX_FRAME_RELATED_P (insn) = 1;
26124 }
26125 }
26126
26127 if (frame_pointer_needed)
26128 thumb_set_frame_pointer (offsets);
26129
26130 /* If we are profiling, make sure no instructions are scheduled before
26131 the call to mcount. Similarly if the user has requested no
26132 scheduling in the prolog. Similarly if we want non-call exceptions
26133 using the EABI unwinder, to prevent faulting instructions from being
26134 swapped with a stack adjustment. */
26135 if (crtl->profile || !TARGET_SCHED_PROLOG
26136 || (arm_except_unwind_info (&global_options) == UI_TARGET
26137 && cfun->can_throw_non_call_exceptions))
26138 emit_insn (gen_blockage ());
26139
26140 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
26141 if (live_regs_mask & 0xff)
26142 cfun->machine->lr_save_eliminated = 0;
26143 }
26144
26145 /* Clear caller saved registers not used to pass return values and leaked
26146 condition flags before exiting a cmse_nonsecure_entry function. */
26147
26148 void
26149 cmse_nonsecure_entry_clear_before_return (void)
26150 {
26151 int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
26152 uint32_t padding_bits_to_clear = 0;
26153 auto_sbitmap to_clear_bitmap (maxregno + 1);
26154 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
26155 tree result_type;
26156
26157 bitmap_clear (to_clear_bitmap);
26158 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
26159 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
26160
26161 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
26162 registers. */
26163 if (TARGET_HARD_FLOAT)
26164 {
26165 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
26166
26167 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
26168
26169 /* Make sure we don't clear the two scratch registers used to clear the
26170 relevant FPSCR bits in output_return_instruction. */
26171 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
26172 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
26173 emit_use (gen_rtx_REG (SImode, 4));
26174 bitmap_clear_bit (to_clear_bitmap, 4);
26175 }
26176
26177 /* If the user has defined registers to be caller saved, these are no longer
26178 restored by the function before returning and must thus be cleared for
26179 security purposes. */
26180 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
26181 {
26182 /* We do not touch registers that can be used to pass arguments as per
26183 the AAPCS, since these should never be made callee-saved by user
26184 options. */
26185 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
26186 continue;
26187 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
26188 continue;
26189 if (call_used_or_fixed_reg_p (regno))
26190 bitmap_set_bit (to_clear_bitmap, regno);
26191 }
26192
26193 /* Make sure we do not clear the registers used to return the result in. */
26194 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
26195 if (!VOID_TYPE_P (result_type))
26196 {
26197 uint64_t to_clear_return_mask;
26198 result_rtl = arm_function_value (result_type, current_function_decl, 0);
26199
26200 /* No need to check that we return in registers, because we don't
26201 support returning on stack yet. */
26202 gcc_assert (REG_P (result_rtl));
26203 to_clear_return_mask
26204 = compute_not_to_clear_mask (result_type, result_rtl, 0,
26205 &padding_bits_to_clear);
26206 if (to_clear_return_mask)
26207 {
26208 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
26209 for (regno = R0_REGNUM; regno <= maxregno; regno++)
26210 {
26211 if (to_clear_return_mask & (1ULL << regno))
26212 bitmap_clear_bit (to_clear_bitmap, regno);
26213 }
26214 }
26215 }
26216
26217 if (padding_bits_to_clear != 0)
26218 {
26219 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
26220 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
26221
26222 /* Padding_bits_to_clear is not 0 so we know we are dealing with
26223 returning a composite type, which only uses r0. Let's make sure that
26224 r1-r3 is cleared too. */
26225 bitmap_clear (to_clear_arg_regs_bitmap);
26226 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
26227 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
26228 }
26229
26230 /* Clear full registers that leak before returning. */
26231 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
26232 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
26233 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
26234 clearing_reg);
26235 }
26236
26237 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
26238 POP instruction can be generated. LR should be replaced by PC. All
26239 the checks required are already done by USE_RETURN_INSN (). Hence,
26240 all we really need to check here is if single register is to be
26241 returned, or multiple register return. */
26242 void
26243 thumb2_expand_return (bool simple_return)
26244 {
26245 int i, num_regs;
26246 unsigned long saved_regs_mask;
26247 arm_stack_offsets *offsets;
26248
26249 offsets = arm_get_frame_offsets ();
26250 saved_regs_mask = offsets->saved_regs_mask;
26251
26252 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
26253 if (saved_regs_mask & (1 << i))
26254 num_regs++;
26255
26256 if (!simple_return && saved_regs_mask)
26257 {
26258 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
26259 functions or adapt code to handle according to ACLE. This path should
26260 not be reachable for cmse_nonsecure_entry functions though we prefer
26261 to assert it for now to ensure that future code changes do not silently
26262 change this behavior. */
26263 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
26264 if (num_regs == 1)
26265 {
26266 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
26267 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
26268 rtx addr = gen_rtx_MEM (SImode,
26269 gen_rtx_POST_INC (SImode,
26270 stack_pointer_rtx));
26271 set_mem_alias_set (addr, get_frame_alias_set ());
26272 XVECEXP (par, 0, 0) = ret_rtx;
26273 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
26274 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
26275 emit_jump_insn (par);
26276 }
26277 else
26278 {
26279 saved_regs_mask &= ~ (1 << LR_REGNUM);
26280 saved_regs_mask |= (1 << PC_REGNUM);
26281 arm_emit_multi_reg_pop (saved_regs_mask);
26282 }
26283 }
26284 else
26285 {
26286 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26287 cmse_nonsecure_entry_clear_before_return ();
26288 emit_jump_insn (simple_return_rtx);
26289 }
26290 }
26291
26292 void
26293 thumb1_expand_epilogue (void)
26294 {
26295 HOST_WIDE_INT amount;
26296 arm_stack_offsets *offsets;
26297 int regno;
26298
26299 /* Naked functions don't have prologues. */
26300 if (IS_NAKED (arm_current_func_type ()))
26301 return;
26302
26303 offsets = arm_get_frame_offsets ();
26304 amount = offsets->outgoing_args - offsets->saved_regs;
26305
26306 if (frame_pointer_needed)
26307 {
26308 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
26309 amount = offsets->locals_base - offsets->saved_regs;
26310 }
26311 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
26312
26313 gcc_assert (amount >= 0);
26314 if (amount)
26315 {
26316 emit_insn (gen_blockage ());
26317
26318 if (amount < 512)
26319 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26320 GEN_INT (amount)));
26321 else
26322 {
26323 /* r3 is always free in the epilogue. */
26324 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
26325
26326 emit_insn (gen_movsi (reg, GEN_INT (amount)));
26327 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
26328 }
26329 }
26330
26331 /* Emit a USE (stack_pointer_rtx), so that
26332 the stack adjustment will not be deleted. */
26333 emit_insn (gen_force_register_use (stack_pointer_rtx));
26334
26335 if (crtl->profile || !TARGET_SCHED_PROLOG)
26336 emit_insn (gen_blockage ());
26337
26338 /* Emit a clobber for each insn that will be restored in the epilogue,
26339 so that flow2 will get register lifetimes correct. */
26340 for (regno = 0; regno < 13; regno++)
26341 if (df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
26342 emit_clobber (gen_rtx_REG (SImode, regno));
26343
26344 if (! df_regs_ever_live_p (LR_REGNUM))
26345 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
26346
26347 /* Clear all caller-saved regs that are not used to return. */
26348 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26349 cmse_nonsecure_entry_clear_before_return ();
26350 }
26351
26352 /* Epilogue code for APCS frame. */
26353 static void
26354 arm_expand_epilogue_apcs_frame (bool really_return)
26355 {
26356 unsigned long func_type;
26357 unsigned long saved_regs_mask;
26358 int num_regs = 0;
26359 int i;
26360 int floats_from_frame = 0;
26361 arm_stack_offsets *offsets;
26362
26363 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
26364 func_type = arm_current_func_type ();
26365
26366 /* Get frame offsets for ARM. */
26367 offsets = arm_get_frame_offsets ();
26368 saved_regs_mask = offsets->saved_regs_mask;
26369
26370 /* Find the offset of the floating-point save area in the frame. */
26371 floats_from_frame
26372 = (offsets->saved_args
26373 + arm_compute_static_chain_stack_bytes ()
26374 - offsets->frame);
26375
26376 /* Compute how many core registers saved and how far away the floats are. */
26377 for (i = 0; i <= LAST_ARM_REGNUM; i++)
26378 if (saved_regs_mask & (1 << i))
26379 {
26380 num_regs++;
26381 floats_from_frame += 4;
26382 }
26383
26384 if (TARGET_HARD_FLOAT)
26385 {
26386 int start_reg;
26387 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
26388
26389 /* The offset is from IP_REGNUM. */
26390 int saved_size = arm_get_vfp_saved_size ();
26391 if (saved_size > 0)
26392 {
26393 rtx_insn *insn;
26394 floats_from_frame += saved_size;
26395 insn = emit_insn (gen_addsi3 (ip_rtx,
26396 hard_frame_pointer_rtx,
26397 GEN_INT (-floats_from_frame)));
26398 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
26399 ip_rtx, hard_frame_pointer_rtx);
26400 }
26401
26402 /* Generate VFP register multi-pop. */
26403 start_reg = FIRST_VFP_REGNUM;
26404
26405 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
26406 /* Look for a case where a reg does not need restoring. */
26407 if ((!df_regs_ever_live_p (i) || call_used_or_fixed_reg_p (i))
26408 && (!df_regs_ever_live_p (i + 1)
26409 || call_used_or_fixed_reg_p (i + 1)))
26410 {
26411 if (start_reg != i)
26412 arm_emit_vfp_multi_reg_pop (start_reg,
26413 (i - start_reg) / 2,
26414 gen_rtx_REG (SImode,
26415 IP_REGNUM));
26416 start_reg = i + 2;
26417 }
26418
26419 /* Restore the remaining regs that we have discovered (or possibly
26420 even all of them, if the conditional in the for loop never
26421 fired). */
26422 if (start_reg != i)
26423 arm_emit_vfp_multi_reg_pop (start_reg,
26424 (i - start_reg) / 2,
26425 gen_rtx_REG (SImode, IP_REGNUM));
26426 }
26427
26428 if (TARGET_IWMMXT)
26429 {
26430 /* The frame pointer is guaranteed to be non-double-word aligned, as
26431 it is set to double-word-aligned old_stack_pointer - 4. */
26432 rtx_insn *insn;
26433 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
26434
26435 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
26436 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
26437 {
26438 rtx addr = gen_frame_mem (V2SImode,
26439 plus_constant (Pmode, hard_frame_pointer_rtx,
26440 - lrm_count * 4));
26441 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
26442 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26443 gen_rtx_REG (V2SImode, i),
26444 NULL_RTX);
26445 lrm_count += 2;
26446 }
26447 }
26448
26449 /* saved_regs_mask should contain IP which contains old stack pointer
26450 at the time of activation creation. Since SP and IP are adjacent registers,
26451 we can restore the value directly into SP. */
26452 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
26453 saved_regs_mask &= ~(1 << IP_REGNUM);
26454 saved_regs_mask |= (1 << SP_REGNUM);
26455
26456 /* There are two registers left in saved_regs_mask - LR and PC. We
26457 only need to restore LR (the return address), but to
26458 save time we can load it directly into PC, unless we need a
26459 special function exit sequence, or we are not really returning. */
26460 if (really_return
26461 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
26462 && !crtl->calls_eh_return)
26463 /* Delete LR from the register mask, so that LR on
26464 the stack is loaded into the PC in the register mask. */
26465 saved_regs_mask &= ~(1 << LR_REGNUM);
26466 else
26467 saved_regs_mask &= ~(1 << PC_REGNUM);
26468
26469 num_regs = bit_count (saved_regs_mask);
26470 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
26471 {
26472 rtx_insn *insn;
26473 emit_insn (gen_blockage ());
26474 /* Unwind the stack to just below the saved registers. */
26475 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26476 hard_frame_pointer_rtx,
26477 GEN_INT (- 4 * num_regs)));
26478
26479 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
26480 stack_pointer_rtx, hard_frame_pointer_rtx);
26481 }
26482
26483 arm_emit_multi_reg_pop (saved_regs_mask);
26484
26485 if (IS_INTERRUPT (func_type))
26486 {
26487 /* Interrupt handlers will have pushed the
26488 IP onto the stack, so restore it now. */
26489 rtx_insn *insn;
26490 rtx addr = gen_rtx_MEM (SImode,
26491 gen_rtx_POST_INC (SImode,
26492 stack_pointer_rtx));
26493 set_mem_alias_set (addr, get_frame_alias_set ());
26494 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
26495 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26496 gen_rtx_REG (SImode, IP_REGNUM),
26497 NULL_RTX);
26498 }
26499
26500 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
26501 return;
26502
26503 if (crtl->calls_eh_return)
26504 emit_insn (gen_addsi3 (stack_pointer_rtx,
26505 stack_pointer_rtx,
26506 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
26507
26508 if (IS_STACKALIGN (func_type))
26509 /* Restore the original stack pointer. Before prologue, the stack was
26510 realigned and the original stack pointer saved in r0. For details,
26511 see comment in arm_expand_prologue. */
26512 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
26513
26514 emit_jump_insn (simple_return_rtx);
26515 }
26516
26517 /* Generate RTL to represent ARM epilogue. Really_return is true if the
26518 function is not a sibcall. */
26519 void
26520 arm_expand_epilogue (bool really_return)
26521 {
26522 unsigned long func_type;
26523 unsigned long saved_regs_mask;
26524 int num_regs = 0;
26525 int i;
26526 int amount;
26527 arm_stack_offsets *offsets;
26528
26529 func_type = arm_current_func_type ();
26530
26531 /* Naked functions don't have epilogue. Hence, generate return pattern, and
26532 let output_return_instruction take care of instruction emission if any. */
26533 if (IS_NAKED (func_type)
26534 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
26535 {
26536 if (really_return)
26537 emit_jump_insn (simple_return_rtx);
26538 return;
26539 }
26540
26541 /* If we are throwing an exception, then we really must be doing a
26542 return, so we can't tail-call. */
26543 gcc_assert (!crtl->calls_eh_return || really_return);
26544
26545 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
26546 {
26547 arm_expand_epilogue_apcs_frame (really_return);
26548 return;
26549 }
26550
26551 /* Get frame offsets for ARM. */
26552 offsets = arm_get_frame_offsets ();
26553 saved_regs_mask = offsets->saved_regs_mask;
26554 num_regs = bit_count (saved_regs_mask);
26555
26556 if (frame_pointer_needed)
26557 {
26558 rtx_insn *insn;
26559 /* Restore stack pointer if necessary. */
26560 if (TARGET_ARM)
26561 {
26562 /* In ARM mode, frame pointer points to first saved register.
26563 Restore stack pointer to last saved register. */
26564 amount = offsets->frame - offsets->saved_regs;
26565
26566 /* Force out any pending memory operations that reference stacked data
26567 before stack de-allocation occurs. */
26568 emit_insn (gen_blockage ());
26569 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26570 hard_frame_pointer_rtx,
26571 GEN_INT (amount)));
26572 arm_add_cfa_adjust_cfa_note (insn, amount,
26573 stack_pointer_rtx,
26574 hard_frame_pointer_rtx);
26575
26576 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
26577 deleted. */
26578 emit_insn (gen_force_register_use (stack_pointer_rtx));
26579 }
26580 else
26581 {
26582 /* In Thumb-2 mode, the frame pointer points to the last saved
26583 register. */
26584 amount = offsets->locals_base - offsets->saved_regs;
26585 if (amount)
26586 {
26587 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
26588 hard_frame_pointer_rtx,
26589 GEN_INT (amount)));
26590 arm_add_cfa_adjust_cfa_note (insn, amount,
26591 hard_frame_pointer_rtx,
26592 hard_frame_pointer_rtx);
26593 }
26594
26595 /* Force out any pending memory operations that reference stacked data
26596 before stack de-allocation occurs. */
26597 emit_insn (gen_blockage ());
26598 insn = emit_insn (gen_movsi (stack_pointer_rtx,
26599 hard_frame_pointer_rtx));
26600 arm_add_cfa_adjust_cfa_note (insn, 0,
26601 stack_pointer_rtx,
26602 hard_frame_pointer_rtx);
26603 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
26604 deleted. */
26605 emit_insn (gen_force_register_use (stack_pointer_rtx));
26606 }
26607 }
26608 else
26609 {
26610 /* Pop off outgoing args and local frame to adjust stack pointer to
26611 last saved register. */
26612 amount = offsets->outgoing_args - offsets->saved_regs;
26613 if (amount)
26614 {
26615 rtx_insn *tmp;
26616 /* Force out any pending memory operations that reference stacked data
26617 before stack de-allocation occurs. */
26618 emit_insn (gen_blockage ());
26619 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
26620 stack_pointer_rtx,
26621 GEN_INT (amount)));
26622 arm_add_cfa_adjust_cfa_note (tmp, amount,
26623 stack_pointer_rtx, stack_pointer_rtx);
26624 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
26625 not deleted. */
26626 emit_insn (gen_force_register_use (stack_pointer_rtx));
26627 }
26628 }
26629
26630 if (TARGET_HARD_FLOAT)
26631 {
26632 /* Generate VFP register multi-pop. */
26633 int end_reg = LAST_VFP_REGNUM + 1;
26634
26635 /* Scan the registers in reverse order. We need to match
26636 any groupings made in the prologue and generate matching
26637 vldm operations. The need to match groups is because,
26638 unlike pop, vldm can only do consecutive regs. */
26639 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
26640 /* Look for a case where a reg does not need restoring. */
26641 if ((!df_regs_ever_live_p (i) || call_used_or_fixed_reg_p (i))
26642 && (!df_regs_ever_live_p (i + 1)
26643 || call_used_or_fixed_reg_p (i + 1)))
26644 {
26645 /* Restore the regs discovered so far (from reg+2 to
26646 end_reg). */
26647 if (end_reg > i + 2)
26648 arm_emit_vfp_multi_reg_pop (i + 2,
26649 (end_reg - (i + 2)) / 2,
26650 stack_pointer_rtx);
26651 end_reg = i;
26652 }
26653
26654 /* Restore the remaining regs that we have discovered (or possibly
26655 even all of them, if the conditional in the for loop never
26656 fired). */
26657 if (end_reg > i + 2)
26658 arm_emit_vfp_multi_reg_pop (i + 2,
26659 (end_reg - (i + 2)) / 2,
26660 stack_pointer_rtx);
26661 }
26662
26663 if (TARGET_IWMMXT)
26664 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
26665 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
26666 {
26667 rtx_insn *insn;
26668 rtx addr = gen_rtx_MEM (V2SImode,
26669 gen_rtx_POST_INC (SImode,
26670 stack_pointer_rtx));
26671 set_mem_alias_set (addr, get_frame_alias_set ());
26672 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
26673 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26674 gen_rtx_REG (V2SImode, i),
26675 NULL_RTX);
26676 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
26677 stack_pointer_rtx, stack_pointer_rtx);
26678 }
26679
26680 if (saved_regs_mask)
26681 {
26682 rtx insn;
26683 bool return_in_pc = false;
26684
26685 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
26686 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
26687 && !IS_CMSE_ENTRY (func_type)
26688 && !IS_STACKALIGN (func_type)
26689 && really_return
26690 && crtl->args.pretend_args_size == 0
26691 && saved_regs_mask & (1 << LR_REGNUM)
26692 && !crtl->calls_eh_return)
26693 {
26694 saved_regs_mask &= ~(1 << LR_REGNUM);
26695 saved_regs_mask |= (1 << PC_REGNUM);
26696 return_in_pc = true;
26697 }
26698
26699 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
26700 {
26701 for (i = 0; i <= LAST_ARM_REGNUM; i++)
26702 if (saved_regs_mask & (1 << i))
26703 {
26704 rtx addr = gen_rtx_MEM (SImode,
26705 gen_rtx_POST_INC (SImode,
26706 stack_pointer_rtx));
26707 set_mem_alias_set (addr, get_frame_alias_set ());
26708
26709 if (i == PC_REGNUM)
26710 {
26711 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
26712 XVECEXP (insn, 0, 0) = ret_rtx;
26713 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
26714 addr);
26715 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
26716 insn = emit_jump_insn (insn);
26717 }
26718 else
26719 {
26720 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
26721 addr));
26722 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26723 gen_rtx_REG (SImode, i),
26724 NULL_RTX);
26725 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
26726 stack_pointer_rtx,
26727 stack_pointer_rtx);
26728 }
26729 }
26730 }
26731 else
26732 {
26733 if (TARGET_LDRD
26734 && current_tune->prefer_ldrd_strd
26735 && !optimize_function_for_size_p (cfun))
26736 {
26737 if (TARGET_THUMB2)
26738 thumb2_emit_ldrd_pop (saved_regs_mask);
26739 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
26740 arm_emit_ldrd_pop (saved_regs_mask);
26741 else
26742 arm_emit_multi_reg_pop (saved_regs_mask);
26743 }
26744 else
26745 arm_emit_multi_reg_pop (saved_regs_mask);
26746 }
26747
26748 if (return_in_pc)
26749 return;
26750 }
26751
26752 amount
26753 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
26754 if (amount)
26755 {
26756 int i, j;
26757 rtx dwarf = NULL_RTX;
26758 rtx_insn *tmp =
26759 emit_insn (gen_addsi3 (stack_pointer_rtx,
26760 stack_pointer_rtx,
26761 GEN_INT (amount)));
26762
26763 RTX_FRAME_RELATED_P (tmp) = 1;
26764
26765 if (cfun->machine->uses_anonymous_args)
26766 {
26767 /* Restore pretend args. Refer arm_expand_prologue on how to save
26768 pretend_args in stack. */
26769 int num_regs = crtl->args.pretend_args_size / 4;
26770 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
26771 for (j = 0, i = 0; j < num_regs; i++)
26772 if (saved_regs_mask & (1 << i))
26773 {
26774 rtx reg = gen_rtx_REG (SImode, i);
26775 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
26776 j++;
26777 }
26778 REG_NOTES (tmp) = dwarf;
26779 }
26780 arm_add_cfa_adjust_cfa_note (tmp, amount,
26781 stack_pointer_rtx, stack_pointer_rtx);
26782 }
26783
26784 /* Clear all caller-saved regs that are not used to return. */
26785 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26786 {
26787 /* CMSE_ENTRY always returns. */
26788 gcc_assert (really_return);
26789 cmse_nonsecure_entry_clear_before_return ();
26790 }
26791
26792 if (!really_return)
26793 return;
26794
26795 if (crtl->calls_eh_return)
26796 emit_insn (gen_addsi3 (stack_pointer_rtx,
26797 stack_pointer_rtx,
26798 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
26799
26800 if (IS_STACKALIGN (func_type))
26801 /* Restore the original stack pointer. Before prologue, the stack was
26802 realigned and the original stack pointer saved in r0. For details,
26803 see comment in arm_expand_prologue. */
26804 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
26805
26806 emit_jump_insn (simple_return_rtx);
26807 }
26808
26809 /* Implementation of insn prologue_thumb1_interwork. This is the first
26810 "instruction" of a function called in ARM mode. Swap to thumb mode. */
26811
26812 const char *
26813 thumb1_output_interwork (void)
26814 {
26815 const char * name;
26816 FILE *f = asm_out_file;
26817
26818 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
26819 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
26820 == SYMBOL_REF);
26821 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
26822
26823 /* Generate code sequence to switch us into Thumb mode. */
26824 /* The .code 32 directive has already been emitted by
26825 ASM_DECLARE_FUNCTION_NAME. */
26826 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
26827 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
26828
26829 /* Generate a label, so that the debugger will notice the
26830 change in instruction sets. This label is also used by
26831 the assembler to bypass the ARM code when this function
26832 is called from a Thumb encoded function elsewhere in the
26833 same file. Hence the definition of STUB_NAME here must
26834 agree with the definition in gas/config/tc-arm.c. */
26835
26836 #define STUB_NAME ".real_start_of"
26837
26838 fprintf (f, "\t.code\t16\n");
26839 #ifdef ARM_PE
26840 if (arm_dllexport_name_p (name))
26841 name = arm_strip_name_encoding (name);
26842 #endif
26843 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26844 fprintf (f, "\t.thumb_func\n");
26845 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26846
26847 return "";
26848 }
26849
26850 /* Handle the case of a double word load into a low register from
26851 a computed memory address. The computed address may involve a
26852 register which is overwritten by the load. */
26853 const char *
26854 thumb_load_double_from_address (rtx *operands)
26855 {
26856 rtx addr;
26857 rtx base;
26858 rtx offset;
26859 rtx arg1;
26860 rtx arg2;
26861
26862 gcc_assert (REG_P (operands[0]));
26863 gcc_assert (MEM_P (operands[1]));
26864
26865 /* Get the memory address. */
26866 addr = XEXP (operands[1], 0);
26867
26868 /* Work out how the memory address is computed. */
26869 switch (GET_CODE (addr))
26870 {
26871 case REG:
26872 operands[2] = adjust_address (operands[1], SImode, 4);
26873
26874 if (REGNO (operands[0]) == REGNO (addr))
26875 {
26876 output_asm_insn ("ldr\t%H0, %2", operands);
26877 output_asm_insn ("ldr\t%0, %1", operands);
26878 }
26879 else
26880 {
26881 output_asm_insn ("ldr\t%0, %1", operands);
26882 output_asm_insn ("ldr\t%H0, %2", operands);
26883 }
26884 break;
26885
26886 case CONST:
26887 /* Compute <address> + 4 for the high order load. */
26888 operands[2] = adjust_address (operands[1], SImode, 4);
26889
26890 output_asm_insn ("ldr\t%0, %1", operands);
26891 output_asm_insn ("ldr\t%H0, %2", operands);
26892 break;
26893
26894 case PLUS:
26895 arg1 = XEXP (addr, 0);
26896 arg2 = XEXP (addr, 1);
26897
26898 if (CONSTANT_P (arg1))
26899 base = arg2, offset = arg1;
26900 else
26901 base = arg1, offset = arg2;
26902
26903 gcc_assert (REG_P (base));
26904
26905 /* Catch the case of <address> = <reg> + <reg> */
26906 if (REG_P (offset))
26907 {
26908 int reg_offset = REGNO (offset);
26909 int reg_base = REGNO (base);
26910 int reg_dest = REGNO (operands[0]);
26911
26912 /* Add the base and offset registers together into the
26913 higher destination register. */
26914 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26915 reg_dest + 1, reg_base, reg_offset);
26916
26917 /* Load the lower destination register from the address in
26918 the higher destination register. */
26919 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26920 reg_dest, reg_dest + 1);
26921
26922 /* Load the higher destination register from its own address
26923 plus 4. */
26924 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26925 reg_dest + 1, reg_dest + 1);
26926 }
26927 else
26928 {
26929 /* Compute <address> + 4 for the high order load. */
26930 operands[2] = adjust_address (operands[1], SImode, 4);
26931
26932 /* If the computed address is held in the low order register
26933 then load the high order register first, otherwise always
26934 load the low order register first. */
26935 if (REGNO (operands[0]) == REGNO (base))
26936 {
26937 output_asm_insn ("ldr\t%H0, %2", operands);
26938 output_asm_insn ("ldr\t%0, %1", operands);
26939 }
26940 else
26941 {
26942 output_asm_insn ("ldr\t%0, %1", operands);
26943 output_asm_insn ("ldr\t%H0, %2", operands);
26944 }
26945 }
26946 break;
26947
26948 case LABEL_REF:
26949 /* With no registers to worry about we can just load the value
26950 directly. */
26951 operands[2] = adjust_address (operands[1], SImode, 4);
26952
26953 output_asm_insn ("ldr\t%H0, %2", operands);
26954 output_asm_insn ("ldr\t%0, %1", operands);
26955 break;
26956
26957 default:
26958 gcc_unreachable ();
26959 }
26960
26961 return "";
26962 }
26963
26964 const char *
26965 thumb_output_move_mem_multiple (int n, rtx *operands)
26966 {
26967 switch (n)
26968 {
26969 case 2:
26970 if (REGNO (operands[4]) > REGNO (operands[5]))
26971 std::swap (operands[4], operands[5]);
26972
26973 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26974 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26975 break;
26976
26977 case 3:
26978 if (REGNO (operands[4]) > REGNO (operands[5]))
26979 std::swap (operands[4], operands[5]);
26980 if (REGNO (operands[5]) > REGNO (operands[6]))
26981 std::swap (operands[5], operands[6]);
26982 if (REGNO (operands[4]) > REGNO (operands[5]))
26983 std::swap (operands[4], operands[5]);
26984
26985 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26986 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26987 break;
26988
26989 default:
26990 gcc_unreachable ();
26991 }
26992
26993 return "";
26994 }
26995
26996 /* Output a call-via instruction for thumb state. */
26997 const char *
26998 thumb_call_via_reg (rtx reg)
26999 {
27000 int regno = REGNO (reg);
27001 rtx *labelp;
27002
27003 gcc_assert (regno < LR_REGNUM);
27004
27005 /* If we are in the normal text section we can use a single instance
27006 per compilation unit. If we are doing function sections, then we need
27007 an entry per section, since we can't rely on reachability. */
27008 if (in_section == text_section)
27009 {
27010 thumb_call_reg_needed = 1;
27011
27012 if (thumb_call_via_label[regno] == NULL)
27013 thumb_call_via_label[regno] = gen_label_rtx ();
27014 labelp = thumb_call_via_label + regno;
27015 }
27016 else
27017 {
27018 if (cfun->machine->call_via[regno] == NULL)
27019 cfun->machine->call_via[regno] = gen_label_rtx ();
27020 labelp = cfun->machine->call_via + regno;
27021 }
27022
27023 output_asm_insn ("bl\t%a0", labelp);
27024 return "";
27025 }
27026
27027 /* Routines for generating rtl. */
27028 void
27029 thumb_expand_cpymemqi (rtx *operands)
27030 {
27031 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
27032 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
27033 HOST_WIDE_INT len = INTVAL (operands[2]);
27034 HOST_WIDE_INT offset = 0;
27035
27036 while (len >= 12)
27037 {
27038 emit_insn (gen_cpymem12b (out, in, out, in));
27039 len -= 12;
27040 }
27041
27042 if (len >= 8)
27043 {
27044 emit_insn (gen_cpymem8b (out, in, out, in));
27045 len -= 8;
27046 }
27047
27048 if (len >= 4)
27049 {
27050 rtx reg = gen_reg_rtx (SImode);
27051 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
27052 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
27053 len -= 4;
27054 offset += 4;
27055 }
27056
27057 if (len >= 2)
27058 {
27059 rtx reg = gen_reg_rtx (HImode);
27060 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
27061 plus_constant (Pmode, in,
27062 offset))));
27063 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
27064 offset)),
27065 reg));
27066 len -= 2;
27067 offset += 2;
27068 }
27069
27070 if (len)
27071 {
27072 rtx reg = gen_reg_rtx (QImode);
27073 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
27074 plus_constant (Pmode, in,
27075 offset))));
27076 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
27077 offset)),
27078 reg));
27079 }
27080 }
27081
27082 void
27083 thumb_reload_out_hi (rtx *operands)
27084 {
27085 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
27086 }
27087
27088 /* Return the length of a function name prefix
27089 that starts with the character 'c'. */
27090 static int
27091 arm_get_strip_length (int c)
27092 {
27093 switch (c)
27094 {
27095 ARM_NAME_ENCODING_LENGTHS
27096 default: return 0;
27097 }
27098 }
27099
27100 /* Return a pointer to a function's name with any
27101 and all prefix encodings stripped from it. */
27102 const char *
27103 arm_strip_name_encoding (const char *name)
27104 {
27105 int skip;
27106
27107 while ((skip = arm_get_strip_length (* name)))
27108 name += skip;
27109
27110 return name;
27111 }
27112
27113 /* If there is a '*' anywhere in the name's prefix, then
27114 emit the stripped name verbatim, otherwise prepend an
27115 underscore if leading underscores are being used. */
27116 void
27117 arm_asm_output_labelref (FILE *stream, const char *name)
27118 {
27119 int skip;
27120 int verbatim = 0;
27121
27122 while ((skip = arm_get_strip_length (* name)))
27123 {
27124 verbatim |= (*name == '*');
27125 name += skip;
27126 }
27127
27128 if (verbatim)
27129 fputs (name, stream);
27130 else
27131 asm_fprintf (stream, "%U%s", name);
27132 }
27133
27134 /* This function is used to emit an EABI tag and its associated value.
27135 We emit the numerical value of the tag in case the assembler does not
27136 support textual tags. (Eg gas prior to 2.20). If requested we include
27137 the tag name in a comment so that anyone reading the assembler output
27138 will know which tag is being set.
27139
27140 This function is not static because arm-c.c needs it too. */
27141
27142 void
27143 arm_emit_eabi_attribute (const char *name, int num, int val)
27144 {
27145 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
27146 if (flag_verbose_asm || flag_debug_asm)
27147 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
27148 asm_fprintf (asm_out_file, "\n");
27149 }
27150
27151 /* This function is used to print CPU tuning information as comment
27152 in assembler file. Pointers are not printed for now. */
27153
27154 void
27155 arm_print_tune_info (void)
27156 {
27157 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
27158 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
27159 current_tune->constant_limit);
27160 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27161 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
27162 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27163 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
27164 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27165 "prefetch.l1_cache_size:\t%d\n",
27166 current_tune->prefetch.l1_cache_size);
27167 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27168 "prefetch.l1_cache_line_size:\t%d\n",
27169 current_tune->prefetch.l1_cache_line_size);
27170 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27171 "prefer_constant_pool:\t%d\n",
27172 (int) current_tune->prefer_constant_pool);
27173 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27174 "branch_cost:\t(s:speed, p:predictable)\n");
27175 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
27176 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
27177 current_tune->branch_cost (false, false));
27178 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
27179 current_tune->branch_cost (false, true));
27180 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
27181 current_tune->branch_cost (true, false));
27182 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
27183 current_tune->branch_cost (true, true));
27184 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27185 "prefer_ldrd_strd:\t%d\n",
27186 (int) current_tune->prefer_ldrd_strd);
27187 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27188 "logical_op_non_short_circuit:\t[%d,%d]\n",
27189 (int) current_tune->logical_op_non_short_circuit_thumb,
27190 (int) current_tune->logical_op_non_short_circuit_arm);
27191 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27192 "disparage_flag_setting_t16_encodings:\t%d\n",
27193 (int) current_tune->disparage_flag_setting_t16_encodings);
27194 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27195 "string_ops_prefer_neon:\t%d\n",
27196 (int) current_tune->string_ops_prefer_neon);
27197 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27198 "max_insns_inline_memset:\t%d\n",
27199 current_tune->max_insns_inline_memset);
27200 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
27201 current_tune->fusible_ops);
27202 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
27203 (int) current_tune->sched_autopref);
27204 }
27205
27206 /* Print .arch and .arch_extension directives corresponding to the
27207 current architecture configuration. */
27208 static void
27209 arm_print_asm_arch_directives ()
27210 {
27211 const arch_option *arch
27212 = arm_parse_arch_option_name (all_architectures, "-march",
27213 arm_active_target.arch_name);
27214 auto_sbitmap opt_bits (isa_num_bits);
27215
27216 gcc_assert (arch);
27217
27218 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
27219 arm_last_printed_arch_string = arm_active_target.arch_name;
27220 if (!arch->common.extensions)
27221 return;
27222
27223 for (const struct cpu_arch_extension *opt = arch->common.extensions;
27224 opt->name != NULL;
27225 opt++)
27226 {
27227 if (!opt->remove)
27228 {
27229 arm_initialize_isa (opt_bits, opt->isa_bits);
27230
27231 /* If every feature bit of this option is set in the target
27232 ISA specification, print out the option name. However,
27233 don't print anything if all the bits are part of the
27234 FPU specification. */
27235 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
27236 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
27237 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
27238 }
27239 }
27240 }
27241
27242 static void
27243 arm_file_start (void)
27244 {
27245 int val;
27246
27247 if (TARGET_BPABI)
27248 {
27249 /* We don't have a specified CPU. Use the architecture to
27250 generate the tags.
27251
27252 Note: it might be better to do this unconditionally, then the
27253 assembler would not need to know about all new CPU names as
27254 they are added. */
27255 if (!arm_active_target.core_name)
27256 {
27257 /* armv7ve doesn't support any extensions. */
27258 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
27259 {
27260 /* Keep backward compatability for assemblers
27261 which don't support armv7ve. */
27262 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
27263 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
27264 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
27265 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
27266 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
27267 arm_last_printed_arch_string = "armv7ve";
27268 }
27269 else
27270 arm_print_asm_arch_directives ();
27271 }
27272 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
27273 {
27274 asm_fprintf (asm_out_file, "\t.arch %s\n",
27275 arm_active_target.core_name + 8);
27276 arm_last_printed_arch_string = arm_active_target.core_name + 8;
27277 }
27278 else
27279 {
27280 const char* truncated_name
27281 = arm_rewrite_selected_cpu (arm_active_target.core_name);
27282 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
27283 }
27284
27285 if (print_tune_info)
27286 arm_print_tune_info ();
27287
27288 if (! TARGET_SOFT_FLOAT)
27289 {
27290 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
27291 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
27292
27293 if (TARGET_HARD_FLOAT_ABI)
27294 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
27295 }
27296
27297 /* Some of these attributes only apply when the corresponding features
27298 are used. However we don't have any easy way of figuring this out.
27299 Conservatively record the setting that would have been used. */
27300
27301 if (flag_rounding_math)
27302 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
27303
27304 if (!flag_unsafe_math_optimizations)
27305 {
27306 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
27307 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
27308 }
27309 if (flag_signaling_nans)
27310 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
27311
27312 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
27313 flag_finite_math_only ? 1 : 3);
27314
27315 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
27316 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
27317 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
27318 flag_short_enums ? 1 : 2);
27319
27320 /* Tag_ABI_optimization_goals. */
27321 if (optimize_size)
27322 val = 4;
27323 else if (optimize >= 2)
27324 val = 2;
27325 else if (optimize)
27326 val = 1;
27327 else
27328 val = 6;
27329 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
27330
27331 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
27332 unaligned_access);
27333
27334 if (arm_fp16_format)
27335 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
27336 (int) arm_fp16_format);
27337
27338 if (arm_lang_output_object_attributes_hook)
27339 arm_lang_output_object_attributes_hook();
27340 }
27341
27342 default_file_start ();
27343 }
27344
27345 static void
27346 arm_file_end (void)
27347 {
27348 int regno;
27349
27350 if (NEED_INDICATE_EXEC_STACK)
27351 /* Add .note.GNU-stack. */
27352 file_end_indicate_exec_stack ();
27353
27354 if (! thumb_call_reg_needed)
27355 return;
27356
27357 switch_to_section (text_section);
27358 asm_fprintf (asm_out_file, "\t.code 16\n");
27359 ASM_OUTPUT_ALIGN (asm_out_file, 1);
27360
27361 for (regno = 0; regno < LR_REGNUM; regno++)
27362 {
27363 rtx label = thumb_call_via_label[regno];
27364
27365 if (label != 0)
27366 {
27367 targetm.asm_out.internal_label (asm_out_file, "L",
27368 CODE_LABEL_NUMBER (label));
27369 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
27370 }
27371 }
27372 }
27373
27374 #ifndef ARM_PE
27375 /* Symbols in the text segment can be accessed without indirecting via the
27376 constant pool; it may take an extra binary operation, but this is still
27377 faster than indirecting via memory. Don't do this when not optimizing,
27378 since we won't be calculating al of the offsets necessary to do this
27379 simplification. */
27380
27381 static void
27382 arm_encode_section_info (tree decl, rtx rtl, int first)
27383 {
27384 if (optimize > 0 && TREE_CONSTANT (decl))
27385 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
27386
27387 default_encode_section_info (decl, rtl, first);
27388 }
27389 #endif /* !ARM_PE */
27390
27391 static void
27392 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
27393 {
27394 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
27395 && !strcmp (prefix, "L"))
27396 {
27397 arm_ccfsm_state = 0;
27398 arm_target_insn = NULL;
27399 }
27400 default_internal_label (stream, prefix, labelno);
27401 }
27402
27403 /* Output code to add DELTA to the first argument, and then jump
27404 to FUNCTION. Used for C++ multiple inheritance. */
27405
27406 static void
27407 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
27408 HOST_WIDE_INT, tree function)
27409 {
27410 static int thunk_label = 0;
27411 char label[256];
27412 char labelpc[256];
27413 int mi_delta = delta;
27414 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
27415 int shift = 0;
27416 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
27417 ? 1 : 0);
27418 if (mi_delta < 0)
27419 mi_delta = - mi_delta;
27420
27421 final_start_function (emit_barrier (), file, 1);
27422
27423 if (TARGET_THUMB1)
27424 {
27425 int labelno = thunk_label++;
27426 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
27427 /* Thunks are entered in arm mode when available. */
27428 if (TARGET_THUMB1_ONLY)
27429 {
27430 /* push r3 so we can use it as a temporary. */
27431 /* TODO: Omit this save if r3 is not used. */
27432 fputs ("\tpush {r3}\n", file);
27433 fputs ("\tldr\tr3, ", file);
27434 }
27435 else
27436 {
27437 fputs ("\tldr\tr12, ", file);
27438 }
27439 assemble_name (file, label);
27440 fputc ('\n', file);
27441 if (flag_pic)
27442 {
27443 /* If we are generating PIC, the ldr instruction below loads
27444 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
27445 the address of the add + 8, so we have:
27446
27447 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
27448 = target + 1.
27449
27450 Note that we have "+ 1" because some versions of GNU ld
27451 don't set the low bit of the result for R_ARM_REL32
27452 relocations against thumb function symbols.
27453 On ARMv6M this is +4, not +8. */
27454 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
27455 assemble_name (file, labelpc);
27456 fputs (":\n", file);
27457 if (TARGET_THUMB1_ONLY)
27458 {
27459 /* This is 2 insns after the start of the thunk, so we know it
27460 is 4-byte aligned. */
27461 fputs ("\tadd\tr3, pc, r3\n", file);
27462 fputs ("\tmov r12, r3\n", file);
27463 }
27464 else
27465 fputs ("\tadd\tr12, pc, r12\n", file);
27466 }
27467 else if (TARGET_THUMB1_ONLY)
27468 fputs ("\tmov r12, r3\n", file);
27469 }
27470 if (TARGET_THUMB1_ONLY)
27471 {
27472 if (mi_delta > 255)
27473 {
27474 fputs ("\tldr\tr3, ", file);
27475 assemble_name (file, label);
27476 fputs ("+4\n", file);
27477 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
27478 mi_op, this_regno, this_regno);
27479 }
27480 else if (mi_delta != 0)
27481 {
27482 /* Thumb1 unified syntax requires s suffix in instruction name when
27483 one of the operands is immediate. */
27484 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
27485 mi_op, this_regno, this_regno,
27486 mi_delta);
27487 }
27488 }
27489 else
27490 {
27491 /* TODO: Use movw/movt for large constants when available. */
27492 while (mi_delta != 0)
27493 {
27494 if ((mi_delta & (3 << shift)) == 0)
27495 shift += 2;
27496 else
27497 {
27498 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
27499 mi_op, this_regno, this_regno,
27500 mi_delta & (0xff << shift));
27501 mi_delta &= ~(0xff << shift);
27502 shift += 8;
27503 }
27504 }
27505 }
27506 if (TARGET_THUMB1)
27507 {
27508 if (TARGET_THUMB1_ONLY)
27509 fputs ("\tpop\t{r3}\n", file);
27510
27511 fprintf (file, "\tbx\tr12\n");
27512 ASM_OUTPUT_ALIGN (file, 2);
27513 assemble_name (file, label);
27514 fputs (":\n", file);
27515 if (flag_pic)
27516 {
27517 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
27518 rtx tem = XEXP (DECL_RTL (function), 0);
27519 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
27520 pipeline offset is four rather than eight. Adjust the offset
27521 accordingly. */
27522 tem = plus_constant (GET_MODE (tem), tem,
27523 TARGET_THUMB1_ONLY ? -3 : -7);
27524 tem = gen_rtx_MINUS (GET_MODE (tem),
27525 tem,
27526 gen_rtx_SYMBOL_REF (Pmode,
27527 ggc_strdup (labelpc)));
27528 assemble_integer (tem, 4, BITS_PER_WORD, 1);
27529 }
27530 else
27531 /* Output ".word .LTHUNKn". */
27532 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
27533
27534 if (TARGET_THUMB1_ONLY && mi_delta > 255)
27535 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
27536 }
27537 else
27538 {
27539 fputs ("\tb\t", file);
27540 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
27541 if (NEED_PLT_RELOC)
27542 fputs ("(PLT)", file);
27543 fputc ('\n', file);
27544 }
27545
27546 final_end_function ();
27547 }
27548
27549 /* MI thunk handling for TARGET_32BIT. */
27550
27551 static void
27552 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
27553 HOST_WIDE_INT vcall_offset, tree function)
27554 {
27555 const bool long_call_p = arm_is_long_call_p (function);
27556
27557 /* On ARM, this_regno is R0 or R1 depending on
27558 whether the function returns an aggregate or not.
27559 */
27560 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
27561 function)
27562 ? R1_REGNUM : R0_REGNUM);
27563
27564 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
27565 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
27566 reload_completed = 1;
27567 emit_note (NOTE_INSN_PROLOGUE_END);
27568
27569 /* Add DELTA to THIS_RTX. */
27570 if (delta != 0)
27571 arm_split_constant (PLUS, Pmode, NULL_RTX,
27572 delta, this_rtx, this_rtx, false);
27573
27574 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
27575 if (vcall_offset != 0)
27576 {
27577 /* Load *THIS_RTX. */
27578 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
27579 /* Compute *THIS_RTX + VCALL_OFFSET. */
27580 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
27581 false);
27582 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
27583 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
27584 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
27585 }
27586
27587 /* Generate a tail call to the target function. */
27588 if (!TREE_USED (function))
27589 {
27590 assemble_external (function);
27591 TREE_USED (function) = 1;
27592 }
27593 rtx funexp = XEXP (DECL_RTL (function), 0);
27594 if (long_call_p)
27595 {
27596 emit_move_insn (temp, funexp);
27597 funexp = temp;
27598 }
27599 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
27600 rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
27601 SIBLING_CALL_P (insn) = 1;
27602 emit_barrier ();
27603
27604 /* Indirect calls require a bit of fixup in PIC mode. */
27605 if (long_call_p)
27606 {
27607 split_all_insns_noflow ();
27608 arm_reorg ();
27609 }
27610
27611 insn = get_insns ();
27612 shorten_branches (insn);
27613 final_start_function (insn, file, 1);
27614 final (insn, file, 1);
27615 final_end_function ();
27616
27617 /* Stop pretending this is a post-reload pass. */
27618 reload_completed = 0;
27619 }
27620
27621 /* Output code to add DELTA to the first argument, and then jump
27622 to FUNCTION. Used for C++ multiple inheritance. */
27623
27624 static void
27625 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
27626 HOST_WIDE_INT vcall_offset, tree function)
27627 {
27628 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
27629
27630 assemble_start_function (thunk, fnname);
27631 if (TARGET_32BIT)
27632 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
27633 else
27634 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
27635 assemble_end_function (thunk, fnname);
27636 }
27637
27638 int
27639 arm_emit_vector_const (FILE *file, rtx x)
27640 {
27641 int i;
27642 const char * pattern;
27643
27644 gcc_assert (GET_CODE (x) == CONST_VECTOR);
27645
27646 switch (GET_MODE (x))
27647 {
27648 case E_V2SImode: pattern = "%08x"; break;
27649 case E_V4HImode: pattern = "%04x"; break;
27650 case E_V8QImode: pattern = "%02x"; break;
27651 default: gcc_unreachable ();
27652 }
27653
27654 fprintf (file, "0x");
27655 for (i = CONST_VECTOR_NUNITS (x); i--;)
27656 {
27657 rtx element;
27658
27659 element = CONST_VECTOR_ELT (x, i);
27660 fprintf (file, pattern, INTVAL (element));
27661 }
27662
27663 return 1;
27664 }
27665
27666 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
27667 HFmode constant pool entries are actually loaded with ldr. */
27668 void
27669 arm_emit_fp16_const (rtx c)
27670 {
27671 long bits;
27672
27673 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
27674 if (WORDS_BIG_ENDIAN)
27675 assemble_zeros (2);
27676 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
27677 if (!WORDS_BIG_ENDIAN)
27678 assemble_zeros (2);
27679 }
27680
27681 const char *
27682 arm_output_load_gr (rtx *operands)
27683 {
27684 rtx reg;
27685 rtx offset;
27686 rtx wcgr;
27687 rtx sum;
27688
27689 if (!MEM_P (operands [1])
27690 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
27691 || !REG_P (reg = XEXP (sum, 0))
27692 || !CONST_INT_P (offset = XEXP (sum, 1))
27693 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
27694 return "wldrw%?\t%0, %1";
27695
27696 /* Fix up an out-of-range load of a GR register. */
27697 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
27698 wcgr = operands[0];
27699 operands[0] = reg;
27700 output_asm_insn ("ldr%?\t%0, %1", operands);
27701
27702 operands[0] = wcgr;
27703 operands[1] = reg;
27704 output_asm_insn ("tmcr%?\t%0, %1", operands);
27705 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
27706
27707 return "";
27708 }
27709
27710 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
27711
27712 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
27713 named arg and all anonymous args onto the stack.
27714 XXX I know the prologue shouldn't be pushing registers, but it is faster
27715 that way. */
27716
27717 static void
27718 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
27719 const function_arg_info &arg,
27720 int *pretend_size,
27721 int second_time ATTRIBUTE_UNUSED)
27722 {
27723 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
27724 int nregs;
27725
27726 cfun->machine->uses_anonymous_args = 1;
27727 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
27728 {
27729 nregs = pcum->aapcs_ncrn;
27730 if (nregs & 1)
27731 {
27732 int res = arm_needs_doubleword_align (arg.mode, arg.type);
27733 if (res < 0 && warn_psabi)
27734 inform (input_location, "parameter passing for argument of "
27735 "type %qT changed in GCC 7.1", arg.type);
27736 else if (res > 0)
27737 {
27738 nregs++;
27739 if (res > 1 && warn_psabi)
27740 inform (input_location,
27741 "parameter passing for argument of type "
27742 "%qT changed in GCC 9.1", arg.type);
27743 }
27744 }
27745 }
27746 else
27747 nregs = pcum->nregs;
27748
27749 if (nregs < NUM_ARG_REGS)
27750 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
27751 }
27752
27753 /* We can't rely on the caller doing the proper promotion when
27754 using APCS or ATPCS. */
27755
27756 static bool
27757 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
27758 {
27759 return !TARGET_AAPCS_BASED;
27760 }
27761
27762 static machine_mode
27763 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
27764 machine_mode mode,
27765 int *punsignedp ATTRIBUTE_UNUSED,
27766 const_tree fntype ATTRIBUTE_UNUSED,
27767 int for_return ATTRIBUTE_UNUSED)
27768 {
27769 if (GET_MODE_CLASS (mode) == MODE_INT
27770 && GET_MODE_SIZE (mode) < 4)
27771 return SImode;
27772
27773 return mode;
27774 }
27775
27776
27777 static bool
27778 arm_default_short_enums (void)
27779 {
27780 return ARM_DEFAULT_SHORT_ENUMS;
27781 }
27782
27783
27784 /* AAPCS requires that anonymous bitfields affect structure alignment. */
27785
27786 static bool
27787 arm_align_anon_bitfield (void)
27788 {
27789 return TARGET_AAPCS_BASED;
27790 }
27791
27792
27793 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
27794
27795 static tree
27796 arm_cxx_guard_type (void)
27797 {
27798 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
27799 }
27800
27801
27802 /* The EABI says test the least significant bit of a guard variable. */
27803
27804 static bool
27805 arm_cxx_guard_mask_bit (void)
27806 {
27807 return TARGET_AAPCS_BASED;
27808 }
27809
27810
27811 /* The EABI specifies that all array cookies are 8 bytes long. */
27812
27813 static tree
27814 arm_get_cookie_size (tree type)
27815 {
27816 tree size;
27817
27818 if (!TARGET_AAPCS_BASED)
27819 return default_cxx_get_cookie_size (type);
27820
27821 size = build_int_cst (sizetype, 8);
27822 return size;
27823 }
27824
27825
27826 /* The EABI says that array cookies should also contain the element size. */
27827
27828 static bool
27829 arm_cookie_has_size (void)
27830 {
27831 return TARGET_AAPCS_BASED;
27832 }
27833
27834
27835 /* The EABI says constructors and destructors should return a pointer to
27836 the object constructed/destroyed. */
27837
27838 static bool
27839 arm_cxx_cdtor_returns_this (void)
27840 {
27841 return TARGET_AAPCS_BASED;
27842 }
27843
27844 /* The EABI says that an inline function may never be the key
27845 method. */
27846
27847 static bool
27848 arm_cxx_key_method_may_be_inline (void)
27849 {
27850 return !TARGET_AAPCS_BASED;
27851 }
27852
27853 static void
27854 arm_cxx_determine_class_data_visibility (tree decl)
27855 {
27856 if (!TARGET_AAPCS_BASED
27857 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27858 return;
27859
27860 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27861 is exported. However, on systems without dynamic vague linkage,
27862 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27863 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27864 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27865 else
27866 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27867 DECL_VISIBILITY_SPECIFIED (decl) = 1;
27868 }
27869
27870 static bool
27871 arm_cxx_class_data_always_comdat (void)
27872 {
27873 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27874 vague linkage if the class has no key function. */
27875 return !TARGET_AAPCS_BASED;
27876 }
27877
27878
27879 /* The EABI says __aeabi_atexit should be used to register static
27880 destructors. */
27881
27882 static bool
27883 arm_cxx_use_aeabi_atexit (void)
27884 {
27885 return TARGET_AAPCS_BASED;
27886 }
27887
27888
27889 void
27890 arm_set_return_address (rtx source, rtx scratch)
27891 {
27892 arm_stack_offsets *offsets;
27893 HOST_WIDE_INT delta;
27894 rtx addr, mem;
27895 unsigned long saved_regs;
27896
27897 offsets = arm_get_frame_offsets ();
27898 saved_regs = offsets->saved_regs_mask;
27899
27900 if ((saved_regs & (1 << LR_REGNUM)) == 0)
27901 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27902 else
27903 {
27904 if (frame_pointer_needed)
27905 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27906 else
27907 {
27908 /* LR will be the first saved register. */
27909 delta = offsets->outgoing_args - (offsets->frame + 4);
27910
27911
27912 if (delta >= 4096)
27913 {
27914 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27915 GEN_INT (delta & ~4095)));
27916 addr = scratch;
27917 delta &= 4095;
27918 }
27919 else
27920 addr = stack_pointer_rtx;
27921
27922 addr = plus_constant (Pmode, addr, delta);
27923 }
27924
27925 /* The store needs to be marked to prevent DSE from deleting
27926 it as dead if it is based on fp. */
27927 mem = gen_frame_mem (Pmode, addr);
27928 MEM_VOLATILE_P (mem) = true;
27929 emit_move_insn (mem, source);
27930 }
27931 }
27932
27933
27934 void
27935 thumb_set_return_address (rtx source, rtx scratch)
27936 {
27937 arm_stack_offsets *offsets;
27938 HOST_WIDE_INT delta;
27939 HOST_WIDE_INT limit;
27940 int reg;
27941 rtx addr, mem;
27942 unsigned long mask;
27943
27944 emit_use (source);
27945
27946 offsets = arm_get_frame_offsets ();
27947 mask = offsets->saved_regs_mask;
27948 if (mask & (1 << LR_REGNUM))
27949 {
27950 limit = 1024;
27951 /* Find the saved regs. */
27952 if (frame_pointer_needed)
27953 {
27954 delta = offsets->soft_frame - offsets->saved_args;
27955 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27956 if (TARGET_THUMB1)
27957 limit = 128;
27958 }
27959 else
27960 {
27961 delta = offsets->outgoing_args - offsets->saved_args;
27962 reg = SP_REGNUM;
27963 }
27964 /* Allow for the stack frame. */
27965 if (TARGET_THUMB1 && TARGET_BACKTRACE)
27966 delta -= 16;
27967 /* The link register is always the first saved register. */
27968 delta -= 4;
27969
27970 /* Construct the address. */
27971 addr = gen_rtx_REG (SImode, reg);
27972 if (delta > limit)
27973 {
27974 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27975 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27976 addr = scratch;
27977 }
27978 else
27979 addr = plus_constant (Pmode, addr, delta);
27980
27981 /* The store needs to be marked to prevent DSE from deleting
27982 it as dead if it is based on fp. */
27983 mem = gen_frame_mem (Pmode, addr);
27984 MEM_VOLATILE_P (mem) = true;
27985 emit_move_insn (mem, source);
27986 }
27987 else
27988 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27989 }
27990
27991 /* Implements target hook vector_mode_supported_p. */
27992 bool
27993 arm_vector_mode_supported_p (machine_mode mode)
27994 {
27995 /* Neon also supports V2SImode, etc. listed in the clause below. */
27996 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27997 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27998 || mode == V2DImode || mode == V8HFmode))
27999 return true;
28000
28001 if ((TARGET_NEON || TARGET_IWMMXT)
28002 && ((mode == V2SImode)
28003 || (mode == V4HImode)
28004 || (mode == V8QImode)))
28005 return true;
28006
28007 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28008 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28009 || mode == V2HAmode))
28010 return true;
28011
28012 return false;
28013 }
28014
28015 /* Implements target hook array_mode_supported_p. */
28016
28017 static bool
28018 arm_array_mode_supported_p (machine_mode mode,
28019 unsigned HOST_WIDE_INT nelems)
28020 {
28021 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
28022 for now, as the lane-swapping logic needs to be extended in the expanders.
28023 See PR target/82518. */
28024 if (TARGET_NEON && !BYTES_BIG_ENDIAN
28025 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28026 && (nelems >= 2 && nelems <= 4))
28027 return true;
28028
28029 return false;
28030 }
28031
28032 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28033 registers when autovectorizing for Neon, at least until multiple vector
28034 widths are supported properly by the middle-end. */
28035
28036 static machine_mode
28037 arm_preferred_simd_mode (scalar_mode mode)
28038 {
28039 if (TARGET_NEON)
28040 switch (mode)
28041 {
28042 case E_SFmode:
28043 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
28044 case E_SImode:
28045 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
28046 case E_HImode:
28047 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
28048 case E_QImode:
28049 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
28050 case E_DImode:
28051 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28052 return V2DImode;
28053 break;
28054
28055 default:;
28056 }
28057
28058 if (TARGET_REALLY_IWMMXT)
28059 switch (mode)
28060 {
28061 case E_SImode:
28062 return V2SImode;
28063 case E_HImode:
28064 return V4HImode;
28065 case E_QImode:
28066 return V8QImode;
28067
28068 default:;
28069 }
28070
28071 return word_mode;
28072 }
28073
28074 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28075
28076 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28077 using r0-r4 for function arguments, r7 for the stack frame and don't have
28078 enough left over to do doubleword arithmetic. For Thumb-2 all the
28079 potentially problematic instructions accept high registers so this is not
28080 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28081 that require many low registers. */
28082 static bool
28083 arm_class_likely_spilled_p (reg_class_t rclass)
28084 {
28085 if ((TARGET_THUMB1 && rclass == LO_REGS)
28086 || rclass == CC_REG)
28087 return true;
28088
28089 return false;
28090 }
28091
28092 /* Implements target hook small_register_classes_for_mode_p. */
28093 bool
28094 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
28095 {
28096 return TARGET_THUMB1;
28097 }
28098
28099 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28100 ARM insns and therefore guarantee that the shift count is modulo 256.
28101 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28102 guarantee no particular behavior for out-of-range counts. */
28103
28104 static unsigned HOST_WIDE_INT
28105 arm_shift_truncation_mask (machine_mode mode)
28106 {
28107 return mode == SImode ? 255 : 0;
28108 }
28109
28110
28111 /* Map internal gcc register numbers to DWARF2 register numbers. */
28112
28113 unsigned int
28114 arm_dbx_register_number (unsigned int regno)
28115 {
28116 if (regno < 16)
28117 return regno;
28118
28119 if (IS_VFP_REGNUM (regno))
28120 {
28121 /* See comment in arm_dwarf_register_span. */
28122 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28123 return 64 + regno - FIRST_VFP_REGNUM;
28124 else
28125 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
28126 }
28127
28128 if (IS_IWMMXT_GR_REGNUM (regno))
28129 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
28130
28131 if (IS_IWMMXT_REGNUM (regno))
28132 return 112 + regno - FIRST_IWMMXT_REGNUM;
28133
28134 return DWARF_FRAME_REGISTERS;
28135 }
28136
28137 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28138 GCC models tham as 64 32-bit registers, so we need to describe this to
28139 the DWARF generation code. Other registers can use the default. */
28140 static rtx
28141 arm_dwarf_register_span (rtx rtl)
28142 {
28143 machine_mode mode;
28144 unsigned regno;
28145 rtx parts[16];
28146 int nregs;
28147 int i;
28148
28149 regno = REGNO (rtl);
28150 if (!IS_VFP_REGNUM (regno))
28151 return NULL_RTX;
28152
28153 /* XXX FIXME: The EABI defines two VFP register ranges:
28154 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28155 256-287: D0-D31
28156 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28157 corresponding D register. Until GDB supports this, we shall use the
28158 legacy encodings. We also use these encodings for D0-D15 for
28159 compatibility with older debuggers. */
28160 mode = GET_MODE (rtl);
28161 if (GET_MODE_SIZE (mode) < 8)
28162 return NULL_RTX;
28163
28164 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28165 {
28166 nregs = GET_MODE_SIZE (mode) / 4;
28167 for (i = 0; i < nregs; i += 2)
28168 if (TARGET_BIG_END)
28169 {
28170 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
28171 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
28172 }
28173 else
28174 {
28175 parts[i] = gen_rtx_REG (SImode, regno + i);
28176 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
28177 }
28178 }
28179 else
28180 {
28181 nregs = GET_MODE_SIZE (mode) / 8;
28182 for (i = 0; i < nregs; i++)
28183 parts[i] = gen_rtx_REG (DImode, regno + i);
28184 }
28185
28186 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
28187 }
28188
28189 #if ARM_UNWIND_INFO
28190 /* Emit unwind directives for a store-multiple instruction or stack pointer
28191 push during alignment.
28192 These should only ever be generated by the function prologue code, so
28193 expect them to have a particular form.
28194 The store-multiple instruction sometimes pushes pc as the last register,
28195 although it should not be tracked into unwind information, or for -Os
28196 sometimes pushes some dummy registers before first register that needs
28197 to be tracked in unwind information; such dummy registers are there just
28198 to avoid separate stack adjustment, and will not be restored in the
28199 epilogue. */
28200
28201 static void
28202 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
28203 {
28204 int i;
28205 HOST_WIDE_INT offset;
28206 HOST_WIDE_INT nregs;
28207 int reg_size;
28208 unsigned reg;
28209 unsigned lastreg;
28210 unsigned padfirst = 0, padlast = 0;
28211 rtx e;
28212
28213 e = XVECEXP (p, 0, 0);
28214 gcc_assert (GET_CODE (e) == SET);
28215
28216 /* First insn will adjust the stack pointer. */
28217 gcc_assert (GET_CODE (e) == SET
28218 && REG_P (SET_DEST (e))
28219 && REGNO (SET_DEST (e)) == SP_REGNUM
28220 && GET_CODE (SET_SRC (e)) == PLUS);
28221
28222 offset = -INTVAL (XEXP (SET_SRC (e), 1));
28223 nregs = XVECLEN (p, 0) - 1;
28224 gcc_assert (nregs);
28225
28226 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
28227 if (reg < 16)
28228 {
28229 /* For -Os dummy registers can be pushed at the beginning to
28230 avoid separate stack pointer adjustment. */
28231 e = XVECEXP (p, 0, 1);
28232 e = XEXP (SET_DEST (e), 0);
28233 if (GET_CODE (e) == PLUS)
28234 padfirst = INTVAL (XEXP (e, 1));
28235 gcc_assert (padfirst == 0 || optimize_size);
28236 /* The function prologue may also push pc, but not annotate it as it is
28237 never restored. We turn this into a stack pointer adjustment. */
28238 e = XVECEXP (p, 0, nregs);
28239 e = XEXP (SET_DEST (e), 0);
28240 if (GET_CODE (e) == PLUS)
28241 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
28242 else
28243 padlast = offset - 4;
28244 gcc_assert (padlast == 0 || padlast == 4);
28245 if (padlast == 4)
28246 fprintf (asm_out_file, "\t.pad #4\n");
28247 reg_size = 4;
28248 fprintf (asm_out_file, "\t.save {");
28249 }
28250 else if (IS_VFP_REGNUM (reg))
28251 {
28252 reg_size = 8;
28253 fprintf (asm_out_file, "\t.vsave {");
28254 }
28255 else
28256 /* Unknown register type. */
28257 gcc_unreachable ();
28258
28259 /* If the stack increment doesn't match the size of the saved registers,
28260 something has gone horribly wrong. */
28261 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
28262
28263 offset = padfirst;
28264 lastreg = 0;
28265 /* The remaining insns will describe the stores. */
28266 for (i = 1; i <= nregs; i++)
28267 {
28268 /* Expect (set (mem <addr>) (reg)).
28269 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
28270 e = XVECEXP (p, 0, i);
28271 gcc_assert (GET_CODE (e) == SET
28272 && MEM_P (SET_DEST (e))
28273 && REG_P (SET_SRC (e)));
28274
28275 reg = REGNO (SET_SRC (e));
28276 gcc_assert (reg >= lastreg);
28277
28278 if (i != 1)
28279 fprintf (asm_out_file, ", ");
28280 /* We can't use %r for vfp because we need to use the
28281 double precision register names. */
28282 if (IS_VFP_REGNUM (reg))
28283 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
28284 else
28285 asm_fprintf (asm_out_file, "%r", reg);
28286
28287 if (flag_checking)
28288 {
28289 /* Check that the addresses are consecutive. */
28290 e = XEXP (SET_DEST (e), 0);
28291 if (GET_CODE (e) == PLUS)
28292 gcc_assert (REG_P (XEXP (e, 0))
28293 && REGNO (XEXP (e, 0)) == SP_REGNUM
28294 && CONST_INT_P (XEXP (e, 1))
28295 && offset == INTVAL (XEXP (e, 1)));
28296 else
28297 gcc_assert (i == 1
28298 && REG_P (e)
28299 && REGNO (e) == SP_REGNUM);
28300 offset += reg_size;
28301 }
28302 }
28303 fprintf (asm_out_file, "}\n");
28304 if (padfirst)
28305 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
28306 }
28307
28308 /* Emit unwind directives for a SET. */
28309
28310 static void
28311 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
28312 {
28313 rtx e0;
28314 rtx e1;
28315 unsigned reg;
28316
28317 e0 = XEXP (p, 0);
28318 e1 = XEXP (p, 1);
28319 switch (GET_CODE (e0))
28320 {
28321 case MEM:
28322 /* Pushing a single register. */
28323 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
28324 || !REG_P (XEXP (XEXP (e0, 0), 0))
28325 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
28326 abort ();
28327
28328 asm_fprintf (asm_out_file, "\t.save ");
28329 if (IS_VFP_REGNUM (REGNO (e1)))
28330 asm_fprintf(asm_out_file, "{d%d}\n",
28331 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
28332 else
28333 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
28334 break;
28335
28336 case REG:
28337 if (REGNO (e0) == SP_REGNUM)
28338 {
28339 /* A stack increment. */
28340 if (GET_CODE (e1) != PLUS
28341 || !REG_P (XEXP (e1, 0))
28342 || REGNO (XEXP (e1, 0)) != SP_REGNUM
28343 || !CONST_INT_P (XEXP (e1, 1)))
28344 abort ();
28345
28346 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
28347 -INTVAL (XEXP (e1, 1)));
28348 }
28349 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
28350 {
28351 HOST_WIDE_INT offset;
28352
28353 if (GET_CODE (e1) == PLUS)
28354 {
28355 if (!REG_P (XEXP (e1, 0))
28356 || !CONST_INT_P (XEXP (e1, 1)))
28357 abort ();
28358 reg = REGNO (XEXP (e1, 0));
28359 offset = INTVAL (XEXP (e1, 1));
28360 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
28361 HARD_FRAME_POINTER_REGNUM, reg,
28362 offset);
28363 }
28364 else if (REG_P (e1))
28365 {
28366 reg = REGNO (e1);
28367 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
28368 HARD_FRAME_POINTER_REGNUM, reg);
28369 }
28370 else
28371 abort ();
28372 }
28373 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
28374 {
28375 /* Move from sp to reg. */
28376 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
28377 }
28378 else if (GET_CODE (e1) == PLUS
28379 && REG_P (XEXP (e1, 0))
28380 && REGNO (XEXP (e1, 0)) == SP_REGNUM
28381 && CONST_INT_P (XEXP (e1, 1)))
28382 {
28383 /* Set reg to offset from sp. */
28384 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
28385 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
28386 }
28387 else
28388 abort ();
28389 break;
28390
28391 default:
28392 abort ();
28393 }
28394 }
28395
28396
28397 /* Emit unwind directives for the given insn. */
28398
28399 static void
28400 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
28401 {
28402 rtx note, pat;
28403 bool handled_one = false;
28404
28405 if (arm_except_unwind_info (&global_options) != UI_TARGET)
28406 return;
28407
28408 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28409 && (TREE_NOTHROW (current_function_decl)
28410 || crtl->all_throwers_are_sibcalls))
28411 return;
28412
28413 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
28414 return;
28415
28416 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
28417 {
28418 switch (REG_NOTE_KIND (note))
28419 {
28420 case REG_FRAME_RELATED_EXPR:
28421 pat = XEXP (note, 0);
28422 goto found;
28423
28424 case REG_CFA_REGISTER:
28425 pat = XEXP (note, 0);
28426 if (pat == NULL)
28427 {
28428 pat = PATTERN (insn);
28429 if (GET_CODE (pat) == PARALLEL)
28430 pat = XVECEXP (pat, 0, 0);
28431 }
28432
28433 /* Only emitted for IS_STACKALIGN re-alignment. */
28434 {
28435 rtx dest, src;
28436 unsigned reg;
28437
28438 src = SET_SRC (pat);
28439 dest = SET_DEST (pat);
28440
28441 gcc_assert (src == stack_pointer_rtx);
28442 reg = REGNO (dest);
28443 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
28444 reg + 0x90, reg);
28445 }
28446 handled_one = true;
28447 break;
28448
28449 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
28450 to get correct dwarf information for shrink-wrap. We should not
28451 emit unwind information for it because these are used either for
28452 pretend arguments or notes to adjust sp and restore registers from
28453 stack. */
28454 case REG_CFA_DEF_CFA:
28455 case REG_CFA_ADJUST_CFA:
28456 case REG_CFA_RESTORE:
28457 return;
28458
28459 case REG_CFA_EXPRESSION:
28460 case REG_CFA_OFFSET:
28461 /* ??? Only handling here what we actually emit. */
28462 gcc_unreachable ();
28463
28464 default:
28465 break;
28466 }
28467 }
28468 if (handled_one)
28469 return;
28470 pat = PATTERN (insn);
28471 found:
28472
28473 switch (GET_CODE (pat))
28474 {
28475 case SET:
28476 arm_unwind_emit_set (asm_out_file, pat);
28477 break;
28478
28479 case SEQUENCE:
28480 /* Store multiple. */
28481 arm_unwind_emit_sequence (asm_out_file, pat);
28482 break;
28483
28484 default:
28485 abort();
28486 }
28487 }
28488
28489
28490 /* Output a reference from a function exception table to the type_info
28491 object X. The EABI specifies that the symbol should be relocated by
28492 an R_ARM_TARGET2 relocation. */
28493
28494 static bool
28495 arm_output_ttype (rtx x)
28496 {
28497 fputs ("\t.word\t", asm_out_file);
28498 output_addr_const (asm_out_file, x);
28499 /* Use special relocations for symbol references. */
28500 if (!CONST_INT_P (x))
28501 fputs ("(TARGET2)", asm_out_file);
28502 fputc ('\n', asm_out_file);
28503
28504 return TRUE;
28505 }
28506
28507 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
28508
28509 static void
28510 arm_asm_emit_except_personality (rtx personality)
28511 {
28512 fputs ("\t.personality\t", asm_out_file);
28513 output_addr_const (asm_out_file, personality);
28514 fputc ('\n', asm_out_file);
28515 }
28516 #endif /* ARM_UNWIND_INFO */
28517
28518 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
28519
28520 static void
28521 arm_asm_init_sections (void)
28522 {
28523 #if ARM_UNWIND_INFO
28524 exception_section = get_unnamed_section (0, output_section_asm_op,
28525 "\t.handlerdata");
28526 #endif /* ARM_UNWIND_INFO */
28527
28528 #ifdef OBJECT_FORMAT_ELF
28529 if (target_pure_code)
28530 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
28531 #endif
28532 }
28533
28534 /* Output unwind directives for the start/end of a function. */
28535
28536 void
28537 arm_output_fn_unwind (FILE * f, bool prologue)
28538 {
28539 if (arm_except_unwind_info (&global_options) != UI_TARGET)
28540 return;
28541
28542 if (prologue)
28543 fputs ("\t.fnstart\n", f);
28544 else
28545 {
28546 /* If this function will never be unwound, then mark it as such.
28547 The came condition is used in arm_unwind_emit to suppress
28548 the frame annotations. */
28549 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28550 && (TREE_NOTHROW (current_function_decl)
28551 || crtl->all_throwers_are_sibcalls))
28552 fputs("\t.cantunwind\n", f);
28553
28554 fputs ("\t.fnend\n", f);
28555 }
28556 }
28557
28558 static bool
28559 arm_emit_tls_decoration (FILE *fp, rtx x)
28560 {
28561 enum tls_reloc reloc;
28562 rtx val;
28563
28564 val = XVECEXP (x, 0, 0);
28565 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
28566
28567 output_addr_const (fp, val);
28568
28569 switch (reloc)
28570 {
28571 case TLS_GD32:
28572 fputs ("(tlsgd)", fp);
28573 break;
28574 case TLS_GD32_FDPIC:
28575 fputs ("(tlsgd_fdpic)", fp);
28576 break;
28577 case TLS_LDM32:
28578 fputs ("(tlsldm)", fp);
28579 break;
28580 case TLS_LDM32_FDPIC:
28581 fputs ("(tlsldm_fdpic)", fp);
28582 break;
28583 case TLS_LDO32:
28584 fputs ("(tlsldo)", fp);
28585 break;
28586 case TLS_IE32:
28587 fputs ("(gottpoff)", fp);
28588 break;
28589 case TLS_IE32_FDPIC:
28590 fputs ("(gottpoff_fdpic)", fp);
28591 break;
28592 case TLS_LE32:
28593 fputs ("(tpoff)", fp);
28594 break;
28595 case TLS_DESCSEQ:
28596 fputs ("(tlsdesc)", fp);
28597 break;
28598 default:
28599 gcc_unreachable ();
28600 }
28601
28602 switch (reloc)
28603 {
28604 case TLS_GD32:
28605 case TLS_LDM32:
28606 case TLS_IE32:
28607 case TLS_DESCSEQ:
28608 fputs (" + (. - ", fp);
28609 output_addr_const (fp, XVECEXP (x, 0, 2));
28610 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
28611 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
28612 output_addr_const (fp, XVECEXP (x, 0, 3));
28613 fputc (')', fp);
28614 break;
28615 default:
28616 break;
28617 }
28618
28619 return TRUE;
28620 }
28621
28622 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
28623
28624 static void
28625 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
28626 {
28627 gcc_assert (size == 4);
28628 fputs ("\t.word\t", file);
28629 output_addr_const (file, x);
28630 fputs ("(tlsldo)", file);
28631 }
28632
28633 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
28634
28635 static bool
28636 arm_output_addr_const_extra (FILE *fp, rtx x)
28637 {
28638 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
28639 return arm_emit_tls_decoration (fp, x);
28640 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
28641 {
28642 char label[256];
28643 int labelno = INTVAL (XVECEXP (x, 0, 0));
28644
28645 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
28646 assemble_name_raw (fp, label);
28647
28648 return TRUE;
28649 }
28650 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
28651 {
28652 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
28653 if (GOT_PCREL)
28654 fputs ("+.", fp);
28655 fputs ("-(", fp);
28656 output_addr_const (fp, XVECEXP (x, 0, 0));
28657 fputc (')', fp);
28658 return TRUE;
28659 }
28660 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
28661 {
28662 output_addr_const (fp, XVECEXP (x, 0, 0));
28663 if (GOT_PCREL)
28664 fputs ("+.", fp);
28665 fputs ("-(", fp);
28666 output_addr_const (fp, XVECEXP (x, 0, 1));
28667 fputc (')', fp);
28668 return TRUE;
28669 }
28670 else if (GET_CODE (x) == CONST_VECTOR)
28671 return arm_emit_vector_const (fp, x);
28672
28673 return FALSE;
28674 }
28675
28676 /* Output assembly for a shift instruction.
28677 SET_FLAGS determines how the instruction modifies the condition codes.
28678 0 - Do not set condition codes.
28679 1 - Set condition codes.
28680 2 - Use smallest instruction. */
28681 const char *
28682 arm_output_shift(rtx * operands, int set_flags)
28683 {
28684 char pattern[100];
28685 static const char flag_chars[3] = {'?', '.', '!'};
28686 const char *shift;
28687 HOST_WIDE_INT val;
28688 char c;
28689
28690 c = flag_chars[set_flags];
28691 shift = shift_op(operands[3], &val);
28692 if (shift)
28693 {
28694 if (val != -1)
28695 operands[2] = GEN_INT(val);
28696 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
28697 }
28698 else
28699 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
28700
28701 output_asm_insn (pattern, operands);
28702 return "";
28703 }
28704
28705 /* Output assembly for a WMMX immediate shift instruction. */
28706 const char *
28707 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
28708 {
28709 int shift = INTVAL (operands[2]);
28710 char templ[50];
28711 machine_mode opmode = GET_MODE (operands[0]);
28712
28713 gcc_assert (shift >= 0);
28714
28715 /* If the shift value in the register versions is > 63 (for D qualifier),
28716 31 (for W qualifier) or 15 (for H qualifier). */
28717 if (((opmode == V4HImode) && (shift > 15))
28718 || ((opmode == V2SImode) && (shift > 31))
28719 || ((opmode == DImode) && (shift > 63)))
28720 {
28721 if (wror_or_wsra)
28722 {
28723 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28724 output_asm_insn (templ, operands);
28725 if (opmode == DImode)
28726 {
28727 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
28728 output_asm_insn (templ, operands);
28729 }
28730 }
28731 else
28732 {
28733 /* The destination register will contain all zeros. */
28734 sprintf (templ, "wzero\t%%0");
28735 output_asm_insn (templ, operands);
28736 }
28737 return "";
28738 }
28739
28740 if ((opmode == DImode) && (shift > 32))
28741 {
28742 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28743 output_asm_insn (templ, operands);
28744 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
28745 output_asm_insn (templ, operands);
28746 }
28747 else
28748 {
28749 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
28750 output_asm_insn (templ, operands);
28751 }
28752 return "";
28753 }
28754
28755 /* Output assembly for a WMMX tinsr instruction. */
28756 const char *
28757 arm_output_iwmmxt_tinsr (rtx *operands)
28758 {
28759 int mask = INTVAL (operands[3]);
28760 int i;
28761 char templ[50];
28762 int units = mode_nunits[GET_MODE (operands[0])];
28763 gcc_assert ((mask & (mask - 1)) == 0);
28764 for (i = 0; i < units; ++i)
28765 {
28766 if ((mask & 0x01) == 1)
28767 {
28768 break;
28769 }
28770 mask >>= 1;
28771 }
28772 gcc_assert (i < units);
28773 {
28774 switch (GET_MODE (operands[0]))
28775 {
28776 case E_V8QImode:
28777 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
28778 break;
28779 case E_V4HImode:
28780 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
28781 break;
28782 case E_V2SImode:
28783 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
28784 break;
28785 default:
28786 gcc_unreachable ();
28787 break;
28788 }
28789 output_asm_insn (templ, operands);
28790 }
28791 return "";
28792 }
28793
28794 /* Output a Thumb-1 casesi dispatch sequence. */
28795 const char *
28796 thumb1_output_casesi (rtx *operands)
28797 {
28798 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
28799
28800 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28801
28802 switch (GET_MODE(diff_vec))
28803 {
28804 case E_QImode:
28805 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28806 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
28807 case E_HImode:
28808 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28809 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
28810 case E_SImode:
28811 return "bl\t%___gnu_thumb1_case_si";
28812 default:
28813 gcc_unreachable ();
28814 }
28815 }
28816
28817 /* Output a Thumb-2 casesi instruction. */
28818 const char *
28819 thumb2_output_casesi (rtx *operands)
28820 {
28821 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
28822
28823 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28824
28825 output_asm_insn ("cmp\t%0, %1", operands);
28826 output_asm_insn ("bhi\t%l3", operands);
28827 switch (GET_MODE(diff_vec))
28828 {
28829 case E_QImode:
28830 return "tbb\t[%|pc, %0]";
28831 case E_HImode:
28832 return "tbh\t[%|pc, %0, lsl #1]";
28833 case E_SImode:
28834 if (flag_pic)
28835 {
28836 output_asm_insn ("adr\t%4, %l2", operands);
28837 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
28838 output_asm_insn ("add\t%4, %4, %5", operands);
28839 return "bx\t%4";
28840 }
28841 else
28842 {
28843 output_asm_insn ("adr\t%4, %l2", operands);
28844 return "ldr\t%|pc, [%4, %0, lsl #2]";
28845 }
28846 default:
28847 gcc_unreachable ();
28848 }
28849 }
28850
28851 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
28852 per-core tuning structs. */
28853 static int
28854 arm_issue_rate (void)
28855 {
28856 return current_tune->issue_rate;
28857 }
28858
28859 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
28860 static int
28861 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
28862 {
28863 if (DEBUG_INSN_P (insn))
28864 return more;
28865
28866 rtx_code code = GET_CODE (PATTERN (insn));
28867 if (code == USE || code == CLOBBER)
28868 return more;
28869
28870 if (get_attr_type (insn) == TYPE_NO_INSN)
28871 return more;
28872
28873 return more - 1;
28874 }
28875
28876 /* Return how many instructions should scheduler lookahead to choose the
28877 best one. */
28878 static int
28879 arm_first_cycle_multipass_dfa_lookahead (void)
28880 {
28881 int issue_rate = arm_issue_rate ();
28882
28883 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
28884 }
28885
28886 /* Enable modeling of L2 auto-prefetcher. */
28887 static int
28888 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28889 {
28890 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28891 }
28892
28893 const char *
28894 arm_mangle_type (const_tree type)
28895 {
28896 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28897 has to be managled as if it is in the "std" namespace. */
28898 if (TARGET_AAPCS_BASED
28899 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28900 return "St9__va_list";
28901
28902 /* Half-precision float. */
28903 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28904 return "Dh";
28905
28906 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28907 builtin type. */
28908 if (TYPE_NAME (type) != NULL)
28909 return arm_mangle_builtin_type (type);
28910
28911 /* Use the default mangling. */
28912 return NULL;
28913 }
28914
28915 /* Order of allocation of core registers for Thumb: this allocation is
28916 written over the corresponding initial entries of the array
28917 initialized with REG_ALLOC_ORDER. We allocate all low registers
28918 first. Saving and restoring a low register is usually cheaper than
28919 using a call-clobbered high register. */
28920
28921 static const int thumb_core_reg_alloc_order[] =
28922 {
28923 3, 2, 1, 0, 4, 5, 6, 7,
28924 12, 14, 8, 9, 10, 11
28925 };
28926
28927 /* Adjust register allocation order when compiling for Thumb. */
28928
28929 void
28930 arm_order_regs_for_local_alloc (void)
28931 {
28932 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28933 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28934 if (TARGET_THUMB)
28935 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28936 sizeof (thumb_core_reg_alloc_order));
28937 }
28938
28939 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28940
28941 bool
28942 arm_frame_pointer_required (void)
28943 {
28944 if (SUBTARGET_FRAME_POINTER_REQUIRED)
28945 return true;
28946
28947 /* If the function receives nonlocal gotos, it needs to save the frame
28948 pointer in the nonlocal_goto_save_area object. */
28949 if (cfun->has_nonlocal_label)
28950 return true;
28951
28952 /* The frame pointer is required for non-leaf APCS frames. */
28953 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28954 return true;
28955
28956 /* If we are probing the stack in the prologue, we will have a faulting
28957 instruction prior to the stack adjustment and this requires a frame
28958 pointer if we want to catch the exception using the EABI unwinder. */
28959 if (!IS_INTERRUPT (arm_current_func_type ())
28960 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28961 || flag_stack_clash_protection)
28962 && arm_except_unwind_info (&global_options) == UI_TARGET
28963 && cfun->can_throw_non_call_exceptions)
28964 {
28965 HOST_WIDE_INT size = get_frame_size ();
28966
28967 /* That's irrelevant if there is no stack adjustment. */
28968 if (size <= 0)
28969 return false;
28970
28971 /* That's relevant only if there is a stack probe. */
28972 if (crtl->is_leaf && !cfun->calls_alloca)
28973 {
28974 /* We don't have the final size of the frame so adjust. */
28975 size += 32 * UNITS_PER_WORD;
28976 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
28977 return true;
28978 }
28979 else
28980 return true;
28981 }
28982
28983 return false;
28984 }
28985
28986 /* Only thumb1 can't support conditional execution, so return true if
28987 the target is not thumb1. */
28988 static bool
28989 arm_have_conditional_execution (void)
28990 {
28991 return !TARGET_THUMB1;
28992 }
28993
28994 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
28995 static HOST_WIDE_INT
28996 arm_vector_alignment (const_tree type)
28997 {
28998 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28999
29000 if (TARGET_AAPCS_BASED)
29001 align = MIN (align, 64);
29002
29003 return align;
29004 }
29005
29006 static void
29007 arm_autovectorize_vector_sizes (vector_sizes *sizes, bool)
29008 {
29009 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29010 {
29011 sizes->safe_push (16);
29012 sizes->safe_push (8);
29013 }
29014 }
29015
29016 static bool
29017 arm_vector_alignment_reachable (const_tree type, bool is_packed)
29018 {
29019 /* Vectors which aren't in packed structures will not be less aligned than
29020 the natural alignment of their element type, so this is safe. */
29021 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29022 return !is_packed;
29023
29024 return default_builtin_vector_alignment_reachable (type, is_packed);
29025 }
29026
29027 static bool
29028 arm_builtin_support_vector_misalignment (machine_mode mode,
29029 const_tree type, int misalignment,
29030 bool is_packed)
29031 {
29032 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29033 {
29034 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
29035
29036 if (is_packed)
29037 return align == 1;
29038
29039 /* If the misalignment is unknown, we should be able to handle the access
29040 so long as it is not to a member of a packed data structure. */
29041 if (misalignment == -1)
29042 return true;
29043
29044 /* Return true if the misalignment is a multiple of the natural alignment
29045 of the vector's element type. This is probably always going to be
29046 true in practice, since we've already established that this isn't a
29047 packed access. */
29048 return ((misalignment % align) == 0);
29049 }
29050
29051 return default_builtin_support_vector_misalignment (mode, type, misalignment,
29052 is_packed);
29053 }
29054
29055 static void
29056 arm_conditional_register_usage (void)
29057 {
29058 int regno;
29059
29060 if (TARGET_THUMB1 && optimize_size)
29061 {
29062 /* When optimizing for size on Thumb-1, it's better not
29063 to use the HI regs, because of the overhead of
29064 stacking them. */
29065 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
29066 fixed_regs[regno] = call_used_regs[regno] = 1;
29067 }
29068
29069 /* The link register can be clobbered by any branch insn,
29070 but we have no way to track that at present, so mark
29071 it as unavailable. */
29072 if (TARGET_THUMB1)
29073 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
29074
29075 if (TARGET_32BIT && TARGET_HARD_FLOAT)
29076 {
29077 /* VFPv3 registers are disabled when earlier VFP
29078 versions are selected due to the definition of
29079 LAST_VFP_REGNUM. */
29080 for (regno = FIRST_VFP_REGNUM;
29081 regno <= LAST_VFP_REGNUM; ++ regno)
29082 {
29083 fixed_regs[regno] = 0;
29084 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
29085 || regno >= FIRST_VFP_REGNUM + 32;
29086 }
29087 }
29088
29089 if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
29090 {
29091 regno = FIRST_IWMMXT_GR_REGNUM;
29092 /* The 2002/10/09 revision of the XScale ABI has wCG0
29093 and wCG1 as call-preserved registers. The 2002/11/21
29094 revision changed this so that all wCG registers are
29095 scratch registers. */
29096 for (regno = FIRST_IWMMXT_GR_REGNUM;
29097 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
29098 fixed_regs[regno] = 0;
29099 /* The XScale ABI has wR0 - wR9 as scratch registers,
29100 the rest as call-preserved registers. */
29101 for (regno = FIRST_IWMMXT_REGNUM;
29102 regno <= LAST_IWMMXT_REGNUM; ++ regno)
29103 {
29104 fixed_regs[regno] = 0;
29105 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
29106 }
29107 }
29108
29109 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
29110 {
29111 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29112 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29113 }
29114 else if (TARGET_APCS_STACK)
29115 {
29116 fixed_regs[10] = 1;
29117 call_used_regs[10] = 1;
29118 }
29119 /* -mcaller-super-interworking reserves r11 for calls to
29120 _interwork_r11_call_via_rN(). Making the register global
29121 is an easy way of ensuring that it remains valid for all
29122 calls. */
29123 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
29124 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
29125 {
29126 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29127 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29128 if (TARGET_CALLER_INTERWORKING)
29129 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29130 }
29131 SUBTARGET_CONDITIONAL_REGISTER_USAGE
29132 }
29133
29134 static reg_class_t
29135 arm_preferred_rename_class (reg_class_t rclass)
29136 {
29137 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
29138 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
29139 and code size can be reduced. */
29140 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
29141 return LO_REGS;
29142 else
29143 return NO_REGS;
29144 }
29145
29146 /* Compute the attribute "length" of insn "*push_multi".
29147 So this function MUST be kept in sync with that insn pattern. */
29148 int
29149 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
29150 {
29151 int i, regno, hi_reg;
29152 int num_saves = XVECLEN (parallel_op, 0);
29153
29154 /* ARM mode. */
29155 if (TARGET_ARM)
29156 return 4;
29157 /* Thumb1 mode. */
29158 if (TARGET_THUMB1)
29159 return 2;
29160
29161 /* Thumb2 mode. */
29162 regno = REGNO (first_op);
29163 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
29164 list is 8-bit. Normally this means all registers in the list must be
29165 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
29166 encodings. There is one exception for PUSH that LR in HI_REGS can be used
29167 with 16-bit encoding. */
29168 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29169 for (i = 1; i < num_saves && !hi_reg; i++)
29170 {
29171 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
29172 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29173 }
29174
29175 if (!hi_reg)
29176 return 2;
29177 return 4;
29178 }
29179
29180 /* Compute the attribute "length" of insn. Currently, this function is used
29181 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
29182 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
29183 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
29184 true if OPERANDS contains insn which explicit updates base register. */
29185
29186 int
29187 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
29188 {
29189 /* ARM mode. */
29190 if (TARGET_ARM)
29191 return 4;
29192 /* Thumb1 mode. */
29193 if (TARGET_THUMB1)
29194 return 2;
29195
29196 rtx parallel_op = operands[0];
29197 /* Initialize to elements number of PARALLEL. */
29198 unsigned indx = XVECLEN (parallel_op, 0) - 1;
29199 /* Initialize the value to base register. */
29200 unsigned regno = REGNO (operands[1]);
29201 /* Skip return and write back pattern.
29202 We only need register pop pattern for later analysis. */
29203 unsigned first_indx = 0;
29204 first_indx += return_pc ? 1 : 0;
29205 first_indx += write_back_p ? 1 : 0;
29206
29207 /* A pop operation can be done through LDM or POP. If the base register is SP
29208 and if it's with write back, then a LDM will be alias of POP. */
29209 bool pop_p = (regno == SP_REGNUM && write_back_p);
29210 bool ldm_p = !pop_p;
29211
29212 /* Check base register for LDM. */
29213 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
29214 return 4;
29215
29216 /* Check each register in the list. */
29217 for (; indx >= first_indx; indx--)
29218 {
29219 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
29220 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
29221 comment in arm_attr_length_push_multi. */
29222 if (REGNO_REG_CLASS (regno) == HI_REGS
29223 && (regno != PC_REGNUM || ldm_p))
29224 return 4;
29225 }
29226
29227 return 2;
29228 }
29229
29230 /* Compute the number of instructions emitted by output_move_double. */
29231 int
29232 arm_count_output_move_double_insns (rtx *operands)
29233 {
29234 int count;
29235 rtx ops[2];
29236 /* output_move_double may modify the operands array, so call it
29237 here on a copy of the array. */
29238 ops[0] = operands[0];
29239 ops[1] = operands[1];
29240 output_move_double (ops, false, &count);
29241 return count;
29242 }
29243
29244 /* Same as above, but operands are a register/memory pair in SImode.
29245 Assumes operands has the base register in position 0 and memory in position
29246 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
29247 int
29248 arm_count_ldrdstrd_insns (rtx *operands, bool load)
29249 {
29250 int count;
29251 rtx ops[2];
29252 int regnum, memnum;
29253 if (load)
29254 regnum = 0, memnum = 1;
29255 else
29256 regnum = 1, memnum = 0;
29257 ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
29258 ops[memnum] = adjust_address (operands[2], DImode, 0);
29259 output_move_double (ops, false, &count);
29260 return count;
29261 }
29262
29263
29264 int
29265 vfp3_const_double_for_fract_bits (rtx operand)
29266 {
29267 REAL_VALUE_TYPE r0;
29268
29269 if (!CONST_DOUBLE_P (operand))
29270 return 0;
29271
29272 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
29273 if (exact_real_inverse (DFmode, &r0)
29274 && !REAL_VALUE_NEGATIVE (r0))
29275 {
29276 if (exact_real_truncate (DFmode, &r0))
29277 {
29278 HOST_WIDE_INT value = real_to_integer (&r0);
29279 value = value & 0xffffffff;
29280 if ((value != 0) && ( (value & (value - 1)) == 0))
29281 {
29282 int ret = exact_log2 (value);
29283 gcc_assert (IN_RANGE (ret, 0, 31));
29284 return ret;
29285 }
29286 }
29287 }
29288 return 0;
29289 }
29290
29291 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
29292 log2 is in [1, 32], return that log2. Otherwise return -1.
29293 This is used in the patterns for vcvt.s32.f32 floating-point to
29294 fixed-point conversions. */
29295
29296 int
29297 vfp3_const_double_for_bits (rtx x)
29298 {
29299 const REAL_VALUE_TYPE *r;
29300
29301 if (!CONST_DOUBLE_P (x))
29302 return -1;
29303
29304 r = CONST_DOUBLE_REAL_VALUE (x);
29305
29306 if (REAL_VALUE_NEGATIVE (*r)
29307 || REAL_VALUE_ISNAN (*r)
29308 || REAL_VALUE_ISINF (*r)
29309 || !real_isinteger (r, SFmode))
29310 return -1;
29311
29312 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
29313
29314 /* The exact_log2 above will have returned -1 if this is
29315 not an exact log2. */
29316 if (!IN_RANGE (hwint, 1, 32))
29317 return -1;
29318
29319 return hwint;
29320 }
29321
29322 \f
29323 /* Emit a memory barrier around an atomic sequence according to MODEL. */
29324
29325 static void
29326 arm_pre_atomic_barrier (enum memmodel model)
29327 {
29328 if (need_atomic_barrier_p (model, true))
29329 emit_insn (gen_memory_barrier ());
29330 }
29331
29332 static void
29333 arm_post_atomic_barrier (enum memmodel model)
29334 {
29335 if (need_atomic_barrier_p (model, false))
29336 emit_insn (gen_memory_barrier ());
29337 }
29338
29339 /* Emit the load-exclusive and store-exclusive instructions.
29340 Use acquire and release versions if necessary. */
29341
29342 static void
29343 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
29344 {
29345 rtx (*gen) (rtx, rtx);
29346
29347 if (acq)
29348 {
29349 switch (mode)
29350 {
29351 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
29352 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
29353 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
29354 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
29355 default:
29356 gcc_unreachable ();
29357 }
29358 }
29359 else
29360 {
29361 switch (mode)
29362 {
29363 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
29364 case E_HImode: gen = gen_arm_load_exclusivehi; break;
29365 case E_SImode: gen = gen_arm_load_exclusivesi; break;
29366 case E_DImode: gen = gen_arm_load_exclusivedi; break;
29367 default:
29368 gcc_unreachable ();
29369 }
29370 }
29371
29372 emit_insn (gen (rval, mem));
29373 }
29374
29375 static void
29376 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
29377 rtx mem, bool rel)
29378 {
29379 rtx (*gen) (rtx, rtx, rtx);
29380
29381 if (rel)
29382 {
29383 switch (mode)
29384 {
29385 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
29386 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
29387 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
29388 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
29389 default:
29390 gcc_unreachable ();
29391 }
29392 }
29393 else
29394 {
29395 switch (mode)
29396 {
29397 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
29398 case E_HImode: gen = gen_arm_store_exclusivehi; break;
29399 case E_SImode: gen = gen_arm_store_exclusivesi; break;
29400 case E_DImode: gen = gen_arm_store_exclusivedi; break;
29401 default:
29402 gcc_unreachable ();
29403 }
29404 }
29405
29406 emit_insn (gen (bval, rval, mem));
29407 }
29408
29409 /* Mark the previous jump instruction as unlikely. */
29410
29411 static void
29412 emit_unlikely_jump (rtx insn)
29413 {
29414 rtx_insn *jump = emit_jump_insn (insn);
29415 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
29416 }
29417
29418 /* Expand a compare and swap pattern. */
29419
29420 void
29421 arm_expand_compare_and_swap (rtx operands[])
29422 {
29423 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
29424 machine_mode mode, cmp_mode;
29425
29426 bval = operands[0];
29427 rval = operands[1];
29428 mem = operands[2];
29429 oldval = operands[3];
29430 newval = operands[4];
29431 is_weak = operands[5];
29432 mod_s = operands[6];
29433 mod_f = operands[7];
29434 mode = GET_MODE (mem);
29435
29436 /* Normally the succ memory model must be stronger than fail, but in the
29437 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
29438 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
29439
29440 if (TARGET_HAVE_LDACQ
29441 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
29442 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
29443 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
29444
29445 switch (mode)
29446 {
29447 case E_QImode:
29448 case E_HImode:
29449 /* For narrow modes, we're going to perform the comparison in SImode,
29450 so do the zero-extension now. */
29451 rval = gen_reg_rtx (SImode);
29452 oldval = convert_modes (SImode, mode, oldval, true);
29453 /* FALLTHRU */
29454
29455 case E_SImode:
29456 /* Force the value into a register if needed. We waited until after
29457 the zero-extension above to do this properly. */
29458 if (!arm_add_operand (oldval, SImode))
29459 oldval = force_reg (SImode, oldval);
29460 break;
29461
29462 case E_DImode:
29463 if (!cmpdi_operand (oldval, mode))
29464 oldval = force_reg (mode, oldval);
29465 break;
29466
29467 default:
29468 gcc_unreachable ();
29469 }
29470
29471 if (TARGET_THUMB1)
29472 cmp_mode = E_SImode;
29473 else
29474 cmp_mode = CC_Zmode;
29475
29476 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
29477 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
29478 oldval, newval, is_weak, mod_s, mod_f));
29479
29480 if (mode == QImode || mode == HImode)
29481 emit_move_insn (operands[1], gen_lowpart (mode, rval));
29482
29483 /* In all cases, we arrange for success to be signaled by Z set.
29484 This arrangement allows for the boolean result to be used directly
29485 in a subsequent branch, post optimization. For Thumb-1 targets, the
29486 boolean negation of the result is also stored in bval because Thumb-1
29487 backend lacks dependency tracking for CC flag due to flag-setting not
29488 being represented at RTL level. */
29489 if (TARGET_THUMB1)
29490 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
29491 else
29492 {
29493 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
29494 emit_insn (gen_rtx_SET (bval, x));
29495 }
29496 }
29497
29498 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
29499 another memory store between the load-exclusive and store-exclusive can
29500 reset the monitor from Exclusive to Open state. This means we must wait
29501 until after reload to split the pattern, lest we get a register spill in
29502 the middle of the atomic sequence. Success of the compare and swap is
29503 indicated by the Z flag set for 32bit targets and by neg_bval being zero
29504 for Thumb-1 targets (ie. negation of the boolean value returned by
29505 atomic_compare_and_swapmode standard pattern in operand 0). */
29506
29507 void
29508 arm_split_compare_and_swap (rtx operands[])
29509 {
29510 rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
29511 machine_mode mode;
29512 enum memmodel mod_s, mod_f;
29513 bool is_weak;
29514 rtx_code_label *label1, *label2;
29515 rtx x, cond;
29516
29517 rval = operands[1];
29518 mem = operands[2];
29519 oldval = operands[3];
29520 newval = operands[4];
29521 is_weak = (operands[5] != const0_rtx);
29522 mod_s_rtx = operands[6];
29523 mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
29524 mod_f = memmodel_from_int (INTVAL (operands[7]));
29525 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
29526 mode = GET_MODE (mem);
29527
29528 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
29529
29530 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
29531 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
29532
29533 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
29534 a full barrier is emitted after the store-release. */
29535 if (is_armv8_sync)
29536 use_acquire = false;
29537
29538 /* Checks whether a barrier is needed and emits one accordingly. */
29539 if (!(use_acquire || use_release))
29540 arm_pre_atomic_barrier (mod_s);
29541
29542 label1 = NULL;
29543 if (!is_weak)
29544 {
29545 label1 = gen_label_rtx ();
29546 emit_label (label1);
29547 }
29548 label2 = gen_label_rtx ();
29549
29550 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
29551
29552 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
29553 as required to communicate with arm_expand_compare_and_swap. */
29554 if (TARGET_32BIT)
29555 {
29556 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
29557 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29558 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
29559 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
29560 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
29561 }
29562 else
29563 {
29564 emit_move_insn (neg_bval, const1_rtx);
29565 cond = gen_rtx_NE (VOIDmode, rval, oldval);
29566 if (thumb1_cmpneg_operand (oldval, SImode))
29567 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
29568 label2, cond));
29569 else
29570 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
29571 }
29572
29573 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
29574
29575 /* Weak or strong, we want EQ to be true for success, so that we
29576 match the flags that we got from the compare above. */
29577 if (TARGET_32BIT)
29578 {
29579 cond = gen_rtx_REG (CCmode, CC_REGNUM);
29580 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
29581 emit_insn (gen_rtx_SET (cond, x));
29582 }
29583
29584 if (!is_weak)
29585 {
29586 /* Z is set to boolean value of !neg_bval, as required to communicate
29587 with arm_expand_compare_and_swap. */
29588 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
29589 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
29590 }
29591
29592 if (!is_mm_relaxed (mod_f))
29593 emit_label (label2);
29594
29595 /* Checks whether a barrier is needed and emits one accordingly. */
29596 if (is_armv8_sync
29597 || !(use_acquire || use_release))
29598 arm_post_atomic_barrier (mod_s);
29599
29600 if (is_mm_relaxed (mod_f))
29601 emit_label (label2);
29602 }
29603
29604 /* Split an atomic operation pattern. Operation is given by CODE and is one
29605 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
29606 operation). Operation is performed on the content at MEM and on VALUE
29607 following the memory model MODEL_RTX. The content at MEM before and after
29608 the operation is returned in OLD_OUT and NEW_OUT respectively while the
29609 success of the operation is returned in COND. Using a scratch register or
29610 an operand register for these determines what result is returned for that
29611 pattern. */
29612
29613 void
29614 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
29615 rtx value, rtx model_rtx, rtx cond)
29616 {
29617 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
29618 machine_mode mode = GET_MODE (mem);
29619 machine_mode wmode = (mode == DImode ? DImode : SImode);
29620 rtx_code_label *label;
29621 bool all_low_regs, bind_old_new;
29622 rtx x;
29623
29624 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
29625
29626 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
29627 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
29628
29629 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
29630 a full barrier is emitted after the store-release. */
29631 if (is_armv8_sync)
29632 use_acquire = false;
29633
29634 /* Checks whether a barrier is needed and emits one accordingly. */
29635 if (!(use_acquire || use_release))
29636 arm_pre_atomic_barrier (model);
29637
29638 label = gen_label_rtx ();
29639 emit_label (label);
29640
29641 if (new_out)
29642 new_out = gen_lowpart (wmode, new_out);
29643 if (old_out)
29644 old_out = gen_lowpart (wmode, old_out);
29645 else
29646 old_out = new_out;
29647 value = simplify_gen_subreg (wmode, value, mode, 0);
29648
29649 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
29650
29651 /* Does the operation require destination and first operand to use the same
29652 register? This is decided by register constraints of relevant insn
29653 patterns in thumb1.md. */
29654 gcc_assert (!new_out || REG_P (new_out));
29655 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
29656 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
29657 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
29658 bind_old_new =
29659 (TARGET_THUMB1
29660 && code != SET
29661 && code != MINUS
29662 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
29663
29664 /* We want to return the old value while putting the result of the operation
29665 in the same register as the old value so copy the old value over to the
29666 destination register and use that register for the operation. */
29667 if (old_out && bind_old_new)
29668 {
29669 emit_move_insn (new_out, old_out);
29670 old_out = new_out;
29671 }
29672
29673 switch (code)
29674 {
29675 case SET:
29676 new_out = value;
29677 break;
29678
29679 case NOT:
29680 x = gen_rtx_AND (wmode, old_out, value);
29681 emit_insn (gen_rtx_SET (new_out, x));
29682 x = gen_rtx_NOT (wmode, new_out);
29683 emit_insn (gen_rtx_SET (new_out, x));
29684 break;
29685
29686 case MINUS:
29687 if (CONST_INT_P (value))
29688 {
29689 value = GEN_INT (-INTVAL (value));
29690 code = PLUS;
29691 }
29692 /* FALLTHRU */
29693
29694 case PLUS:
29695 if (mode == DImode)
29696 {
29697 /* DImode plus/minus need to clobber flags. */
29698 /* The adddi3 and subdi3 patterns are incorrectly written so that
29699 they require matching operands, even when we could easily support
29700 three operands. Thankfully, this can be fixed up post-splitting,
29701 as the individual add+adc patterns do accept three operands and
29702 post-reload cprop can make these moves go away. */
29703 emit_move_insn (new_out, old_out);
29704 if (code == PLUS)
29705 x = gen_adddi3 (new_out, new_out, value);
29706 else
29707 x = gen_subdi3 (new_out, new_out, value);
29708 emit_insn (x);
29709 break;
29710 }
29711 /* FALLTHRU */
29712
29713 default:
29714 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
29715 emit_insn (gen_rtx_SET (new_out, x));
29716 break;
29717 }
29718
29719 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
29720 use_release);
29721
29722 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29723 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
29724
29725 /* Checks whether a barrier is needed and emits one accordingly. */
29726 if (is_armv8_sync
29727 || !(use_acquire || use_release))
29728 arm_post_atomic_barrier (model);
29729 }
29730 \f
29731 #define MAX_VECT_LEN 16
29732
29733 struct expand_vec_perm_d
29734 {
29735 rtx target, op0, op1;
29736 vec_perm_indices perm;
29737 machine_mode vmode;
29738 bool one_vector_p;
29739 bool testing_p;
29740 };
29741
29742 /* Generate a variable permutation. */
29743
29744 static void
29745 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
29746 {
29747 machine_mode vmode = GET_MODE (target);
29748 bool one_vector_p = rtx_equal_p (op0, op1);
29749
29750 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
29751 gcc_checking_assert (GET_MODE (op0) == vmode);
29752 gcc_checking_assert (GET_MODE (op1) == vmode);
29753 gcc_checking_assert (GET_MODE (sel) == vmode);
29754 gcc_checking_assert (TARGET_NEON);
29755
29756 if (one_vector_p)
29757 {
29758 if (vmode == V8QImode)
29759 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
29760 else
29761 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
29762 }
29763 else
29764 {
29765 rtx pair;
29766
29767 if (vmode == V8QImode)
29768 {
29769 pair = gen_reg_rtx (V16QImode);
29770 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
29771 pair = gen_lowpart (TImode, pair);
29772 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
29773 }
29774 else
29775 {
29776 pair = gen_reg_rtx (OImode);
29777 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
29778 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
29779 }
29780 }
29781 }
29782
29783 void
29784 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
29785 {
29786 machine_mode vmode = GET_MODE (target);
29787 unsigned int nelt = GET_MODE_NUNITS (vmode);
29788 bool one_vector_p = rtx_equal_p (op0, op1);
29789 rtx mask;
29790
29791 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29792 numbering of elements for big-endian, we must reverse the order. */
29793 gcc_checking_assert (!BYTES_BIG_ENDIAN);
29794
29795 /* The VTBL instruction does not use a modulo index, so we must take care
29796 of that ourselves. */
29797 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
29798 mask = gen_const_vec_duplicate (vmode, mask);
29799 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
29800
29801 arm_expand_vec_perm_1 (target, op0, op1, sel);
29802 }
29803
29804 /* Map lane ordering between architectural lane order, and GCC lane order,
29805 taking into account ABI. See comment above output_move_neon for details. */
29806
29807 static int
29808 neon_endian_lane_map (machine_mode mode, int lane)
29809 {
29810 if (BYTES_BIG_ENDIAN)
29811 {
29812 int nelems = GET_MODE_NUNITS (mode);
29813 /* Reverse lane order. */
29814 lane = (nelems - 1 - lane);
29815 /* Reverse D register order, to match ABI. */
29816 if (GET_MODE_SIZE (mode) == 16)
29817 lane = lane ^ (nelems / 2);
29818 }
29819 return lane;
29820 }
29821
29822 /* Some permutations index into pairs of vectors, this is a helper function
29823 to map indexes into those pairs of vectors. */
29824
29825 static int
29826 neon_pair_endian_lane_map (machine_mode mode, int lane)
29827 {
29828 int nelem = GET_MODE_NUNITS (mode);
29829 if (BYTES_BIG_ENDIAN)
29830 lane =
29831 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
29832 return lane;
29833 }
29834
29835 /* Generate or test for an insn that supports a constant permutation. */
29836
29837 /* Recognize patterns for the VUZP insns. */
29838
29839 static bool
29840 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
29841 {
29842 unsigned int i, odd, mask, nelt = d->perm.length ();
29843 rtx out0, out1, in0, in1;
29844 int first_elem;
29845 int swap_nelt;
29846
29847 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29848 return false;
29849
29850 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
29851 big endian pattern on 64 bit vectors, so we correct for that. */
29852 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
29853 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
29854
29855 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
29856
29857 if (first_elem == neon_endian_lane_map (d->vmode, 0))
29858 odd = 0;
29859 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
29860 odd = 1;
29861 else
29862 return false;
29863 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29864
29865 for (i = 0; i < nelt; i++)
29866 {
29867 unsigned elt =
29868 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
29869 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
29870 return false;
29871 }
29872
29873 /* Success! */
29874 if (d->testing_p)
29875 return true;
29876
29877 in0 = d->op0;
29878 in1 = d->op1;
29879 if (swap_nelt != 0)
29880 std::swap (in0, in1);
29881
29882 out0 = d->target;
29883 out1 = gen_reg_rtx (d->vmode);
29884 if (odd)
29885 std::swap (out0, out1);
29886
29887 emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
29888 return true;
29889 }
29890
29891 /* Recognize patterns for the VZIP insns. */
29892
29893 static bool
29894 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29895 {
29896 unsigned int i, high, mask, nelt = d->perm.length ();
29897 rtx out0, out1, in0, in1;
29898 int first_elem;
29899 bool is_swapped;
29900
29901 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29902 return false;
29903
29904 is_swapped = BYTES_BIG_ENDIAN;
29905
29906 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29907
29908 high = nelt / 2;
29909 if (first_elem == neon_endian_lane_map (d->vmode, high))
29910 ;
29911 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29912 high = 0;
29913 else
29914 return false;
29915 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29916
29917 for (i = 0; i < nelt / 2; i++)
29918 {
29919 unsigned elt =
29920 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29921 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29922 != elt)
29923 return false;
29924 elt =
29925 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29926 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29927 != elt)
29928 return false;
29929 }
29930
29931 /* Success! */
29932 if (d->testing_p)
29933 return true;
29934
29935 in0 = d->op0;
29936 in1 = d->op1;
29937 if (is_swapped)
29938 std::swap (in0, in1);
29939
29940 out0 = d->target;
29941 out1 = gen_reg_rtx (d->vmode);
29942 if (high)
29943 std::swap (out0, out1);
29944
29945 emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
29946 return true;
29947 }
29948
29949 /* Recognize patterns for the VREV insns. */
29950 static bool
29951 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29952 {
29953 unsigned int i, j, diff, nelt = d->perm.length ();
29954 rtx (*gen) (machine_mode, rtx, rtx);
29955
29956 if (!d->one_vector_p)
29957 return false;
29958
29959 diff = d->perm[0];
29960 switch (diff)
29961 {
29962 case 7:
29963 switch (d->vmode)
29964 {
29965 case E_V16QImode:
29966 case E_V8QImode:
29967 gen = gen_neon_vrev64;
29968 break;
29969 default:
29970 return false;
29971 }
29972 break;
29973 case 3:
29974 switch (d->vmode)
29975 {
29976 case E_V16QImode:
29977 case E_V8QImode:
29978 gen = gen_neon_vrev32;
29979 break;
29980 case E_V8HImode:
29981 case E_V4HImode:
29982 case E_V8HFmode:
29983 case E_V4HFmode:
29984 gen = gen_neon_vrev64;
29985 break;
29986 default:
29987 return false;
29988 }
29989 break;
29990 case 1:
29991 switch (d->vmode)
29992 {
29993 case E_V16QImode:
29994 case E_V8QImode:
29995 gen = gen_neon_vrev16;
29996 break;
29997 case E_V8HImode:
29998 case E_V4HImode:
29999 gen = gen_neon_vrev32;
30000 break;
30001 case E_V4SImode:
30002 case E_V2SImode:
30003 case E_V4SFmode:
30004 case E_V2SFmode:
30005 gen = gen_neon_vrev64;
30006 break;
30007 default:
30008 return false;
30009 }
30010 break;
30011 default:
30012 return false;
30013 }
30014
30015 for (i = 0; i < nelt ; i += diff + 1)
30016 for (j = 0; j <= diff; j += 1)
30017 {
30018 /* This is guaranteed to be true as the value of diff
30019 is 7, 3, 1 and we should have enough elements in the
30020 queue to generate this. Getting a vector mask with a
30021 value of diff other than these values implies that
30022 something is wrong by the time we get here. */
30023 gcc_assert (i + j < nelt);
30024 if (d->perm[i + j] != i + diff - j)
30025 return false;
30026 }
30027
30028 /* Success! */
30029 if (d->testing_p)
30030 return true;
30031
30032 emit_insn (gen (d->vmode, d->target, d->op0));
30033 return true;
30034 }
30035
30036 /* Recognize patterns for the VTRN insns. */
30037
30038 static bool
30039 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
30040 {
30041 unsigned int i, odd, mask, nelt = d->perm.length ();
30042 rtx out0, out1, in0, in1;
30043
30044 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30045 return false;
30046
30047 /* Note that these are little-endian tests. Adjust for big-endian later. */
30048 if (d->perm[0] == 0)
30049 odd = 0;
30050 else if (d->perm[0] == 1)
30051 odd = 1;
30052 else
30053 return false;
30054 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30055
30056 for (i = 0; i < nelt; i += 2)
30057 {
30058 if (d->perm[i] != i + odd)
30059 return false;
30060 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
30061 return false;
30062 }
30063
30064 /* Success! */
30065 if (d->testing_p)
30066 return true;
30067
30068 in0 = d->op0;
30069 in1 = d->op1;
30070 if (BYTES_BIG_ENDIAN)
30071 {
30072 std::swap (in0, in1);
30073 odd = !odd;
30074 }
30075
30076 out0 = d->target;
30077 out1 = gen_reg_rtx (d->vmode);
30078 if (odd)
30079 std::swap (out0, out1);
30080
30081 emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
30082 return true;
30083 }
30084
30085 /* Recognize patterns for the VEXT insns. */
30086
30087 static bool
30088 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
30089 {
30090 unsigned int i, nelt = d->perm.length ();
30091 rtx offset;
30092
30093 unsigned int location;
30094
30095 unsigned int next = d->perm[0] + 1;
30096
30097 /* TODO: Handle GCC's numbering of elements for big-endian. */
30098 if (BYTES_BIG_ENDIAN)
30099 return false;
30100
30101 /* Check if the extracted indexes are increasing by one. */
30102 for (i = 1; i < nelt; next++, i++)
30103 {
30104 /* If we hit the most significant element of the 2nd vector in
30105 the previous iteration, no need to test further. */
30106 if (next == 2 * nelt)
30107 return false;
30108
30109 /* If we are operating on only one vector: it could be a
30110 rotation. If there are only two elements of size < 64, let
30111 arm_evpc_neon_vrev catch it. */
30112 if (d->one_vector_p && (next == nelt))
30113 {
30114 if ((nelt == 2) && (d->vmode != V2DImode))
30115 return false;
30116 else
30117 next = 0;
30118 }
30119
30120 if (d->perm[i] != next)
30121 return false;
30122 }
30123
30124 location = d->perm[0];
30125
30126 /* Success! */
30127 if (d->testing_p)
30128 return true;
30129
30130 offset = GEN_INT (location);
30131
30132 if(d->vmode == E_DImode)
30133 return false;
30134
30135 emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
30136 return true;
30137 }
30138
30139 /* The NEON VTBL instruction is a fully variable permuation that's even
30140 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30141 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30142 can do slightly better by expanding this as a constant where we don't
30143 have to apply a mask. */
30144
30145 static bool
30146 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
30147 {
30148 rtx rperm[MAX_VECT_LEN], sel;
30149 machine_mode vmode = d->vmode;
30150 unsigned int i, nelt = d->perm.length ();
30151
30152 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30153 numbering of elements for big-endian, we must reverse the order. */
30154 if (BYTES_BIG_ENDIAN)
30155 return false;
30156
30157 if (d->testing_p)
30158 return true;
30159
30160 /* Generic code will try constant permutation twice. Once with the
30161 original mode and again with the elements lowered to QImode.
30162 So wait and don't do the selector expansion ourselves. */
30163 if (vmode != V8QImode && vmode != V16QImode)
30164 return false;
30165
30166 for (i = 0; i < nelt; ++i)
30167 rperm[i] = GEN_INT (d->perm[i]);
30168 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
30169 sel = force_reg (vmode, sel);
30170
30171 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
30172 return true;
30173 }
30174
30175 static bool
30176 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
30177 {
30178 /* Check if the input mask matches vext before reordering the
30179 operands. */
30180 if (TARGET_NEON)
30181 if (arm_evpc_neon_vext (d))
30182 return true;
30183
30184 /* The pattern matching functions above are written to look for a small
30185 number to begin the sequence (0, 1, N/2). If we begin with an index
30186 from the second operand, we can swap the operands. */
30187 unsigned int nelt = d->perm.length ();
30188 if (d->perm[0] >= nelt)
30189 {
30190 d->perm.rotate_inputs (1);
30191 std::swap (d->op0, d->op1);
30192 }
30193
30194 if (TARGET_NEON)
30195 {
30196 if (arm_evpc_neon_vuzp (d))
30197 return true;
30198 if (arm_evpc_neon_vzip (d))
30199 return true;
30200 if (arm_evpc_neon_vrev (d))
30201 return true;
30202 if (arm_evpc_neon_vtrn (d))
30203 return true;
30204 return arm_evpc_neon_vtbl (d);
30205 }
30206 return false;
30207 }
30208
30209 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
30210
30211 static bool
30212 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
30213 const vec_perm_indices &sel)
30214 {
30215 struct expand_vec_perm_d d;
30216 int i, nelt, which;
30217
30218 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
30219 return false;
30220
30221 d.target = target;
30222 d.op0 = op0;
30223 d.op1 = op1;
30224
30225 d.vmode = vmode;
30226 gcc_assert (VECTOR_MODE_P (d.vmode));
30227 d.testing_p = !target;
30228
30229 nelt = GET_MODE_NUNITS (d.vmode);
30230 for (i = which = 0; i < nelt; ++i)
30231 {
30232 int ei = sel[i] & (2 * nelt - 1);
30233 which |= (ei < nelt ? 1 : 2);
30234 }
30235
30236 switch (which)
30237 {
30238 default:
30239 gcc_unreachable();
30240
30241 case 3:
30242 d.one_vector_p = false;
30243 if (d.testing_p || !rtx_equal_p (op0, op1))
30244 break;
30245
30246 /* The elements of PERM do not suggest that only the first operand
30247 is used, but both operands are identical. Allow easier matching
30248 of the permutation by folding the permutation into the single
30249 input vector. */
30250 /* FALLTHRU */
30251 case 2:
30252 d.op0 = op1;
30253 d.one_vector_p = true;
30254 break;
30255
30256 case 1:
30257 d.op1 = op0;
30258 d.one_vector_p = true;
30259 break;
30260 }
30261
30262 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
30263
30264 if (!d.testing_p)
30265 return arm_expand_vec_perm_const_1 (&d);
30266
30267 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
30268 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
30269 if (!d.one_vector_p)
30270 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
30271
30272 start_sequence ();
30273 bool ret = arm_expand_vec_perm_const_1 (&d);
30274 end_sequence ();
30275
30276 return ret;
30277 }
30278
30279 bool
30280 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
30281 {
30282 /* If we are soft float and we do not have ldrd
30283 then all auto increment forms are ok. */
30284 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
30285 return true;
30286
30287 switch (code)
30288 {
30289 /* Post increment and Pre Decrement are supported for all
30290 instruction forms except for vector forms. */
30291 case ARM_POST_INC:
30292 case ARM_PRE_DEC:
30293 if (VECTOR_MODE_P (mode))
30294 {
30295 if (code != ARM_PRE_DEC)
30296 return true;
30297 else
30298 return false;
30299 }
30300
30301 return true;
30302
30303 case ARM_POST_DEC:
30304 case ARM_PRE_INC:
30305 /* Without LDRD and mode size greater than
30306 word size, there is no point in auto-incrementing
30307 because ldm and stm will not have these forms. */
30308 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
30309 return false;
30310
30311 /* Vector and floating point modes do not support
30312 these auto increment forms. */
30313 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
30314 return false;
30315
30316 return true;
30317
30318 default:
30319 return false;
30320
30321 }
30322
30323 return false;
30324 }
30325
30326 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
30327 on ARM, since we know that shifts by negative amounts are no-ops.
30328 Additionally, the default expansion code is not available or suitable
30329 for post-reload insn splits (this can occur when the register allocator
30330 chooses not to do a shift in NEON).
30331
30332 This function is used in both initial expand and post-reload splits, and
30333 handles all kinds of 64-bit shifts.
30334
30335 Input requirements:
30336 - It is safe for the input and output to be the same register, but
30337 early-clobber rules apply for the shift amount and scratch registers.
30338 - Shift by register requires both scratch registers. In all other cases
30339 the scratch registers may be NULL.
30340 - Ashiftrt by a register also clobbers the CC register. */
30341 void
30342 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
30343 rtx amount, rtx scratch1, rtx scratch2)
30344 {
30345 rtx out_high = gen_highpart (SImode, out);
30346 rtx out_low = gen_lowpart (SImode, out);
30347 rtx in_high = gen_highpart (SImode, in);
30348 rtx in_low = gen_lowpart (SImode, in);
30349
30350 /* Terminology:
30351 in = the register pair containing the input value.
30352 out = the destination register pair.
30353 up = the high- or low-part of each pair.
30354 down = the opposite part to "up".
30355 In a shift, we can consider bits to shift from "up"-stream to
30356 "down"-stream, so in a left-shift "up" is the low-part and "down"
30357 is the high-part of each register pair. */
30358
30359 rtx out_up = code == ASHIFT ? out_low : out_high;
30360 rtx out_down = code == ASHIFT ? out_high : out_low;
30361 rtx in_up = code == ASHIFT ? in_low : in_high;
30362 rtx in_down = code == ASHIFT ? in_high : in_low;
30363
30364 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
30365 gcc_assert (out
30366 && (REG_P (out) || GET_CODE (out) == SUBREG)
30367 && GET_MODE (out) == DImode);
30368 gcc_assert (in
30369 && (REG_P (in) || GET_CODE (in) == SUBREG)
30370 && GET_MODE (in) == DImode);
30371 gcc_assert (amount
30372 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
30373 && GET_MODE (amount) == SImode)
30374 || CONST_INT_P (amount)));
30375 gcc_assert (scratch1 == NULL
30376 || (GET_CODE (scratch1) == SCRATCH)
30377 || (GET_MODE (scratch1) == SImode
30378 && REG_P (scratch1)));
30379 gcc_assert (scratch2 == NULL
30380 || (GET_CODE (scratch2) == SCRATCH)
30381 || (GET_MODE (scratch2) == SImode
30382 && REG_P (scratch2)));
30383 gcc_assert (!REG_P (out) || !REG_P (amount)
30384 || !HARD_REGISTER_P (out)
30385 || (REGNO (out) != REGNO (amount)
30386 && REGNO (out) + 1 != REGNO (amount)));
30387
30388 /* Macros to make following code more readable. */
30389 #define SUB_32(DEST,SRC) \
30390 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
30391 #define RSB_32(DEST,SRC) \
30392 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
30393 #define SUB_S_32(DEST,SRC) \
30394 gen_addsi3_compare0 ((DEST), (SRC), \
30395 GEN_INT (-32))
30396 #define SET(DEST,SRC) \
30397 gen_rtx_SET ((DEST), (SRC))
30398 #define SHIFT(CODE,SRC,AMOUNT) \
30399 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
30400 #define LSHIFT(CODE,SRC,AMOUNT) \
30401 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
30402 SImode, (SRC), (AMOUNT))
30403 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
30404 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
30405 SImode, (SRC), (AMOUNT))
30406 #define ORR(A,B) \
30407 gen_rtx_IOR (SImode, (A), (B))
30408 #define BRANCH(COND,LABEL) \
30409 gen_arm_cond_branch ((LABEL), \
30410 gen_rtx_ ## COND (CCmode, cc_reg, \
30411 const0_rtx), \
30412 cc_reg)
30413
30414 /* Shifts by register and shifts by constant are handled separately. */
30415 if (CONST_INT_P (amount))
30416 {
30417 /* We have a shift-by-constant. */
30418
30419 /* First, handle out-of-range shift amounts.
30420 In both cases we try to match the result an ARM instruction in a
30421 shift-by-register would give. This helps reduce execution
30422 differences between optimization levels, but it won't stop other
30423 parts of the compiler doing different things. This is "undefined
30424 behavior, in any case. */
30425 if (INTVAL (amount) <= 0)
30426 emit_insn (gen_movdi (out, in));
30427 else if (INTVAL (amount) >= 64)
30428 {
30429 if (code == ASHIFTRT)
30430 {
30431 rtx const31_rtx = GEN_INT (31);
30432 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
30433 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
30434 }
30435 else
30436 emit_insn (gen_movdi (out, const0_rtx));
30437 }
30438
30439 /* Now handle valid shifts. */
30440 else if (INTVAL (amount) < 32)
30441 {
30442 /* Shifts by a constant less than 32. */
30443 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
30444
30445 /* Clearing the out register in DImode first avoids lots
30446 of spilling and results in less stack usage.
30447 Later this redundant insn is completely removed.
30448 Do that only if "in" and "out" are different registers. */
30449 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
30450 emit_insn (SET (out, const0_rtx));
30451 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30452 emit_insn (SET (out_down,
30453 ORR (REV_LSHIFT (code, in_up, reverse_amount),
30454 out_down)));
30455 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30456 }
30457 else
30458 {
30459 /* Shifts by a constant greater than 31. */
30460 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
30461
30462 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
30463 emit_insn (SET (out, const0_rtx));
30464 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
30465 if (code == ASHIFTRT)
30466 emit_insn (gen_ashrsi3 (out_up, in_up,
30467 GEN_INT (31)));
30468 else
30469 emit_insn (SET (out_up, const0_rtx));
30470 }
30471 }
30472 else
30473 {
30474 /* We have a shift-by-register. */
30475 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
30476
30477 /* This alternative requires the scratch registers. */
30478 gcc_assert (scratch1 && REG_P (scratch1));
30479 gcc_assert (scratch2 && REG_P (scratch2));
30480
30481 /* We will need the values "amount-32" and "32-amount" later.
30482 Swapping them around now allows the later code to be more general. */
30483 switch (code)
30484 {
30485 case ASHIFT:
30486 emit_insn (SUB_32 (scratch1, amount));
30487 emit_insn (RSB_32 (scratch2, amount));
30488 break;
30489 case ASHIFTRT:
30490 emit_insn (RSB_32 (scratch1, amount));
30491 /* Also set CC = amount > 32. */
30492 emit_insn (SUB_S_32 (scratch2, amount));
30493 break;
30494 case LSHIFTRT:
30495 emit_insn (RSB_32 (scratch1, amount));
30496 emit_insn (SUB_32 (scratch2, amount));
30497 break;
30498 default:
30499 gcc_unreachable ();
30500 }
30501
30502 /* Emit code like this:
30503
30504 arithmetic-left:
30505 out_down = in_down << amount;
30506 out_down = (in_up << (amount - 32)) | out_down;
30507 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30508 out_up = in_up << amount;
30509
30510 arithmetic-right:
30511 out_down = in_down >> amount;
30512 out_down = (in_up << (32 - amount)) | out_down;
30513 if (amount < 32)
30514 out_down = ((signed)in_up >> (amount - 32)) | out_down;
30515 out_up = in_up << amount;
30516
30517 logical-right:
30518 out_down = in_down >> amount;
30519 out_down = (in_up << (32 - amount)) | out_down;
30520 if (amount < 32)
30521 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
30522 out_up = in_up << amount;
30523
30524 The ARM and Thumb2 variants are the same but implemented slightly
30525 differently. If this were only called during expand we could just
30526 use the Thumb2 case and let combine do the right thing, but this
30527 can also be called from post-reload splitters. */
30528
30529 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30530
30531 if (!TARGET_THUMB2)
30532 {
30533 /* Emit code for ARM mode. */
30534 emit_insn (SET (out_down,
30535 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
30536 if (code == ASHIFTRT)
30537 {
30538 rtx_code_label *done_label = gen_label_rtx ();
30539 emit_jump_insn (BRANCH (LT, done_label));
30540 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
30541 out_down)));
30542 emit_label (done_label);
30543 }
30544 else
30545 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
30546 out_down)));
30547 }
30548 else
30549 {
30550 /* Emit code for Thumb2 mode.
30551 Thumb2 can't do shift and or in one insn. */
30552 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
30553 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
30554
30555 if (code == ASHIFTRT)
30556 {
30557 rtx_code_label *done_label = gen_label_rtx ();
30558 emit_jump_insn (BRANCH (LT, done_label));
30559 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
30560 emit_insn (SET (out_down, ORR (out_down, scratch2)));
30561 emit_label (done_label);
30562 }
30563 else
30564 {
30565 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
30566 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
30567 }
30568 }
30569
30570 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30571 }
30572
30573 #undef SUB_32
30574 #undef RSB_32
30575 #undef SUB_S_32
30576 #undef SET
30577 #undef SHIFT
30578 #undef LSHIFT
30579 #undef REV_LSHIFT
30580 #undef ORR
30581 #undef BRANCH
30582 }
30583
30584 /* Returns true if the pattern is a valid symbolic address, which is either a
30585 symbol_ref or (symbol_ref + addend).
30586
30587 According to the ARM ELF ABI, the initial addend of REL-type relocations
30588 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
30589 literal field of the instruction as a 16-bit signed value in the range
30590 -32768 <= A < 32768. */
30591
30592 bool
30593 arm_valid_symbolic_address_p (rtx addr)
30594 {
30595 rtx xop0, xop1 = NULL_RTX;
30596 rtx tmp = addr;
30597
30598 if (target_word_relocations)
30599 return false;
30600
30601 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
30602 return true;
30603
30604 /* (const (plus: symbol_ref const_int)) */
30605 if (GET_CODE (addr) == CONST)
30606 tmp = XEXP (addr, 0);
30607
30608 if (GET_CODE (tmp) == PLUS)
30609 {
30610 xop0 = XEXP (tmp, 0);
30611 xop1 = XEXP (tmp, 1);
30612
30613 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
30614 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
30615 }
30616
30617 return false;
30618 }
30619
30620 /* Returns true if a valid comparison operation and makes
30621 the operands in a form that is valid. */
30622 bool
30623 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
30624 {
30625 enum rtx_code code = GET_CODE (*comparison);
30626 int code_int;
30627 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
30628 ? GET_MODE (*op2) : GET_MODE (*op1);
30629
30630 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
30631
30632 if (code == UNEQ || code == LTGT)
30633 return false;
30634
30635 code_int = (int)code;
30636 arm_canonicalize_comparison (&code_int, op1, op2, 0);
30637 PUT_CODE (*comparison, (enum rtx_code)code_int);
30638
30639 switch (mode)
30640 {
30641 case E_SImode:
30642 if (!arm_add_operand (*op1, mode))
30643 *op1 = force_reg (mode, *op1);
30644 if (!arm_add_operand (*op2, mode))
30645 *op2 = force_reg (mode, *op2);
30646 return true;
30647
30648 case E_DImode:
30649 /* gen_compare_reg() will sort out any invalid operands. */
30650 return true;
30651
30652 case E_HFmode:
30653 if (!TARGET_VFP_FP16INST)
30654 break;
30655 /* FP16 comparisons are done in SF mode. */
30656 mode = SFmode;
30657 *op1 = convert_to_mode (mode, *op1, 1);
30658 *op2 = convert_to_mode (mode, *op2, 1);
30659 /* Fall through. */
30660 case E_SFmode:
30661 case E_DFmode:
30662 if (!vfp_compare_operand (*op1, mode))
30663 *op1 = force_reg (mode, *op1);
30664 if (!vfp_compare_operand (*op2, mode))
30665 *op2 = force_reg (mode, *op2);
30666 return true;
30667 default:
30668 break;
30669 }
30670
30671 return false;
30672
30673 }
30674
30675 /* Maximum number of instructions to set block of memory. */
30676 static int
30677 arm_block_set_max_insns (void)
30678 {
30679 if (optimize_function_for_size_p (cfun))
30680 return 4;
30681 else
30682 return current_tune->max_insns_inline_memset;
30683 }
30684
30685 /* Return TRUE if it's profitable to set block of memory for
30686 non-vectorized case. VAL is the value to set the memory
30687 with. LENGTH is the number of bytes to set. ALIGN is the
30688 alignment of the destination memory in bytes. UNALIGNED_P
30689 is TRUE if we can only set the memory with instructions
30690 meeting alignment requirements. USE_STRD_P is TRUE if we
30691 can use strd to set the memory. */
30692 static bool
30693 arm_block_set_non_vect_profit_p (rtx val,
30694 unsigned HOST_WIDE_INT length,
30695 unsigned HOST_WIDE_INT align,
30696 bool unaligned_p, bool use_strd_p)
30697 {
30698 int num = 0;
30699 /* For leftovers in bytes of 0-7, we can set the memory block using
30700 strb/strh/str with minimum instruction number. */
30701 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
30702
30703 if (unaligned_p)
30704 {
30705 num = arm_const_inline_cost (SET, val);
30706 num += length / align + length % align;
30707 }
30708 else if (use_strd_p)
30709 {
30710 num = arm_const_double_inline_cost (val);
30711 num += (length >> 3) + leftover[length & 7];
30712 }
30713 else
30714 {
30715 num = arm_const_inline_cost (SET, val);
30716 num += (length >> 2) + leftover[length & 3];
30717 }
30718
30719 /* We may be able to combine last pair STRH/STRB into a single STR
30720 by shifting one byte back. */
30721 if (unaligned_access && length > 3 && (length & 3) == 3)
30722 num--;
30723
30724 return (num <= arm_block_set_max_insns ());
30725 }
30726
30727 /* Return TRUE if it's profitable to set block of memory for
30728 vectorized case. LENGTH is the number of bytes to set.
30729 ALIGN is the alignment of destination memory in bytes.
30730 MODE is the vector mode used to set the memory. */
30731 static bool
30732 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
30733 unsigned HOST_WIDE_INT align,
30734 machine_mode mode)
30735 {
30736 int num;
30737 bool unaligned_p = ((align & 3) != 0);
30738 unsigned int nelt = GET_MODE_NUNITS (mode);
30739
30740 /* Instruction loading constant value. */
30741 num = 1;
30742 /* Instructions storing the memory. */
30743 num += (length + nelt - 1) / nelt;
30744 /* Instructions adjusting the address expression. Only need to
30745 adjust address expression if it's 4 bytes aligned and bytes
30746 leftover can only be stored by mis-aligned store instruction. */
30747 if (!unaligned_p && (length & 3) != 0)
30748 num++;
30749
30750 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
30751 if (!unaligned_p && mode == V16QImode)
30752 num--;
30753
30754 return (num <= arm_block_set_max_insns ());
30755 }
30756
30757 /* Set a block of memory using vectorization instructions for the
30758 unaligned case. We fill the first LENGTH bytes of the memory
30759 area starting from DSTBASE with byte constant VALUE. ALIGN is
30760 the alignment requirement of memory. Return TRUE if succeeded. */
30761 static bool
30762 arm_block_set_unaligned_vect (rtx dstbase,
30763 unsigned HOST_WIDE_INT length,
30764 unsigned HOST_WIDE_INT value,
30765 unsigned HOST_WIDE_INT align)
30766 {
30767 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
30768 rtx dst, mem;
30769 rtx val_vec, reg;
30770 rtx (*gen_func) (rtx, rtx);
30771 machine_mode mode;
30772 unsigned HOST_WIDE_INT v = value;
30773 unsigned int offset = 0;
30774 gcc_assert ((align & 0x3) != 0);
30775 nelt_v8 = GET_MODE_NUNITS (V8QImode);
30776 nelt_v16 = GET_MODE_NUNITS (V16QImode);
30777 if (length >= nelt_v16)
30778 {
30779 mode = V16QImode;
30780 gen_func = gen_movmisalignv16qi;
30781 }
30782 else
30783 {
30784 mode = V8QImode;
30785 gen_func = gen_movmisalignv8qi;
30786 }
30787 nelt_mode = GET_MODE_NUNITS (mode);
30788 gcc_assert (length >= nelt_mode);
30789 /* Skip if it isn't profitable. */
30790 if (!arm_block_set_vect_profit_p (length, align, mode))
30791 return false;
30792
30793 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30794 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30795
30796 v = sext_hwi (v, BITS_PER_WORD);
30797
30798 reg = gen_reg_rtx (mode);
30799 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30800 /* Emit instruction loading the constant value. */
30801 emit_move_insn (reg, val_vec);
30802
30803 /* Handle nelt_mode bytes in a vector. */
30804 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
30805 {
30806 emit_insn ((*gen_func) (mem, reg));
30807 if (i + 2 * nelt_mode <= length)
30808 {
30809 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
30810 offset += nelt_mode;
30811 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30812 }
30813 }
30814
30815 /* If there are not less than nelt_v8 bytes leftover, we must be in
30816 V16QI mode. */
30817 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
30818
30819 /* Handle (8, 16) bytes leftover. */
30820 if (i + nelt_v8 < length)
30821 {
30822 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
30823 offset += length - i;
30824 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30825
30826 /* We are shifting bytes back, set the alignment accordingly. */
30827 if ((length & 1) != 0 && align >= 2)
30828 set_mem_align (mem, BITS_PER_UNIT);
30829
30830 emit_insn (gen_movmisalignv16qi (mem, reg));
30831 }
30832 /* Handle (0, 8] bytes leftover. */
30833 else if (i < length && i + nelt_v8 >= length)
30834 {
30835 if (mode == V16QImode)
30836 reg = gen_lowpart (V8QImode, reg);
30837
30838 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30839 + (nelt_mode - nelt_v8))));
30840 offset += (length - i) + (nelt_mode - nelt_v8);
30841 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30842
30843 /* We are shifting bytes back, set the alignment accordingly. */
30844 if ((length & 1) != 0 && align >= 2)
30845 set_mem_align (mem, BITS_PER_UNIT);
30846
30847 emit_insn (gen_movmisalignv8qi (mem, reg));
30848 }
30849
30850 return true;
30851 }
30852
30853 /* Set a block of memory using vectorization instructions for the
30854 aligned case. We fill the first LENGTH bytes of the memory area
30855 starting from DSTBASE with byte constant VALUE. ALIGN is the
30856 alignment requirement of memory. Return TRUE if succeeded. */
30857 static bool
30858 arm_block_set_aligned_vect (rtx dstbase,
30859 unsigned HOST_WIDE_INT length,
30860 unsigned HOST_WIDE_INT value,
30861 unsigned HOST_WIDE_INT align)
30862 {
30863 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30864 rtx dst, addr, mem;
30865 rtx val_vec, reg;
30866 machine_mode mode;
30867 unsigned int offset = 0;
30868
30869 gcc_assert ((align & 0x3) == 0);
30870 nelt_v8 = GET_MODE_NUNITS (V8QImode);
30871 nelt_v16 = GET_MODE_NUNITS (V16QImode);
30872 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30873 mode = V16QImode;
30874 else
30875 mode = V8QImode;
30876
30877 nelt_mode = GET_MODE_NUNITS (mode);
30878 gcc_assert (length >= nelt_mode);
30879 /* Skip if it isn't profitable. */
30880 if (!arm_block_set_vect_profit_p (length, align, mode))
30881 return false;
30882
30883 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30884
30885 reg = gen_reg_rtx (mode);
30886 val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
30887 /* Emit instruction loading the constant value. */
30888 emit_move_insn (reg, val_vec);
30889
30890 i = 0;
30891 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
30892 if (mode == V16QImode)
30893 {
30894 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30895 emit_insn (gen_movmisalignv16qi (mem, reg));
30896 i += nelt_mode;
30897 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
30898 if (i + nelt_v8 < length && i + nelt_v16 > length)
30899 {
30900 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30901 offset += length - nelt_mode;
30902 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30903 /* We are shifting bytes back, set the alignment accordingly. */
30904 if ((length & 0x3) == 0)
30905 set_mem_align (mem, BITS_PER_UNIT * 4);
30906 else if ((length & 0x1) == 0)
30907 set_mem_align (mem, BITS_PER_UNIT * 2);
30908 else
30909 set_mem_align (mem, BITS_PER_UNIT);
30910
30911 emit_insn (gen_movmisalignv16qi (mem, reg));
30912 return true;
30913 }
30914 /* Fall through for bytes leftover. */
30915 mode = V8QImode;
30916 nelt_mode = GET_MODE_NUNITS (mode);
30917 reg = gen_lowpart (V8QImode, reg);
30918 }
30919
30920 /* Handle 8 bytes in a vector. */
30921 for (; (i + nelt_mode <= length); i += nelt_mode)
30922 {
30923 addr = plus_constant (Pmode, dst, i);
30924 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30925 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
30926 emit_move_insn (mem, reg);
30927 else
30928 emit_insn (gen_unaligned_storev8qi (mem, reg));
30929 }
30930
30931 /* Handle single word leftover by shifting 4 bytes back. We can
30932 use aligned access for this case. */
30933 if (i + UNITS_PER_WORD == length)
30934 {
30935 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30936 offset += i - UNITS_PER_WORD;
30937 mem = adjust_automodify_address (dstbase, mode, addr, offset);
30938 /* We are shifting 4 bytes back, set the alignment accordingly. */
30939 if (align > UNITS_PER_WORD)
30940 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30941
30942 emit_insn (gen_unaligned_storev8qi (mem, reg));
30943 }
30944 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30945 We have to use unaligned access for this case. */
30946 else if (i < length)
30947 {
30948 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30949 offset += length - nelt_mode;
30950 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30951 /* We are shifting bytes back, set the alignment accordingly. */
30952 if ((length & 1) == 0)
30953 set_mem_align (mem, BITS_PER_UNIT * 2);
30954 else
30955 set_mem_align (mem, BITS_PER_UNIT);
30956
30957 emit_insn (gen_movmisalignv8qi (mem, reg));
30958 }
30959
30960 return true;
30961 }
30962
30963 /* Set a block of memory using plain strh/strb instructions, only
30964 using instructions allowed by ALIGN on processor. We fill the
30965 first LENGTH bytes of the memory area starting from DSTBASE
30966 with byte constant VALUE. ALIGN is the alignment requirement
30967 of memory. */
30968 static bool
30969 arm_block_set_unaligned_non_vect (rtx dstbase,
30970 unsigned HOST_WIDE_INT length,
30971 unsigned HOST_WIDE_INT value,
30972 unsigned HOST_WIDE_INT align)
30973 {
30974 unsigned int i;
30975 rtx dst, addr, mem;
30976 rtx val_exp, val_reg, reg;
30977 machine_mode mode;
30978 HOST_WIDE_INT v = value;
30979
30980 gcc_assert (align == 1 || align == 2);
30981
30982 if (align == 2)
30983 v |= (value << BITS_PER_UNIT);
30984
30985 v = sext_hwi (v, BITS_PER_WORD);
30986 val_exp = GEN_INT (v);
30987 /* Skip if it isn't profitable. */
30988 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30989 align, true, false))
30990 return false;
30991
30992 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30993 mode = (align == 2 ? HImode : QImode);
30994 val_reg = force_reg (SImode, val_exp);
30995 reg = gen_lowpart (mode, val_reg);
30996
30997 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30998 {
30999 addr = plus_constant (Pmode, dst, i);
31000 mem = adjust_automodify_address (dstbase, mode, addr, i);
31001 emit_move_insn (mem, reg);
31002 }
31003
31004 /* Handle single byte leftover. */
31005 if (i + 1 == length)
31006 {
31007 reg = gen_lowpart (QImode, val_reg);
31008 addr = plus_constant (Pmode, dst, i);
31009 mem = adjust_automodify_address (dstbase, QImode, addr, i);
31010 emit_move_insn (mem, reg);
31011 i++;
31012 }
31013
31014 gcc_assert (i == length);
31015 return true;
31016 }
31017
31018 /* Set a block of memory using plain strd/str/strh/strb instructions,
31019 to permit unaligned copies on processors which support unaligned
31020 semantics for those instructions. We fill the first LENGTH bytes
31021 of the memory area starting from DSTBASE with byte constant VALUE.
31022 ALIGN is the alignment requirement of memory. */
31023 static bool
31024 arm_block_set_aligned_non_vect (rtx dstbase,
31025 unsigned HOST_WIDE_INT length,
31026 unsigned HOST_WIDE_INT value,
31027 unsigned HOST_WIDE_INT align)
31028 {
31029 unsigned int i;
31030 rtx dst, addr, mem;
31031 rtx val_exp, val_reg, reg;
31032 unsigned HOST_WIDE_INT v;
31033 bool use_strd_p;
31034
31035 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
31036 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
31037
31038 v = (value | (value << 8) | (value << 16) | (value << 24));
31039 if (length < UNITS_PER_WORD)
31040 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
31041
31042 if (use_strd_p)
31043 v |= (v << BITS_PER_WORD);
31044 else
31045 v = sext_hwi (v, BITS_PER_WORD);
31046
31047 val_exp = GEN_INT (v);
31048 /* Skip if it isn't profitable. */
31049 if (!arm_block_set_non_vect_profit_p (val_exp, length,
31050 align, false, use_strd_p))
31051 {
31052 if (!use_strd_p)
31053 return false;
31054
31055 /* Try without strd. */
31056 v = (v >> BITS_PER_WORD);
31057 v = sext_hwi (v, BITS_PER_WORD);
31058 val_exp = GEN_INT (v);
31059 use_strd_p = false;
31060 if (!arm_block_set_non_vect_profit_p (val_exp, length,
31061 align, false, use_strd_p))
31062 return false;
31063 }
31064
31065 i = 0;
31066 dst = copy_addr_to_reg (XEXP (dstbase, 0));
31067 /* Handle double words using strd if possible. */
31068 if (use_strd_p)
31069 {
31070 val_reg = force_reg (DImode, val_exp);
31071 reg = val_reg;
31072 for (; (i + 8 <= length); i += 8)
31073 {
31074 addr = plus_constant (Pmode, dst, i);
31075 mem = adjust_automodify_address (dstbase, DImode, addr, i);
31076 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
31077 emit_move_insn (mem, reg);
31078 else
31079 emit_insn (gen_unaligned_storedi (mem, reg));
31080 }
31081 }
31082 else
31083 val_reg = force_reg (SImode, val_exp);
31084
31085 /* Handle words. */
31086 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
31087 for (; (i + 4 <= length); i += 4)
31088 {
31089 addr = plus_constant (Pmode, dst, i);
31090 mem = adjust_automodify_address (dstbase, SImode, addr, i);
31091 if ((align & 3) == 0)
31092 emit_move_insn (mem, reg);
31093 else
31094 emit_insn (gen_unaligned_storesi (mem, reg));
31095 }
31096
31097 /* Merge last pair of STRH and STRB into a STR if possible. */
31098 if (unaligned_access && i > 0 && (i + 3) == length)
31099 {
31100 addr = plus_constant (Pmode, dst, i - 1);
31101 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
31102 /* We are shifting one byte back, set the alignment accordingly. */
31103 if ((align & 1) == 0)
31104 set_mem_align (mem, BITS_PER_UNIT);
31105
31106 /* Most likely this is an unaligned access, and we can't tell at
31107 compilation time. */
31108 emit_insn (gen_unaligned_storesi (mem, reg));
31109 return true;
31110 }
31111
31112 /* Handle half word leftover. */
31113 if (i + 2 <= length)
31114 {
31115 reg = gen_lowpart (HImode, val_reg);
31116 addr = plus_constant (Pmode, dst, i);
31117 mem = adjust_automodify_address (dstbase, HImode, addr, i);
31118 if ((align & 1) == 0)
31119 emit_move_insn (mem, reg);
31120 else
31121 emit_insn (gen_unaligned_storehi (mem, reg));
31122
31123 i += 2;
31124 }
31125
31126 /* Handle single byte leftover. */
31127 if (i + 1 == length)
31128 {
31129 reg = gen_lowpart (QImode, val_reg);
31130 addr = plus_constant (Pmode, dst, i);
31131 mem = adjust_automodify_address (dstbase, QImode, addr, i);
31132 emit_move_insn (mem, reg);
31133 }
31134
31135 return true;
31136 }
31137
31138 /* Set a block of memory using vectorization instructions for both
31139 aligned and unaligned cases. We fill the first LENGTH bytes of
31140 the memory area starting from DSTBASE with byte constant VALUE.
31141 ALIGN is the alignment requirement of memory. */
31142 static bool
31143 arm_block_set_vect (rtx dstbase,
31144 unsigned HOST_WIDE_INT length,
31145 unsigned HOST_WIDE_INT value,
31146 unsigned HOST_WIDE_INT align)
31147 {
31148 /* Check whether we need to use unaligned store instruction. */
31149 if (((align & 3) != 0 || (length & 3) != 0)
31150 /* Check whether unaligned store instruction is available. */
31151 && (!unaligned_access || BYTES_BIG_ENDIAN))
31152 return false;
31153
31154 if ((align & 3) == 0)
31155 return arm_block_set_aligned_vect (dstbase, length, value, align);
31156 else
31157 return arm_block_set_unaligned_vect (dstbase, length, value, align);
31158 }
31159
31160 /* Expand string store operation. Firstly we try to do that by using
31161 vectorization instructions, then try with ARM unaligned access and
31162 double-word store if profitable. OPERANDS[0] is the destination,
31163 OPERANDS[1] is the number of bytes, operands[2] is the value to
31164 initialize the memory, OPERANDS[3] is the known alignment of the
31165 destination. */
31166 bool
31167 arm_gen_setmem (rtx *operands)
31168 {
31169 rtx dstbase = operands[0];
31170 unsigned HOST_WIDE_INT length;
31171 unsigned HOST_WIDE_INT value;
31172 unsigned HOST_WIDE_INT align;
31173
31174 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
31175 return false;
31176
31177 length = UINTVAL (operands[1]);
31178 if (length > 64)
31179 return false;
31180
31181 value = (UINTVAL (operands[2]) & 0xFF);
31182 align = UINTVAL (operands[3]);
31183 if (TARGET_NEON && length >= 8
31184 && current_tune->string_ops_prefer_neon
31185 && arm_block_set_vect (dstbase, length, value, align))
31186 return true;
31187
31188 if (!unaligned_access && (align & 3) != 0)
31189 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
31190
31191 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
31192 }
31193
31194
31195 static bool
31196 arm_macro_fusion_p (void)
31197 {
31198 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
31199 }
31200
31201 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
31202 for MOVW / MOVT macro fusion. */
31203
31204 static bool
31205 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
31206 {
31207 /* We are trying to fuse
31208 movw imm / movt imm
31209 instructions as a group that gets scheduled together. */
31210
31211 rtx set_dest = SET_DEST (curr_set);
31212
31213 if (GET_MODE (set_dest) != SImode)
31214 return false;
31215
31216 /* We are trying to match:
31217 prev (movw) == (set (reg r0) (const_int imm16))
31218 curr (movt) == (set (zero_extract (reg r0)
31219 (const_int 16)
31220 (const_int 16))
31221 (const_int imm16_1))
31222 or
31223 prev (movw) == (set (reg r1)
31224 (high (symbol_ref ("SYM"))))
31225 curr (movt) == (set (reg r0)
31226 (lo_sum (reg r1)
31227 (symbol_ref ("SYM")))) */
31228
31229 if (GET_CODE (set_dest) == ZERO_EXTRACT)
31230 {
31231 if (CONST_INT_P (SET_SRC (curr_set))
31232 && CONST_INT_P (SET_SRC (prev_set))
31233 && REG_P (XEXP (set_dest, 0))
31234 && REG_P (SET_DEST (prev_set))
31235 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
31236 return true;
31237
31238 }
31239 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
31240 && REG_P (SET_DEST (curr_set))
31241 && REG_P (SET_DEST (prev_set))
31242 && GET_CODE (SET_SRC (prev_set)) == HIGH
31243 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
31244 return true;
31245
31246 return false;
31247 }
31248
31249 static bool
31250 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
31251 {
31252 rtx prev_set = single_set (prev);
31253 rtx curr_set = single_set (curr);
31254
31255 if (!prev_set
31256 || !curr_set)
31257 return false;
31258
31259 if (any_condjump_p (curr))
31260 return false;
31261
31262 if (!arm_macro_fusion_p ())
31263 return false;
31264
31265 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
31266 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
31267 return true;
31268
31269 return false;
31270 }
31271
31272 /* Return true iff the instruction fusion described by OP is enabled. */
31273 bool
31274 arm_fusion_enabled_p (tune_params::fuse_ops op)
31275 {
31276 return current_tune->fusible_ops & op;
31277 }
31278
31279 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
31280 scheduled for speculative execution. Reject the long-running division
31281 and square-root instructions. */
31282
31283 static bool
31284 arm_sched_can_speculate_insn (rtx_insn *insn)
31285 {
31286 switch (get_attr_type (insn))
31287 {
31288 case TYPE_SDIV:
31289 case TYPE_UDIV:
31290 case TYPE_FDIVS:
31291 case TYPE_FDIVD:
31292 case TYPE_FSQRTS:
31293 case TYPE_FSQRTD:
31294 case TYPE_NEON_FP_SQRT_S:
31295 case TYPE_NEON_FP_SQRT_D:
31296 case TYPE_NEON_FP_SQRT_S_Q:
31297 case TYPE_NEON_FP_SQRT_D_Q:
31298 case TYPE_NEON_FP_DIV_S:
31299 case TYPE_NEON_FP_DIV_D:
31300 case TYPE_NEON_FP_DIV_S_Q:
31301 case TYPE_NEON_FP_DIV_D_Q:
31302 return false;
31303 default:
31304 return true;
31305 }
31306 }
31307
31308 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31309
31310 static unsigned HOST_WIDE_INT
31311 arm_asan_shadow_offset (void)
31312 {
31313 return HOST_WIDE_INT_1U << 29;
31314 }
31315
31316
31317 /* This is a temporary fix for PR60655. Ideally we need
31318 to handle most of these cases in the generic part but
31319 currently we reject minus (..) (sym_ref). We try to
31320 ameliorate the case with minus (sym_ref1) (sym_ref2)
31321 where they are in the same section. */
31322
31323 static bool
31324 arm_const_not_ok_for_debug_p (rtx p)
31325 {
31326 tree decl_op0 = NULL;
31327 tree decl_op1 = NULL;
31328
31329 if (GET_CODE (p) == UNSPEC)
31330 return true;
31331 if (GET_CODE (p) == MINUS)
31332 {
31333 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
31334 {
31335 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
31336 if (decl_op1
31337 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
31338 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
31339 {
31340 if ((VAR_P (decl_op1)
31341 || TREE_CODE (decl_op1) == CONST_DECL)
31342 && (VAR_P (decl_op0)
31343 || TREE_CODE (decl_op0) == CONST_DECL))
31344 return (get_variable_section (decl_op1, false)
31345 != get_variable_section (decl_op0, false));
31346
31347 if (TREE_CODE (decl_op1) == LABEL_DECL
31348 && TREE_CODE (decl_op0) == LABEL_DECL)
31349 return (DECL_CONTEXT (decl_op1)
31350 != DECL_CONTEXT (decl_op0));
31351 }
31352
31353 return true;
31354 }
31355 }
31356
31357 return false;
31358 }
31359
31360 /* return TRUE if x is a reference to a value in a constant pool */
31361 extern bool
31362 arm_is_constant_pool_ref (rtx x)
31363 {
31364 return (MEM_P (x)
31365 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
31366 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
31367 }
31368
31369 /* Remember the last target of arm_set_current_function. */
31370 static GTY(()) tree arm_previous_fndecl;
31371
31372 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
31373
31374 void
31375 save_restore_target_globals (tree new_tree)
31376 {
31377 /* If we have a previous state, use it. */
31378 if (TREE_TARGET_GLOBALS (new_tree))
31379 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
31380 else if (new_tree == target_option_default_node)
31381 restore_target_globals (&default_target_globals);
31382 else
31383 {
31384 /* Call target_reinit and save the state for TARGET_GLOBALS. */
31385 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
31386 }
31387
31388 arm_option_params_internal ();
31389 }
31390
31391 /* Invalidate arm_previous_fndecl. */
31392
31393 void
31394 arm_reset_previous_fndecl (void)
31395 {
31396 arm_previous_fndecl = NULL_TREE;
31397 }
31398
31399 /* Establish appropriate back-end context for processing the function
31400 FNDECL. The argument might be NULL to indicate processing at top
31401 level, outside of any function scope. */
31402
31403 static void
31404 arm_set_current_function (tree fndecl)
31405 {
31406 if (!fndecl || fndecl == arm_previous_fndecl)
31407 return;
31408
31409 tree old_tree = (arm_previous_fndecl
31410 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
31411 : NULL_TREE);
31412
31413 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
31414
31415 /* If current function has no attributes but previous one did,
31416 use the default node. */
31417 if (! new_tree && old_tree)
31418 new_tree = target_option_default_node;
31419
31420 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
31421 the default have been handled by save_restore_target_globals from
31422 arm_pragma_target_parse. */
31423 if (old_tree == new_tree)
31424 return;
31425
31426 arm_previous_fndecl = fndecl;
31427
31428 /* First set the target options. */
31429 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
31430
31431 save_restore_target_globals (new_tree);
31432 }
31433
31434 /* Implement TARGET_OPTION_PRINT. */
31435
31436 static void
31437 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
31438 {
31439 int flags = ptr->x_target_flags;
31440 const char *fpu_name;
31441
31442 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
31443 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
31444
31445 fprintf (file, "%*sselected isa %s\n", indent, "",
31446 TARGET_THUMB2_P (flags) ? "thumb2" :
31447 TARGET_THUMB_P (flags) ? "thumb1" :
31448 "arm");
31449
31450 if (ptr->x_arm_arch_string)
31451 fprintf (file, "%*sselected architecture %s\n", indent, "",
31452 ptr->x_arm_arch_string);
31453
31454 if (ptr->x_arm_cpu_string)
31455 fprintf (file, "%*sselected CPU %s\n", indent, "",
31456 ptr->x_arm_cpu_string);
31457
31458 if (ptr->x_arm_tune_string)
31459 fprintf (file, "%*sselected tune %s\n", indent, "",
31460 ptr->x_arm_tune_string);
31461
31462 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
31463 }
31464
31465 /* Hook to determine if one function can safely inline another. */
31466
31467 static bool
31468 arm_can_inline_p (tree caller, tree callee)
31469 {
31470 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
31471 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
31472 bool can_inline = true;
31473
31474 struct cl_target_option *caller_opts
31475 = TREE_TARGET_OPTION (caller_tree ? caller_tree
31476 : target_option_default_node);
31477
31478 struct cl_target_option *callee_opts
31479 = TREE_TARGET_OPTION (callee_tree ? callee_tree
31480 : target_option_default_node);
31481
31482 if (callee_opts == caller_opts)
31483 return true;
31484
31485 /* Callee's ISA features should be a subset of the caller's. */
31486 struct arm_build_target caller_target;
31487 struct arm_build_target callee_target;
31488 caller_target.isa = sbitmap_alloc (isa_num_bits);
31489 callee_target.isa = sbitmap_alloc (isa_num_bits);
31490
31491 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
31492 false);
31493 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
31494 false);
31495 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
31496 can_inline = false;
31497
31498 sbitmap_free (caller_target.isa);
31499 sbitmap_free (callee_target.isa);
31500
31501 /* OK to inline between different modes.
31502 Function with mode specific instructions, e.g using asm,
31503 must be explicitly protected with noinline. */
31504 return can_inline;
31505 }
31506
31507 /* Hook to fix function's alignment affected by target attribute. */
31508
31509 static void
31510 arm_relayout_function (tree fndecl)
31511 {
31512 if (DECL_USER_ALIGN (fndecl))
31513 return;
31514
31515 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
31516
31517 if (!callee_tree)
31518 callee_tree = target_option_default_node;
31519
31520 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
31521 SET_DECL_ALIGN
31522 (fndecl,
31523 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
31524 }
31525
31526 /* Inner function to process the attribute((target(...))), take an argument and
31527 set the current options from the argument. If we have a list, recursively
31528 go over the list. */
31529
31530 static bool
31531 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
31532 {
31533 if (TREE_CODE (args) == TREE_LIST)
31534 {
31535 bool ret = true;
31536
31537 for (; args; args = TREE_CHAIN (args))
31538 if (TREE_VALUE (args)
31539 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
31540 ret = false;
31541 return ret;
31542 }
31543
31544 else if (TREE_CODE (args) != STRING_CST)
31545 {
31546 error ("attribute %<target%> argument not a string");
31547 return false;
31548 }
31549
31550 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
31551 char *q;
31552
31553 while ((q = strtok (argstr, ",")) != NULL)
31554 {
31555 argstr = NULL;
31556 if (!strcmp (q, "thumb"))
31557 {
31558 opts->x_target_flags |= MASK_THUMB;
31559 if (TARGET_FDPIC && !arm_arch_thumb2)
31560 sorry ("FDPIC mode is not supported in Thumb-1 mode");
31561 }
31562
31563 else if (!strcmp (q, "arm"))
31564 opts->x_target_flags &= ~MASK_THUMB;
31565
31566 else if (!strcmp (q, "general-regs-only"))
31567 opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
31568
31569 else if (!strncmp (q, "fpu=", 4))
31570 {
31571 int fpu_index;
31572 if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
31573 &fpu_index, CL_TARGET))
31574 {
31575 error ("invalid fpu for target attribute or pragma %qs", q);
31576 return false;
31577 }
31578 if (fpu_index == TARGET_FPU_auto)
31579 {
31580 /* This doesn't really make sense until we support
31581 general dynamic selection of the architecture and all
31582 sub-features. */
31583 sorry ("auto fpu selection not currently permitted here");
31584 return false;
31585 }
31586 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
31587 }
31588 else if (!strncmp (q, "arch=", 5))
31589 {
31590 char *arch = q + 5;
31591 const arch_option *arm_selected_arch
31592 = arm_parse_arch_option_name (all_architectures, "arch", arch);
31593
31594 if (!arm_selected_arch)
31595 {
31596 error ("invalid architecture for target attribute or pragma %qs",
31597 q);
31598 return false;
31599 }
31600
31601 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
31602 }
31603 else if (q[0] == '+')
31604 {
31605 opts->x_arm_arch_string
31606 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
31607 }
31608 else
31609 {
31610 error ("unknown target attribute or pragma %qs", q);
31611 return false;
31612 }
31613 }
31614
31615 return true;
31616 }
31617
31618 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
31619
31620 tree
31621 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
31622 struct gcc_options *opts_set)
31623 {
31624 struct cl_target_option cl_opts;
31625
31626 if (!arm_valid_target_attribute_rec (args, opts))
31627 return NULL_TREE;
31628
31629 cl_target_option_save (&cl_opts, opts);
31630 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
31631 arm_option_check_internal (opts);
31632 /* Do any overrides, such as global options arch=xxx.
31633 We do this since arm_active_target was overridden. */
31634 arm_option_reconfigure_globals ();
31635 arm_options_perform_arch_sanity_checks ();
31636 arm_option_override_internal (opts, opts_set);
31637
31638 return build_target_option_node (opts);
31639 }
31640
31641 static void
31642 add_attribute (const char * mode, tree *attributes)
31643 {
31644 size_t len = strlen (mode);
31645 tree value = build_string (len, mode);
31646
31647 TREE_TYPE (value) = build_array_type (char_type_node,
31648 build_index_type (size_int (len)));
31649
31650 *attributes = tree_cons (get_identifier ("target"),
31651 build_tree_list (NULL_TREE, value),
31652 *attributes);
31653 }
31654
31655 /* For testing. Insert thumb or arm modes alternatively on functions. */
31656
31657 static void
31658 arm_insert_attributes (tree fndecl, tree * attributes)
31659 {
31660 const char *mode;
31661
31662 if (! TARGET_FLIP_THUMB)
31663 return;
31664
31665 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
31666 || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
31667 return;
31668
31669 /* Nested definitions must inherit mode. */
31670 if (current_function_decl)
31671 {
31672 mode = TARGET_THUMB ? "thumb" : "arm";
31673 add_attribute (mode, attributes);
31674 return;
31675 }
31676
31677 /* If there is already a setting don't change it. */
31678 if (lookup_attribute ("target", *attributes) != NULL)
31679 return;
31680
31681 mode = thumb_flipper ? "thumb" : "arm";
31682 add_attribute (mode, attributes);
31683
31684 thumb_flipper = !thumb_flipper;
31685 }
31686
31687 /* Hook to validate attribute((target("string"))). */
31688
31689 static bool
31690 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
31691 tree args, int ARG_UNUSED (flags))
31692 {
31693 bool ret = true;
31694 struct gcc_options func_options;
31695 tree cur_tree, new_optimize;
31696 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
31697
31698 /* Get the optimization options of the current function. */
31699 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
31700
31701 /* If the function changed the optimization levels as well as setting target
31702 options, start with the optimizations specified. */
31703 if (!func_optimize)
31704 func_optimize = optimization_default_node;
31705
31706 /* Init func_options. */
31707 memset (&func_options, 0, sizeof (func_options));
31708 init_options_struct (&func_options, NULL);
31709 lang_hooks.init_options_struct (&func_options);
31710
31711 /* Initialize func_options to the defaults. */
31712 cl_optimization_restore (&func_options,
31713 TREE_OPTIMIZATION (func_optimize));
31714
31715 cl_target_option_restore (&func_options,
31716 TREE_TARGET_OPTION (target_option_default_node));
31717
31718 /* Set func_options flags with new target mode. */
31719 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
31720 &global_options_set);
31721
31722 if (cur_tree == NULL_TREE)
31723 ret = false;
31724
31725 new_optimize = build_optimization_node (&func_options);
31726
31727 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
31728
31729 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
31730
31731 finalize_options_struct (&func_options);
31732
31733 return ret;
31734 }
31735
31736 /* Match an ISA feature bitmap to a named FPU. We always use the
31737 first entry that exactly matches the feature set, so that we
31738 effectively canonicalize the FPU name for the assembler. */
31739 static const char*
31740 arm_identify_fpu_from_isa (sbitmap isa)
31741 {
31742 auto_sbitmap fpubits (isa_num_bits);
31743 auto_sbitmap cand_fpubits (isa_num_bits);
31744
31745 bitmap_and (fpubits, isa, isa_all_fpubits);
31746
31747 /* If there are no ISA feature bits relating to the FPU, we must be
31748 doing soft-float. */
31749 if (bitmap_empty_p (fpubits))
31750 return "softvfp";
31751
31752 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31753 {
31754 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
31755 if (bitmap_equal_p (fpubits, cand_fpubits))
31756 return all_fpus[i].name;
31757 }
31758 /* We must find an entry, or things have gone wrong. */
31759 gcc_unreachable ();
31760 }
31761
31762 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
31763 by the function fndecl. */
31764 void
31765 arm_declare_function_name (FILE *stream, const char *name, tree decl)
31766 {
31767 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
31768
31769 struct cl_target_option *targ_options;
31770 if (target_parts)
31771 targ_options = TREE_TARGET_OPTION (target_parts);
31772 else
31773 targ_options = TREE_TARGET_OPTION (target_option_current_node);
31774 gcc_assert (targ_options);
31775
31776 /* Only update the assembler .arch string if it is distinct from the last
31777 such string we printed. arch_to_print is set conditionally in case
31778 targ_options->x_arm_arch_string is NULL which can be the case
31779 when cc1 is invoked directly without passing -march option. */
31780 std::string arch_to_print;
31781 if (targ_options->x_arm_arch_string)
31782 arch_to_print = targ_options->x_arm_arch_string;
31783
31784 if (arch_to_print != arm_last_printed_arch_string)
31785 {
31786 std::string arch_name
31787 = arch_to_print.substr (0, arch_to_print.find ("+"));
31788 asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
31789 const arch_option *arch
31790 = arm_parse_arch_option_name (all_architectures, "-march",
31791 targ_options->x_arm_arch_string);
31792 auto_sbitmap opt_bits (isa_num_bits);
31793
31794 gcc_assert (arch);
31795 if (arch->common.extensions)
31796 {
31797 for (const struct cpu_arch_extension *opt = arch->common.extensions;
31798 opt->name != NULL;
31799 opt++)
31800 {
31801 if (!opt->remove)
31802 {
31803 arm_initialize_isa (opt_bits, opt->isa_bits);
31804 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
31805 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
31806 asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
31807 opt->name);
31808 }
31809 }
31810 }
31811
31812 arm_last_printed_arch_string = arch_to_print;
31813 }
31814
31815 fprintf (stream, "\t.syntax unified\n");
31816
31817 if (TARGET_THUMB)
31818 {
31819 if (is_called_in_ARM_mode (decl)
31820 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
31821 && cfun->is_thunk))
31822 fprintf (stream, "\t.code 32\n");
31823 else if (TARGET_THUMB1)
31824 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
31825 else
31826 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
31827 }
31828 else
31829 fprintf (stream, "\t.arm\n");
31830
31831 std::string fpu_to_print
31832 = TARGET_SOFT_FLOAT
31833 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
31834
31835 if (fpu_to_print != arm_last_printed_arch_string)
31836 {
31837 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31838 arm_last_printed_fpu_string = fpu_to_print;
31839 }
31840
31841 if (TARGET_POKE_FUNCTION_NAME)
31842 arm_poke_function_name (stream, (const char *) name);
31843 }
31844
31845 /* If MEM is in the form of [base+offset], extract the two parts
31846 of address and set to BASE and OFFSET, otherwise return false
31847 after clearing BASE and OFFSET. */
31848
31849 static bool
31850 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31851 {
31852 rtx addr;
31853
31854 gcc_assert (MEM_P (mem));
31855
31856 addr = XEXP (mem, 0);
31857
31858 /* Strip off const from addresses like (const (addr)). */
31859 if (GET_CODE (addr) == CONST)
31860 addr = XEXP (addr, 0);
31861
31862 if (GET_CODE (addr) == REG)
31863 {
31864 *base = addr;
31865 *offset = const0_rtx;
31866 return true;
31867 }
31868
31869 if (GET_CODE (addr) == PLUS
31870 && GET_CODE (XEXP (addr, 0)) == REG
31871 && CONST_INT_P (XEXP (addr, 1)))
31872 {
31873 *base = XEXP (addr, 0);
31874 *offset = XEXP (addr, 1);
31875 return true;
31876 }
31877
31878 *base = NULL_RTX;
31879 *offset = NULL_RTX;
31880
31881 return false;
31882 }
31883
31884 /* If INSN is a load or store of address in the form of [base+offset],
31885 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
31886 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
31887 otherwise return FALSE. */
31888
31889 static bool
31890 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31891 {
31892 rtx x, dest, src;
31893
31894 gcc_assert (INSN_P (insn));
31895 x = PATTERN (insn);
31896 if (GET_CODE (x) != SET)
31897 return false;
31898
31899 src = SET_SRC (x);
31900 dest = SET_DEST (x);
31901 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31902 {
31903 *is_load = false;
31904 extract_base_offset_in_addr (dest, base, offset);
31905 }
31906 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31907 {
31908 *is_load = true;
31909 extract_base_offset_in_addr (src, base, offset);
31910 }
31911 else
31912 return false;
31913
31914 return (*base != NULL_RTX && *offset != NULL_RTX);
31915 }
31916
31917 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31918
31919 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31920 and PRI are only calculated for these instructions. For other instruction,
31921 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
31922 instruction fusion can be supported by returning different priorities.
31923
31924 It's important that irrelevant instructions get the largest FUSION_PRI. */
31925
31926 static void
31927 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31928 int *fusion_pri, int *pri)
31929 {
31930 int tmp, off_val;
31931 bool is_load;
31932 rtx base, offset;
31933
31934 gcc_assert (INSN_P (insn));
31935
31936 tmp = max_pri - 1;
31937 if (!fusion_load_store (insn, &base, &offset, &is_load))
31938 {
31939 *pri = tmp;
31940 *fusion_pri = tmp;
31941 return;
31942 }
31943
31944 /* Load goes first. */
31945 if (is_load)
31946 *fusion_pri = tmp - 1;
31947 else
31948 *fusion_pri = tmp - 2;
31949
31950 tmp /= 2;
31951
31952 /* INSN with smaller base register goes first. */
31953 tmp -= ((REGNO (base) & 0xff) << 20);
31954
31955 /* INSN with smaller offset goes first. */
31956 off_val = (int)(INTVAL (offset));
31957 if (off_val >= 0)
31958 tmp -= (off_val & 0xfffff);
31959 else
31960 tmp += ((- off_val) & 0xfffff);
31961
31962 *pri = tmp;
31963 return;
31964 }
31965
31966
31967 /* Construct and return a PARALLEL RTX vector with elements numbering the
31968 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31969 the vector - from the perspective of the architecture. This does not
31970 line up with GCC's perspective on lane numbers, so we end up with
31971 different masks depending on our target endian-ness. The diagram
31972 below may help. We must draw the distinction when building masks
31973 which select one half of the vector. An instruction selecting
31974 architectural low-lanes for a big-endian target, must be described using
31975 a mask selecting GCC high-lanes.
31976
31977 Big-Endian Little-Endian
31978
31979 GCC 0 1 2 3 3 2 1 0
31980 | x | x | x | x | | x | x | x | x |
31981 Architecture 3 2 1 0 3 2 1 0
31982
31983 Low Mask: { 2, 3 } { 0, 1 }
31984 High Mask: { 0, 1 } { 2, 3 }
31985 */
31986
31987 rtx
31988 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
31989 {
31990 int nunits = GET_MODE_NUNITS (mode);
31991 rtvec v = rtvec_alloc (nunits / 2);
31992 int high_base = nunits / 2;
31993 int low_base = 0;
31994 int base;
31995 rtx t1;
31996 int i;
31997
31998 if (BYTES_BIG_ENDIAN)
31999 base = high ? low_base : high_base;
32000 else
32001 base = high ? high_base : low_base;
32002
32003 for (i = 0; i < nunits / 2; i++)
32004 RTVEC_ELT (v, i) = GEN_INT (base + i);
32005
32006 t1 = gen_rtx_PARALLEL (mode, v);
32007 return t1;
32008 }
32009
32010 /* Check OP for validity as a PARALLEL RTX vector with elements
32011 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
32012 from the perspective of the architecture. See the diagram above
32013 arm_simd_vect_par_cnst_half_p for more details. */
32014
32015 bool
32016 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
32017 bool high)
32018 {
32019 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
32020 HOST_WIDE_INT count_op = XVECLEN (op, 0);
32021 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
32022 int i = 0;
32023
32024 if (!VECTOR_MODE_P (mode))
32025 return false;
32026
32027 if (count_op != count_ideal)
32028 return false;
32029
32030 for (i = 0; i < count_ideal; i++)
32031 {
32032 rtx elt_op = XVECEXP (op, 0, i);
32033 rtx elt_ideal = XVECEXP (ideal, 0, i);
32034
32035 if (!CONST_INT_P (elt_op)
32036 || INTVAL (elt_ideal) != INTVAL (elt_op))
32037 return false;
32038 }
32039 return true;
32040 }
32041
32042 /* Can output mi_thunk for all cases except for non-zero vcall_offset
32043 in Thumb1. */
32044 static bool
32045 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
32046 const_tree)
32047 {
32048 /* For now, we punt and not handle this for TARGET_THUMB1. */
32049 if (vcall_offset && TARGET_THUMB1)
32050 return false;
32051
32052 /* Otherwise ok. */
32053 return true;
32054 }
32055
32056 /* Generate RTL for a conditional branch with rtx comparison CODE in
32057 mode CC_MODE. The destination of the unlikely conditional branch
32058 is LABEL_REF. */
32059
32060 void
32061 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
32062 rtx label_ref)
32063 {
32064 rtx x;
32065 x = gen_rtx_fmt_ee (code, VOIDmode,
32066 gen_rtx_REG (cc_mode, CC_REGNUM),
32067 const0_rtx);
32068
32069 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
32070 gen_rtx_LABEL_REF (VOIDmode, label_ref),
32071 pc_rtx);
32072 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
32073 }
32074
32075 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
32076
32077 For pure-code sections there is no letter code for this attribute, so
32078 output all the section flags numerically when this is needed. */
32079
32080 static bool
32081 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
32082 {
32083
32084 if (flags & SECTION_ARM_PURECODE)
32085 {
32086 *num = 0x20000000;
32087
32088 if (!(flags & SECTION_DEBUG))
32089 *num |= 0x2;
32090 if (flags & SECTION_EXCLUDE)
32091 *num |= 0x80000000;
32092 if (flags & SECTION_WRITE)
32093 *num |= 0x1;
32094 if (flags & SECTION_CODE)
32095 *num |= 0x4;
32096 if (flags & SECTION_MERGE)
32097 *num |= 0x10;
32098 if (flags & SECTION_STRINGS)
32099 *num |= 0x20;
32100 if (flags & SECTION_TLS)
32101 *num |= 0x400;
32102 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
32103 *num |= 0x200;
32104
32105 return true;
32106 }
32107
32108 return false;
32109 }
32110
32111 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
32112
32113 If pure-code is passed as an option, make sure all functions are in
32114 sections that have the SHF_ARM_PURECODE attribute. */
32115
32116 static section *
32117 arm_function_section (tree decl, enum node_frequency freq,
32118 bool startup, bool exit)
32119 {
32120 const char * section_name;
32121 section * sec;
32122
32123 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
32124 return default_function_section (decl, freq, startup, exit);
32125
32126 if (!target_pure_code)
32127 return default_function_section (decl, freq, startup, exit);
32128
32129
32130 section_name = DECL_SECTION_NAME (decl);
32131
32132 /* If a function is not in a named section then it falls under the 'default'
32133 text section, also known as '.text'. We can preserve previous behavior as
32134 the default text section already has the SHF_ARM_PURECODE section
32135 attribute. */
32136 if (!section_name)
32137 {
32138 section *default_sec = default_function_section (decl, freq, startup,
32139 exit);
32140
32141 /* If default_sec is not null, then it must be a special section like for
32142 example .text.startup. We set the pure-code attribute and return the
32143 same section to preserve existing behavior. */
32144 if (default_sec)
32145 default_sec->common.flags |= SECTION_ARM_PURECODE;
32146 return default_sec;
32147 }
32148
32149 /* Otherwise look whether a section has already been created with
32150 'section_name'. */
32151 sec = get_named_section (decl, section_name, 0);
32152 if (!sec)
32153 /* If that is not the case passing NULL as the section's name to
32154 'get_named_section' will create a section with the declaration's
32155 section name. */
32156 sec = get_named_section (decl, NULL, 0);
32157
32158 /* Set the SHF_ARM_PURECODE attribute. */
32159 sec->common.flags |= SECTION_ARM_PURECODE;
32160
32161 return sec;
32162 }
32163
32164 /* Implements the TARGET_SECTION_FLAGS hook.
32165
32166 If DECL is a function declaration and pure-code is passed as an option
32167 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
32168 section's name and RELOC indicates whether the declarations initializer may
32169 contain runtime relocations. */
32170
32171 static unsigned int
32172 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
32173 {
32174 unsigned int flags = default_section_type_flags (decl, name, reloc);
32175
32176 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
32177 flags |= SECTION_ARM_PURECODE;
32178
32179 return flags;
32180 }
32181
32182 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
32183
32184 static void
32185 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
32186 rtx op0, rtx op1,
32187 rtx *quot_p, rtx *rem_p)
32188 {
32189 if (mode == SImode)
32190 gcc_assert (!TARGET_IDIV);
32191
32192 scalar_int_mode libval_mode
32193 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
32194
32195 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
32196 libval_mode,
32197 op0, GET_MODE (op0),
32198 op1, GET_MODE (op1));
32199
32200 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
32201 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
32202 GET_MODE_SIZE (mode));
32203
32204 gcc_assert (quotient);
32205 gcc_assert (remainder);
32206
32207 *quot_p = quotient;
32208 *rem_p = remainder;
32209 }
32210
32211 /* This function checks for the availability of the coprocessor builtin passed
32212 in BUILTIN for the current target. Returns true if it is available and
32213 false otherwise. If a BUILTIN is passed for which this function has not
32214 been implemented it will cause an exception. */
32215
32216 bool
32217 arm_coproc_builtin_available (enum unspecv builtin)
32218 {
32219 /* None of these builtins are available in Thumb mode if the target only
32220 supports Thumb-1. */
32221 if (TARGET_THUMB1)
32222 return false;
32223
32224 switch (builtin)
32225 {
32226 case VUNSPEC_CDP:
32227 case VUNSPEC_LDC:
32228 case VUNSPEC_LDCL:
32229 case VUNSPEC_STC:
32230 case VUNSPEC_STCL:
32231 case VUNSPEC_MCR:
32232 case VUNSPEC_MRC:
32233 if (arm_arch4)
32234 return true;
32235 break;
32236 case VUNSPEC_CDP2:
32237 case VUNSPEC_LDC2:
32238 case VUNSPEC_LDC2L:
32239 case VUNSPEC_STC2:
32240 case VUNSPEC_STC2L:
32241 case VUNSPEC_MCR2:
32242 case VUNSPEC_MRC2:
32243 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
32244 ARMv8-{A,M}. */
32245 if (arm_arch5t)
32246 return true;
32247 break;
32248 case VUNSPEC_MCRR:
32249 case VUNSPEC_MRRC:
32250 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
32251 ARMv8-{A,M}. */
32252 if (arm_arch6 || arm_arch5te)
32253 return true;
32254 break;
32255 case VUNSPEC_MCRR2:
32256 case VUNSPEC_MRRC2:
32257 if (arm_arch6)
32258 return true;
32259 break;
32260 default:
32261 gcc_unreachable ();
32262 }
32263 return false;
32264 }
32265
32266 /* This function returns true if OP is a valid memory operand for the ldc and
32267 stc coprocessor instructions and false otherwise. */
32268
32269 bool
32270 arm_coproc_ldc_stc_legitimate_address (rtx op)
32271 {
32272 HOST_WIDE_INT range;
32273 /* Has to be a memory operand. */
32274 if (!MEM_P (op))
32275 return false;
32276
32277 op = XEXP (op, 0);
32278
32279 /* We accept registers. */
32280 if (REG_P (op))
32281 return true;
32282
32283 switch GET_CODE (op)
32284 {
32285 case PLUS:
32286 {
32287 /* Or registers with an offset. */
32288 if (!REG_P (XEXP (op, 0)))
32289 return false;
32290
32291 op = XEXP (op, 1);
32292
32293 /* The offset must be an immediate though. */
32294 if (!CONST_INT_P (op))
32295 return false;
32296
32297 range = INTVAL (op);
32298
32299 /* Within the range of [-1020,1020]. */
32300 if (!IN_RANGE (range, -1020, 1020))
32301 return false;
32302
32303 /* And a multiple of 4. */
32304 return (range % 4) == 0;
32305 }
32306 case PRE_INC:
32307 case POST_INC:
32308 case PRE_DEC:
32309 case POST_DEC:
32310 return REG_P (XEXP (op, 0));
32311 default:
32312 gcc_unreachable ();
32313 }
32314 return false;
32315 }
32316
32317 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
32318
32319 In VFPv1, VFP registers could only be accessed in the mode they were
32320 set, so subregs would be invalid there. However, we don't support
32321 VFPv1 at the moment, and the restriction was lifted in VFPv2.
32322
32323 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
32324 VFP registers in little-endian order. We can't describe that accurately to
32325 GCC, so avoid taking subregs of such values.
32326
32327 The only exception is going from a 128-bit to a 64-bit type. In that
32328 case the data layout happens to be consistent for big-endian, so we
32329 explicitly allow that case. */
32330
32331 static bool
32332 arm_can_change_mode_class (machine_mode from, machine_mode to,
32333 reg_class_t rclass)
32334 {
32335 if (TARGET_BIG_END
32336 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
32337 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
32338 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
32339 && reg_classes_intersect_p (VFP_REGS, rclass))
32340 return false;
32341 return true;
32342 }
32343
32344 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
32345 strcpy from constants will be faster. */
32346
32347 static HOST_WIDE_INT
32348 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
32349 {
32350 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
32351 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
32352 return MAX (align, BITS_PER_WORD * factor);
32353 return align;
32354 }
32355
32356 /* Emit a speculation barrier on target architectures that do not have
32357 DSB/ISB directly. Such systems probably don't need a barrier
32358 themselves, but if the code is ever run on a later architecture, it
32359 might become a problem. */
32360 void
32361 arm_emit_speculation_barrier_function ()
32362 {
32363 emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
32364 }
32365
32366 #if CHECKING_P
32367 namespace selftest {
32368
32369 /* Scan the static data tables generated by parsecpu.awk looking for
32370 potential issues with the data. We primarily check for
32371 inconsistencies in the option extensions at present (extensions
32372 that duplicate others but aren't marked as aliases). Furthermore,
32373 for correct canonicalization later options must never be a subset
32374 of an earlier option. Any extension should also only specify other
32375 feature bits and never an architecture bit. The architecture is inferred
32376 from the declaration of the extension. */
32377 static void
32378 arm_test_cpu_arch_data (void)
32379 {
32380 const arch_option *arch;
32381 const cpu_option *cpu;
32382 auto_sbitmap target_isa (isa_num_bits);
32383 auto_sbitmap isa1 (isa_num_bits);
32384 auto_sbitmap isa2 (isa_num_bits);
32385
32386 for (arch = all_architectures; arch->common.name != NULL; ++arch)
32387 {
32388 const cpu_arch_extension *ext1, *ext2;
32389
32390 if (arch->common.extensions == NULL)
32391 continue;
32392
32393 arm_initialize_isa (target_isa, arch->common.isa_bits);
32394
32395 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
32396 {
32397 if (ext1->alias)
32398 continue;
32399
32400 arm_initialize_isa (isa1, ext1->isa_bits);
32401 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
32402 {
32403 if (ext2->alias || ext1->remove != ext2->remove)
32404 continue;
32405
32406 arm_initialize_isa (isa2, ext2->isa_bits);
32407 /* If the option is a subset of the parent option, it doesn't
32408 add anything and so isn't useful. */
32409 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
32410
32411 /* If the extension specifies any architectural bits then
32412 disallow it. Extensions should only specify feature bits. */
32413 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
32414 }
32415 }
32416 }
32417
32418 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
32419 {
32420 const cpu_arch_extension *ext1, *ext2;
32421
32422 if (cpu->common.extensions == NULL)
32423 continue;
32424
32425 arm_initialize_isa (target_isa, arch->common.isa_bits);
32426
32427 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
32428 {
32429 if (ext1->alias)
32430 continue;
32431
32432 arm_initialize_isa (isa1, ext1->isa_bits);
32433 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
32434 {
32435 if (ext2->alias || ext1->remove != ext2->remove)
32436 continue;
32437
32438 arm_initialize_isa (isa2, ext2->isa_bits);
32439 /* If the option is a subset of the parent option, it doesn't
32440 add anything and so isn't useful. */
32441 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
32442
32443 /* If the extension specifies any architectural bits then
32444 disallow it. Extensions should only specify feature bits. */
32445 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
32446 }
32447 }
32448 }
32449 }
32450
32451 /* Scan the static data tables generated by parsecpu.awk looking for
32452 potential issues with the data. Here we check for consistency between the
32453 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
32454 a feature bit that is not defined by any FPU flag. */
32455 static void
32456 arm_test_fpu_data (void)
32457 {
32458 auto_sbitmap isa_all_fpubits (isa_num_bits);
32459 auto_sbitmap fpubits (isa_num_bits);
32460 auto_sbitmap tmpset (isa_num_bits);
32461
32462 static const enum isa_feature fpu_bitlist[]
32463 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
32464 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
32465
32466 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
32467 {
32468 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
32469 bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
32470 bitmap_clear (isa_all_fpubits);
32471 bitmap_copy (isa_all_fpubits, tmpset);
32472 }
32473
32474 if (!bitmap_empty_p (isa_all_fpubits))
32475 {
32476 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
32477 " group that are not defined by any FPU.\n"
32478 " Check your arm-cpus.in.\n");
32479 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
32480 }
32481 }
32482
32483 static void
32484 arm_run_selftests (void)
32485 {
32486 arm_test_cpu_arch_data ();
32487 arm_test_fpu_data ();
32488 }
32489 } /* Namespace selftest. */
32490
32491 #undef TARGET_RUN_TARGET_SELFTESTS
32492 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
32493 #endif /* CHECKING_P */
32494
32495 struct gcc_target targetm = TARGET_INITIALIZER;
32496
32497 #include "gt-arm.h"