]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
Remove aarch32 support for falkor/qdf24xx, not in released hardware.
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic-core.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "output.h"
47 #include "insn-attr.h"
48 #include "flags.h"
49 #include "reload.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "cfgrtl.h"
53 #include "sched-int.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
56 #include "intl.h"
57 #include "libfuncs.h"
58 #include "params.h"
59 #include "opts.h"
60 #include "dumpfile.h"
61 #include "target-globals.h"
62 #include "builtins.h"
63 #include "tm-constrs.h"
64 #include "rtl-iter.h"
65 #include "optabs-libfuncs.h"
66 #include "gimplify.h"
67 #include "gimple.h"
68
69 /* This file should be included last. */
70 #include "target-def.h"
71
72 /* Forward definitions of types. */
73 typedef struct minipool_node Mnode;
74 typedef struct minipool_fixup Mfix;
75
76 void (*arm_lang_output_object_attributes_hook)(void);
77
78 struct four_ints
79 {
80 int i[4];
81 };
82
83 /* Forward function declarations. */
84 static bool arm_const_not_ok_for_debug_p (rtx);
85 static int arm_needs_doubleword_align (machine_mode, const_tree);
86 static int arm_compute_static_chain_stack_bytes (void);
87 static arm_stack_offsets *arm_get_frame_offsets (void);
88 static void arm_compute_frame_layout (void);
89 static void arm_add_gc_roots (void);
90 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
91 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
92 static unsigned bit_count (unsigned long);
93 static unsigned bitmap_popcount (const sbitmap);
94 static int arm_address_register_rtx_p (rtx, int);
95 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
96 static bool is_called_in_ARM_mode (tree);
97 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
98 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
99 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
100 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
101 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
102 inline static int thumb1_index_register_rtx_p (rtx, int);
103 static int thumb_far_jump_used_p (void);
104 static bool thumb_force_lr_save (void);
105 static unsigned arm_size_return_regs (void);
106 static bool arm_assemble_integer (rtx, unsigned int, int);
107 static void arm_print_operand (FILE *, rtx, int);
108 static void arm_print_operand_address (FILE *, machine_mode, rtx);
109 static bool arm_print_operand_punct_valid_p (unsigned char code);
110 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
111 static arm_cc get_arm_condition_code (rtx);
112 static const char *output_multi_immediate (rtx *, const char *, const char *,
113 int, HOST_WIDE_INT);
114 static const char *shift_op (rtx, HOST_WIDE_INT *);
115 static struct machine_function *arm_init_machine_status (void);
116 static void thumb_exit (FILE *, int);
117 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
118 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
119 static Mnode *add_minipool_forward_ref (Mfix *);
120 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
121 static Mnode *add_minipool_backward_ref (Mfix *);
122 static void assign_minipool_offsets (Mfix *);
123 static void arm_print_value (FILE *, rtx);
124 static void dump_minipool (rtx_insn *);
125 static int arm_barrier_cost (rtx_insn *);
126 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
127 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
128 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
129 machine_mode, rtx);
130 static void arm_reorg (void);
131 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
132 static unsigned long arm_compute_save_reg0_reg12_mask (void);
133 static unsigned long arm_compute_save_core_reg_mask (void);
134 static unsigned long arm_isr_value (tree);
135 static unsigned long arm_compute_func_type (void);
136 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
137 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
138 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
139 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
140 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
141 #endif
142 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
143 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
144 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
145 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
146 static int arm_comp_type_attributes (const_tree, const_tree);
147 static void arm_set_default_type_attributes (tree);
148 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
149 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
150 static int optimal_immediate_sequence (enum rtx_code code,
151 unsigned HOST_WIDE_INT val,
152 struct four_ints *return_sequence);
153 static int optimal_immediate_sequence_1 (enum rtx_code code,
154 unsigned HOST_WIDE_INT val,
155 struct four_ints *return_sequence,
156 int i);
157 static int arm_get_strip_length (int);
158 static bool arm_function_ok_for_sibcall (tree, tree);
159 static machine_mode arm_promote_function_mode (const_tree,
160 machine_mode, int *,
161 const_tree, int);
162 static bool arm_return_in_memory (const_tree, const_tree);
163 static rtx arm_function_value (const_tree, const_tree, bool);
164 static rtx arm_libcall_value_1 (machine_mode);
165 static rtx arm_libcall_value (machine_mode, const_rtx);
166 static bool arm_function_value_regno_p (const unsigned int);
167 static void arm_internal_label (FILE *, const char *, unsigned long);
168 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
169 tree);
170 static bool arm_have_conditional_execution (void);
171 static bool arm_cannot_force_const_mem (machine_mode, rtx);
172 static bool arm_legitimate_constant_p (machine_mode, rtx);
173 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
174 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
175 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
176 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
177 static void emit_constant_insn (rtx cond, rtx pattern);
178 static rtx_insn *emit_set_insn (rtx, rtx);
179 static rtx emit_multi_reg_push (unsigned long, unsigned long);
180 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
181 tree, bool);
182 static rtx arm_function_arg (cumulative_args_t, machine_mode,
183 const_tree, bool);
184 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
185 const_tree, bool);
186 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
187 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
188 const_tree);
189 static rtx aapcs_libcall_value (machine_mode);
190 static int aapcs_select_return_coproc (const_tree, const_tree);
191
192 #ifdef OBJECT_FORMAT_ELF
193 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
194 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
195 #endif
196 #ifndef ARM_PE
197 static void arm_encode_section_info (tree, rtx, int);
198 #endif
199
200 static void arm_file_end (void);
201 static void arm_file_start (void);
202 static void arm_insert_attributes (tree, tree *);
203
204 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
205 tree, int *, int);
206 static bool arm_pass_by_reference (cumulative_args_t,
207 machine_mode, const_tree, bool);
208 static bool arm_promote_prototypes (const_tree);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree);
212 static bool arm_must_pass_in_stack (machine_mode, const_tree);
213 static bool arm_return_in_memory (const_tree, const_tree);
214 #if ARM_UNWIND_INFO
215 static void arm_unwind_emit (FILE *, rtx_insn *);
216 static bool arm_output_ttype (rtx);
217 static void arm_asm_emit_except_personality (rtx);
218 #endif
219 static void arm_asm_init_sections (void);
220 static rtx arm_dwarf_register_span (rtx);
221
222 static tree arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree arm_get_cookie_size (tree);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree, rtx);
234 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
235 static void arm_option_override (void);
236 static void arm_option_restore (struct gcc_options *,
237 struct cl_target_option *);
238 static void arm_override_options_after_change (void);
239 static void arm_option_print (FILE *, int, struct cl_target_option *);
240 static void arm_set_current_function (tree);
241 static bool arm_can_inline_p (tree, tree);
242 static void arm_relayout_function (tree);
243 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
244 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
245 static bool arm_sched_can_speculate_insn (rtx_insn *);
246 static bool arm_macro_fusion_p (void);
247 static bool arm_cannot_copy_insn_p (rtx_insn *);
248 static int arm_issue_rate (void);
249 static int arm_first_cycle_multipass_dfa_lookahead (void);
250 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
251 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
252 static bool arm_output_addr_const_extra (FILE *, rtx);
253 static bool arm_allocate_stack_slots_for_args (void);
254 static bool arm_warn_func_return (tree);
255 static tree arm_promoted_type (const_tree t);
256 static bool arm_scalar_mode_supported_p (machine_mode);
257 static bool arm_frame_pointer_required (void);
258 static bool arm_can_eliminate (const int, const int);
259 static void arm_asm_trampoline_template (FILE *);
260 static void arm_trampoline_init (rtx, tree, rtx);
261 static rtx arm_trampoline_adjust_address (rtx);
262 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
263 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
264 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
265 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
266 static bool arm_array_mode_supported_p (machine_mode,
267 unsigned HOST_WIDE_INT);
268 static machine_mode arm_preferred_simd_mode (machine_mode);
269 static bool arm_class_likely_spilled_p (reg_class_t);
270 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
271 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
272 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
273 const_tree type,
274 int misalignment,
275 bool is_packed);
276 static void arm_conditional_register_usage (void);
277 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
278 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
279 static unsigned int arm_autovectorize_vector_sizes (void);
280 static int arm_default_branch_cost (bool, bool);
281 static int arm_cortex_a5_branch_cost (bool, bool);
282 static int arm_cortex_m_branch_cost (bool, bool);
283 static int arm_cortex_m7_branch_cost (bool, bool);
284
285 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
286 const unsigned char *sel);
287
288 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
289
290 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
291 tree vectype,
292 int misalign ATTRIBUTE_UNUSED);
293 static unsigned arm_add_stmt_cost (void *data, int count,
294 enum vect_cost_for_stmt kind,
295 struct _stmt_vec_info *stmt_info,
296 int misalign,
297 enum vect_cost_model_location where);
298
299 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
300 bool op0_preserve_value);
301 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
302
303 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
304 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
305 const_tree);
306 static section *arm_function_section (tree, enum node_frequency, bool, bool);
307 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
308 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
309 int reloc);
310 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
311 static machine_mode arm_floatn_mode (int, bool);
312 \f
313 /* Table of machine attributes. */
314 static const struct attribute_spec arm_attribute_table[] =
315 {
316 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
317 affects_type_identity } */
318 /* Function calls made to this symbol must be done indirectly, because
319 it may lie outside of the 26 bit addressing range of a normal function
320 call. */
321 { "long_call", 0, 0, false, true, true, NULL, false },
322 /* Whereas these functions are always known to reside within the 26 bit
323 addressing range. */
324 { "short_call", 0, 0, false, true, true, NULL, false },
325 /* Specify the procedure call conventions for a function. */
326 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
327 false },
328 /* Interrupt Service Routines have special prologue and epilogue requirements. */
329 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
330 false },
331 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
332 false },
333 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
334 false },
335 #ifdef ARM_PE
336 /* ARM/PE has three new attributes:
337 interfacearm - ?
338 dllexport - for exporting a function/variable that will live in a dll
339 dllimport - for importing a function/variable from a dll
340
341 Microsoft allows multiple declspecs in one __declspec, separating
342 them with spaces. We do NOT support this. Instead, use __declspec
343 multiple times.
344 */
345 { "dllimport", 0, 0, true, false, false, NULL, false },
346 { "dllexport", 0, 0, true, false, false, NULL, false },
347 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
348 false },
349 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
350 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
351 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
352 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
353 false },
354 #endif
355 /* ARMv8-M Security Extensions support. */
356 { "cmse_nonsecure_entry", 0, 0, true, false, false,
357 arm_handle_cmse_nonsecure_entry, false },
358 { "cmse_nonsecure_call", 0, 0, true, false, false,
359 arm_handle_cmse_nonsecure_call, true },
360 { NULL, 0, 0, false, false, false, NULL, false }
361 };
362 \f
363 /* Initialize the GCC target structure. */
364 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
365 #undef TARGET_MERGE_DECL_ATTRIBUTES
366 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
367 #endif
368
369 #undef TARGET_LEGITIMIZE_ADDRESS
370 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
371
372 #undef TARGET_ATTRIBUTE_TABLE
373 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
374
375 #undef TARGET_INSERT_ATTRIBUTES
376 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
377
378 #undef TARGET_ASM_FILE_START
379 #define TARGET_ASM_FILE_START arm_file_start
380 #undef TARGET_ASM_FILE_END
381 #define TARGET_ASM_FILE_END arm_file_end
382
383 #undef TARGET_ASM_ALIGNED_SI_OP
384 #define TARGET_ASM_ALIGNED_SI_OP NULL
385 #undef TARGET_ASM_INTEGER
386 #define TARGET_ASM_INTEGER arm_assemble_integer
387
388 #undef TARGET_PRINT_OPERAND
389 #define TARGET_PRINT_OPERAND arm_print_operand
390 #undef TARGET_PRINT_OPERAND_ADDRESS
391 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
392 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
393 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
394
395 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
396 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
397
398 #undef TARGET_ASM_FUNCTION_PROLOGUE
399 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
400
401 #undef TARGET_ASM_FUNCTION_EPILOGUE
402 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
403
404 #undef TARGET_CAN_INLINE_P
405 #define TARGET_CAN_INLINE_P arm_can_inline_p
406
407 #undef TARGET_RELAYOUT_FUNCTION
408 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
409
410 #undef TARGET_OPTION_OVERRIDE
411 #define TARGET_OPTION_OVERRIDE arm_option_override
412
413 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
414 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
415
416 #undef TARGET_OPTION_RESTORE
417 #define TARGET_OPTION_RESTORE arm_option_restore
418
419 #undef TARGET_OPTION_PRINT
420 #define TARGET_OPTION_PRINT arm_option_print
421
422 #undef TARGET_COMP_TYPE_ATTRIBUTES
423 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
424
425 #undef TARGET_SCHED_CAN_SPECULATE_INSN
426 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
427
428 #undef TARGET_SCHED_MACRO_FUSION_P
429 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
430
431 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
432 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
433
434 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
435 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
436
437 #undef TARGET_SCHED_ADJUST_COST
438 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
439
440 #undef TARGET_SET_CURRENT_FUNCTION
441 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
442
443 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
444 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
445
446 #undef TARGET_SCHED_REORDER
447 #define TARGET_SCHED_REORDER arm_sched_reorder
448
449 #undef TARGET_REGISTER_MOVE_COST
450 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
451
452 #undef TARGET_MEMORY_MOVE_COST
453 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
454
455 #undef TARGET_ENCODE_SECTION_INFO
456 #ifdef ARM_PE
457 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
458 #else
459 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
460 #endif
461
462 #undef TARGET_STRIP_NAME_ENCODING
463 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
464
465 #undef TARGET_ASM_INTERNAL_LABEL
466 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
467
468 #undef TARGET_FLOATN_MODE
469 #define TARGET_FLOATN_MODE arm_floatn_mode
470
471 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
472 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
473
474 #undef TARGET_FUNCTION_VALUE
475 #define TARGET_FUNCTION_VALUE arm_function_value
476
477 #undef TARGET_LIBCALL_VALUE
478 #define TARGET_LIBCALL_VALUE arm_libcall_value
479
480 #undef TARGET_FUNCTION_VALUE_REGNO_P
481 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
482
483 #undef TARGET_ASM_OUTPUT_MI_THUNK
484 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
485 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
486 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
487
488 #undef TARGET_RTX_COSTS
489 #define TARGET_RTX_COSTS arm_rtx_costs
490 #undef TARGET_ADDRESS_COST
491 #define TARGET_ADDRESS_COST arm_address_cost
492
493 #undef TARGET_SHIFT_TRUNCATION_MASK
494 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
495 #undef TARGET_VECTOR_MODE_SUPPORTED_P
496 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
497 #undef TARGET_ARRAY_MODE_SUPPORTED_P
498 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
499 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
500 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
501 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
502 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
503 arm_autovectorize_vector_sizes
504
505 #undef TARGET_MACHINE_DEPENDENT_REORG
506 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
507
508 #undef TARGET_INIT_BUILTINS
509 #define TARGET_INIT_BUILTINS arm_init_builtins
510 #undef TARGET_EXPAND_BUILTIN
511 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
512 #undef TARGET_BUILTIN_DECL
513 #define TARGET_BUILTIN_DECL arm_builtin_decl
514
515 #undef TARGET_INIT_LIBFUNCS
516 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
517
518 #undef TARGET_PROMOTE_FUNCTION_MODE
519 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
520 #undef TARGET_PROMOTE_PROTOTYPES
521 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
522 #undef TARGET_PASS_BY_REFERENCE
523 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
524 #undef TARGET_ARG_PARTIAL_BYTES
525 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
526 #undef TARGET_FUNCTION_ARG
527 #define TARGET_FUNCTION_ARG arm_function_arg
528 #undef TARGET_FUNCTION_ARG_ADVANCE
529 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
530 #undef TARGET_FUNCTION_ARG_BOUNDARY
531 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
532
533 #undef TARGET_SETUP_INCOMING_VARARGS
534 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
535
536 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
537 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
538
539 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
540 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
541 #undef TARGET_TRAMPOLINE_INIT
542 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
543 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
544 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
545
546 #undef TARGET_WARN_FUNC_RETURN
547 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
548
549 #undef TARGET_DEFAULT_SHORT_ENUMS
550 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
551
552 #undef TARGET_ALIGN_ANON_BITFIELD
553 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
554
555 #undef TARGET_NARROW_VOLATILE_BITFIELD
556 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
557
558 #undef TARGET_CXX_GUARD_TYPE
559 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
560
561 #undef TARGET_CXX_GUARD_MASK_BIT
562 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
563
564 #undef TARGET_CXX_GET_COOKIE_SIZE
565 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
566
567 #undef TARGET_CXX_COOKIE_HAS_SIZE
568 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
569
570 #undef TARGET_CXX_CDTOR_RETURNS_THIS
571 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
572
573 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
574 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
575
576 #undef TARGET_CXX_USE_AEABI_ATEXIT
577 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
578
579 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
580 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
581 arm_cxx_determine_class_data_visibility
582
583 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
584 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
585
586 #undef TARGET_RETURN_IN_MSB
587 #define TARGET_RETURN_IN_MSB arm_return_in_msb
588
589 #undef TARGET_RETURN_IN_MEMORY
590 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
591
592 #undef TARGET_MUST_PASS_IN_STACK
593 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
594
595 #if ARM_UNWIND_INFO
596 #undef TARGET_ASM_UNWIND_EMIT
597 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
598
599 /* EABI unwinding tables use a different format for the typeinfo tables. */
600 #undef TARGET_ASM_TTYPE
601 #define TARGET_ASM_TTYPE arm_output_ttype
602
603 #undef TARGET_ARM_EABI_UNWINDER
604 #define TARGET_ARM_EABI_UNWINDER true
605
606 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
607 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
608
609 #endif /* ARM_UNWIND_INFO */
610
611 #undef TARGET_ASM_INIT_SECTIONS
612 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
613
614 #undef TARGET_DWARF_REGISTER_SPAN
615 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
616
617 #undef TARGET_CANNOT_COPY_INSN_P
618 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
619
620 #ifdef HAVE_AS_TLS
621 #undef TARGET_HAVE_TLS
622 #define TARGET_HAVE_TLS true
623 #endif
624
625 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
626 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
627
628 #undef TARGET_LEGITIMATE_CONSTANT_P
629 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
630
631 #undef TARGET_CANNOT_FORCE_CONST_MEM
632 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
633
634 #undef TARGET_MAX_ANCHOR_OFFSET
635 #define TARGET_MAX_ANCHOR_OFFSET 4095
636
637 /* The minimum is set such that the total size of the block
638 for a particular anchor is -4088 + 1 + 4095 bytes, which is
639 divisible by eight, ensuring natural spacing of anchors. */
640 #undef TARGET_MIN_ANCHOR_OFFSET
641 #define TARGET_MIN_ANCHOR_OFFSET -4088
642
643 #undef TARGET_SCHED_ISSUE_RATE
644 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
645
646 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
647 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
648 arm_first_cycle_multipass_dfa_lookahead
649
650 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
651 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
652 arm_first_cycle_multipass_dfa_lookahead_guard
653
654 #undef TARGET_MANGLE_TYPE
655 #define TARGET_MANGLE_TYPE arm_mangle_type
656
657 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
658 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
659
660 #undef TARGET_BUILD_BUILTIN_VA_LIST
661 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
662 #undef TARGET_EXPAND_BUILTIN_VA_START
663 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
664 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
665 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
666
667 #ifdef HAVE_AS_TLS
668 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
669 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
670 #endif
671
672 #undef TARGET_LEGITIMATE_ADDRESS_P
673 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
674
675 #undef TARGET_PREFERRED_RELOAD_CLASS
676 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
677
678 #undef TARGET_PROMOTED_TYPE
679 #define TARGET_PROMOTED_TYPE arm_promoted_type
680
681 #undef TARGET_SCALAR_MODE_SUPPORTED_P
682 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
683
684 #undef TARGET_COMPUTE_FRAME_LAYOUT
685 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
686
687 #undef TARGET_FRAME_POINTER_REQUIRED
688 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
689
690 #undef TARGET_CAN_ELIMINATE
691 #define TARGET_CAN_ELIMINATE arm_can_eliminate
692
693 #undef TARGET_CONDITIONAL_REGISTER_USAGE
694 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
695
696 #undef TARGET_CLASS_LIKELY_SPILLED_P
697 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
698
699 #undef TARGET_VECTORIZE_BUILTINS
700 #define TARGET_VECTORIZE_BUILTINS
701
702 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
703 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
704 arm_builtin_vectorized_function
705
706 #undef TARGET_VECTOR_ALIGNMENT
707 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
708
709 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
710 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
711 arm_vector_alignment_reachable
712
713 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
714 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
715 arm_builtin_support_vector_misalignment
716
717 #undef TARGET_PREFERRED_RENAME_CLASS
718 #define TARGET_PREFERRED_RENAME_CLASS \
719 arm_preferred_rename_class
720
721 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
722 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
723 arm_vectorize_vec_perm_const_ok
724
725 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
726 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
727 arm_builtin_vectorization_cost
728 #undef TARGET_VECTORIZE_ADD_STMT_COST
729 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
730
731 #undef TARGET_CANONICALIZE_COMPARISON
732 #define TARGET_CANONICALIZE_COMPARISON \
733 arm_canonicalize_comparison
734
735 #undef TARGET_ASAN_SHADOW_OFFSET
736 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
737
738 #undef MAX_INSN_PER_IT_BLOCK
739 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
740
741 #undef TARGET_CAN_USE_DOLOOP_P
742 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
743
744 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
745 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
746
747 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
748 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
749
750 #undef TARGET_SCHED_FUSION_PRIORITY
751 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
752
753 #undef TARGET_ASM_FUNCTION_SECTION
754 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
755
756 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
757 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
758
759 #undef TARGET_SECTION_TYPE_FLAGS
760 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
761
762 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
763 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
764
765 #undef TARGET_C_EXCESS_PRECISION
766 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
767
768 /* Although the architecture reserves bits 0 and 1, only the former is
769 used for ARM/Thumb ISA selection in v7 and earlier versions. */
770 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
771 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
772
773 struct gcc_target targetm = TARGET_INITIALIZER;
774 \f
775 /* Obstack for minipool constant handling. */
776 static struct obstack minipool_obstack;
777 static char * minipool_startobj;
778
779 /* The maximum number of insns skipped which
780 will be conditionalised if possible. */
781 static int max_insns_skipped = 5;
782
783 extern FILE * asm_out_file;
784
785 /* True if we are currently building a constant table. */
786 int making_const_table;
787
788 /* The processor for which instructions should be scheduled. */
789 enum processor_type arm_tune = TARGET_CPU_arm_none;
790
791 /* The current tuning set. */
792 const struct tune_params *current_tune;
793
794 /* Which floating point hardware to schedule for. */
795 int arm_fpu_attr;
796
797 /* Used for Thumb call_via trampolines. */
798 rtx thumb_call_via_label[14];
799 static int thumb_call_reg_needed;
800
801 /* The bits in this mask specify which instruction scheduling options should
802 be used. */
803 unsigned int tune_flags = 0;
804
805 /* The highest ARM architecture version supported by the
806 target. */
807 enum base_architecture arm_base_arch = BASE_ARCH_0;
808
809 /* Active target architecture and tuning. */
810
811 struct arm_build_target arm_active_target;
812
813 /* The following are used in the arm.md file as equivalents to bits
814 in the above two flag variables. */
815
816 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
817 int arm_arch3m = 0;
818
819 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
820 int arm_arch4 = 0;
821
822 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
823 int arm_arch4t = 0;
824
825 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
826 int arm_arch5 = 0;
827
828 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
829 int arm_arch5e = 0;
830
831 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
832 int arm_arch5te = 0;
833
834 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
835 int arm_arch6 = 0;
836
837 /* Nonzero if this chip supports the ARM 6K extensions. */
838 int arm_arch6k = 0;
839
840 /* Nonzero if this chip supports the ARM 6KZ extensions. */
841 int arm_arch6kz = 0;
842
843 /* Nonzero if instructions present in ARMv6-M can be used. */
844 int arm_arch6m = 0;
845
846 /* Nonzero if this chip supports the ARM 7 extensions. */
847 int arm_arch7 = 0;
848
849 /* Nonzero if this chip supports the Large Physical Address Extension. */
850 int arm_arch_lpae = 0;
851
852 /* Nonzero if instructions not present in the 'M' profile can be used. */
853 int arm_arch_notm = 0;
854
855 /* Nonzero if instructions present in ARMv7E-M can be used. */
856 int arm_arch7em = 0;
857
858 /* Nonzero if instructions present in ARMv8 can be used. */
859 int arm_arch8 = 0;
860
861 /* Nonzero if this chip supports the ARMv8.1 extensions. */
862 int arm_arch8_1 = 0;
863
864 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
865 int arm_arch8_2 = 0;
866
867 /* Nonzero if this chip supports the FP16 instructions extension of ARM
868 Architecture 8.2. */
869 int arm_fp16_inst = 0;
870
871 /* Nonzero if this chip can benefit from load scheduling. */
872 int arm_ld_sched = 0;
873
874 /* Nonzero if this chip is a StrongARM. */
875 int arm_tune_strongarm = 0;
876
877 /* Nonzero if this chip supports Intel Wireless MMX technology. */
878 int arm_arch_iwmmxt = 0;
879
880 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
881 int arm_arch_iwmmxt2 = 0;
882
883 /* Nonzero if this chip is an XScale. */
884 int arm_arch_xscale = 0;
885
886 /* Nonzero if tuning for XScale */
887 int arm_tune_xscale = 0;
888
889 /* Nonzero if we want to tune for stores that access the write-buffer.
890 This typically means an ARM6 or ARM7 with MMU or MPU. */
891 int arm_tune_wbuf = 0;
892
893 /* Nonzero if tuning for Cortex-A9. */
894 int arm_tune_cortex_a9 = 0;
895
896 /* Nonzero if we should define __THUMB_INTERWORK__ in the
897 preprocessor.
898 XXX This is a bit of a hack, it's intended to help work around
899 problems in GLD which doesn't understand that armv5t code is
900 interworking clean. */
901 int arm_cpp_interwork = 0;
902
903 /* Nonzero if chip supports Thumb 1. */
904 int arm_arch_thumb1;
905
906 /* Nonzero if chip supports Thumb 2. */
907 int arm_arch_thumb2;
908
909 /* Nonzero if chip supports integer division instruction. */
910 int arm_arch_arm_hwdiv;
911 int arm_arch_thumb_hwdiv;
912
913 /* Nonzero if chip disallows volatile memory access in IT block. */
914 int arm_arch_no_volatile_ce;
915
916 /* Nonzero if we should use Neon to handle 64-bits operations rather
917 than core registers. */
918 int prefer_neon_for_64bits = 0;
919
920 /* Nonzero if we shouldn't use literal pools. */
921 bool arm_disable_literal_pool = false;
922
923 /* The register number to be used for the PIC offset register. */
924 unsigned arm_pic_register = INVALID_REGNUM;
925
926 enum arm_pcs arm_pcs_default;
927
928 /* For an explanation of these variables, see final_prescan_insn below. */
929 int arm_ccfsm_state;
930 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
931 enum arm_cond_code arm_current_cc;
932
933 rtx arm_target_insn;
934 int arm_target_label;
935 /* The number of conditionally executed insns, including the current insn. */
936 int arm_condexec_count = 0;
937 /* A bitmask specifying the patterns for the IT block.
938 Zero means do not output an IT block before this insn. */
939 int arm_condexec_mask = 0;
940 /* The number of bits used in arm_condexec_mask. */
941 int arm_condexec_masklen = 0;
942
943 /* Nonzero if chip supports the ARMv8 CRC instructions. */
944 int arm_arch_crc = 0;
945
946 /* Nonzero if chip supports the ARMv8-M security extensions. */
947 int arm_arch_cmse = 0;
948
949 /* Nonzero if the core has a very small, high-latency, multiply unit. */
950 int arm_m_profile_small_mul = 0;
951
952 /* The condition codes of the ARM, and the inverse function. */
953 static const char * const arm_condition_codes[] =
954 {
955 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
956 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
957 };
958
959 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
960 int arm_regs_in_sequence[] =
961 {
962 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
963 };
964
965 #define ARM_LSL_NAME "lsl"
966 #define streq(string1, string2) (strcmp (string1, string2) == 0)
967
968 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
969 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
970 | (1 << PIC_OFFSET_TABLE_REGNUM)))
971 \f
972 /* Initialization code. */
973
974 struct processors
975 {
976 const char *const name;
977 enum processor_type core;
978 unsigned int tune_flags;
979 const char *arch;
980 enum base_architecture base_arch;
981 enum isa_feature isa_bits[isa_num_bits];
982 const struct tune_params *const tune;
983 };
984
985
986 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
987 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
988 { \
989 num_slots, \
990 l1_size, \
991 l1_line_size \
992 }
993
994 /* arm generic vectorizer costs. */
995 static const
996 struct cpu_vec_costs arm_default_vec_cost = {
997 1, /* scalar_stmt_cost. */
998 1, /* scalar load_cost. */
999 1, /* scalar_store_cost. */
1000 1, /* vec_stmt_cost. */
1001 1, /* vec_to_scalar_cost. */
1002 1, /* scalar_to_vec_cost. */
1003 1, /* vec_align_load_cost. */
1004 1, /* vec_unalign_load_cost. */
1005 1, /* vec_unalign_store_cost. */
1006 1, /* vec_store_cost. */
1007 3, /* cond_taken_branch_cost. */
1008 1, /* cond_not_taken_branch_cost. */
1009 };
1010
1011 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1012 #include "aarch-cost-tables.h"
1013
1014
1015
1016 const struct cpu_cost_table cortexa9_extra_costs =
1017 {
1018 /* ALU */
1019 {
1020 0, /* arith. */
1021 0, /* logical. */
1022 0, /* shift. */
1023 COSTS_N_INSNS (1), /* shift_reg. */
1024 COSTS_N_INSNS (1), /* arith_shift. */
1025 COSTS_N_INSNS (2), /* arith_shift_reg. */
1026 0, /* log_shift. */
1027 COSTS_N_INSNS (1), /* log_shift_reg. */
1028 COSTS_N_INSNS (1), /* extend. */
1029 COSTS_N_INSNS (2), /* extend_arith. */
1030 COSTS_N_INSNS (1), /* bfi. */
1031 COSTS_N_INSNS (1), /* bfx. */
1032 0, /* clz. */
1033 0, /* rev. */
1034 0, /* non_exec. */
1035 true /* non_exec_costs_exec. */
1036 },
1037 {
1038 /* MULT SImode */
1039 {
1040 COSTS_N_INSNS (3), /* simple. */
1041 COSTS_N_INSNS (3), /* flag_setting. */
1042 COSTS_N_INSNS (2), /* extend. */
1043 COSTS_N_INSNS (3), /* add. */
1044 COSTS_N_INSNS (2), /* extend_add. */
1045 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1046 },
1047 /* MULT DImode */
1048 {
1049 0, /* simple (N/A). */
1050 0, /* flag_setting (N/A). */
1051 COSTS_N_INSNS (4), /* extend. */
1052 0, /* add (N/A). */
1053 COSTS_N_INSNS (4), /* extend_add. */
1054 0 /* idiv (N/A). */
1055 }
1056 },
1057 /* LD/ST */
1058 {
1059 COSTS_N_INSNS (2), /* load. */
1060 COSTS_N_INSNS (2), /* load_sign_extend. */
1061 COSTS_N_INSNS (2), /* ldrd. */
1062 COSTS_N_INSNS (2), /* ldm_1st. */
1063 1, /* ldm_regs_per_insn_1st. */
1064 2, /* ldm_regs_per_insn_subsequent. */
1065 COSTS_N_INSNS (5), /* loadf. */
1066 COSTS_N_INSNS (5), /* loadd. */
1067 COSTS_N_INSNS (1), /* load_unaligned. */
1068 COSTS_N_INSNS (2), /* store. */
1069 COSTS_N_INSNS (2), /* strd. */
1070 COSTS_N_INSNS (2), /* stm_1st. */
1071 1, /* stm_regs_per_insn_1st. */
1072 2, /* stm_regs_per_insn_subsequent. */
1073 COSTS_N_INSNS (1), /* storef. */
1074 COSTS_N_INSNS (1), /* stored. */
1075 COSTS_N_INSNS (1), /* store_unaligned. */
1076 COSTS_N_INSNS (1), /* loadv. */
1077 COSTS_N_INSNS (1) /* storev. */
1078 },
1079 {
1080 /* FP SFmode */
1081 {
1082 COSTS_N_INSNS (14), /* div. */
1083 COSTS_N_INSNS (4), /* mult. */
1084 COSTS_N_INSNS (7), /* mult_addsub. */
1085 COSTS_N_INSNS (30), /* fma. */
1086 COSTS_N_INSNS (3), /* addsub. */
1087 COSTS_N_INSNS (1), /* fpconst. */
1088 COSTS_N_INSNS (1), /* neg. */
1089 COSTS_N_INSNS (3), /* compare. */
1090 COSTS_N_INSNS (3), /* widen. */
1091 COSTS_N_INSNS (3), /* narrow. */
1092 COSTS_N_INSNS (3), /* toint. */
1093 COSTS_N_INSNS (3), /* fromint. */
1094 COSTS_N_INSNS (3) /* roundint. */
1095 },
1096 /* FP DFmode */
1097 {
1098 COSTS_N_INSNS (24), /* div. */
1099 COSTS_N_INSNS (5), /* mult. */
1100 COSTS_N_INSNS (8), /* mult_addsub. */
1101 COSTS_N_INSNS (30), /* fma. */
1102 COSTS_N_INSNS (3), /* addsub. */
1103 COSTS_N_INSNS (1), /* fpconst. */
1104 COSTS_N_INSNS (1), /* neg. */
1105 COSTS_N_INSNS (3), /* compare. */
1106 COSTS_N_INSNS (3), /* widen. */
1107 COSTS_N_INSNS (3), /* narrow. */
1108 COSTS_N_INSNS (3), /* toint. */
1109 COSTS_N_INSNS (3), /* fromint. */
1110 COSTS_N_INSNS (3) /* roundint. */
1111 }
1112 },
1113 /* Vector */
1114 {
1115 COSTS_N_INSNS (1) /* alu. */
1116 }
1117 };
1118
1119 const struct cpu_cost_table cortexa8_extra_costs =
1120 {
1121 /* ALU */
1122 {
1123 0, /* arith. */
1124 0, /* logical. */
1125 COSTS_N_INSNS (1), /* shift. */
1126 0, /* shift_reg. */
1127 COSTS_N_INSNS (1), /* arith_shift. */
1128 0, /* arith_shift_reg. */
1129 COSTS_N_INSNS (1), /* log_shift. */
1130 0, /* log_shift_reg. */
1131 0, /* extend. */
1132 0, /* extend_arith. */
1133 0, /* bfi. */
1134 0, /* bfx. */
1135 0, /* clz. */
1136 0, /* rev. */
1137 0, /* non_exec. */
1138 true /* non_exec_costs_exec. */
1139 },
1140 {
1141 /* MULT SImode */
1142 {
1143 COSTS_N_INSNS (1), /* simple. */
1144 COSTS_N_INSNS (1), /* flag_setting. */
1145 COSTS_N_INSNS (1), /* extend. */
1146 COSTS_N_INSNS (1), /* add. */
1147 COSTS_N_INSNS (1), /* extend_add. */
1148 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1149 },
1150 /* MULT DImode */
1151 {
1152 0, /* simple (N/A). */
1153 0, /* flag_setting (N/A). */
1154 COSTS_N_INSNS (2), /* extend. */
1155 0, /* add (N/A). */
1156 COSTS_N_INSNS (2), /* extend_add. */
1157 0 /* idiv (N/A). */
1158 }
1159 },
1160 /* LD/ST */
1161 {
1162 COSTS_N_INSNS (1), /* load. */
1163 COSTS_N_INSNS (1), /* load_sign_extend. */
1164 COSTS_N_INSNS (1), /* ldrd. */
1165 COSTS_N_INSNS (1), /* ldm_1st. */
1166 1, /* ldm_regs_per_insn_1st. */
1167 2, /* ldm_regs_per_insn_subsequent. */
1168 COSTS_N_INSNS (1), /* loadf. */
1169 COSTS_N_INSNS (1), /* loadd. */
1170 COSTS_N_INSNS (1), /* load_unaligned. */
1171 COSTS_N_INSNS (1), /* store. */
1172 COSTS_N_INSNS (1), /* strd. */
1173 COSTS_N_INSNS (1), /* stm_1st. */
1174 1, /* stm_regs_per_insn_1st. */
1175 2, /* stm_regs_per_insn_subsequent. */
1176 COSTS_N_INSNS (1), /* storef. */
1177 COSTS_N_INSNS (1), /* stored. */
1178 COSTS_N_INSNS (1), /* store_unaligned. */
1179 COSTS_N_INSNS (1), /* loadv. */
1180 COSTS_N_INSNS (1) /* storev. */
1181 },
1182 {
1183 /* FP SFmode */
1184 {
1185 COSTS_N_INSNS (36), /* div. */
1186 COSTS_N_INSNS (11), /* mult. */
1187 COSTS_N_INSNS (20), /* mult_addsub. */
1188 COSTS_N_INSNS (30), /* fma. */
1189 COSTS_N_INSNS (9), /* addsub. */
1190 COSTS_N_INSNS (3), /* fpconst. */
1191 COSTS_N_INSNS (3), /* neg. */
1192 COSTS_N_INSNS (6), /* compare. */
1193 COSTS_N_INSNS (4), /* widen. */
1194 COSTS_N_INSNS (4), /* narrow. */
1195 COSTS_N_INSNS (8), /* toint. */
1196 COSTS_N_INSNS (8), /* fromint. */
1197 COSTS_N_INSNS (8) /* roundint. */
1198 },
1199 /* FP DFmode */
1200 {
1201 COSTS_N_INSNS (64), /* div. */
1202 COSTS_N_INSNS (16), /* mult. */
1203 COSTS_N_INSNS (25), /* mult_addsub. */
1204 COSTS_N_INSNS (30), /* fma. */
1205 COSTS_N_INSNS (9), /* addsub. */
1206 COSTS_N_INSNS (3), /* fpconst. */
1207 COSTS_N_INSNS (3), /* neg. */
1208 COSTS_N_INSNS (6), /* compare. */
1209 COSTS_N_INSNS (6), /* widen. */
1210 COSTS_N_INSNS (6), /* narrow. */
1211 COSTS_N_INSNS (8), /* toint. */
1212 COSTS_N_INSNS (8), /* fromint. */
1213 COSTS_N_INSNS (8) /* roundint. */
1214 }
1215 },
1216 /* Vector */
1217 {
1218 COSTS_N_INSNS (1) /* alu. */
1219 }
1220 };
1221
1222 const struct cpu_cost_table cortexa5_extra_costs =
1223 {
1224 /* ALU */
1225 {
1226 0, /* arith. */
1227 0, /* logical. */
1228 COSTS_N_INSNS (1), /* shift. */
1229 COSTS_N_INSNS (1), /* shift_reg. */
1230 COSTS_N_INSNS (1), /* arith_shift. */
1231 COSTS_N_INSNS (1), /* arith_shift_reg. */
1232 COSTS_N_INSNS (1), /* log_shift. */
1233 COSTS_N_INSNS (1), /* log_shift_reg. */
1234 COSTS_N_INSNS (1), /* extend. */
1235 COSTS_N_INSNS (1), /* extend_arith. */
1236 COSTS_N_INSNS (1), /* bfi. */
1237 COSTS_N_INSNS (1), /* bfx. */
1238 COSTS_N_INSNS (1), /* clz. */
1239 COSTS_N_INSNS (1), /* rev. */
1240 0, /* non_exec. */
1241 true /* non_exec_costs_exec. */
1242 },
1243
1244 {
1245 /* MULT SImode */
1246 {
1247 0, /* simple. */
1248 COSTS_N_INSNS (1), /* flag_setting. */
1249 COSTS_N_INSNS (1), /* extend. */
1250 COSTS_N_INSNS (1), /* add. */
1251 COSTS_N_INSNS (1), /* extend_add. */
1252 COSTS_N_INSNS (7) /* idiv. */
1253 },
1254 /* MULT DImode */
1255 {
1256 0, /* simple (N/A). */
1257 0, /* flag_setting (N/A). */
1258 COSTS_N_INSNS (1), /* extend. */
1259 0, /* add. */
1260 COSTS_N_INSNS (2), /* extend_add. */
1261 0 /* idiv (N/A). */
1262 }
1263 },
1264 /* LD/ST */
1265 {
1266 COSTS_N_INSNS (1), /* load. */
1267 COSTS_N_INSNS (1), /* load_sign_extend. */
1268 COSTS_N_INSNS (6), /* ldrd. */
1269 COSTS_N_INSNS (1), /* ldm_1st. */
1270 1, /* ldm_regs_per_insn_1st. */
1271 2, /* ldm_regs_per_insn_subsequent. */
1272 COSTS_N_INSNS (2), /* loadf. */
1273 COSTS_N_INSNS (4), /* loadd. */
1274 COSTS_N_INSNS (1), /* load_unaligned. */
1275 COSTS_N_INSNS (1), /* store. */
1276 COSTS_N_INSNS (3), /* strd. */
1277 COSTS_N_INSNS (1), /* stm_1st. */
1278 1, /* stm_regs_per_insn_1st. */
1279 2, /* stm_regs_per_insn_subsequent. */
1280 COSTS_N_INSNS (2), /* storef. */
1281 COSTS_N_INSNS (2), /* stored. */
1282 COSTS_N_INSNS (1), /* store_unaligned. */
1283 COSTS_N_INSNS (1), /* loadv. */
1284 COSTS_N_INSNS (1) /* storev. */
1285 },
1286 {
1287 /* FP SFmode */
1288 {
1289 COSTS_N_INSNS (15), /* div. */
1290 COSTS_N_INSNS (3), /* mult. */
1291 COSTS_N_INSNS (7), /* mult_addsub. */
1292 COSTS_N_INSNS (7), /* fma. */
1293 COSTS_N_INSNS (3), /* addsub. */
1294 COSTS_N_INSNS (3), /* fpconst. */
1295 COSTS_N_INSNS (3), /* neg. */
1296 COSTS_N_INSNS (3), /* compare. */
1297 COSTS_N_INSNS (3), /* widen. */
1298 COSTS_N_INSNS (3), /* narrow. */
1299 COSTS_N_INSNS (3), /* toint. */
1300 COSTS_N_INSNS (3), /* fromint. */
1301 COSTS_N_INSNS (3) /* roundint. */
1302 },
1303 /* FP DFmode */
1304 {
1305 COSTS_N_INSNS (30), /* div. */
1306 COSTS_N_INSNS (6), /* mult. */
1307 COSTS_N_INSNS (10), /* mult_addsub. */
1308 COSTS_N_INSNS (7), /* fma. */
1309 COSTS_N_INSNS (3), /* addsub. */
1310 COSTS_N_INSNS (3), /* fpconst. */
1311 COSTS_N_INSNS (3), /* neg. */
1312 COSTS_N_INSNS (3), /* compare. */
1313 COSTS_N_INSNS (3), /* widen. */
1314 COSTS_N_INSNS (3), /* narrow. */
1315 COSTS_N_INSNS (3), /* toint. */
1316 COSTS_N_INSNS (3), /* fromint. */
1317 COSTS_N_INSNS (3) /* roundint. */
1318 }
1319 },
1320 /* Vector */
1321 {
1322 COSTS_N_INSNS (1) /* alu. */
1323 }
1324 };
1325
1326
1327 const struct cpu_cost_table cortexa7_extra_costs =
1328 {
1329 /* ALU */
1330 {
1331 0, /* arith. */
1332 0, /* logical. */
1333 COSTS_N_INSNS (1), /* shift. */
1334 COSTS_N_INSNS (1), /* shift_reg. */
1335 COSTS_N_INSNS (1), /* arith_shift. */
1336 COSTS_N_INSNS (1), /* arith_shift_reg. */
1337 COSTS_N_INSNS (1), /* log_shift. */
1338 COSTS_N_INSNS (1), /* log_shift_reg. */
1339 COSTS_N_INSNS (1), /* extend. */
1340 COSTS_N_INSNS (1), /* extend_arith. */
1341 COSTS_N_INSNS (1), /* bfi. */
1342 COSTS_N_INSNS (1), /* bfx. */
1343 COSTS_N_INSNS (1), /* clz. */
1344 COSTS_N_INSNS (1), /* rev. */
1345 0, /* non_exec. */
1346 true /* non_exec_costs_exec. */
1347 },
1348
1349 {
1350 /* MULT SImode */
1351 {
1352 0, /* simple. */
1353 COSTS_N_INSNS (1), /* flag_setting. */
1354 COSTS_N_INSNS (1), /* extend. */
1355 COSTS_N_INSNS (1), /* add. */
1356 COSTS_N_INSNS (1), /* extend_add. */
1357 COSTS_N_INSNS (7) /* idiv. */
1358 },
1359 /* MULT DImode */
1360 {
1361 0, /* simple (N/A). */
1362 0, /* flag_setting (N/A). */
1363 COSTS_N_INSNS (1), /* extend. */
1364 0, /* add. */
1365 COSTS_N_INSNS (2), /* extend_add. */
1366 0 /* idiv (N/A). */
1367 }
1368 },
1369 /* LD/ST */
1370 {
1371 COSTS_N_INSNS (1), /* load. */
1372 COSTS_N_INSNS (1), /* load_sign_extend. */
1373 COSTS_N_INSNS (3), /* ldrd. */
1374 COSTS_N_INSNS (1), /* ldm_1st. */
1375 1, /* ldm_regs_per_insn_1st. */
1376 2, /* ldm_regs_per_insn_subsequent. */
1377 COSTS_N_INSNS (2), /* loadf. */
1378 COSTS_N_INSNS (2), /* loadd. */
1379 COSTS_N_INSNS (1), /* load_unaligned. */
1380 COSTS_N_INSNS (1), /* store. */
1381 COSTS_N_INSNS (3), /* strd. */
1382 COSTS_N_INSNS (1), /* stm_1st. */
1383 1, /* stm_regs_per_insn_1st. */
1384 2, /* stm_regs_per_insn_subsequent. */
1385 COSTS_N_INSNS (2), /* storef. */
1386 COSTS_N_INSNS (2), /* stored. */
1387 COSTS_N_INSNS (1), /* store_unaligned. */
1388 COSTS_N_INSNS (1), /* loadv. */
1389 COSTS_N_INSNS (1) /* storev. */
1390 },
1391 {
1392 /* FP SFmode */
1393 {
1394 COSTS_N_INSNS (15), /* div. */
1395 COSTS_N_INSNS (3), /* mult. */
1396 COSTS_N_INSNS (7), /* mult_addsub. */
1397 COSTS_N_INSNS (7), /* fma. */
1398 COSTS_N_INSNS (3), /* addsub. */
1399 COSTS_N_INSNS (3), /* fpconst. */
1400 COSTS_N_INSNS (3), /* neg. */
1401 COSTS_N_INSNS (3), /* compare. */
1402 COSTS_N_INSNS (3), /* widen. */
1403 COSTS_N_INSNS (3), /* narrow. */
1404 COSTS_N_INSNS (3), /* toint. */
1405 COSTS_N_INSNS (3), /* fromint. */
1406 COSTS_N_INSNS (3) /* roundint. */
1407 },
1408 /* FP DFmode */
1409 {
1410 COSTS_N_INSNS (30), /* div. */
1411 COSTS_N_INSNS (6), /* mult. */
1412 COSTS_N_INSNS (10), /* mult_addsub. */
1413 COSTS_N_INSNS (7), /* fma. */
1414 COSTS_N_INSNS (3), /* addsub. */
1415 COSTS_N_INSNS (3), /* fpconst. */
1416 COSTS_N_INSNS (3), /* neg. */
1417 COSTS_N_INSNS (3), /* compare. */
1418 COSTS_N_INSNS (3), /* widen. */
1419 COSTS_N_INSNS (3), /* narrow. */
1420 COSTS_N_INSNS (3), /* toint. */
1421 COSTS_N_INSNS (3), /* fromint. */
1422 COSTS_N_INSNS (3) /* roundint. */
1423 }
1424 },
1425 /* Vector */
1426 {
1427 COSTS_N_INSNS (1) /* alu. */
1428 }
1429 };
1430
1431 const struct cpu_cost_table cortexa12_extra_costs =
1432 {
1433 /* ALU */
1434 {
1435 0, /* arith. */
1436 0, /* logical. */
1437 0, /* shift. */
1438 COSTS_N_INSNS (1), /* shift_reg. */
1439 COSTS_N_INSNS (1), /* arith_shift. */
1440 COSTS_N_INSNS (1), /* arith_shift_reg. */
1441 COSTS_N_INSNS (1), /* log_shift. */
1442 COSTS_N_INSNS (1), /* log_shift_reg. */
1443 0, /* extend. */
1444 COSTS_N_INSNS (1), /* extend_arith. */
1445 0, /* bfi. */
1446 COSTS_N_INSNS (1), /* bfx. */
1447 COSTS_N_INSNS (1), /* clz. */
1448 COSTS_N_INSNS (1), /* rev. */
1449 0, /* non_exec. */
1450 true /* non_exec_costs_exec. */
1451 },
1452 /* MULT SImode */
1453 {
1454 {
1455 COSTS_N_INSNS (2), /* simple. */
1456 COSTS_N_INSNS (3), /* flag_setting. */
1457 COSTS_N_INSNS (2), /* extend. */
1458 COSTS_N_INSNS (3), /* add. */
1459 COSTS_N_INSNS (2), /* extend_add. */
1460 COSTS_N_INSNS (18) /* idiv. */
1461 },
1462 /* MULT DImode */
1463 {
1464 0, /* simple (N/A). */
1465 0, /* flag_setting (N/A). */
1466 COSTS_N_INSNS (3), /* extend. */
1467 0, /* add (N/A). */
1468 COSTS_N_INSNS (3), /* extend_add. */
1469 0 /* idiv (N/A). */
1470 }
1471 },
1472 /* LD/ST */
1473 {
1474 COSTS_N_INSNS (3), /* load. */
1475 COSTS_N_INSNS (3), /* load_sign_extend. */
1476 COSTS_N_INSNS (3), /* ldrd. */
1477 COSTS_N_INSNS (3), /* ldm_1st. */
1478 1, /* ldm_regs_per_insn_1st. */
1479 2, /* ldm_regs_per_insn_subsequent. */
1480 COSTS_N_INSNS (3), /* loadf. */
1481 COSTS_N_INSNS (3), /* loadd. */
1482 0, /* load_unaligned. */
1483 0, /* store. */
1484 0, /* strd. */
1485 0, /* stm_1st. */
1486 1, /* stm_regs_per_insn_1st. */
1487 2, /* stm_regs_per_insn_subsequent. */
1488 COSTS_N_INSNS (2), /* storef. */
1489 COSTS_N_INSNS (2), /* stored. */
1490 0, /* store_unaligned. */
1491 COSTS_N_INSNS (1), /* loadv. */
1492 COSTS_N_INSNS (1) /* storev. */
1493 },
1494 {
1495 /* FP SFmode */
1496 {
1497 COSTS_N_INSNS (17), /* div. */
1498 COSTS_N_INSNS (4), /* mult. */
1499 COSTS_N_INSNS (8), /* mult_addsub. */
1500 COSTS_N_INSNS (8), /* fma. */
1501 COSTS_N_INSNS (4), /* addsub. */
1502 COSTS_N_INSNS (2), /* fpconst. */
1503 COSTS_N_INSNS (2), /* neg. */
1504 COSTS_N_INSNS (2), /* compare. */
1505 COSTS_N_INSNS (4), /* widen. */
1506 COSTS_N_INSNS (4), /* narrow. */
1507 COSTS_N_INSNS (4), /* toint. */
1508 COSTS_N_INSNS (4), /* fromint. */
1509 COSTS_N_INSNS (4) /* roundint. */
1510 },
1511 /* FP DFmode */
1512 {
1513 COSTS_N_INSNS (31), /* div. */
1514 COSTS_N_INSNS (4), /* mult. */
1515 COSTS_N_INSNS (8), /* mult_addsub. */
1516 COSTS_N_INSNS (8), /* fma. */
1517 COSTS_N_INSNS (4), /* addsub. */
1518 COSTS_N_INSNS (2), /* fpconst. */
1519 COSTS_N_INSNS (2), /* neg. */
1520 COSTS_N_INSNS (2), /* compare. */
1521 COSTS_N_INSNS (4), /* widen. */
1522 COSTS_N_INSNS (4), /* narrow. */
1523 COSTS_N_INSNS (4), /* toint. */
1524 COSTS_N_INSNS (4), /* fromint. */
1525 COSTS_N_INSNS (4) /* roundint. */
1526 }
1527 },
1528 /* Vector */
1529 {
1530 COSTS_N_INSNS (1) /* alu. */
1531 }
1532 };
1533
1534 const struct cpu_cost_table cortexa15_extra_costs =
1535 {
1536 /* ALU */
1537 {
1538 0, /* arith. */
1539 0, /* logical. */
1540 0, /* shift. */
1541 0, /* shift_reg. */
1542 COSTS_N_INSNS (1), /* arith_shift. */
1543 COSTS_N_INSNS (1), /* arith_shift_reg. */
1544 COSTS_N_INSNS (1), /* log_shift. */
1545 COSTS_N_INSNS (1), /* log_shift_reg. */
1546 0, /* extend. */
1547 COSTS_N_INSNS (1), /* extend_arith. */
1548 COSTS_N_INSNS (1), /* bfi. */
1549 0, /* bfx. */
1550 0, /* clz. */
1551 0, /* rev. */
1552 0, /* non_exec. */
1553 true /* non_exec_costs_exec. */
1554 },
1555 /* MULT SImode */
1556 {
1557 {
1558 COSTS_N_INSNS (2), /* simple. */
1559 COSTS_N_INSNS (3), /* flag_setting. */
1560 COSTS_N_INSNS (2), /* extend. */
1561 COSTS_N_INSNS (2), /* add. */
1562 COSTS_N_INSNS (2), /* extend_add. */
1563 COSTS_N_INSNS (18) /* idiv. */
1564 },
1565 /* MULT DImode */
1566 {
1567 0, /* simple (N/A). */
1568 0, /* flag_setting (N/A). */
1569 COSTS_N_INSNS (3), /* extend. */
1570 0, /* add (N/A). */
1571 COSTS_N_INSNS (3), /* extend_add. */
1572 0 /* idiv (N/A). */
1573 }
1574 },
1575 /* LD/ST */
1576 {
1577 COSTS_N_INSNS (3), /* load. */
1578 COSTS_N_INSNS (3), /* load_sign_extend. */
1579 COSTS_N_INSNS (3), /* ldrd. */
1580 COSTS_N_INSNS (4), /* ldm_1st. */
1581 1, /* ldm_regs_per_insn_1st. */
1582 2, /* ldm_regs_per_insn_subsequent. */
1583 COSTS_N_INSNS (4), /* loadf. */
1584 COSTS_N_INSNS (4), /* loadd. */
1585 0, /* load_unaligned. */
1586 0, /* store. */
1587 0, /* strd. */
1588 COSTS_N_INSNS (1), /* stm_1st. */
1589 1, /* stm_regs_per_insn_1st. */
1590 2, /* stm_regs_per_insn_subsequent. */
1591 0, /* storef. */
1592 0, /* stored. */
1593 0, /* store_unaligned. */
1594 COSTS_N_INSNS (1), /* loadv. */
1595 COSTS_N_INSNS (1) /* storev. */
1596 },
1597 {
1598 /* FP SFmode */
1599 {
1600 COSTS_N_INSNS (17), /* div. */
1601 COSTS_N_INSNS (4), /* mult. */
1602 COSTS_N_INSNS (8), /* mult_addsub. */
1603 COSTS_N_INSNS (8), /* fma. */
1604 COSTS_N_INSNS (4), /* addsub. */
1605 COSTS_N_INSNS (2), /* fpconst. */
1606 COSTS_N_INSNS (2), /* neg. */
1607 COSTS_N_INSNS (5), /* compare. */
1608 COSTS_N_INSNS (4), /* widen. */
1609 COSTS_N_INSNS (4), /* narrow. */
1610 COSTS_N_INSNS (4), /* toint. */
1611 COSTS_N_INSNS (4), /* fromint. */
1612 COSTS_N_INSNS (4) /* roundint. */
1613 },
1614 /* FP DFmode */
1615 {
1616 COSTS_N_INSNS (31), /* div. */
1617 COSTS_N_INSNS (4), /* mult. */
1618 COSTS_N_INSNS (8), /* mult_addsub. */
1619 COSTS_N_INSNS (8), /* fma. */
1620 COSTS_N_INSNS (4), /* addsub. */
1621 COSTS_N_INSNS (2), /* fpconst. */
1622 COSTS_N_INSNS (2), /* neg. */
1623 COSTS_N_INSNS (2), /* compare. */
1624 COSTS_N_INSNS (4), /* widen. */
1625 COSTS_N_INSNS (4), /* narrow. */
1626 COSTS_N_INSNS (4), /* toint. */
1627 COSTS_N_INSNS (4), /* fromint. */
1628 COSTS_N_INSNS (4) /* roundint. */
1629 }
1630 },
1631 /* Vector */
1632 {
1633 COSTS_N_INSNS (1) /* alu. */
1634 }
1635 };
1636
1637 const struct cpu_cost_table v7m_extra_costs =
1638 {
1639 /* ALU */
1640 {
1641 0, /* arith. */
1642 0, /* logical. */
1643 0, /* shift. */
1644 0, /* shift_reg. */
1645 0, /* arith_shift. */
1646 COSTS_N_INSNS (1), /* arith_shift_reg. */
1647 0, /* log_shift. */
1648 COSTS_N_INSNS (1), /* log_shift_reg. */
1649 0, /* extend. */
1650 COSTS_N_INSNS (1), /* extend_arith. */
1651 0, /* bfi. */
1652 0, /* bfx. */
1653 0, /* clz. */
1654 0, /* rev. */
1655 COSTS_N_INSNS (1), /* non_exec. */
1656 false /* non_exec_costs_exec. */
1657 },
1658 {
1659 /* MULT SImode */
1660 {
1661 COSTS_N_INSNS (1), /* simple. */
1662 COSTS_N_INSNS (1), /* flag_setting. */
1663 COSTS_N_INSNS (2), /* extend. */
1664 COSTS_N_INSNS (1), /* add. */
1665 COSTS_N_INSNS (3), /* extend_add. */
1666 COSTS_N_INSNS (8) /* idiv. */
1667 },
1668 /* MULT DImode */
1669 {
1670 0, /* simple (N/A). */
1671 0, /* flag_setting (N/A). */
1672 COSTS_N_INSNS (2), /* extend. */
1673 0, /* add (N/A). */
1674 COSTS_N_INSNS (3), /* extend_add. */
1675 0 /* idiv (N/A). */
1676 }
1677 },
1678 /* LD/ST */
1679 {
1680 COSTS_N_INSNS (2), /* load. */
1681 0, /* load_sign_extend. */
1682 COSTS_N_INSNS (3), /* ldrd. */
1683 COSTS_N_INSNS (2), /* ldm_1st. */
1684 1, /* ldm_regs_per_insn_1st. */
1685 1, /* ldm_regs_per_insn_subsequent. */
1686 COSTS_N_INSNS (2), /* loadf. */
1687 COSTS_N_INSNS (3), /* loadd. */
1688 COSTS_N_INSNS (1), /* load_unaligned. */
1689 COSTS_N_INSNS (2), /* store. */
1690 COSTS_N_INSNS (3), /* strd. */
1691 COSTS_N_INSNS (2), /* stm_1st. */
1692 1, /* stm_regs_per_insn_1st. */
1693 1, /* stm_regs_per_insn_subsequent. */
1694 COSTS_N_INSNS (2), /* storef. */
1695 COSTS_N_INSNS (3), /* stored. */
1696 COSTS_N_INSNS (1), /* store_unaligned. */
1697 COSTS_N_INSNS (1), /* loadv. */
1698 COSTS_N_INSNS (1) /* storev. */
1699 },
1700 {
1701 /* FP SFmode */
1702 {
1703 COSTS_N_INSNS (7), /* div. */
1704 COSTS_N_INSNS (2), /* mult. */
1705 COSTS_N_INSNS (5), /* mult_addsub. */
1706 COSTS_N_INSNS (3), /* fma. */
1707 COSTS_N_INSNS (1), /* addsub. */
1708 0, /* fpconst. */
1709 0, /* neg. */
1710 0, /* compare. */
1711 0, /* widen. */
1712 0, /* narrow. */
1713 0, /* toint. */
1714 0, /* fromint. */
1715 0 /* roundint. */
1716 },
1717 /* FP DFmode */
1718 {
1719 COSTS_N_INSNS (15), /* div. */
1720 COSTS_N_INSNS (5), /* mult. */
1721 COSTS_N_INSNS (7), /* mult_addsub. */
1722 COSTS_N_INSNS (7), /* fma. */
1723 COSTS_N_INSNS (3), /* addsub. */
1724 0, /* fpconst. */
1725 0, /* neg. */
1726 0, /* compare. */
1727 0, /* widen. */
1728 0, /* narrow. */
1729 0, /* toint. */
1730 0, /* fromint. */
1731 0 /* roundint. */
1732 }
1733 },
1734 /* Vector */
1735 {
1736 COSTS_N_INSNS (1) /* alu. */
1737 }
1738 };
1739
1740 const struct tune_params arm_slowmul_tune =
1741 {
1742 &generic_extra_costs, /* Insn extra costs. */
1743 NULL, /* Sched adj cost. */
1744 arm_default_branch_cost,
1745 &arm_default_vec_cost,
1746 3, /* Constant limit. */
1747 5, /* Max cond insns. */
1748 8, /* Memset max inline. */
1749 1, /* Issue rate. */
1750 ARM_PREFETCH_NOT_BENEFICIAL,
1751 tune_params::PREF_CONST_POOL_TRUE,
1752 tune_params::PREF_LDRD_FALSE,
1753 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1754 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1755 tune_params::DISPARAGE_FLAGS_NEITHER,
1756 tune_params::PREF_NEON_64_FALSE,
1757 tune_params::PREF_NEON_STRINGOPS_FALSE,
1758 tune_params::FUSE_NOTHING,
1759 tune_params::SCHED_AUTOPREF_OFF
1760 };
1761
1762 const struct tune_params arm_fastmul_tune =
1763 {
1764 &generic_extra_costs, /* Insn extra costs. */
1765 NULL, /* Sched adj cost. */
1766 arm_default_branch_cost,
1767 &arm_default_vec_cost,
1768 1, /* Constant limit. */
1769 5, /* Max cond insns. */
1770 8, /* Memset max inline. */
1771 1, /* Issue rate. */
1772 ARM_PREFETCH_NOT_BENEFICIAL,
1773 tune_params::PREF_CONST_POOL_TRUE,
1774 tune_params::PREF_LDRD_FALSE,
1775 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1776 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1777 tune_params::DISPARAGE_FLAGS_NEITHER,
1778 tune_params::PREF_NEON_64_FALSE,
1779 tune_params::PREF_NEON_STRINGOPS_FALSE,
1780 tune_params::FUSE_NOTHING,
1781 tune_params::SCHED_AUTOPREF_OFF
1782 };
1783
1784 /* StrongARM has early execution of branches, so a sequence that is worth
1785 skipping is shorter. Set max_insns_skipped to a lower value. */
1786
1787 const struct tune_params arm_strongarm_tune =
1788 {
1789 &generic_extra_costs, /* Insn extra costs. */
1790 NULL, /* Sched adj cost. */
1791 arm_default_branch_cost,
1792 &arm_default_vec_cost,
1793 1, /* Constant limit. */
1794 3, /* Max cond insns. */
1795 8, /* Memset max inline. */
1796 1, /* Issue rate. */
1797 ARM_PREFETCH_NOT_BENEFICIAL,
1798 tune_params::PREF_CONST_POOL_TRUE,
1799 tune_params::PREF_LDRD_FALSE,
1800 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1801 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1802 tune_params::DISPARAGE_FLAGS_NEITHER,
1803 tune_params::PREF_NEON_64_FALSE,
1804 tune_params::PREF_NEON_STRINGOPS_FALSE,
1805 tune_params::FUSE_NOTHING,
1806 tune_params::SCHED_AUTOPREF_OFF
1807 };
1808
1809 const struct tune_params arm_xscale_tune =
1810 {
1811 &generic_extra_costs, /* Insn extra costs. */
1812 xscale_sched_adjust_cost,
1813 arm_default_branch_cost,
1814 &arm_default_vec_cost,
1815 2, /* Constant limit. */
1816 3, /* Max cond insns. */
1817 8, /* Memset max inline. */
1818 1, /* Issue rate. */
1819 ARM_PREFETCH_NOT_BENEFICIAL,
1820 tune_params::PREF_CONST_POOL_TRUE,
1821 tune_params::PREF_LDRD_FALSE,
1822 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1823 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1824 tune_params::DISPARAGE_FLAGS_NEITHER,
1825 tune_params::PREF_NEON_64_FALSE,
1826 tune_params::PREF_NEON_STRINGOPS_FALSE,
1827 tune_params::FUSE_NOTHING,
1828 tune_params::SCHED_AUTOPREF_OFF
1829 };
1830
1831 const struct tune_params arm_9e_tune =
1832 {
1833 &generic_extra_costs, /* Insn extra costs. */
1834 NULL, /* Sched adj cost. */
1835 arm_default_branch_cost,
1836 &arm_default_vec_cost,
1837 1, /* Constant limit. */
1838 5, /* Max cond insns. */
1839 8, /* Memset max inline. */
1840 1, /* Issue rate. */
1841 ARM_PREFETCH_NOT_BENEFICIAL,
1842 tune_params::PREF_CONST_POOL_TRUE,
1843 tune_params::PREF_LDRD_FALSE,
1844 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1845 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1846 tune_params::DISPARAGE_FLAGS_NEITHER,
1847 tune_params::PREF_NEON_64_FALSE,
1848 tune_params::PREF_NEON_STRINGOPS_FALSE,
1849 tune_params::FUSE_NOTHING,
1850 tune_params::SCHED_AUTOPREF_OFF
1851 };
1852
1853 const struct tune_params arm_marvell_pj4_tune =
1854 {
1855 &generic_extra_costs, /* Insn extra costs. */
1856 NULL, /* Sched adj cost. */
1857 arm_default_branch_cost,
1858 &arm_default_vec_cost,
1859 1, /* Constant limit. */
1860 5, /* Max cond insns. */
1861 8, /* Memset max inline. */
1862 2, /* Issue rate. */
1863 ARM_PREFETCH_NOT_BENEFICIAL,
1864 tune_params::PREF_CONST_POOL_TRUE,
1865 tune_params::PREF_LDRD_FALSE,
1866 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1867 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1868 tune_params::DISPARAGE_FLAGS_NEITHER,
1869 tune_params::PREF_NEON_64_FALSE,
1870 tune_params::PREF_NEON_STRINGOPS_FALSE,
1871 tune_params::FUSE_NOTHING,
1872 tune_params::SCHED_AUTOPREF_OFF
1873 };
1874
1875 const struct tune_params arm_v6t2_tune =
1876 {
1877 &generic_extra_costs, /* Insn extra costs. */
1878 NULL, /* Sched adj cost. */
1879 arm_default_branch_cost,
1880 &arm_default_vec_cost,
1881 1, /* Constant limit. */
1882 5, /* Max cond insns. */
1883 8, /* Memset max inline. */
1884 1, /* Issue rate. */
1885 ARM_PREFETCH_NOT_BENEFICIAL,
1886 tune_params::PREF_CONST_POOL_FALSE,
1887 tune_params::PREF_LDRD_FALSE,
1888 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1889 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1890 tune_params::DISPARAGE_FLAGS_NEITHER,
1891 tune_params::PREF_NEON_64_FALSE,
1892 tune_params::PREF_NEON_STRINGOPS_FALSE,
1893 tune_params::FUSE_NOTHING,
1894 tune_params::SCHED_AUTOPREF_OFF
1895 };
1896
1897
1898 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1899 const struct tune_params arm_cortex_tune =
1900 {
1901 &generic_extra_costs,
1902 NULL, /* Sched adj cost. */
1903 arm_default_branch_cost,
1904 &arm_default_vec_cost,
1905 1, /* Constant limit. */
1906 5, /* Max cond insns. */
1907 8, /* Memset max inline. */
1908 2, /* Issue rate. */
1909 ARM_PREFETCH_NOT_BENEFICIAL,
1910 tune_params::PREF_CONST_POOL_FALSE,
1911 tune_params::PREF_LDRD_FALSE,
1912 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1913 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1914 tune_params::DISPARAGE_FLAGS_NEITHER,
1915 tune_params::PREF_NEON_64_FALSE,
1916 tune_params::PREF_NEON_STRINGOPS_FALSE,
1917 tune_params::FUSE_NOTHING,
1918 tune_params::SCHED_AUTOPREF_OFF
1919 };
1920
1921 const struct tune_params arm_cortex_a8_tune =
1922 {
1923 &cortexa8_extra_costs,
1924 NULL, /* Sched adj cost. */
1925 arm_default_branch_cost,
1926 &arm_default_vec_cost,
1927 1, /* Constant limit. */
1928 5, /* Max cond insns. */
1929 8, /* Memset max inline. */
1930 2, /* Issue rate. */
1931 ARM_PREFETCH_NOT_BENEFICIAL,
1932 tune_params::PREF_CONST_POOL_FALSE,
1933 tune_params::PREF_LDRD_FALSE,
1934 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1935 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1936 tune_params::DISPARAGE_FLAGS_NEITHER,
1937 tune_params::PREF_NEON_64_FALSE,
1938 tune_params::PREF_NEON_STRINGOPS_TRUE,
1939 tune_params::FUSE_NOTHING,
1940 tune_params::SCHED_AUTOPREF_OFF
1941 };
1942
1943 const struct tune_params arm_cortex_a7_tune =
1944 {
1945 &cortexa7_extra_costs,
1946 NULL, /* Sched adj cost. */
1947 arm_default_branch_cost,
1948 &arm_default_vec_cost,
1949 1, /* Constant limit. */
1950 5, /* Max cond insns. */
1951 8, /* Memset max inline. */
1952 2, /* Issue rate. */
1953 ARM_PREFETCH_NOT_BENEFICIAL,
1954 tune_params::PREF_CONST_POOL_FALSE,
1955 tune_params::PREF_LDRD_FALSE,
1956 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1957 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1958 tune_params::DISPARAGE_FLAGS_NEITHER,
1959 tune_params::PREF_NEON_64_FALSE,
1960 tune_params::PREF_NEON_STRINGOPS_TRUE,
1961 tune_params::FUSE_NOTHING,
1962 tune_params::SCHED_AUTOPREF_OFF
1963 };
1964
1965 const struct tune_params arm_cortex_a15_tune =
1966 {
1967 &cortexa15_extra_costs,
1968 NULL, /* Sched adj cost. */
1969 arm_default_branch_cost,
1970 &arm_default_vec_cost,
1971 1, /* Constant limit. */
1972 2, /* Max cond insns. */
1973 8, /* Memset max inline. */
1974 3, /* Issue rate. */
1975 ARM_PREFETCH_NOT_BENEFICIAL,
1976 tune_params::PREF_CONST_POOL_FALSE,
1977 tune_params::PREF_LDRD_TRUE,
1978 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1979 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1980 tune_params::DISPARAGE_FLAGS_ALL,
1981 tune_params::PREF_NEON_64_FALSE,
1982 tune_params::PREF_NEON_STRINGOPS_TRUE,
1983 tune_params::FUSE_NOTHING,
1984 tune_params::SCHED_AUTOPREF_FULL
1985 };
1986
1987 const struct tune_params arm_cortex_a35_tune =
1988 {
1989 &cortexa53_extra_costs,
1990 NULL, /* Sched adj cost. */
1991 arm_default_branch_cost,
1992 &arm_default_vec_cost,
1993 1, /* Constant limit. */
1994 5, /* Max cond insns. */
1995 8, /* Memset max inline. */
1996 1, /* Issue rate. */
1997 ARM_PREFETCH_NOT_BENEFICIAL,
1998 tune_params::PREF_CONST_POOL_FALSE,
1999 tune_params::PREF_LDRD_FALSE,
2000 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2001 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2002 tune_params::DISPARAGE_FLAGS_NEITHER,
2003 tune_params::PREF_NEON_64_FALSE,
2004 tune_params::PREF_NEON_STRINGOPS_TRUE,
2005 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2006 tune_params::SCHED_AUTOPREF_OFF
2007 };
2008
2009 const struct tune_params arm_cortex_a53_tune =
2010 {
2011 &cortexa53_extra_costs,
2012 NULL, /* Sched adj cost. */
2013 arm_default_branch_cost,
2014 &arm_default_vec_cost,
2015 1, /* Constant limit. */
2016 5, /* Max cond insns. */
2017 8, /* Memset max inline. */
2018 2, /* Issue rate. */
2019 ARM_PREFETCH_NOT_BENEFICIAL,
2020 tune_params::PREF_CONST_POOL_FALSE,
2021 tune_params::PREF_LDRD_FALSE,
2022 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2023 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2024 tune_params::DISPARAGE_FLAGS_NEITHER,
2025 tune_params::PREF_NEON_64_FALSE,
2026 tune_params::PREF_NEON_STRINGOPS_TRUE,
2027 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2028 tune_params::SCHED_AUTOPREF_OFF
2029 };
2030
2031 const struct tune_params arm_cortex_a57_tune =
2032 {
2033 &cortexa57_extra_costs,
2034 NULL, /* Sched adj cost. */
2035 arm_default_branch_cost,
2036 &arm_default_vec_cost,
2037 1, /* Constant limit. */
2038 2, /* Max cond insns. */
2039 8, /* Memset max inline. */
2040 3, /* Issue rate. */
2041 ARM_PREFETCH_NOT_BENEFICIAL,
2042 tune_params::PREF_CONST_POOL_FALSE,
2043 tune_params::PREF_LDRD_TRUE,
2044 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2045 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2046 tune_params::DISPARAGE_FLAGS_ALL,
2047 tune_params::PREF_NEON_64_FALSE,
2048 tune_params::PREF_NEON_STRINGOPS_TRUE,
2049 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2050 tune_params::SCHED_AUTOPREF_FULL
2051 };
2052
2053 const struct tune_params arm_exynosm1_tune =
2054 {
2055 &exynosm1_extra_costs,
2056 NULL, /* Sched adj cost. */
2057 arm_default_branch_cost,
2058 &arm_default_vec_cost,
2059 1, /* Constant limit. */
2060 2, /* Max cond insns. */
2061 8, /* Memset max inline. */
2062 3, /* Issue rate. */
2063 ARM_PREFETCH_NOT_BENEFICIAL,
2064 tune_params::PREF_CONST_POOL_FALSE,
2065 tune_params::PREF_LDRD_TRUE,
2066 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2067 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2068 tune_params::DISPARAGE_FLAGS_ALL,
2069 tune_params::PREF_NEON_64_FALSE,
2070 tune_params::PREF_NEON_STRINGOPS_TRUE,
2071 tune_params::FUSE_NOTHING,
2072 tune_params::SCHED_AUTOPREF_OFF
2073 };
2074
2075 const struct tune_params arm_xgene1_tune =
2076 {
2077 &xgene1_extra_costs,
2078 NULL, /* Sched adj cost. */
2079 arm_default_branch_cost,
2080 &arm_default_vec_cost,
2081 1, /* Constant limit. */
2082 2, /* Max cond insns. */
2083 32, /* Memset max inline. */
2084 4, /* Issue rate. */
2085 ARM_PREFETCH_NOT_BENEFICIAL,
2086 tune_params::PREF_CONST_POOL_FALSE,
2087 tune_params::PREF_LDRD_TRUE,
2088 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2089 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2090 tune_params::DISPARAGE_FLAGS_ALL,
2091 tune_params::PREF_NEON_64_FALSE,
2092 tune_params::PREF_NEON_STRINGOPS_FALSE,
2093 tune_params::FUSE_NOTHING,
2094 tune_params::SCHED_AUTOPREF_OFF
2095 };
2096
2097 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2098 less appealing. Set max_insns_skipped to a low value. */
2099
2100 const struct tune_params arm_cortex_a5_tune =
2101 {
2102 &cortexa5_extra_costs,
2103 NULL, /* Sched adj cost. */
2104 arm_cortex_a5_branch_cost,
2105 &arm_default_vec_cost,
2106 1, /* Constant limit. */
2107 1, /* Max cond insns. */
2108 8, /* Memset max inline. */
2109 2, /* Issue rate. */
2110 ARM_PREFETCH_NOT_BENEFICIAL,
2111 tune_params::PREF_CONST_POOL_FALSE,
2112 tune_params::PREF_LDRD_FALSE,
2113 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2114 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2115 tune_params::DISPARAGE_FLAGS_NEITHER,
2116 tune_params::PREF_NEON_64_FALSE,
2117 tune_params::PREF_NEON_STRINGOPS_TRUE,
2118 tune_params::FUSE_NOTHING,
2119 tune_params::SCHED_AUTOPREF_OFF
2120 };
2121
2122 const struct tune_params arm_cortex_a9_tune =
2123 {
2124 &cortexa9_extra_costs,
2125 cortex_a9_sched_adjust_cost,
2126 arm_default_branch_cost,
2127 &arm_default_vec_cost,
2128 1, /* Constant limit. */
2129 5, /* Max cond insns. */
2130 8, /* Memset max inline. */
2131 2, /* Issue rate. */
2132 ARM_PREFETCH_BENEFICIAL(4,32,32),
2133 tune_params::PREF_CONST_POOL_FALSE,
2134 tune_params::PREF_LDRD_FALSE,
2135 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2136 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2137 tune_params::DISPARAGE_FLAGS_NEITHER,
2138 tune_params::PREF_NEON_64_FALSE,
2139 tune_params::PREF_NEON_STRINGOPS_FALSE,
2140 tune_params::FUSE_NOTHING,
2141 tune_params::SCHED_AUTOPREF_OFF
2142 };
2143
2144 const struct tune_params arm_cortex_a12_tune =
2145 {
2146 &cortexa12_extra_costs,
2147 NULL, /* Sched adj cost. */
2148 arm_default_branch_cost,
2149 &arm_default_vec_cost, /* Vectorizer costs. */
2150 1, /* Constant limit. */
2151 2, /* Max cond insns. */
2152 8, /* Memset max inline. */
2153 2, /* Issue rate. */
2154 ARM_PREFETCH_NOT_BENEFICIAL,
2155 tune_params::PREF_CONST_POOL_FALSE,
2156 tune_params::PREF_LDRD_TRUE,
2157 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2158 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2159 tune_params::DISPARAGE_FLAGS_ALL,
2160 tune_params::PREF_NEON_64_FALSE,
2161 tune_params::PREF_NEON_STRINGOPS_TRUE,
2162 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2163 tune_params::SCHED_AUTOPREF_OFF
2164 };
2165
2166 const struct tune_params arm_cortex_a73_tune =
2167 {
2168 &cortexa57_extra_costs,
2169 NULL, /* Sched adj cost. */
2170 arm_default_branch_cost,
2171 &arm_default_vec_cost, /* Vectorizer costs. */
2172 1, /* Constant limit. */
2173 2, /* Max cond insns. */
2174 8, /* Memset max inline. */
2175 2, /* Issue rate. */
2176 ARM_PREFETCH_NOT_BENEFICIAL,
2177 tune_params::PREF_CONST_POOL_FALSE,
2178 tune_params::PREF_LDRD_TRUE,
2179 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2180 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2181 tune_params::DISPARAGE_FLAGS_ALL,
2182 tune_params::PREF_NEON_64_FALSE,
2183 tune_params::PREF_NEON_STRINGOPS_TRUE,
2184 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2185 tune_params::SCHED_AUTOPREF_FULL
2186 };
2187
2188 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2189 cycle to execute each. An LDR from the constant pool also takes two cycles
2190 to execute, but mildly increases pipelining opportunity (consecutive
2191 loads/stores can be pipelined together, saving one cycle), and may also
2192 improve icache utilisation. Hence we prefer the constant pool for such
2193 processors. */
2194
2195 const struct tune_params arm_v7m_tune =
2196 {
2197 &v7m_extra_costs,
2198 NULL, /* Sched adj cost. */
2199 arm_cortex_m_branch_cost,
2200 &arm_default_vec_cost,
2201 1, /* Constant limit. */
2202 2, /* Max cond insns. */
2203 8, /* Memset max inline. */
2204 1, /* Issue rate. */
2205 ARM_PREFETCH_NOT_BENEFICIAL,
2206 tune_params::PREF_CONST_POOL_TRUE,
2207 tune_params::PREF_LDRD_FALSE,
2208 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2209 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2210 tune_params::DISPARAGE_FLAGS_NEITHER,
2211 tune_params::PREF_NEON_64_FALSE,
2212 tune_params::PREF_NEON_STRINGOPS_FALSE,
2213 tune_params::FUSE_NOTHING,
2214 tune_params::SCHED_AUTOPREF_OFF
2215 };
2216
2217 /* Cortex-M7 tuning. */
2218
2219 const struct tune_params arm_cortex_m7_tune =
2220 {
2221 &v7m_extra_costs,
2222 NULL, /* Sched adj cost. */
2223 arm_cortex_m7_branch_cost,
2224 &arm_default_vec_cost,
2225 0, /* Constant limit. */
2226 1, /* Max cond insns. */
2227 8, /* Memset max inline. */
2228 2, /* Issue rate. */
2229 ARM_PREFETCH_NOT_BENEFICIAL,
2230 tune_params::PREF_CONST_POOL_TRUE,
2231 tune_params::PREF_LDRD_FALSE,
2232 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2233 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2234 tune_params::DISPARAGE_FLAGS_NEITHER,
2235 tune_params::PREF_NEON_64_FALSE,
2236 tune_params::PREF_NEON_STRINGOPS_FALSE,
2237 tune_params::FUSE_NOTHING,
2238 tune_params::SCHED_AUTOPREF_OFF
2239 };
2240
2241 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2242 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2243 cortex-m23. */
2244 const struct tune_params arm_v6m_tune =
2245 {
2246 &generic_extra_costs, /* Insn extra costs. */
2247 NULL, /* Sched adj cost. */
2248 arm_default_branch_cost,
2249 &arm_default_vec_cost, /* Vectorizer costs. */
2250 1, /* Constant limit. */
2251 5, /* Max cond insns. */
2252 8, /* Memset max inline. */
2253 1, /* Issue rate. */
2254 ARM_PREFETCH_NOT_BENEFICIAL,
2255 tune_params::PREF_CONST_POOL_FALSE,
2256 tune_params::PREF_LDRD_FALSE,
2257 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2258 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2259 tune_params::DISPARAGE_FLAGS_NEITHER,
2260 tune_params::PREF_NEON_64_FALSE,
2261 tune_params::PREF_NEON_STRINGOPS_FALSE,
2262 tune_params::FUSE_NOTHING,
2263 tune_params::SCHED_AUTOPREF_OFF
2264 };
2265
2266 const struct tune_params arm_fa726te_tune =
2267 {
2268 &generic_extra_costs, /* Insn extra costs. */
2269 fa726te_sched_adjust_cost,
2270 arm_default_branch_cost,
2271 &arm_default_vec_cost,
2272 1, /* Constant limit. */
2273 5, /* Max cond insns. */
2274 8, /* Memset max inline. */
2275 2, /* Issue rate. */
2276 ARM_PREFETCH_NOT_BENEFICIAL,
2277 tune_params::PREF_CONST_POOL_TRUE,
2278 tune_params::PREF_LDRD_FALSE,
2279 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2280 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2281 tune_params::DISPARAGE_FLAGS_NEITHER,
2282 tune_params::PREF_NEON_64_FALSE,
2283 tune_params::PREF_NEON_STRINGOPS_FALSE,
2284 tune_params::FUSE_NOTHING,
2285 tune_params::SCHED_AUTOPREF_OFF
2286 };
2287
2288 /* Auto-generated CPU, FPU and architecture tables. */
2289 #include "arm-cpu-data.h"
2290
2291 /* The name of the preprocessor macro to define for this architecture. PROFILE
2292 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2293 is thus chosen to be big enough to hold the longest architecture name. */
2294
2295 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2296
2297 /* Supported TLS relocations. */
2298
2299 enum tls_reloc {
2300 TLS_GD32,
2301 TLS_LDM32,
2302 TLS_LDO32,
2303 TLS_IE32,
2304 TLS_LE32,
2305 TLS_DESCSEQ /* GNU scheme */
2306 };
2307
2308 /* The maximum number of insns to be used when loading a constant. */
2309 inline static int
2310 arm_constant_limit (bool size_p)
2311 {
2312 return size_p ? 1 : current_tune->constant_limit;
2313 }
2314
2315 /* Emit an insn that's a simple single-set. Both the operands must be known
2316 to be valid. */
2317 inline static rtx_insn *
2318 emit_set_insn (rtx x, rtx y)
2319 {
2320 return emit_insn (gen_rtx_SET (x, y));
2321 }
2322
2323 /* Return the number of bits set in VALUE. */
2324 static unsigned
2325 bit_count (unsigned long value)
2326 {
2327 unsigned long count = 0;
2328
2329 while (value)
2330 {
2331 count++;
2332 value &= value - 1; /* Clear the least-significant set bit. */
2333 }
2334
2335 return count;
2336 }
2337
2338 /* Return the number of bits set in BMAP. */
2339 static unsigned
2340 bitmap_popcount (const sbitmap bmap)
2341 {
2342 unsigned int count = 0;
2343 unsigned int n = 0;
2344 sbitmap_iterator sbi;
2345
2346 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2347 count++;
2348 return count;
2349 }
2350
2351 typedef struct
2352 {
2353 machine_mode mode;
2354 const char *name;
2355 } arm_fixed_mode_set;
2356
2357 /* A small helper for setting fixed-point library libfuncs. */
2358
2359 static void
2360 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2361 const char *funcname, const char *modename,
2362 int num_suffix)
2363 {
2364 char buffer[50];
2365
2366 if (num_suffix == 0)
2367 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2368 else
2369 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2370
2371 set_optab_libfunc (optable, mode, buffer);
2372 }
2373
2374 static void
2375 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2376 machine_mode from, const char *funcname,
2377 const char *toname, const char *fromname)
2378 {
2379 char buffer[50];
2380 const char *maybe_suffix_2 = "";
2381
2382 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2383 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2384 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2385 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2386 maybe_suffix_2 = "2";
2387
2388 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2389 maybe_suffix_2);
2390
2391 set_conv_libfunc (optable, to, from, buffer);
2392 }
2393
2394 /* Set up library functions unique to ARM. */
2395
2396 static void
2397 arm_init_libfuncs (void)
2398 {
2399 /* For Linux, we have access to kernel support for atomic operations. */
2400 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2401 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2402
2403 /* There are no special library functions unless we are using the
2404 ARM BPABI. */
2405 if (!TARGET_BPABI)
2406 return;
2407
2408 /* The functions below are described in Section 4 of the "Run-Time
2409 ABI for the ARM architecture", Version 1.0. */
2410
2411 /* Double-precision floating-point arithmetic. Table 2. */
2412 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2413 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2414 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2415 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2416 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2417
2418 /* Double-precision comparisons. Table 3. */
2419 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2420 set_optab_libfunc (ne_optab, DFmode, NULL);
2421 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2422 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2423 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2424 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2425 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2426
2427 /* Single-precision floating-point arithmetic. Table 4. */
2428 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2429 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2430 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2431 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2432 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2433
2434 /* Single-precision comparisons. Table 5. */
2435 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2436 set_optab_libfunc (ne_optab, SFmode, NULL);
2437 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2438 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2439 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2440 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2441 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2442
2443 /* Floating-point to integer conversions. Table 6. */
2444 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2445 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2446 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2447 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2448 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2449 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2450 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2451 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2452
2453 /* Conversions between floating types. Table 7. */
2454 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2455 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2456
2457 /* Integer to floating-point conversions. Table 8. */
2458 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2459 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2460 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2461 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2462 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2463 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2464 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2465 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2466
2467 /* Long long. Table 9. */
2468 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2469 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2470 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2471 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2472 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2473 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2474 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2475 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2476
2477 /* Integer (32/32->32) division. \S 4.3.1. */
2478 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2479 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2480
2481 /* The divmod functions are designed so that they can be used for
2482 plain division, even though they return both the quotient and the
2483 remainder. The quotient is returned in the usual location (i.e.,
2484 r0 for SImode, {r0, r1} for DImode), just as would be expected
2485 for an ordinary division routine. Because the AAPCS calling
2486 conventions specify that all of { r0, r1, r2, r3 } are
2487 callee-saved registers, there is no need to tell the compiler
2488 explicitly that those registers are clobbered by these
2489 routines. */
2490 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2491 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2492
2493 /* For SImode division the ABI provides div-without-mod routines,
2494 which are faster. */
2495 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2496 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2497
2498 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2499 divmod libcalls instead. */
2500 set_optab_libfunc (smod_optab, DImode, NULL);
2501 set_optab_libfunc (umod_optab, DImode, NULL);
2502 set_optab_libfunc (smod_optab, SImode, NULL);
2503 set_optab_libfunc (umod_optab, SImode, NULL);
2504
2505 /* Half-precision float operations. The compiler handles all operations
2506 with NULL libfuncs by converting the SFmode. */
2507 switch (arm_fp16_format)
2508 {
2509 case ARM_FP16_FORMAT_IEEE:
2510 case ARM_FP16_FORMAT_ALTERNATIVE:
2511
2512 /* Conversions. */
2513 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2514 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2515 ? "__gnu_f2h_ieee"
2516 : "__gnu_f2h_alternative"));
2517 set_conv_libfunc (sext_optab, SFmode, HFmode,
2518 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2519 ? "__gnu_h2f_ieee"
2520 : "__gnu_h2f_alternative"));
2521
2522 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2523 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2524 ? "__gnu_d2h_ieee"
2525 : "__gnu_d2h_alternative"));
2526
2527 /* Arithmetic. */
2528 set_optab_libfunc (add_optab, HFmode, NULL);
2529 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2530 set_optab_libfunc (smul_optab, HFmode, NULL);
2531 set_optab_libfunc (neg_optab, HFmode, NULL);
2532 set_optab_libfunc (sub_optab, HFmode, NULL);
2533
2534 /* Comparisons. */
2535 set_optab_libfunc (eq_optab, HFmode, NULL);
2536 set_optab_libfunc (ne_optab, HFmode, NULL);
2537 set_optab_libfunc (lt_optab, HFmode, NULL);
2538 set_optab_libfunc (le_optab, HFmode, NULL);
2539 set_optab_libfunc (ge_optab, HFmode, NULL);
2540 set_optab_libfunc (gt_optab, HFmode, NULL);
2541 set_optab_libfunc (unord_optab, HFmode, NULL);
2542 break;
2543
2544 default:
2545 break;
2546 }
2547
2548 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2549 {
2550 const arm_fixed_mode_set fixed_arith_modes[] =
2551 {
2552 { QQmode, "qq" },
2553 { UQQmode, "uqq" },
2554 { HQmode, "hq" },
2555 { UHQmode, "uhq" },
2556 { SQmode, "sq" },
2557 { USQmode, "usq" },
2558 { DQmode, "dq" },
2559 { UDQmode, "udq" },
2560 { TQmode, "tq" },
2561 { UTQmode, "utq" },
2562 { HAmode, "ha" },
2563 { UHAmode, "uha" },
2564 { SAmode, "sa" },
2565 { USAmode, "usa" },
2566 { DAmode, "da" },
2567 { UDAmode, "uda" },
2568 { TAmode, "ta" },
2569 { UTAmode, "uta" }
2570 };
2571 const arm_fixed_mode_set fixed_conv_modes[] =
2572 {
2573 { QQmode, "qq" },
2574 { UQQmode, "uqq" },
2575 { HQmode, "hq" },
2576 { UHQmode, "uhq" },
2577 { SQmode, "sq" },
2578 { USQmode, "usq" },
2579 { DQmode, "dq" },
2580 { UDQmode, "udq" },
2581 { TQmode, "tq" },
2582 { UTQmode, "utq" },
2583 { HAmode, "ha" },
2584 { UHAmode, "uha" },
2585 { SAmode, "sa" },
2586 { USAmode, "usa" },
2587 { DAmode, "da" },
2588 { UDAmode, "uda" },
2589 { TAmode, "ta" },
2590 { UTAmode, "uta" },
2591 { QImode, "qi" },
2592 { HImode, "hi" },
2593 { SImode, "si" },
2594 { DImode, "di" },
2595 { TImode, "ti" },
2596 { SFmode, "sf" },
2597 { DFmode, "df" }
2598 };
2599 unsigned int i, j;
2600
2601 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2602 {
2603 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2604 "add", fixed_arith_modes[i].name, 3);
2605 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2606 "ssadd", fixed_arith_modes[i].name, 3);
2607 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2608 "usadd", fixed_arith_modes[i].name, 3);
2609 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2610 "sub", fixed_arith_modes[i].name, 3);
2611 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2612 "sssub", fixed_arith_modes[i].name, 3);
2613 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2614 "ussub", fixed_arith_modes[i].name, 3);
2615 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2616 "mul", fixed_arith_modes[i].name, 3);
2617 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2618 "ssmul", fixed_arith_modes[i].name, 3);
2619 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2620 "usmul", fixed_arith_modes[i].name, 3);
2621 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2622 "div", fixed_arith_modes[i].name, 3);
2623 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2624 "udiv", fixed_arith_modes[i].name, 3);
2625 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2626 "ssdiv", fixed_arith_modes[i].name, 3);
2627 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2628 "usdiv", fixed_arith_modes[i].name, 3);
2629 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2630 "neg", fixed_arith_modes[i].name, 2);
2631 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2632 "ssneg", fixed_arith_modes[i].name, 2);
2633 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2634 "usneg", fixed_arith_modes[i].name, 2);
2635 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2636 "ashl", fixed_arith_modes[i].name, 3);
2637 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2638 "ashr", fixed_arith_modes[i].name, 3);
2639 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2640 "lshr", fixed_arith_modes[i].name, 3);
2641 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2642 "ssashl", fixed_arith_modes[i].name, 3);
2643 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2644 "usashl", fixed_arith_modes[i].name, 3);
2645 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2646 "cmp", fixed_arith_modes[i].name, 2);
2647 }
2648
2649 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2650 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2651 {
2652 if (i == j
2653 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2654 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2655 continue;
2656
2657 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2658 fixed_conv_modes[j].mode, "fract",
2659 fixed_conv_modes[i].name,
2660 fixed_conv_modes[j].name);
2661 arm_set_fixed_conv_libfunc (satfract_optab,
2662 fixed_conv_modes[i].mode,
2663 fixed_conv_modes[j].mode, "satfract",
2664 fixed_conv_modes[i].name,
2665 fixed_conv_modes[j].name);
2666 arm_set_fixed_conv_libfunc (fractuns_optab,
2667 fixed_conv_modes[i].mode,
2668 fixed_conv_modes[j].mode, "fractuns",
2669 fixed_conv_modes[i].name,
2670 fixed_conv_modes[j].name);
2671 arm_set_fixed_conv_libfunc (satfractuns_optab,
2672 fixed_conv_modes[i].mode,
2673 fixed_conv_modes[j].mode, "satfractuns",
2674 fixed_conv_modes[i].name,
2675 fixed_conv_modes[j].name);
2676 }
2677 }
2678
2679 if (TARGET_AAPCS_BASED)
2680 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2681 }
2682
2683 /* On AAPCS systems, this is the "struct __va_list". */
2684 static GTY(()) tree va_list_type;
2685
2686 /* Return the type to use as __builtin_va_list. */
2687 static tree
2688 arm_build_builtin_va_list (void)
2689 {
2690 tree va_list_name;
2691 tree ap_field;
2692
2693 if (!TARGET_AAPCS_BASED)
2694 return std_build_builtin_va_list ();
2695
2696 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2697 defined as:
2698
2699 struct __va_list
2700 {
2701 void *__ap;
2702 };
2703
2704 The C Library ABI further reinforces this definition in \S
2705 4.1.
2706
2707 We must follow this definition exactly. The structure tag
2708 name is visible in C++ mangled names, and thus forms a part
2709 of the ABI. The field name may be used by people who
2710 #include <stdarg.h>. */
2711 /* Create the type. */
2712 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2713 /* Give it the required name. */
2714 va_list_name = build_decl (BUILTINS_LOCATION,
2715 TYPE_DECL,
2716 get_identifier ("__va_list"),
2717 va_list_type);
2718 DECL_ARTIFICIAL (va_list_name) = 1;
2719 TYPE_NAME (va_list_type) = va_list_name;
2720 TYPE_STUB_DECL (va_list_type) = va_list_name;
2721 /* Create the __ap field. */
2722 ap_field = build_decl (BUILTINS_LOCATION,
2723 FIELD_DECL,
2724 get_identifier ("__ap"),
2725 ptr_type_node);
2726 DECL_ARTIFICIAL (ap_field) = 1;
2727 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2728 TYPE_FIELDS (va_list_type) = ap_field;
2729 /* Compute its layout. */
2730 layout_type (va_list_type);
2731
2732 return va_list_type;
2733 }
2734
2735 /* Return an expression of type "void *" pointing to the next
2736 available argument in a variable-argument list. VALIST is the
2737 user-level va_list object, of type __builtin_va_list. */
2738 static tree
2739 arm_extract_valist_ptr (tree valist)
2740 {
2741 if (TREE_TYPE (valist) == error_mark_node)
2742 return error_mark_node;
2743
2744 /* On an AAPCS target, the pointer is stored within "struct
2745 va_list". */
2746 if (TARGET_AAPCS_BASED)
2747 {
2748 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2749 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2750 valist, ap_field, NULL_TREE);
2751 }
2752
2753 return valist;
2754 }
2755
2756 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2757 static void
2758 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2759 {
2760 valist = arm_extract_valist_ptr (valist);
2761 std_expand_builtin_va_start (valist, nextarg);
2762 }
2763
2764 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2765 static tree
2766 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2767 gimple_seq *post_p)
2768 {
2769 valist = arm_extract_valist_ptr (valist);
2770 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2771 }
2772
2773 /* Check any incompatible options that the user has specified. */
2774 static void
2775 arm_option_check_internal (struct gcc_options *opts)
2776 {
2777 int flags = opts->x_target_flags;
2778
2779 /* iWMMXt and NEON are incompatible. */
2780 if (TARGET_IWMMXT
2781 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2782 error ("iWMMXt and NEON are incompatible");
2783
2784 /* Make sure that the processor choice does not conflict with any of the
2785 other command line choices. */
2786 if (TARGET_ARM_P (flags)
2787 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2788 error ("target CPU does not support ARM mode");
2789
2790 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2791 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2792 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2793
2794 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2795 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2796
2797 /* If this target is normally configured to use APCS frames, warn if they
2798 are turned off and debugging is turned on. */
2799 if (TARGET_ARM_P (flags)
2800 && write_symbols != NO_DEBUG
2801 && !TARGET_APCS_FRAME
2802 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2803 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2804
2805 /* iWMMXt unsupported under Thumb mode. */
2806 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2807 error ("iWMMXt unsupported under Thumb mode");
2808
2809 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2810 error ("can not use -mtp=cp15 with 16-bit Thumb");
2811
2812 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2813 {
2814 error ("RTP PIC is incompatible with Thumb");
2815 flag_pic = 0;
2816 }
2817
2818 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2819 with MOVT. */
2820 if ((target_pure_code || target_slow_flash_data)
2821 && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2822 {
2823 const char *flag = (target_pure_code ? "-mpure-code" :
2824 "-mslow-flash-data");
2825 error ("%s only supports non-pic code on M-profile targets with the "
2826 "MOVT instruction", flag);
2827 }
2828
2829 }
2830
2831 /* Recompute the global settings depending on target attribute options. */
2832
2833 static void
2834 arm_option_params_internal (void)
2835 {
2836 /* If we are not using the default (ARM mode) section anchor offset
2837 ranges, then set the correct ranges now. */
2838 if (TARGET_THUMB1)
2839 {
2840 /* Thumb-1 LDR instructions cannot have negative offsets.
2841 Permissible positive offset ranges are 5-bit (for byte loads),
2842 6-bit (for halfword loads), or 7-bit (for word loads).
2843 Empirical results suggest a 7-bit anchor range gives the best
2844 overall code size. */
2845 targetm.min_anchor_offset = 0;
2846 targetm.max_anchor_offset = 127;
2847 }
2848 else if (TARGET_THUMB2)
2849 {
2850 /* The minimum is set such that the total size of the block
2851 for a particular anchor is 248 + 1 + 4095 bytes, which is
2852 divisible by eight, ensuring natural spacing of anchors. */
2853 targetm.min_anchor_offset = -248;
2854 targetm.max_anchor_offset = 4095;
2855 }
2856 else
2857 {
2858 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2859 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2860 }
2861
2862 if (optimize_size)
2863 {
2864 /* If optimizing for size, bump the number of instructions that we
2865 are prepared to conditionally execute (even on a StrongARM). */
2866 max_insns_skipped = 6;
2867
2868 /* For THUMB2, we limit the conditional sequence to one IT block. */
2869 if (TARGET_THUMB2)
2870 max_insns_skipped = arm_restrict_it ? 1 : 4;
2871 }
2872 else
2873 /* When -mrestrict-it is in use tone down the if-conversion. */
2874 max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2875 ? 1 : current_tune->max_insns_skipped;
2876 }
2877
2878 /* True if -mflip-thumb should next add an attribute for the default
2879 mode, false if it should next add an attribute for the opposite mode. */
2880 static GTY(()) bool thumb_flipper;
2881
2882 /* Options after initial target override. */
2883 static GTY(()) tree init_optimize;
2884
2885 static void
2886 arm_override_options_after_change_1 (struct gcc_options *opts)
2887 {
2888 if (opts->x_align_functions <= 0)
2889 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2890 && opts->x_optimize_size ? 2 : 4;
2891 }
2892
2893 /* Implement targetm.override_options_after_change. */
2894
2895 static void
2896 arm_override_options_after_change (void)
2897 {
2898 arm_configure_build_target (&arm_active_target,
2899 TREE_TARGET_OPTION (target_option_default_node),
2900 &global_options_set, false);
2901
2902 arm_override_options_after_change_1 (&global_options);
2903 }
2904
2905 static void
2906 arm_option_restore (struct gcc_options *, struct cl_target_option *ptr)
2907 {
2908 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2909 false);
2910 }
2911
2912 /* Reset options between modes that the user has specified. */
2913 static void
2914 arm_option_override_internal (struct gcc_options *opts,
2915 struct gcc_options *opts_set)
2916 {
2917 arm_override_options_after_change_1 (opts);
2918
2919 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2920 {
2921 /* The default is to enable interworking, so this warning message would
2922 be confusing to users who have just compiled with, eg, -march=armv3. */
2923 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2924 opts->x_target_flags &= ~MASK_INTERWORK;
2925 }
2926
2927 if (TARGET_THUMB_P (opts->x_target_flags)
2928 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2929 {
2930 warning (0, "target CPU does not support THUMB instructions");
2931 opts->x_target_flags &= ~MASK_THUMB;
2932 }
2933
2934 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2935 {
2936 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2937 opts->x_target_flags &= ~MASK_APCS_FRAME;
2938 }
2939
2940 /* Callee super interworking implies thumb interworking. Adding
2941 this to the flags here simplifies the logic elsewhere. */
2942 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2943 opts->x_target_flags |= MASK_INTERWORK;
2944
2945 /* need to remember initial values so combinaisons of options like
2946 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2947 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2948
2949 if (! opts_set->x_arm_restrict_it)
2950 opts->x_arm_restrict_it = arm_arch8;
2951
2952 /* ARM execution state and M profile don't have [restrict] IT. */
2953 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2954 opts->x_arm_restrict_it = 0;
2955
2956 /* Enable -munaligned-access by default for
2957 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2958 i.e. Thumb2 and ARM state only.
2959 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2960 - ARMv8 architecture-base processors.
2961
2962 Disable -munaligned-access by default for
2963 - all pre-ARMv6 architecture-based processors
2964 - ARMv6-M architecture-based processors
2965 - ARMv8-M Baseline processors. */
2966
2967 if (! opts_set->x_unaligned_access)
2968 {
2969 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2970 && arm_arch6 && (arm_arch_notm || arm_arch7));
2971 }
2972 else if (opts->x_unaligned_access == 1
2973 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2974 {
2975 warning (0, "target CPU does not support unaligned accesses");
2976 opts->x_unaligned_access = 0;
2977 }
2978
2979 /* Don't warn since it's on by default in -O2. */
2980 if (TARGET_THUMB1_P (opts->x_target_flags))
2981 opts->x_flag_schedule_insns = 0;
2982 else
2983 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
2984
2985 /* Disable shrink-wrap when optimizing function for size, since it tends to
2986 generate additional returns. */
2987 if (optimize_function_for_size_p (cfun)
2988 && TARGET_THUMB2_P (opts->x_target_flags))
2989 opts->x_flag_shrink_wrap = false;
2990 else
2991 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
2992
2993 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2994 - epilogue_insns - does not accurately model the corresponding insns
2995 emitted in the asm file. In particular, see the comment in thumb_exit
2996 'Find out how many of the (return) argument registers we can corrupt'.
2997 As a consequence, the epilogue may clobber registers without fipa-ra
2998 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
2999 TODO: Accurately model clobbers for epilogue_insns and reenable
3000 fipa-ra. */
3001 if (TARGET_THUMB1_P (opts->x_target_flags))
3002 opts->x_flag_ipa_ra = 0;
3003 else
3004 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3005
3006 /* Thumb2 inline assembly code should always use unified syntax.
3007 This will apply to ARM and Thumb1 eventually. */
3008 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3009
3010 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3011 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3012 #endif
3013 }
3014
3015 /* Convert a static initializer array of feature bits to sbitmap
3016 representation. */
3017 static void
3018 arm_initialize_isa (sbitmap isa, const enum isa_feature *isa_bits)
3019 {
3020 bitmap_clear (isa);
3021 while (*isa_bits != isa_nobit)
3022 bitmap_set_bit (isa, *(isa_bits++));
3023 }
3024
3025 static sbitmap isa_all_fpubits;
3026 static sbitmap isa_quirkbits;
3027
3028 /* Configure a build target TARGET from the user-specified options OPTS and
3029 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3030 architecture have been specified, but the two are not identical. */
3031 void
3032 arm_configure_build_target (struct arm_build_target *target,
3033 struct cl_target_option *opts,
3034 struct gcc_options *opts_set,
3035 bool warn_compatible)
3036 {
3037 const struct processors *arm_selected_tune = NULL;
3038 const struct processors *arm_selected_arch = NULL;
3039 const struct processors *arm_selected_cpu = NULL;
3040 const struct arm_fpu_desc *arm_selected_fpu = NULL;
3041
3042 bitmap_clear (target->isa);
3043 target->core_name = NULL;
3044 target->arch_name = NULL;
3045
3046 if (opts_set->x_arm_arch_option)
3047 arm_selected_arch = &all_architectures[opts->x_arm_arch_option];
3048
3049 if (opts_set->x_arm_cpu_option)
3050 {
3051 arm_selected_cpu = &all_cores[(int) opts->x_arm_cpu_option];
3052 arm_selected_tune = &all_cores[(int) opts->x_arm_cpu_option];
3053 }
3054
3055 if (opts_set->x_arm_tune_option)
3056 arm_selected_tune = &all_cores[(int) opts->x_arm_tune_option];
3057
3058 if (arm_selected_arch)
3059 {
3060 arm_initialize_isa (target->isa, arm_selected_arch->isa_bits);
3061
3062 if (arm_selected_cpu)
3063 {
3064 auto_sbitmap cpu_isa (isa_num_bits);
3065
3066 arm_initialize_isa (cpu_isa, arm_selected_cpu->isa_bits);
3067 bitmap_xor (cpu_isa, cpu_isa, target->isa);
3068 /* Ignore any bits that are quirk bits. */
3069 bitmap_and_compl (cpu_isa, cpu_isa, isa_quirkbits);
3070 /* Ignore (for now) any bits that might be set by -mfpu. */
3071 bitmap_and_compl (cpu_isa, cpu_isa, isa_all_fpubits);
3072
3073 if (!bitmap_empty_p (cpu_isa))
3074 {
3075 if (warn_compatible)
3076 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3077 arm_selected_cpu->name, arm_selected_arch->name);
3078 /* -march wins for code generation.
3079 -mcpu wins for default tuning. */
3080 if (!arm_selected_tune)
3081 arm_selected_tune = arm_selected_cpu;
3082
3083 arm_selected_cpu = arm_selected_arch;
3084 target->arch_name = arm_selected_arch->name;
3085 }
3086 else
3087 {
3088 /* Architecture and CPU are essentially the same.
3089 Prefer the CPU setting. */
3090 arm_selected_arch = NULL;
3091 target->core_name = arm_selected_cpu->name;
3092 }
3093 }
3094 else
3095 {
3096 /* Pick a CPU based on the architecture. */
3097 arm_selected_cpu = arm_selected_arch;
3098 target->arch_name = arm_selected_arch->name;
3099 /* Note: target->core_name is left unset in this path. */
3100 }
3101 }
3102 else if (arm_selected_cpu)
3103 {
3104 target->core_name = arm_selected_cpu->name;
3105 arm_initialize_isa (target->isa, arm_selected_cpu->isa_bits);
3106 }
3107 /* If the user did not specify a processor, choose one for them. */
3108 else
3109 {
3110 const struct processors * sel;
3111 auto_sbitmap sought_isa (isa_num_bits);
3112 bitmap_clear (sought_isa);
3113 auto_sbitmap default_isa (isa_num_bits);
3114
3115 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
3116 gcc_assert (arm_selected_cpu->name);
3117
3118 /* RWE: All of the selection logic below (to the end of this
3119 'if' clause) looks somewhat suspect. It appears to be mostly
3120 there to support forcing thumb support when the default CPU
3121 does not have thumb (somewhat dubious in terms of what the
3122 user might be expecting). I think it should be removed once
3123 support for the pre-thumb era cores is removed. */
3124 sel = arm_selected_cpu;
3125 arm_initialize_isa (default_isa, sel->isa_bits);
3126
3127 /* Now check to see if the user has specified any command line
3128 switches that require certain abilities from the cpu. */
3129
3130 if (TARGET_INTERWORK || TARGET_THUMB)
3131 {
3132 bitmap_set_bit (sought_isa, isa_bit_thumb);
3133 bitmap_set_bit (sought_isa, isa_bit_mode32);
3134
3135 /* There are no ARM processors that support both APCS-26 and
3136 interworking. Therefore we forcibly remove MODE26 from
3137 from the isa features here (if it was set), so that the
3138 search below will always be able to find a compatible
3139 processor. */
3140 bitmap_clear_bit (default_isa, isa_bit_mode26);
3141 }
3142
3143 /* If there are such requirements and the default CPU does not
3144 satisfy them, we need to run over the complete list of
3145 cores looking for one that is satisfactory. */
3146 if (!bitmap_empty_p (sought_isa)
3147 && !bitmap_subset_p (sought_isa, default_isa))
3148 {
3149 auto_sbitmap candidate_isa (isa_num_bits);
3150 /* We're only interested in a CPU with at least the
3151 capabilities of the default CPU and the required
3152 additional features. */
3153 bitmap_ior (default_isa, default_isa, sought_isa);
3154
3155 /* Try to locate a CPU type that supports all of the abilities
3156 of the default CPU, plus the extra abilities requested by
3157 the user. */
3158 for (sel = all_cores; sel->name != NULL; sel++)
3159 {
3160 arm_initialize_isa (candidate_isa, sel->isa_bits);
3161 /* An exact match? */
3162 if (bitmap_equal_p (default_isa, candidate_isa))
3163 break;
3164 }
3165
3166 if (sel->name == NULL)
3167 {
3168 unsigned current_bit_count = isa_num_bits;
3169 const struct processors * best_fit = NULL;
3170
3171 /* Ideally we would like to issue an error message here
3172 saying that it was not possible to find a CPU compatible
3173 with the default CPU, but which also supports the command
3174 line options specified by the programmer, and so they
3175 ought to use the -mcpu=<name> command line option to
3176 override the default CPU type.
3177
3178 If we cannot find a CPU that has exactly the
3179 characteristics of the default CPU and the given
3180 command line options we scan the array again looking
3181 for a best match. The best match must have at least
3182 the capabilities of the perfect match. */
3183 for (sel = all_cores; sel->name != NULL; sel++)
3184 {
3185 arm_initialize_isa (candidate_isa, sel->isa_bits);
3186
3187 if (bitmap_subset_p (default_isa, candidate_isa))
3188 {
3189 unsigned count;
3190
3191 bitmap_and_compl (candidate_isa, candidate_isa,
3192 default_isa);
3193 count = bitmap_popcount (candidate_isa);
3194
3195 if (count < current_bit_count)
3196 {
3197 best_fit = sel;
3198 current_bit_count = count;
3199 }
3200 }
3201
3202 gcc_assert (best_fit);
3203 sel = best_fit;
3204 }
3205 }
3206 arm_selected_cpu = sel;
3207 }
3208
3209 /* Now we know the CPU, we can finally initialize the target
3210 structure. */
3211 target->core_name = arm_selected_cpu->name;
3212 arm_initialize_isa (target->isa, arm_selected_cpu->isa_bits);
3213 }
3214
3215 gcc_assert (arm_selected_cpu);
3216
3217 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3218 {
3219 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3220 auto_sbitmap fpu_bits (isa_num_bits);
3221
3222 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3223 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3224 bitmap_ior (target->isa, target->isa, fpu_bits);
3225 }
3226 else if (target->core_name == NULL)
3227 /* To support this we need to be able to parse FPU feature options
3228 from the architecture string. */
3229 sorry ("-mfpu=auto not currently supported without an explicit CPU.");
3230
3231 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3232 if (!arm_selected_tune)
3233 arm_selected_tune = &all_cores[arm_selected_cpu->core];
3234
3235 /* Finish initializing the target structure. */
3236 target->arch_pp_name = arm_selected_cpu->arch;
3237 target->base_arch = arm_selected_cpu->base_arch;
3238 target->arch_core = arm_selected_cpu->core;
3239
3240 target->tune_flags = arm_selected_tune->tune_flags;
3241 target->tune = arm_selected_tune->tune;
3242 target->tune_core = arm_selected_tune->core;
3243 }
3244
3245 /* Fix up any incompatible options that the user has specified. */
3246 static void
3247 arm_option_override (void)
3248 {
3249 static const enum isa_feature fpu_bitlist[] = { ISA_ALL_FPU, isa_nobit };
3250 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3251 cl_target_option opts;
3252
3253 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3254 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3255
3256 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3257 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3258
3259 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3260
3261 if (!global_options_set.x_arm_fpu_index)
3262 {
3263 const char *target_fpu_name;
3264 bool ok;
3265 int fpu_index;
3266
3267 #ifdef FPUTYPE_DEFAULT
3268 target_fpu_name = FPUTYPE_DEFAULT;
3269 #else
3270 target_fpu_name = "vfp";
3271 #endif
3272
3273 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &fpu_index,
3274 CL_TARGET);
3275 gcc_assert (ok);
3276 arm_fpu_index = (enum fpu_type) fpu_index;
3277 }
3278
3279 cl_target_option_save (&opts, &global_options);
3280 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3281 true);
3282
3283 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3284 SUBTARGET_OVERRIDE_OPTIONS;
3285 #endif
3286
3287 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3288 arm_base_arch = arm_active_target.base_arch;
3289
3290 arm_tune = arm_active_target.tune_core;
3291 tune_flags = arm_active_target.tune_flags;
3292 current_tune = arm_active_target.tune;
3293
3294 /* TBD: Dwarf info for apcs frame is not handled yet. */
3295 if (TARGET_APCS_FRAME)
3296 flag_shrink_wrap = false;
3297
3298 /* BPABI targets use linker tricks to allow interworking on cores
3299 without thumb support. */
3300 if (TARGET_INTERWORK
3301 && !TARGET_BPABI
3302 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3303 {
3304 warning (0, "target CPU does not support interworking" );
3305 target_flags &= ~MASK_INTERWORK;
3306 }
3307
3308 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3309 {
3310 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3311 target_flags |= MASK_APCS_FRAME;
3312 }
3313
3314 if (TARGET_POKE_FUNCTION_NAME)
3315 target_flags |= MASK_APCS_FRAME;
3316
3317 if (TARGET_APCS_REENT && flag_pic)
3318 error ("-fpic and -mapcs-reent are incompatible");
3319
3320 if (TARGET_APCS_REENT)
3321 warning (0, "APCS reentrant code not supported. Ignored");
3322
3323 /* Initialize boolean versions of the architectural flags, for use
3324 in the arm.md file. */
3325 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv3m);
3326 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv4);
3327 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3328 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5);
3329 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5e);
3330 arm_arch5te = arm_arch5e
3331 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3332 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6);
3333 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6k);
3334 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3335 arm_arch6m = arm_arch6 && !arm_arch_notm;
3336 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7);
3337 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7em);
3338 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8);
3339 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_1);
3340 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_2);
3341 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3342 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3343 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3344 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3345 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3346 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3347 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3348 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3349 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3350 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3351 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3352 if (arm_fp16_inst)
3353 {
3354 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3355 error ("selected fp16 options are incompatible");
3356 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3357 }
3358
3359
3360 /* Set up some tuning parameters. */
3361 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3362 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3363 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3364 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3365 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3366 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3367
3368 /* And finally, set up some quirks. */
3369 arm_arch_no_volatile_ce
3370 = bitmap_bit_p (arm_active_target.isa, isa_quirk_no_volatile_ce);
3371 arm_arch6kz
3372 = arm_arch6k && bitmap_bit_p (arm_active_target.isa, isa_quirk_ARMv6kz);
3373
3374 /* V5 code we generate is completely interworking capable, so we turn off
3375 TARGET_INTERWORK here to avoid many tests later on. */
3376
3377 /* XXX However, we must pass the right pre-processor defines to CPP
3378 or GLD can get confused. This is a hack. */
3379 if (TARGET_INTERWORK)
3380 arm_cpp_interwork = 1;
3381
3382 if (arm_arch5)
3383 target_flags &= ~MASK_INTERWORK;
3384
3385 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3386 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3387
3388 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3389 error ("iwmmxt abi requires an iwmmxt capable cpu");
3390
3391 /* If soft-float is specified then don't use FPU. */
3392 if (TARGET_SOFT_FLOAT)
3393 arm_fpu_attr = FPU_NONE;
3394 else
3395 arm_fpu_attr = FPU_VFP;
3396
3397 if (TARGET_AAPCS_BASED)
3398 {
3399 if (TARGET_CALLER_INTERWORKING)
3400 error ("AAPCS does not support -mcaller-super-interworking");
3401 else
3402 if (TARGET_CALLEE_INTERWORKING)
3403 error ("AAPCS does not support -mcallee-super-interworking");
3404 }
3405
3406 /* __fp16 support currently assumes the core has ldrh. */
3407 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3408 sorry ("__fp16 and no ldrh");
3409
3410 if (TARGET_AAPCS_BASED)
3411 {
3412 if (arm_abi == ARM_ABI_IWMMXT)
3413 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3414 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3415 && TARGET_HARD_FLOAT)
3416 {
3417 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3418 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_VFPv2))
3419 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3420 }
3421 else
3422 arm_pcs_default = ARM_PCS_AAPCS;
3423 }
3424 else
3425 {
3426 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3427 sorry ("-mfloat-abi=hard and VFP");
3428
3429 if (arm_abi == ARM_ABI_APCS)
3430 arm_pcs_default = ARM_PCS_APCS;
3431 else
3432 arm_pcs_default = ARM_PCS_ATPCS;
3433 }
3434
3435 /* For arm2/3 there is no need to do any scheduling if we are doing
3436 software floating-point. */
3437 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3438 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3439
3440 /* Use the cp15 method if it is available. */
3441 if (target_thread_pointer == TP_AUTO)
3442 {
3443 if (arm_arch6k && !TARGET_THUMB1)
3444 target_thread_pointer = TP_CP15;
3445 else
3446 target_thread_pointer = TP_SOFT;
3447 }
3448
3449 /* Override the default structure alignment for AAPCS ABI. */
3450 if (!global_options_set.x_arm_structure_size_boundary)
3451 {
3452 if (TARGET_AAPCS_BASED)
3453 arm_structure_size_boundary = 8;
3454 }
3455 else
3456 {
3457 if (arm_structure_size_boundary != 8
3458 && arm_structure_size_boundary != 32
3459 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3460 {
3461 if (ARM_DOUBLEWORD_ALIGN)
3462 warning (0,
3463 "structure size boundary can only be set to 8, 32 or 64");
3464 else
3465 warning (0, "structure size boundary can only be set to 8 or 32");
3466 arm_structure_size_boundary
3467 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3468 }
3469 }
3470
3471 if (TARGET_VXWORKS_RTP)
3472 {
3473 if (!global_options_set.x_arm_pic_data_is_text_relative)
3474 arm_pic_data_is_text_relative = 0;
3475 }
3476 else if (flag_pic
3477 && !arm_pic_data_is_text_relative
3478 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3479 /* When text & data segments don't have a fixed displacement, the
3480 intended use is with a single, read only, pic base register.
3481 Unless the user explicitly requested not to do that, set
3482 it. */
3483 target_flags |= MASK_SINGLE_PIC_BASE;
3484
3485 /* If stack checking is disabled, we can use r10 as the PIC register,
3486 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3487 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3488 {
3489 if (TARGET_VXWORKS_RTP)
3490 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3491 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3492 }
3493
3494 if (flag_pic && TARGET_VXWORKS_RTP)
3495 arm_pic_register = 9;
3496
3497 if (arm_pic_register_string != NULL)
3498 {
3499 int pic_register = decode_reg_name (arm_pic_register_string);
3500
3501 if (!flag_pic)
3502 warning (0, "-mpic-register= is useless without -fpic");
3503
3504 /* Prevent the user from choosing an obviously stupid PIC register. */
3505 else if (pic_register < 0 || call_used_regs[pic_register]
3506 || pic_register == HARD_FRAME_POINTER_REGNUM
3507 || pic_register == STACK_POINTER_REGNUM
3508 || pic_register >= PC_REGNUM
3509 || (TARGET_VXWORKS_RTP
3510 && (unsigned int) pic_register != arm_pic_register))
3511 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3512 else
3513 arm_pic_register = pic_register;
3514 }
3515
3516 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3517 if (fix_cm3_ldrd == 2)
3518 {
3519 if (bitmap_bit_p (arm_active_target.isa, isa_quirk_cm3_ldrd))
3520 fix_cm3_ldrd = 1;
3521 else
3522 fix_cm3_ldrd = 0;
3523 }
3524
3525 /* Hot/Cold partitioning is not currently supported, since we can't
3526 handle literal pool placement in that case. */
3527 if (flag_reorder_blocks_and_partition)
3528 {
3529 inform (input_location,
3530 "-freorder-blocks-and-partition not supported on this architecture");
3531 flag_reorder_blocks_and_partition = 0;
3532 flag_reorder_blocks = 1;
3533 }
3534
3535 if (flag_pic)
3536 /* Hoisting PIC address calculations more aggressively provides a small,
3537 but measurable, size reduction for PIC code. Therefore, we decrease
3538 the bar for unrestricted expression hoisting to the cost of PIC address
3539 calculation, which is 2 instructions. */
3540 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3541 global_options.x_param_values,
3542 global_options_set.x_param_values);
3543
3544 /* ARM EABI defaults to strict volatile bitfields. */
3545 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3546 && abi_version_at_least(2))
3547 flag_strict_volatile_bitfields = 1;
3548
3549 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3550 have deemed it beneficial (signified by setting
3551 prefetch.num_slots to 1 or more). */
3552 if (flag_prefetch_loop_arrays < 0
3553 && HAVE_prefetch
3554 && optimize >= 3
3555 && current_tune->prefetch.num_slots > 0)
3556 flag_prefetch_loop_arrays = 1;
3557
3558 /* Set up parameters to be used in prefetching algorithm. Do not
3559 override the defaults unless we are tuning for a core we have
3560 researched values for. */
3561 if (current_tune->prefetch.num_slots > 0)
3562 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3563 current_tune->prefetch.num_slots,
3564 global_options.x_param_values,
3565 global_options_set.x_param_values);
3566 if (current_tune->prefetch.l1_cache_line_size >= 0)
3567 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3568 current_tune->prefetch.l1_cache_line_size,
3569 global_options.x_param_values,
3570 global_options_set.x_param_values);
3571 if (current_tune->prefetch.l1_cache_size >= 0)
3572 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3573 current_tune->prefetch.l1_cache_size,
3574 global_options.x_param_values,
3575 global_options_set.x_param_values);
3576
3577 /* Use Neon to perform 64-bits operations rather than core
3578 registers. */
3579 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3580 if (use_neon_for_64bits == 1)
3581 prefer_neon_for_64bits = true;
3582
3583 /* Use the alternative scheduling-pressure algorithm by default. */
3584 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3585 global_options.x_param_values,
3586 global_options_set.x_param_values);
3587
3588 /* Look through ready list and all of queue for instructions
3589 relevant for L2 auto-prefetcher. */
3590 int param_sched_autopref_queue_depth;
3591
3592 switch (current_tune->sched_autopref)
3593 {
3594 case tune_params::SCHED_AUTOPREF_OFF:
3595 param_sched_autopref_queue_depth = -1;
3596 break;
3597
3598 case tune_params::SCHED_AUTOPREF_RANK:
3599 param_sched_autopref_queue_depth = 0;
3600 break;
3601
3602 case tune_params::SCHED_AUTOPREF_FULL:
3603 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3604 break;
3605
3606 default:
3607 gcc_unreachable ();
3608 }
3609
3610 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3611 param_sched_autopref_queue_depth,
3612 global_options.x_param_values,
3613 global_options_set.x_param_values);
3614
3615 /* Currently, for slow flash data, we just disable literal pools. We also
3616 disable it for pure-code. */
3617 if (target_slow_flash_data || target_pure_code)
3618 arm_disable_literal_pool = true;
3619
3620 if (use_cmse && !arm_arch_cmse)
3621 error ("target CPU does not support ARMv8-M Security Extensions");
3622
3623 /* Disable scheduling fusion by default if it's not armv7 processor
3624 or doesn't prefer ldrd/strd. */
3625 if (flag_schedule_fusion == 2
3626 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3627 flag_schedule_fusion = 0;
3628
3629 /* Need to remember initial options before they are overriden. */
3630 init_optimize = build_optimization_node (&global_options);
3631
3632 arm_option_override_internal (&global_options, &global_options_set);
3633 arm_option_check_internal (&global_options);
3634 arm_option_params_internal ();
3635
3636 /* Create the default target_options structure. */
3637 target_option_default_node = target_option_current_node
3638 = build_target_option_node (&global_options);
3639
3640 /* Register global variables with the garbage collector. */
3641 arm_add_gc_roots ();
3642
3643 /* Init initial mode for testing. */
3644 thumb_flipper = TARGET_THUMB;
3645 }
3646
3647 static void
3648 arm_add_gc_roots (void)
3649 {
3650 gcc_obstack_init(&minipool_obstack);
3651 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3652 }
3653 \f
3654 /* A table of known ARM exception types.
3655 For use with the interrupt function attribute. */
3656
3657 typedef struct
3658 {
3659 const char *const arg;
3660 const unsigned long return_value;
3661 }
3662 isr_attribute_arg;
3663
3664 static const isr_attribute_arg isr_attribute_args [] =
3665 {
3666 { "IRQ", ARM_FT_ISR },
3667 { "irq", ARM_FT_ISR },
3668 { "FIQ", ARM_FT_FIQ },
3669 { "fiq", ARM_FT_FIQ },
3670 { "ABORT", ARM_FT_ISR },
3671 { "abort", ARM_FT_ISR },
3672 { "ABORT", ARM_FT_ISR },
3673 { "abort", ARM_FT_ISR },
3674 { "UNDEF", ARM_FT_EXCEPTION },
3675 { "undef", ARM_FT_EXCEPTION },
3676 { "SWI", ARM_FT_EXCEPTION },
3677 { "swi", ARM_FT_EXCEPTION },
3678 { NULL, ARM_FT_NORMAL }
3679 };
3680
3681 /* Returns the (interrupt) function type of the current
3682 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3683
3684 static unsigned long
3685 arm_isr_value (tree argument)
3686 {
3687 const isr_attribute_arg * ptr;
3688 const char * arg;
3689
3690 if (!arm_arch_notm)
3691 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3692
3693 /* No argument - default to IRQ. */
3694 if (argument == NULL_TREE)
3695 return ARM_FT_ISR;
3696
3697 /* Get the value of the argument. */
3698 if (TREE_VALUE (argument) == NULL_TREE
3699 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3700 return ARM_FT_UNKNOWN;
3701
3702 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3703
3704 /* Check it against the list of known arguments. */
3705 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3706 if (streq (arg, ptr->arg))
3707 return ptr->return_value;
3708
3709 /* An unrecognized interrupt type. */
3710 return ARM_FT_UNKNOWN;
3711 }
3712
3713 /* Computes the type of the current function. */
3714
3715 static unsigned long
3716 arm_compute_func_type (void)
3717 {
3718 unsigned long type = ARM_FT_UNKNOWN;
3719 tree a;
3720 tree attr;
3721
3722 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3723
3724 /* Decide if the current function is volatile. Such functions
3725 never return, and many memory cycles can be saved by not storing
3726 register values that will never be needed again. This optimization
3727 was added to speed up context switching in a kernel application. */
3728 if (optimize > 0
3729 && (TREE_NOTHROW (current_function_decl)
3730 || !(flag_unwind_tables
3731 || (flag_exceptions
3732 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3733 && TREE_THIS_VOLATILE (current_function_decl))
3734 type |= ARM_FT_VOLATILE;
3735
3736 if (cfun->static_chain_decl != NULL)
3737 type |= ARM_FT_NESTED;
3738
3739 attr = DECL_ATTRIBUTES (current_function_decl);
3740
3741 a = lookup_attribute ("naked", attr);
3742 if (a != NULL_TREE)
3743 type |= ARM_FT_NAKED;
3744
3745 a = lookup_attribute ("isr", attr);
3746 if (a == NULL_TREE)
3747 a = lookup_attribute ("interrupt", attr);
3748
3749 if (a == NULL_TREE)
3750 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3751 else
3752 type |= arm_isr_value (TREE_VALUE (a));
3753
3754 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3755 type |= ARM_FT_CMSE_ENTRY;
3756
3757 return type;
3758 }
3759
3760 /* Returns the type of the current function. */
3761
3762 unsigned long
3763 arm_current_func_type (void)
3764 {
3765 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3766 cfun->machine->func_type = arm_compute_func_type ();
3767
3768 return cfun->machine->func_type;
3769 }
3770
3771 bool
3772 arm_allocate_stack_slots_for_args (void)
3773 {
3774 /* Naked functions should not allocate stack slots for arguments. */
3775 return !IS_NAKED (arm_current_func_type ());
3776 }
3777
3778 static bool
3779 arm_warn_func_return (tree decl)
3780 {
3781 /* Naked functions are implemented entirely in assembly, including the
3782 return sequence, so suppress warnings about this. */
3783 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3784 }
3785
3786 \f
3787 /* Output assembler code for a block containing the constant parts
3788 of a trampoline, leaving space for the variable parts.
3789
3790 On the ARM, (if r8 is the static chain regnum, and remembering that
3791 referencing pc adds an offset of 8) the trampoline looks like:
3792 ldr r8, [pc, #0]
3793 ldr pc, [pc]
3794 .word static chain value
3795 .word function's address
3796 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3797
3798 static void
3799 arm_asm_trampoline_template (FILE *f)
3800 {
3801 fprintf (f, "\t.syntax unified\n");
3802
3803 if (TARGET_ARM)
3804 {
3805 fprintf (f, "\t.arm\n");
3806 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3807 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3808 }
3809 else if (TARGET_THUMB2)
3810 {
3811 fprintf (f, "\t.thumb\n");
3812 /* The Thumb-2 trampoline is similar to the arm implementation.
3813 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3814 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3815 STATIC_CHAIN_REGNUM, PC_REGNUM);
3816 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3817 }
3818 else
3819 {
3820 ASM_OUTPUT_ALIGN (f, 2);
3821 fprintf (f, "\t.code\t16\n");
3822 fprintf (f, ".Ltrampoline_start:\n");
3823 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3824 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3825 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3826 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3827 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3828 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3829 }
3830 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3831 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3832 }
3833
3834 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3835
3836 static void
3837 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3838 {
3839 rtx fnaddr, mem, a_tramp;
3840
3841 emit_block_move (m_tramp, assemble_trampoline_template (),
3842 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3843
3844 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3845 emit_move_insn (mem, chain_value);
3846
3847 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3848 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3849 emit_move_insn (mem, fnaddr);
3850
3851 a_tramp = XEXP (m_tramp, 0);
3852 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3853 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3854 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3855 }
3856
3857 /* Thumb trampolines should be entered in thumb mode, so set
3858 the bottom bit of the address. */
3859
3860 static rtx
3861 arm_trampoline_adjust_address (rtx addr)
3862 {
3863 if (TARGET_THUMB)
3864 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3865 NULL, 0, OPTAB_LIB_WIDEN);
3866 return addr;
3867 }
3868 \f
3869 /* Return 1 if it is possible to return using a single instruction.
3870 If SIBLING is non-null, this is a test for a return before a sibling
3871 call. SIBLING is the call insn, so we can examine its register usage. */
3872
3873 int
3874 use_return_insn (int iscond, rtx sibling)
3875 {
3876 int regno;
3877 unsigned int func_type;
3878 unsigned long saved_int_regs;
3879 unsigned HOST_WIDE_INT stack_adjust;
3880 arm_stack_offsets *offsets;
3881
3882 /* Never use a return instruction before reload has run. */
3883 if (!reload_completed)
3884 return 0;
3885
3886 func_type = arm_current_func_type ();
3887
3888 /* Naked, volatile and stack alignment functions need special
3889 consideration. */
3890 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3891 return 0;
3892
3893 /* So do interrupt functions that use the frame pointer and Thumb
3894 interrupt functions. */
3895 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3896 return 0;
3897
3898 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3899 && !optimize_function_for_size_p (cfun))
3900 return 0;
3901
3902 offsets = arm_get_frame_offsets ();
3903 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3904
3905 /* As do variadic functions. */
3906 if (crtl->args.pretend_args_size
3907 || cfun->machine->uses_anonymous_args
3908 /* Or if the function calls __builtin_eh_return () */
3909 || crtl->calls_eh_return
3910 /* Or if the function calls alloca */
3911 || cfun->calls_alloca
3912 /* Or if there is a stack adjustment. However, if the stack pointer
3913 is saved on the stack, we can use a pre-incrementing stack load. */
3914 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3915 && stack_adjust == 4))
3916 /* Or if the static chain register was saved above the frame, under the
3917 assumption that the stack pointer isn't saved on the stack. */
3918 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3919 && arm_compute_static_chain_stack_bytes() != 0))
3920 return 0;
3921
3922 saved_int_regs = offsets->saved_regs_mask;
3923
3924 /* Unfortunately, the insn
3925
3926 ldmib sp, {..., sp, ...}
3927
3928 triggers a bug on most SA-110 based devices, such that the stack
3929 pointer won't be correctly restored if the instruction takes a
3930 page fault. We work around this problem by popping r3 along with
3931 the other registers, since that is never slower than executing
3932 another instruction.
3933
3934 We test for !arm_arch5 here, because code for any architecture
3935 less than this could potentially be run on one of the buggy
3936 chips. */
3937 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3938 {
3939 /* Validate that r3 is a call-clobbered register (always true in
3940 the default abi) ... */
3941 if (!call_used_regs[3])
3942 return 0;
3943
3944 /* ... that it isn't being used for a return value ... */
3945 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3946 return 0;
3947
3948 /* ... or for a tail-call argument ... */
3949 if (sibling)
3950 {
3951 gcc_assert (CALL_P (sibling));
3952
3953 if (find_regno_fusage (sibling, USE, 3))
3954 return 0;
3955 }
3956
3957 /* ... and that there are no call-saved registers in r0-r2
3958 (always true in the default ABI). */
3959 if (saved_int_regs & 0x7)
3960 return 0;
3961 }
3962
3963 /* Can't be done if interworking with Thumb, and any registers have been
3964 stacked. */
3965 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3966 return 0;
3967
3968 /* On StrongARM, conditional returns are expensive if they aren't
3969 taken and multiple registers have been stacked. */
3970 if (iscond && arm_tune_strongarm)
3971 {
3972 /* Conditional return when just the LR is stored is a simple
3973 conditional-load instruction, that's not expensive. */
3974 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3975 return 0;
3976
3977 if (flag_pic
3978 && arm_pic_register != INVALID_REGNUM
3979 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3980 return 0;
3981 }
3982
3983 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
3984 several instructions if anything needs to be popped. */
3985 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
3986 return 0;
3987
3988 /* If there are saved registers but the LR isn't saved, then we need
3989 two instructions for the return. */
3990 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3991 return 0;
3992
3993 /* Can't be done if any of the VFP regs are pushed,
3994 since this also requires an insn. */
3995 if (TARGET_HARD_FLOAT)
3996 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3997 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3998 return 0;
3999
4000 if (TARGET_REALLY_IWMMXT)
4001 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4002 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4003 return 0;
4004
4005 return 1;
4006 }
4007
4008 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4009 shrink-wrapping if possible. This is the case if we need to emit a
4010 prologue, which we can test by looking at the offsets. */
4011 bool
4012 use_simple_return_p (void)
4013 {
4014 arm_stack_offsets *offsets;
4015
4016 /* Note this function can be called before or after reload. */
4017 if (!reload_completed)
4018 arm_compute_frame_layout ();
4019
4020 offsets = arm_get_frame_offsets ();
4021 return offsets->outgoing_args != 0;
4022 }
4023
4024 /* Return TRUE if int I is a valid immediate ARM constant. */
4025
4026 int
4027 const_ok_for_arm (HOST_WIDE_INT i)
4028 {
4029 int lowbit;
4030
4031 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4032 be all zero, or all one. */
4033 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4034 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4035 != ((~(unsigned HOST_WIDE_INT) 0)
4036 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4037 return FALSE;
4038
4039 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4040
4041 /* Fast return for 0 and small values. We must do this for zero, since
4042 the code below can't handle that one case. */
4043 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4044 return TRUE;
4045
4046 /* Get the number of trailing zeros. */
4047 lowbit = ffs((int) i) - 1;
4048
4049 /* Only even shifts are allowed in ARM mode so round down to the
4050 nearest even number. */
4051 if (TARGET_ARM)
4052 lowbit &= ~1;
4053
4054 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4055 return TRUE;
4056
4057 if (TARGET_ARM)
4058 {
4059 /* Allow rotated constants in ARM mode. */
4060 if (lowbit <= 4
4061 && ((i & ~0xc000003f) == 0
4062 || (i & ~0xf000000f) == 0
4063 || (i & ~0xfc000003) == 0))
4064 return TRUE;
4065 }
4066 else if (TARGET_THUMB2)
4067 {
4068 HOST_WIDE_INT v;
4069
4070 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4071 v = i & 0xff;
4072 v |= v << 16;
4073 if (i == v || i == (v | (v << 8)))
4074 return TRUE;
4075
4076 /* Allow repeated pattern 0xXY00XY00. */
4077 v = i & 0xff00;
4078 v |= v << 16;
4079 if (i == v)
4080 return TRUE;
4081 }
4082 else if (TARGET_HAVE_MOVT)
4083 {
4084 /* Thumb-1 Targets with MOVT. */
4085 if (i > 0xffff)
4086 return FALSE;
4087 else
4088 return TRUE;
4089 }
4090
4091 return FALSE;
4092 }
4093
4094 /* Return true if I is a valid constant for the operation CODE. */
4095 int
4096 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4097 {
4098 if (const_ok_for_arm (i))
4099 return 1;
4100
4101 switch (code)
4102 {
4103 case SET:
4104 /* See if we can use movw. */
4105 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4106 return 1;
4107 else
4108 /* Otherwise, try mvn. */
4109 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4110
4111 case PLUS:
4112 /* See if we can use addw or subw. */
4113 if (TARGET_THUMB2
4114 && ((i & 0xfffff000) == 0
4115 || ((-i) & 0xfffff000) == 0))
4116 return 1;
4117 /* Fall through. */
4118 case COMPARE:
4119 case EQ:
4120 case NE:
4121 case GT:
4122 case LE:
4123 case LT:
4124 case GE:
4125 case GEU:
4126 case LTU:
4127 case GTU:
4128 case LEU:
4129 case UNORDERED:
4130 case ORDERED:
4131 case UNEQ:
4132 case UNGE:
4133 case UNLT:
4134 case UNGT:
4135 case UNLE:
4136 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4137
4138 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4139 case XOR:
4140 return 0;
4141
4142 case IOR:
4143 if (TARGET_THUMB2)
4144 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4145 return 0;
4146
4147 case AND:
4148 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4149
4150 default:
4151 gcc_unreachable ();
4152 }
4153 }
4154
4155 /* Return true if I is a valid di mode constant for the operation CODE. */
4156 int
4157 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4158 {
4159 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4160 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4161 rtx hi = GEN_INT (hi_val);
4162 rtx lo = GEN_INT (lo_val);
4163
4164 if (TARGET_THUMB1)
4165 return 0;
4166
4167 switch (code)
4168 {
4169 case AND:
4170 case IOR:
4171 case XOR:
4172 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4173 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4174 case PLUS:
4175 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4176
4177 default:
4178 return 0;
4179 }
4180 }
4181
4182 /* Emit a sequence of insns to handle a large constant.
4183 CODE is the code of the operation required, it can be any of SET, PLUS,
4184 IOR, AND, XOR, MINUS;
4185 MODE is the mode in which the operation is being performed;
4186 VAL is the integer to operate on;
4187 SOURCE is the other operand (a register, or a null-pointer for SET);
4188 SUBTARGETS means it is safe to create scratch registers if that will
4189 either produce a simpler sequence, or we will want to cse the values.
4190 Return value is the number of insns emitted. */
4191
4192 /* ??? Tweak this for thumb2. */
4193 int
4194 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4195 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4196 {
4197 rtx cond;
4198
4199 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4200 cond = COND_EXEC_TEST (PATTERN (insn));
4201 else
4202 cond = NULL_RTX;
4203
4204 if (subtargets || code == SET
4205 || (REG_P (target) && REG_P (source)
4206 && REGNO (target) != REGNO (source)))
4207 {
4208 /* After arm_reorg has been called, we can't fix up expensive
4209 constants by pushing them into memory so we must synthesize
4210 them in-line, regardless of the cost. This is only likely to
4211 be more costly on chips that have load delay slots and we are
4212 compiling without running the scheduler (so no splitting
4213 occurred before the final instruction emission).
4214
4215 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4216 */
4217 if (!cfun->machine->after_arm_reorg
4218 && !cond
4219 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4220 1, 0)
4221 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4222 + (code != SET))))
4223 {
4224 if (code == SET)
4225 {
4226 /* Currently SET is the only monadic value for CODE, all
4227 the rest are diadic. */
4228 if (TARGET_USE_MOVT)
4229 arm_emit_movpair (target, GEN_INT (val));
4230 else
4231 emit_set_insn (target, GEN_INT (val));
4232
4233 return 1;
4234 }
4235 else
4236 {
4237 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4238
4239 if (TARGET_USE_MOVT)
4240 arm_emit_movpair (temp, GEN_INT (val));
4241 else
4242 emit_set_insn (temp, GEN_INT (val));
4243
4244 /* For MINUS, the value is subtracted from, since we never
4245 have subtraction of a constant. */
4246 if (code == MINUS)
4247 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4248 else
4249 emit_set_insn (target,
4250 gen_rtx_fmt_ee (code, mode, source, temp));
4251 return 2;
4252 }
4253 }
4254 }
4255
4256 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4257 1);
4258 }
4259
4260 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4261 ARM/THUMB2 immediates, and add up to VAL.
4262 Thr function return value gives the number of insns required. */
4263 static int
4264 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4265 struct four_ints *return_sequence)
4266 {
4267 int best_consecutive_zeros = 0;
4268 int i;
4269 int best_start = 0;
4270 int insns1, insns2;
4271 struct four_ints tmp_sequence;
4272
4273 /* If we aren't targeting ARM, the best place to start is always at
4274 the bottom, otherwise look more closely. */
4275 if (TARGET_ARM)
4276 {
4277 for (i = 0; i < 32; i += 2)
4278 {
4279 int consecutive_zeros = 0;
4280
4281 if (!(val & (3 << i)))
4282 {
4283 while ((i < 32) && !(val & (3 << i)))
4284 {
4285 consecutive_zeros += 2;
4286 i += 2;
4287 }
4288 if (consecutive_zeros > best_consecutive_zeros)
4289 {
4290 best_consecutive_zeros = consecutive_zeros;
4291 best_start = i - consecutive_zeros;
4292 }
4293 i -= 2;
4294 }
4295 }
4296 }
4297
4298 /* So long as it won't require any more insns to do so, it's
4299 desirable to emit a small constant (in bits 0...9) in the last
4300 insn. This way there is more chance that it can be combined with
4301 a later addressing insn to form a pre-indexed load or store
4302 operation. Consider:
4303
4304 *((volatile int *)0xe0000100) = 1;
4305 *((volatile int *)0xe0000110) = 2;
4306
4307 We want this to wind up as:
4308
4309 mov rA, #0xe0000000
4310 mov rB, #1
4311 str rB, [rA, #0x100]
4312 mov rB, #2
4313 str rB, [rA, #0x110]
4314
4315 rather than having to synthesize both large constants from scratch.
4316
4317 Therefore, we calculate how many insns would be required to emit
4318 the constant starting from `best_start', and also starting from
4319 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4320 yield a shorter sequence, we may as well use zero. */
4321 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4322 if (best_start != 0
4323 && ((HOST_WIDE_INT_1U << best_start) < val))
4324 {
4325 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4326 if (insns2 <= insns1)
4327 {
4328 *return_sequence = tmp_sequence;
4329 insns1 = insns2;
4330 }
4331 }
4332
4333 return insns1;
4334 }
4335
4336 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4337 static int
4338 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4339 struct four_ints *return_sequence, int i)
4340 {
4341 int remainder = val & 0xffffffff;
4342 int insns = 0;
4343
4344 /* Try and find a way of doing the job in either two or three
4345 instructions.
4346
4347 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4348 location. We start at position I. This may be the MSB, or
4349 optimial_immediate_sequence may have positioned it at the largest block
4350 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4351 wrapping around to the top of the word when we drop off the bottom.
4352 In the worst case this code should produce no more than four insns.
4353
4354 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4355 constants, shifted to any arbitrary location. We should always start
4356 at the MSB. */
4357 do
4358 {
4359 int end;
4360 unsigned int b1, b2, b3, b4;
4361 unsigned HOST_WIDE_INT result;
4362 int loc;
4363
4364 gcc_assert (insns < 4);
4365
4366 if (i <= 0)
4367 i += 32;
4368
4369 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4370 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4371 {
4372 loc = i;
4373 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4374 /* We can use addw/subw for the last 12 bits. */
4375 result = remainder;
4376 else
4377 {
4378 /* Use an 8-bit shifted/rotated immediate. */
4379 end = i - 8;
4380 if (end < 0)
4381 end += 32;
4382 result = remainder & ((0x0ff << end)
4383 | ((i < end) ? (0xff >> (32 - end))
4384 : 0));
4385 i -= 8;
4386 }
4387 }
4388 else
4389 {
4390 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4391 arbitrary shifts. */
4392 i -= TARGET_ARM ? 2 : 1;
4393 continue;
4394 }
4395
4396 /* Next, see if we can do a better job with a thumb2 replicated
4397 constant.
4398
4399 We do it this way around to catch the cases like 0x01F001E0 where
4400 two 8-bit immediates would work, but a replicated constant would
4401 make it worse.
4402
4403 TODO: 16-bit constants that don't clear all the bits, but still win.
4404 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4405 if (TARGET_THUMB2)
4406 {
4407 b1 = (remainder & 0xff000000) >> 24;
4408 b2 = (remainder & 0x00ff0000) >> 16;
4409 b3 = (remainder & 0x0000ff00) >> 8;
4410 b4 = remainder & 0xff;
4411
4412 if (loc > 24)
4413 {
4414 /* The 8-bit immediate already found clears b1 (and maybe b2),
4415 but must leave b3 and b4 alone. */
4416
4417 /* First try to find a 32-bit replicated constant that clears
4418 almost everything. We can assume that we can't do it in one,
4419 or else we wouldn't be here. */
4420 unsigned int tmp = b1 & b2 & b3 & b4;
4421 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4422 + (tmp << 24);
4423 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4424 + (tmp == b3) + (tmp == b4);
4425 if (tmp
4426 && (matching_bytes >= 3
4427 || (matching_bytes == 2
4428 && const_ok_for_op (remainder & ~tmp2, code))))
4429 {
4430 /* At least 3 of the bytes match, and the fourth has at
4431 least as many bits set, or two of the bytes match
4432 and it will only require one more insn to finish. */
4433 result = tmp2;
4434 i = tmp != b1 ? 32
4435 : tmp != b2 ? 24
4436 : tmp != b3 ? 16
4437 : 8;
4438 }
4439
4440 /* Second, try to find a 16-bit replicated constant that can
4441 leave three of the bytes clear. If b2 or b4 is already
4442 zero, then we can. If the 8-bit from above would not
4443 clear b2 anyway, then we still win. */
4444 else if (b1 == b3 && (!b2 || !b4
4445 || (remainder & 0x00ff0000 & ~result)))
4446 {
4447 result = remainder & 0xff00ff00;
4448 i = 24;
4449 }
4450 }
4451 else if (loc > 16)
4452 {
4453 /* The 8-bit immediate already found clears b2 (and maybe b3)
4454 and we don't get here unless b1 is alredy clear, but it will
4455 leave b4 unchanged. */
4456
4457 /* If we can clear b2 and b4 at once, then we win, since the
4458 8-bits couldn't possibly reach that far. */
4459 if (b2 == b4)
4460 {
4461 result = remainder & 0x00ff00ff;
4462 i = 16;
4463 }
4464 }
4465 }
4466
4467 return_sequence->i[insns++] = result;
4468 remainder &= ~result;
4469
4470 if (code == SET || code == MINUS)
4471 code = PLUS;
4472 }
4473 while (remainder);
4474
4475 return insns;
4476 }
4477
4478 /* Emit an instruction with the indicated PATTERN. If COND is
4479 non-NULL, conditionalize the execution of the instruction on COND
4480 being true. */
4481
4482 static void
4483 emit_constant_insn (rtx cond, rtx pattern)
4484 {
4485 if (cond)
4486 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4487 emit_insn (pattern);
4488 }
4489
4490 /* As above, but extra parameter GENERATE which, if clear, suppresses
4491 RTL generation. */
4492
4493 static int
4494 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4495 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4496 int subtargets, int generate)
4497 {
4498 int can_invert = 0;
4499 int can_negate = 0;
4500 int final_invert = 0;
4501 int i;
4502 int set_sign_bit_copies = 0;
4503 int clear_sign_bit_copies = 0;
4504 int clear_zero_bit_copies = 0;
4505 int set_zero_bit_copies = 0;
4506 int insns = 0, neg_insns, inv_insns;
4507 unsigned HOST_WIDE_INT temp1, temp2;
4508 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4509 struct four_ints *immediates;
4510 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4511
4512 /* Find out which operations are safe for a given CODE. Also do a quick
4513 check for degenerate cases; these can occur when DImode operations
4514 are split. */
4515 switch (code)
4516 {
4517 case SET:
4518 can_invert = 1;
4519 break;
4520
4521 case PLUS:
4522 can_negate = 1;
4523 break;
4524
4525 case IOR:
4526 if (remainder == 0xffffffff)
4527 {
4528 if (generate)
4529 emit_constant_insn (cond,
4530 gen_rtx_SET (target,
4531 GEN_INT (ARM_SIGN_EXTEND (val))));
4532 return 1;
4533 }
4534
4535 if (remainder == 0)
4536 {
4537 if (reload_completed && rtx_equal_p (target, source))
4538 return 0;
4539
4540 if (generate)
4541 emit_constant_insn (cond, gen_rtx_SET (target, source));
4542 return 1;
4543 }
4544 break;
4545
4546 case AND:
4547 if (remainder == 0)
4548 {
4549 if (generate)
4550 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4551 return 1;
4552 }
4553 if (remainder == 0xffffffff)
4554 {
4555 if (reload_completed && rtx_equal_p (target, source))
4556 return 0;
4557 if (generate)
4558 emit_constant_insn (cond, gen_rtx_SET (target, source));
4559 return 1;
4560 }
4561 can_invert = 1;
4562 break;
4563
4564 case XOR:
4565 if (remainder == 0)
4566 {
4567 if (reload_completed && rtx_equal_p (target, source))
4568 return 0;
4569 if (generate)
4570 emit_constant_insn (cond, gen_rtx_SET (target, source));
4571 return 1;
4572 }
4573
4574 if (remainder == 0xffffffff)
4575 {
4576 if (generate)
4577 emit_constant_insn (cond,
4578 gen_rtx_SET (target,
4579 gen_rtx_NOT (mode, source)));
4580 return 1;
4581 }
4582 final_invert = 1;
4583 break;
4584
4585 case MINUS:
4586 /* We treat MINUS as (val - source), since (source - val) is always
4587 passed as (source + (-val)). */
4588 if (remainder == 0)
4589 {
4590 if (generate)
4591 emit_constant_insn (cond,
4592 gen_rtx_SET (target,
4593 gen_rtx_NEG (mode, source)));
4594 return 1;
4595 }
4596 if (const_ok_for_arm (val))
4597 {
4598 if (generate)
4599 emit_constant_insn (cond,
4600 gen_rtx_SET (target,
4601 gen_rtx_MINUS (mode, GEN_INT (val),
4602 source)));
4603 return 1;
4604 }
4605
4606 break;
4607
4608 default:
4609 gcc_unreachable ();
4610 }
4611
4612 /* If we can do it in one insn get out quickly. */
4613 if (const_ok_for_op (val, code))
4614 {
4615 if (generate)
4616 emit_constant_insn (cond,
4617 gen_rtx_SET (target,
4618 (source
4619 ? gen_rtx_fmt_ee (code, mode, source,
4620 GEN_INT (val))
4621 : GEN_INT (val))));
4622 return 1;
4623 }
4624
4625 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4626 insn. */
4627 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4628 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4629 {
4630 if (generate)
4631 {
4632 if (mode == SImode && i == 16)
4633 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4634 smaller insn. */
4635 emit_constant_insn (cond,
4636 gen_zero_extendhisi2
4637 (target, gen_lowpart (HImode, source)));
4638 else
4639 /* Extz only supports SImode, but we can coerce the operands
4640 into that mode. */
4641 emit_constant_insn (cond,
4642 gen_extzv_t2 (gen_lowpart (SImode, target),
4643 gen_lowpart (SImode, source),
4644 GEN_INT (i), const0_rtx));
4645 }
4646
4647 return 1;
4648 }
4649
4650 /* Calculate a few attributes that may be useful for specific
4651 optimizations. */
4652 /* Count number of leading zeros. */
4653 for (i = 31; i >= 0; i--)
4654 {
4655 if ((remainder & (1 << i)) == 0)
4656 clear_sign_bit_copies++;
4657 else
4658 break;
4659 }
4660
4661 /* Count number of leading 1's. */
4662 for (i = 31; i >= 0; i--)
4663 {
4664 if ((remainder & (1 << i)) != 0)
4665 set_sign_bit_copies++;
4666 else
4667 break;
4668 }
4669
4670 /* Count number of trailing zero's. */
4671 for (i = 0; i <= 31; i++)
4672 {
4673 if ((remainder & (1 << i)) == 0)
4674 clear_zero_bit_copies++;
4675 else
4676 break;
4677 }
4678
4679 /* Count number of trailing 1's. */
4680 for (i = 0; i <= 31; i++)
4681 {
4682 if ((remainder & (1 << i)) != 0)
4683 set_zero_bit_copies++;
4684 else
4685 break;
4686 }
4687
4688 switch (code)
4689 {
4690 case SET:
4691 /* See if we can do this by sign_extending a constant that is known
4692 to be negative. This is a good, way of doing it, since the shift
4693 may well merge into a subsequent insn. */
4694 if (set_sign_bit_copies > 1)
4695 {
4696 if (const_ok_for_arm
4697 (temp1 = ARM_SIGN_EXTEND (remainder
4698 << (set_sign_bit_copies - 1))))
4699 {
4700 if (generate)
4701 {
4702 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4703 emit_constant_insn (cond,
4704 gen_rtx_SET (new_src, GEN_INT (temp1)));
4705 emit_constant_insn (cond,
4706 gen_ashrsi3 (target, new_src,
4707 GEN_INT (set_sign_bit_copies - 1)));
4708 }
4709 return 2;
4710 }
4711 /* For an inverted constant, we will need to set the low bits,
4712 these will be shifted out of harm's way. */
4713 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4714 if (const_ok_for_arm (~temp1))
4715 {
4716 if (generate)
4717 {
4718 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4719 emit_constant_insn (cond,
4720 gen_rtx_SET (new_src, GEN_INT (temp1)));
4721 emit_constant_insn (cond,
4722 gen_ashrsi3 (target, new_src,
4723 GEN_INT (set_sign_bit_copies - 1)));
4724 }
4725 return 2;
4726 }
4727 }
4728
4729 /* See if we can calculate the value as the difference between two
4730 valid immediates. */
4731 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4732 {
4733 int topshift = clear_sign_bit_copies & ~1;
4734
4735 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4736 & (0xff000000 >> topshift));
4737
4738 /* If temp1 is zero, then that means the 9 most significant
4739 bits of remainder were 1 and we've caused it to overflow.
4740 When topshift is 0 we don't need to do anything since we
4741 can borrow from 'bit 32'. */
4742 if (temp1 == 0 && topshift != 0)
4743 temp1 = 0x80000000 >> (topshift - 1);
4744
4745 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4746
4747 if (const_ok_for_arm (temp2))
4748 {
4749 if (generate)
4750 {
4751 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4752 emit_constant_insn (cond,
4753 gen_rtx_SET (new_src, GEN_INT (temp1)));
4754 emit_constant_insn (cond,
4755 gen_addsi3 (target, new_src,
4756 GEN_INT (-temp2)));
4757 }
4758
4759 return 2;
4760 }
4761 }
4762
4763 /* See if we can generate this by setting the bottom (or the top)
4764 16 bits, and then shifting these into the other half of the
4765 word. We only look for the simplest cases, to do more would cost
4766 too much. Be careful, however, not to generate this when the
4767 alternative would take fewer insns. */
4768 if (val & 0xffff0000)
4769 {
4770 temp1 = remainder & 0xffff0000;
4771 temp2 = remainder & 0x0000ffff;
4772
4773 /* Overlaps outside this range are best done using other methods. */
4774 for (i = 9; i < 24; i++)
4775 {
4776 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4777 && !const_ok_for_arm (temp2))
4778 {
4779 rtx new_src = (subtargets
4780 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4781 : target);
4782 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4783 source, subtargets, generate);
4784 source = new_src;
4785 if (generate)
4786 emit_constant_insn
4787 (cond,
4788 gen_rtx_SET
4789 (target,
4790 gen_rtx_IOR (mode,
4791 gen_rtx_ASHIFT (mode, source,
4792 GEN_INT (i)),
4793 source)));
4794 return insns + 1;
4795 }
4796 }
4797
4798 /* Don't duplicate cases already considered. */
4799 for (i = 17; i < 24; i++)
4800 {
4801 if (((temp1 | (temp1 >> i)) == remainder)
4802 && !const_ok_for_arm (temp1))
4803 {
4804 rtx new_src = (subtargets
4805 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4806 : target);
4807 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4808 source, subtargets, generate);
4809 source = new_src;
4810 if (generate)
4811 emit_constant_insn
4812 (cond,
4813 gen_rtx_SET (target,
4814 gen_rtx_IOR
4815 (mode,
4816 gen_rtx_LSHIFTRT (mode, source,
4817 GEN_INT (i)),
4818 source)));
4819 return insns + 1;
4820 }
4821 }
4822 }
4823 break;
4824
4825 case IOR:
4826 case XOR:
4827 /* If we have IOR or XOR, and the constant can be loaded in a
4828 single instruction, and we can find a temporary to put it in,
4829 then this can be done in two instructions instead of 3-4. */
4830 if (subtargets
4831 /* TARGET can't be NULL if SUBTARGETS is 0 */
4832 || (reload_completed && !reg_mentioned_p (target, source)))
4833 {
4834 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4835 {
4836 if (generate)
4837 {
4838 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4839
4840 emit_constant_insn (cond,
4841 gen_rtx_SET (sub, GEN_INT (val)));
4842 emit_constant_insn (cond,
4843 gen_rtx_SET (target,
4844 gen_rtx_fmt_ee (code, mode,
4845 source, sub)));
4846 }
4847 return 2;
4848 }
4849 }
4850
4851 if (code == XOR)
4852 break;
4853
4854 /* Convert.
4855 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4856 and the remainder 0s for e.g. 0xfff00000)
4857 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4858
4859 This can be done in 2 instructions by using shifts with mov or mvn.
4860 e.g. for
4861 x = x | 0xfff00000;
4862 we generate.
4863 mvn r0, r0, asl #12
4864 mvn r0, r0, lsr #12 */
4865 if (set_sign_bit_copies > 8
4866 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4867 {
4868 if (generate)
4869 {
4870 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4871 rtx shift = GEN_INT (set_sign_bit_copies);
4872
4873 emit_constant_insn
4874 (cond,
4875 gen_rtx_SET (sub,
4876 gen_rtx_NOT (mode,
4877 gen_rtx_ASHIFT (mode,
4878 source,
4879 shift))));
4880 emit_constant_insn
4881 (cond,
4882 gen_rtx_SET (target,
4883 gen_rtx_NOT (mode,
4884 gen_rtx_LSHIFTRT (mode, sub,
4885 shift))));
4886 }
4887 return 2;
4888 }
4889
4890 /* Convert
4891 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4892 to
4893 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4894
4895 For eg. r0 = r0 | 0xfff
4896 mvn r0, r0, lsr #12
4897 mvn r0, r0, asl #12
4898
4899 */
4900 if (set_zero_bit_copies > 8
4901 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4902 {
4903 if (generate)
4904 {
4905 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4906 rtx shift = GEN_INT (set_zero_bit_copies);
4907
4908 emit_constant_insn
4909 (cond,
4910 gen_rtx_SET (sub,
4911 gen_rtx_NOT (mode,
4912 gen_rtx_LSHIFTRT (mode,
4913 source,
4914 shift))));
4915 emit_constant_insn
4916 (cond,
4917 gen_rtx_SET (target,
4918 gen_rtx_NOT (mode,
4919 gen_rtx_ASHIFT (mode, sub,
4920 shift))));
4921 }
4922 return 2;
4923 }
4924
4925 /* This will never be reached for Thumb2 because orn is a valid
4926 instruction. This is for Thumb1 and the ARM 32 bit cases.
4927
4928 x = y | constant (such that ~constant is a valid constant)
4929 Transform this to
4930 x = ~(~y & ~constant).
4931 */
4932 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4933 {
4934 if (generate)
4935 {
4936 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4937 emit_constant_insn (cond,
4938 gen_rtx_SET (sub,
4939 gen_rtx_NOT (mode, source)));
4940 source = sub;
4941 if (subtargets)
4942 sub = gen_reg_rtx (mode);
4943 emit_constant_insn (cond,
4944 gen_rtx_SET (sub,
4945 gen_rtx_AND (mode, source,
4946 GEN_INT (temp1))));
4947 emit_constant_insn (cond,
4948 gen_rtx_SET (target,
4949 gen_rtx_NOT (mode, sub)));
4950 }
4951 return 3;
4952 }
4953 break;
4954
4955 case AND:
4956 /* See if two shifts will do 2 or more insn's worth of work. */
4957 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4958 {
4959 HOST_WIDE_INT shift_mask = ((0xffffffff
4960 << (32 - clear_sign_bit_copies))
4961 & 0xffffffff);
4962
4963 if ((remainder | shift_mask) != 0xffffffff)
4964 {
4965 HOST_WIDE_INT new_val
4966 = ARM_SIGN_EXTEND (remainder | shift_mask);
4967
4968 if (generate)
4969 {
4970 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4971 insns = arm_gen_constant (AND, SImode, cond, new_val,
4972 new_src, source, subtargets, 1);
4973 source = new_src;
4974 }
4975 else
4976 {
4977 rtx targ = subtargets ? NULL_RTX : target;
4978 insns = arm_gen_constant (AND, mode, cond, new_val,
4979 targ, source, subtargets, 0);
4980 }
4981 }
4982
4983 if (generate)
4984 {
4985 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4986 rtx shift = GEN_INT (clear_sign_bit_copies);
4987
4988 emit_insn (gen_ashlsi3 (new_src, source, shift));
4989 emit_insn (gen_lshrsi3 (target, new_src, shift));
4990 }
4991
4992 return insns + 2;
4993 }
4994
4995 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4996 {
4997 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4998
4999 if ((remainder | shift_mask) != 0xffffffff)
5000 {
5001 HOST_WIDE_INT new_val
5002 = ARM_SIGN_EXTEND (remainder | shift_mask);
5003 if (generate)
5004 {
5005 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5006
5007 insns = arm_gen_constant (AND, mode, cond, new_val,
5008 new_src, source, subtargets, 1);
5009 source = new_src;
5010 }
5011 else
5012 {
5013 rtx targ = subtargets ? NULL_RTX : target;
5014
5015 insns = arm_gen_constant (AND, mode, cond, new_val,
5016 targ, source, subtargets, 0);
5017 }
5018 }
5019
5020 if (generate)
5021 {
5022 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5023 rtx shift = GEN_INT (clear_zero_bit_copies);
5024
5025 emit_insn (gen_lshrsi3 (new_src, source, shift));
5026 emit_insn (gen_ashlsi3 (target, new_src, shift));
5027 }
5028
5029 return insns + 2;
5030 }
5031
5032 break;
5033
5034 default:
5035 break;
5036 }
5037
5038 /* Calculate what the instruction sequences would be if we generated it
5039 normally, negated, or inverted. */
5040 if (code == AND)
5041 /* AND cannot be split into multiple insns, so invert and use BIC. */
5042 insns = 99;
5043 else
5044 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5045
5046 if (can_negate)
5047 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5048 &neg_immediates);
5049 else
5050 neg_insns = 99;
5051
5052 if (can_invert || final_invert)
5053 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5054 &inv_immediates);
5055 else
5056 inv_insns = 99;
5057
5058 immediates = &pos_immediates;
5059
5060 /* Is the negated immediate sequence more efficient? */
5061 if (neg_insns < insns && neg_insns <= inv_insns)
5062 {
5063 insns = neg_insns;
5064 immediates = &neg_immediates;
5065 }
5066 else
5067 can_negate = 0;
5068
5069 /* Is the inverted immediate sequence more efficient?
5070 We must allow for an extra NOT instruction for XOR operations, although
5071 there is some chance that the final 'mvn' will get optimized later. */
5072 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5073 {
5074 insns = inv_insns;
5075 immediates = &inv_immediates;
5076 }
5077 else
5078 {
5079 can_invert = 0;
5080 final_invert = 0;
5081 }
5082
5083 /* Now output the chosen sequence as instructions. */
5084 if (generate)
5085 {
5086 for (i = 0; i < insns; i++)
5087 {
5088 rtx new_src, temp1_rtx;
5089
5090 temp1 = immediates->i[i];
5091
5092 if (code == SET || code == MINUS)
5093 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5094 else if ((final_invert || i < (insns - 1)) && subtargets)
5095 new_src = gen_reg_rtx (mode);
5096 else
5097 new_src = target;
5098
5099 if (can_invert)
5100 temp1 = ~temp1;
5101 else if (can_negate)
5102 temp1 = -temp1;
5103
5104 temp1 = trunc_int_for_mode (temp1, mode);
5105 temp1_rtx = GEN_INT (temp1);
5106
5107 if (code == SET)
5108 ;
5109 else if (code == MINUS)
5110 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5111 else
5112 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5113
5114 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5115 source = new_src;
5116
5117 if (code == SET)
5118 {
5119 can_negate = can_invert;
5120 can_invert = 0;
5121 code = PLUS;
5122 }
5123 else if (code == MINUS)
5124 code = PLUS;
5125 }
5126 }
5127
5128 if (final_invert)
5129 {
5130 if (generate)
5131 emit_constant_insn (cond, gen_rtx_SET (target,
5132 gen_rtx_NOT (mode, source)));
5133 insns++;
5134 }
5135
5136 return insns;
5137 }
5138
5139 /* Canonicalize a comparison so that we are more likely to recognize it.
5140 This can be done for a few constant compares, where we can make the
5141 immediate value easier to load. */
5142
5143 static void
5144 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5145 bool op0_preserve_value)
5146 {
5147 machine_mode mode;
5148 unsigned HOST_WIDE_INT i, maxval;
5149
5150 mode = GET_MODE (*op0);
5151 if (mode == VOIDmode)
5152 mode = GET_MODE (*op1);
5153
5154 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5155
5156 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5157 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5158 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5159 for GTU/LEU in Thumb mode. */
5160 if (mode == DImode)
5161 {
5162
5163 if (*code == GT || *code == LE
5164 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5165 {
5166 /* Missing comparison. First try to use an available
5167 comparison. */
5168 if (CONST_INT_P (*op1))
5169 {
5170 i = INTVAL (*op1);
5171 switch (*code)
5172 {
5173 case GT:
5174 case LE:
5175 if (i != maxval
5176 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5177 {
5178 *op1 = GEN_INT (i + 1);
5179 *code = *code == GT ? GE : LT;
5180 return;
5181 }
5182 break;
5183 case GTU:
5184 case LEU:
5185 if (i != ~((unsigned HOST_WIDE_INT) 0)
5186 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5187 {
5188 *op1 = GEN_INT (i + 1);
5189 *code = *code == GTU ? GEU : LTU;
5190 return;
5191 }
5192 break;
5193 default:
5194 gcc_unreachable ();
5195 }
5196 }
5197
5198 /* If that did not work, reverse the condition. */
5199 if (!op0_preserve_value)
5200 {
5201 std::swap (*op0, *op1);
5202 *code = (int)swap_condition ((enum rtx_code)*code);
5203 }
5204 }
5205 return;
5206 }
5207
5208 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5209 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5210 to facilitate possible combining with a cmp into 'ands'. */
5211 if (mode == SImode
5212 && GET_CODE (*op0) == ZERO_EXTEND
5213 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5214 && GET_MODE (XEXP (*op0, 0)) == QImode
5215 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5216 && subreg_lowpart_p (XEXP (*op0, 0))
5217 && *op1 == const0_rtx)
5218 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5219 GEN_INT (255));
5220
5221 /* Comparisons smaller than DImode. Only adjust comparisons against
5222 an out-of-range constant. */
5223 if (!CONST_INT_P (*op1)
5224 || const_ok_for_arm (INTVAL (*op1))
5225 || const_ok_for_arm (- INTVAL (*op1)))
5226 return;
5227
5228 i = INTVAL (*op1);
5229
5230 switch (*code)
5231 {
5232 case EQ:
5233 case NE:
5234 return;
5235
5236 case GT:
5237 case LE:
5238 if (i != maxval
5239 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5240 {
5241 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5242 *code = *code == GT ? GE : LT;
5243 return;
5244 }
5245 break;
5246
5247 case GE:
5248 case LT:
5249 if (i != ~maxval
5250 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5251 {
5252 *op1 = GEN_INT (i - 1);
5253 *code = *code == GE ? GT : LE;
5254 return;
5255 }
5256 break;
5257
5258 case GTU:
5259 case LEU:
5260 if (i != ~((unsigned HOST_WIDE_INT) 0)
5261 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5262 {
5263 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5264 *code = *code == GTU ? GEU : LTU;
5265 return;
5266 }
5267 break;
5268
5269 case GEU:
5270 case LTU:
5271 if (i != 0
5272 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5273 {
5274 *op1 = GEN_INT (i - 1);
5275 *code = *code == GEU ? GTU : LEU;
5276 return;
5277 }
5278 break;
5279
5280 default:
5281 gcc_unreachable ();
5282 }
5283 }
5284
5285
5286 /* Define how to find the value returned by a function. */
5287
5288 static rtx
5289 arm_function_value(const_tree type, const_tree func,
5290 bool outgoing ATTRIBUTE_UNUSED)
5291 {
5292 machine_mode mode;
5293 int unsignedp ATTRIBUTE_UNUSED;
5294 rtx r ATTRIBUTE_UNUSED;
5295
5296 mode = TYPE_MODE (type);
5297
5298 if (TARGET_AAPCS_BASED)
5299 return aapcs_allocate_return_reg (mode, type, func);
5300
5301 /* Promote integer types. */
5302 if (INTEGRAL_TYPE_P (type))
5303 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5304
5305 /* Promotes small structs returned in a register to full-word size
5306 for big-endian AAPCS. */
5307 if (arm_return_in_msb (type))
5308 {
5309 HOST_WIDE_INT size = int_size_in_bytes (type);
5310 if (size % UNITS_PER_WORD != 0)
5311 {
5312 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5313 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5314 }
5315 }
5316
5317 return arm_libcall_value_1 (mode);
5318 }
5319
5320 /* libcall hashtable helpers. */
5321
5322 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5323 {
5324 static inline hashval_t hash (const rtx_def *);
5325 static inline bool equal (const rtx_def *, const rtx_def *);
5326 static inline void remove (rtx_def *);
5327 };
5328
5329 inline bool
5330 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5331 {
5332 return rtx_equal_p (p1, p2);
5333 }
5334
5335 inline hashval_t
5336 libcall_hasher::hash (const rtx_def *p1)
5337 {
5338 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5339 }
5340
5341 typedef hash_table<libcall_hasher> libcall_table_type;
5342
5343 static void
5344 add_libcall (libcall_table_type *htab, rtx libcall)
5345 {
5346 *htab->find_slot (libcall, INSERT) = libcall;
5347 }
5348
5349 static bool
5350 arm_libcall_uses_aapcs_base (const_rtx libcall)
5351 {
5352 static bool init_done = false;
5353 static libcall_table_type *libcall_htab = NULL;
5354
5355 if (!init_done)
5356 {
5357 init_done = true;
5358
5359 libcall_htab = new libcall_table_type (31);
5360 add_libcall (libcall_htab,
5361 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5362 add_libcall (libcall_htab,
5363 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5364 add_libcall (libcall_htab,
5365 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5366 add_libcall (libcall_htab,
5367 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5368
5369 add_libcall (libcall_htab,
5370 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5371 add_libcall (libcall_htab,
5372 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5373 add_libcall (libcall_htab,
5374 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5375 add_libcall (libcall_htab,
5376 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5377
5378 add_libcall (libcall_htab,
5379 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5380 add_libcall (libcall_htab,
5381 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5382 add_libcall (libcall_htab,
5383 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5384 add_libcall (libcall_htab,
5385 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5386 add_libcall (libcall_htab,
5387 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5388 add_libcall (libcall_htab,
5389 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5390 add_libcall (libcall_htab,
5391 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5392 add_libcall (libcall_htab,
5393 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5394
5395 /* Values from double-precision helper functions are returned in core
5396 registers if the selected core only supports single-precision
5397 arithmetic, even if we are using the hard-float ABI. The same is
5398 true for single-precision helpers, but we will never be using the
5399 hard-float ABI on a CPU which doesn't support single-precision
5400 operations in hardware. */
5401 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5402 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5403 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5404 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5405 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5406 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5407 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5408 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5409 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5410 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5411 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5412 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5413 SFmode));
5414 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5415 DFmode));
5416 add_libcall (libcall_htab,
5417 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5418 }
5419
5420 return libcall && libcall_htab->find (libcall) != NULL;
5421 }
5422
5423 static rtx
5424 arm_libcall_value_1 (machine_mode mode)
5425 {
5426 if (TARGET_AAPCS_BASED)
5427 return aapcs_libcall_value (mode);
5428 else if (TARGET_IWMMXT_ABI
5429 && arm_vector_mode_supported_p (mode))
5430 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5431 else
5432 return gen_rtx_REG (mode, ARG_REGISTER (1));
5433 }
5434
5435 /* Define how to find the value returned by a library function
5436 assuming the value has mode MODE. */
5437
5438 static rtx
5439 arm_libcall_value (machine_mode mode, const_rtx libcall)
5440 {
5441 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5442 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5443 {
5444 /* The following libcalls return their result in integer registers,
5445 even though they return a floating point value. */
5446 if (arm_libcall_uses_aapcs_base (libcall))
5447 return gen_rtx_REG (mode, ARG_REGISTER(1));
5448
5449 }
5450
5451 return arm_libcall_value_1 (mode);
5452 }
5453
5454 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5455
5456 static bool
5457 arm_function_value_regno_p (const unsigned int regno)
5458 {
5459 if (regno == ARG_REGISTER (1)
5460 || (TARGET_32BIT
5461 && TARGET_AAPCS_BASED
5462 && TARGET_HARD_FLOAT
5463 && regno == FIRST_VFP_REGNUM)
5464 || (TARGET_IWMMXT_ABI
5465 && regno == FIRST_IWMMXT_REGNUM))
5466 return true;
5467
5468 return false;
5469 }
5470
5471 /* Determine the amount of memory needed to store the possible return
5472 registers of an untyped call. */
5473 int
5474 arm_apply_result_size (void)
5475 {
5476 int size = 16;
5477
5478 if (TARGET_32BIT)
5479 {
5480 if (TARGET_HARD_FLOAT_ABI)
5481 size += 32;
5482 if (TARGET_IWMMXT_ABI)
5483 size += 8;
5484 }
5485
5486 return size;
5487 }
5488
5489 /* Decide whether TYPE should be returned in memory (true)
5490 or in a register (false). FNTYPE is the type of the function making
5491 the call. */
5492 static bool
5493 arm_return_in_memory (const_tree type, const_tree fntype)
5494 {
5495 HOST_WIDE_INT size;
5496
5497 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5498
5499 if (TARGET_AAPCS_BASED)
5500 {
5501 /* Simple, non-aggregate types (ie not including vectors and
5502 complex) are always returned in a register (or registers).
5503 We don't care about which register here, so we can short-cut
5504 some of the detail. */
5505 if (!AGGREGATE_TYPE_P (type)
5506 && TREE_CODE (type) != VECTOR_TYPE
5507 && TREE_CODE (type) != COMPLEX_TYPE)
5508 return false;
5509
5510 /* Any return value that is no larger than one word can be
5511 returned in r0. */
5512 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5513 return false;
5514
5515 /* Check any available co-processors to see if they accept the
5516 type as a register candidate (VFP, for example, can return
5517 some aggregates in consecutive registers). These aren't
5518 available if the call is variadic. */
5519 if (aapcs_select_return_coproc (type, fntype) >= 0)
5520 return false;
5521
5522 /* Vector values should be returned using ARM registers, not
5523 memory (unless they're over 16 bytes, which will break since
5524 we only have four call-clobbered registers to play with). */
5525 if (TREE_CODE (type) == VECTOR_TYPE)
5526 return (size < 0 || size > (4 * UNITS_PER_WORD));
5527
5528 /* The rest go in memory. */
5529 return true;
5530 }
5531
5532 if (TREE_CODE (type) == VECTOR_TYPE)
5533 return (size < 0 || size > (4 * UNITS_PER_WORD));
5534
5535 if (!AGGREGATE_TYPE_P (type) &&
5536 (TREE_CODE (type) != VECTOR_TYPE))
5537 /* All simple types are returned in registers. */
5538 return false;
5539
5540 if (arm_abi != ARM_ABI_APCS)
5541 {
5542 /* ATPCS and later return aggregate types in memory only if they are
5543 larger than a word (or are variable size). */
5544 return (size < 0 || size > UNITS_PER_WORD);
5545 }
5546
5547 /* For the arm-wince targets we choose to be compatible with Microsoft's
5548 ARM and Thumb compilers, which always return aggregates in memory. */
5549 #ifndef ARM_WINCE
5550 /* All structures/unions bigger than one word are returned in memory.
5551 Also catch the case where int_size_in_bytes returns -1. In this case
5552 the aggregate is either huge or of variable size, and in either case
5553 we will want to return it via memory and not in a register. */
5554 if (size < 0 || size > UNITS_PER_WORD)
5555 return true;
5556
5557 if (TREE_CODE (type) == RECORD_TYPE)
5558 {
5559 tree field;
5560
5561 /* For a struct the APCS says that we only return in a register
5562 if the type is 'integer like' and every addressable element
5563 has an offset of zero. For practical purposes this means
5564 that the structure can have at most one non bit-field element
5565 and that this element must be the first one in the structure. */
5566
5567 /* Find the first field, ignoring non FIELD_DECL things which will
5568 have been created by C++. */
5569 for (field = TYPE_FIELDS (type);
5570 field && TREE_CODE (field) != FIELD_DECL;
5571 field = DECL_CHAIN (field))
5572 continue;
5573
5574 if (field == NULL)
5575 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5576
5577 /* Check that the first field is valid for returning in a register. */
5578
5579 /* ... Floats are not allowed */
5580 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5581 return true;
5582
5583 /* ... Aggregates that are not themselves valid for returning in
5584 a register are not allowed. */
5585 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5586 return true;
5587
5588 /* Now check the remaining fields, if any. Only bitfields are allowed,
5589 since they are not addressable. */
5590 for (field = DECL_CHAIN (field);
5591 field;
5592 field = DECL_CHAIN (field))
5593 {
5594 if (TREE_CODE (field) != FIELD_DECL)
5595 continue;
5596
5597 if (!DECL_BIT_FIELD_TYPE (field))
5598 return true;
5599 }
5600
5601 return false;
5602 }
5603
5604 if (TREE_CODE (type) == UNION_TYPE)
5605 {
5606 tree field;
5607
5608 /* Unions can be returned in registers if every element is
5609 integral, or can be returned in an integer register. */
5610 for (field = TYPE_FIELDS (type);
5611 field;
5612 field = DECL_CHAIN (field))
5613 {
5614 if (TREE_CODE (field) != FIELD_DECL)
5615 continue;
5616
5617 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5618 return true;
5619
5620 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5621 return true;
5622 }
5623
5624 return false;
5625 }
5626 #endif /* not ARM_WINCE */
5627
5628 /* Return all other types in memory. */
5629 return true;
5630 }
5631
5632 const struct pcs_attribute_arg
5633 {
5634 const char *arg;
5635 enum arm_pcs value;
5636 } pcs_attribute_args[] =
5637 {
5638 {"aapcs", ARM_PCS_AAPCS},
5639 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5640 #if 0
5641 /* We could recognize these, but changes would be needed elsewhere
5642 * to implement them. */
5643 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5644 {"atpcs", ARM_PCS_ATPCS},
5645 {"apcs", ARM_PCS_APCS},
5646 #endif
5647 {NULL, ARM_PCS_UNKNOWN}
5648 };
5649
5650 static enum arm_pcs
5651 arm_pcs_from_attribute (tree attr)
5652 {
5653 const struct pcs_attribute_arg *ptr;
5654 const char *arg;
5655
5656 /* Get the value of the argument. */
5657 if (TREE_VALUE (attr) == NULL_TREE
5658 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5659 return ARM_PCS_UNKNOWN;
5660
5661 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5662
5663 /* Check it against the list of known arguments. */
5664 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5665 if (streq (arg, ptr->arg))
5666 return ptr->value;
5667
5668 /* An unrecognized interrupt type. */
5669 return ARM_PCS_UNKNOWN;
5670 }
5671
5672 /* Get the PCS variant to use for this call. TYPE is the function's type
5673 specification, DECL is the specific declartion. DECL may be null if
5674 the call could be indirect or if this is a library call. */
5675 static enum arm_pcs
5676 arm_get_pcs_model (const_tree type, const_tree decl)
5677 {
5678 bool user_convention = false;
5679 enum arm_pcs user_pcs = arm_pcs_default;
5680 tree attr;
5681
5682 gcc_assert (type);
5683
5684 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5685 if (attr)
5686 {
5687 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5688 user_convention = true;
5689 }
5690
5691 if (TARGET_AAPCS_BASED)
5692 {
5693 /* Detect varargs functions. These always use the base rules
5694 (no argument is ever a candidate for a co-processor
5695 register). */
5696 bool base_rules = stdarg_p (type);
5697
5698 if (user_convention)
5699 {
5700 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5701 sorry ("non-AAPCS derived PCS variant");
5702 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5703 error ("variadic functions must use the base AAPCS variant");
5704 }
5705
5706 if (base_rules)
5707 return ARM_PCS_AAPCS;
5708 else if (user_convention)
5709 return user_pcs;
5710 else if (decl && flag_unit_at_a_time)
5711 {
5712 /* Local functions never leak outside this compilation unit,
5713 so we are free to use whatever conventions are
5714 appropriate. */
5715 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5716 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5717 if (i && i->local)
5718 return ARM_PCS_AAPCS_LOCAL;
5719 }
5720 }
5721 else if (user_convention && user_pcs != arm_pcs_default)
5722 sorry ("PCS variant");
5723
5724 /* For everything else we use the target's default. */
5725 return arm_pcs_default;
5726 }
5727
5728
5729 static void
5730 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5731 const_tree fntype ATTRIBUTE_UNUSED,
5732 rtx libcall ATTRIBUTE_UNUSED,
5733 const_tree fndecl ATTRIBUTE_UNUSED)
5734 {
5735 /* Record the unallocated VFP registers. */
5736 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5737 pcum->aapcs_vfp_reg_alloc = 0;
5738 }
5739
5740 /* Walk down the type tree of TYPE counting consecutive base elements.
5741 If *MODEP is VOIDmode, then set it to the first valid floating point
5742 type. If a non-floating point type is found, or if a floating point
5743 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5744 otherwise return the count in the sub-tree. */
5745 static int
5746 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5747 {
5748 machine_mode mode;
5749 HOST_WIDE_INT size;
5750
5751 switch (TREE_CODE (type))
5752 {
5753 case REAL_TYPE:
5754 mode = TYPE_MODE (type);
5755 if (mode != DFmode && mode != SFmode && mode != HFmode)
5756 return -1;
5757
5758 if (*modep == VOIDmode)
5759 *modep = mode;
5760
5761 if (*modep == mode)
5762 return 1;
5763
5764 break;
5765
5766 case COMPLEX_TYPE:
5767 mode = TYPE_MODE (TREE_TYPE (type));
5768 if (mode != DFmode && mode != SFmode)
5769 return -1;
5770
5771 if (*modep == VOIDmode)
5772 *modep = mode;
5773
5774 if (*modep == mode)
5775 return 2;
5776
5777 break;
5778
5779 case VECTOR_TYPE:
5780 /* Use V2SImode and V4SImode as representatives of all 64-bit
5781 and 128-bit vector types, whether or not those modes are
5782 supported with the present options. */
5783 size = int_size_in_bytes (type);
5784 switch (size)
5785 {
5786 case 8:
5787 mode = V2SImode;
5788 break;
5789 case 16:
5790 mode = V4SImode;
5791 break;
5792 default:
5793 return -1;
5794 }
5795
5796 if (*modep == VOIDmode)
5797 *modep = mode;
5798
5799 /* Vector modes are considered to be opaque: two vectors are
5800 equivalent for the purposes of being homogeneous aggregates
5801 if they are the same size. */
5802 if (*modep == mode)
5803 return 1;
5804
5805 break;
5806
5807 case ARRAY_TYPE:
5808 {
5809 int count;
5810 tree index = TYPE_DOMAIN (type);
5811
5812 /* Can't handle incomplete types nor sizes that are not
5813 fixed. */
5814 if (!COMPLETE_TYPE_P (type)
5815 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5816 return -1;
5817
5818 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5819 if (count == -1
5820 || !index
5821 || !TYPE_MAX_VALUE (index)
5822 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5823 || !TYPE_MIN_VALUE (index)
5824 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5825 || count < 0)
5826 return -1;
5827
5828 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5829 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5830
5831 /* There must be no padding. */
5832 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5833 return -1;
5834
5835 return count;
5836 }
5837
5838 case RECORD_TYPE:
5839 {
5840 int count = 0;
5841 int sub_count;
5842 tree field;
5843
5844 /* Can't handle incomplete types nor sizes that are not
5845 fixed. */
5846 if (!COMPLETE_TYPE_P (type)
5847 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5848 return -1;
5849
5850 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5851 {
5852 if (TREE_CODE (field) != FIELD_DECL)
5853 continue;
5854
5855 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5856 if (sub_count < 0)
5857 return -1;
5858 count += sub_count;
5859 }
5860
5861 /* There must be no padding. */
5862 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5863 return -1;
5864
5865 return count;
5866 }
5867
5868 case UNION_TYPE:
5869 case QUAL_UNION_TYPE:
5870 {
5871 /* These aren't very interesting except in a degenerate case. */
5872 int count = 0;
5873 int sub_count;
5874 tree field;
5875
5876 /* Can't handle incomplete types nor sizes that are not
5877 fixed. */
5878 if (!COMPLETE_TYPE_P (type)
5879 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5880 return -1;
5881
5882 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5883 {
5884 if (TREE_CODE (field) != FIELD_DECL)
5885 continue;
5886
5887 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5888 if (sub_count < 0)
5889 return -1;
5890 count = count > sub_count ? count : sub_count;
5891 }
5892
5893 /* There must be no padding. */
5894 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5895 return -1;
5896
5897 return count;
5898 }
5899
5900 default:
5901 break;
5902 }
5903
5904 return -1;
5905 }
5906
5907 /* Return true if PCS_VARIANT should use VFP registers. */
5908 static bool
5909 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5910 {
5911 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5912 {
5913 static bool seen_thumb1_vfp = false;
5914
5915 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5916 {
5917 sorry ("Thumb-1 hard-float VFP ABI");
5918 /* sorry() is not immediately fatal, so only display this once. */
5919 seen_thumb1_vfp = true;
5920 }
5921
5922 return true;
5923 }
5924
5925 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5926 return false;
5927
5928 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5929 (TARGET_VFP_DOUBLE || !is_double));
5930 }
5931
5932 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5933 suitable for passing or returning in VFP registers for the PCS
5934 variant selected. If it is, then *BASE_MODE is updated to contain
5935 a machine mode describing each element of the argument's type and
5936 *COUNT to hold the number of such elements. */
5937 static bool
5938 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5939 machine_mode mode, const_tree type,
5940 machine_mode *base_mode, int *count)
5941 {
5942 machine_mode new_mode = VOIDmode;
5943
5944 /* If we have the type information, prefer that to working things
5945 out from the mode. */
5946 if (type)
5947 {
5948 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5949
5950 if (ag_count > 0 && ag_count <= 4)
5951 *count = ag_count;
5952 else
5953 return false;
5954 }
5955 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5956 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5957 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5958 {
5959 *count = 1;
5960 new_mode = mode;
5961 }
5962 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5963 {
5964 *count = 2;
5965 new_mode = (mode == DCmode ? DFmode : SFmode);
5966 }
5967 else
5968 return false;
5969
5970
5971 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5972 return false;
5973
5974 *base_mode = new_mode;
5975 return true;
5976 }
5977
5978 static bool
5979 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5980 machine_mode mode, const_tree type)
5981 {
5982 int count ATTRIBUTE_UNUSED;
5983 machine_mode ag_mode ATTRIBUTE_UNUSED;
5984
5985 if (!use_vfp_abi (pcs_variant, false))
5986 return false;
5987 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5988 &ag_mode, &count);
5989 }
5990
5991 static bool
5992 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5993 const_tree type)
5994 {
5995 if (!use_vfp_abi (pcum->pcs_variant, false))
5996 return false;
5997
5998 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5999 &pcum->aapcs_vfp_rmode,
6000 &pcum->aapcs_vfp_rcount);
6001 }
6002
6003 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6004 for the behaviour of this function. */
6005
6006 static bool
6007 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6008 const_tree type ATTRIBUTE_UNUSED)
6009 {
6010 int rmode_size
6011 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6012 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6013 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6014 int regno;
6015
6016 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6017 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6018 {
6019 pcum->aapcs_vfp_reg_alloc = mask << regno;
6020 if (mode == BLKmode
6021 || (mode == TImode && ! TARGET_NEON)
6022 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6023 {
6024 int i;
6025 int rcount = pcum->aapcs_vfp_rcount;
6026 int rshift = shift;
6027 machine_mode rmode = pcum->aapcs_vfp_rmode;
6028 rtx par;
6029 if (!TARGET_NEON)
6030 {
6031 /* Avoid using unsupported vector modes. */
6032 if (rmode == V2SImode)
6033 rmode = DImode;
6034 else if (rmode == V4SImode)
6035 {
6036 rmode = DImode;
6037 rcount *= 2;
6038 rshift /= 2;
6039 }
6040 }
6041 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6042 for (i = 0; i < rcount; i++)
6043 {
6044 rtx tmp = gen_rtx_REG (rmode,
6045 FIRST_VFP_REGNUM + regno + i * rshift);
6046 tmp = gen_rtx_EXPR_LIST
6047 (VOIDmode, tmp,
6048 GEN_INT (i * GET_MODE_SIZE (rmode)));
6049 XVECEXP (par, 0, i) = tmp;
6050 }
6051
6052 pcum->aapcs_reg = par;
6053 }
6054 else
6055 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6056 return true;
6057 }
6058 return false;
6059 }
6060
6061 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6062 comment there for the behaviour of this function. */
6063
6064 static rtx
6065 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6066 machine_mode mode,
6067 const_tree type ATTRIBUTE_UNUSED)
6068 {
6069 if (!use_vfp_abi (pcs_variant, false))
6070 return NULL;
6071
6072 if (mode == BLKmode
6073 || (GET_MODE_CLASS (mode) == MODE_INT
6074 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6075 && !TARGET_NEON))
6076 {
6077 int count;
6078 machine_mode ag_mode;
6079 int i;
6080 rtx par;
6081 int shift;
6082
6083 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6084 &ag_mode, &count);
6085
6086 if (!TARGET_NEON)
6087 {
6088 if (ag_mode == V2SImode)
6089 ag_mode = DImode;
6090 else if (ag_mode == V4SImode)
6091 {
6092 ag_mode = DImode;
6093 count *= 2;
6094 }
6095 }
6096 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6097 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6098 for (i = 0; i < count; i++)
6099 {
6100 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6101 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6102 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6103 XVECEXP (par, 0, i) = tmp;
6104 }
6105
6106 return par;
6107 }
6108
6109 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6110 }
6111
6112 static void
6113 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6114 machine_mode mode ATTRIBUTE_UNUSED,
6115 const_tree type ATTRIBUTE_UNUSED)
6116 {
6117 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6118 pcum->aapcs_vfp_reg_alloc = 0;
6119 return;
6120 }
6121
6122 #define AAPCS_CP(X) \
6123 { \
6124 aapcs_ ## X ## _cum_init, \
6125 aapcs_ ## X ## _is_call_candidate, \
6126 aapcs_ ## X ## _allocate, \
6127 aapcs_ ## X ## _is_return_candidate, \
6128 aapcs_ ## X ## _allocate_return_reg, \
6129 aapcs_ ## X ## _advance \
6130 }
6131
6132 /* Table of co-processors that can be used to pass arguments in
6133 registers. Idealy no arugment should be a candidate for more than
6134 one co-processor table entry, but the table is processed in order
6135 and stops after the first match. If that entry then fails to put
6136 the argument into a co-processor register, the argument will go on
6137 the stack. */
6138 static struct
6139 {
6140 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6141 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6142
6143 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6144 BLKmode) is a candidate for this co-processor's registers; this
6145 function should ignore any position-dependent state in
6146 CUMULATIVE_ARGS and only use call-type dependent information. */
6147 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6148
6149 /* Return true if the argument does get a co-processor register; it
6150 should set aapcs_reg to an RTX of the register allocated as is
6151 required for a return from FUNCTION_ARG. */
6152 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6153
6154 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6155 be returned in this co-processor's registers. */
6156 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6157
6158 /* Allocate and return an RTX element to hold the return type of a call. This
6159 routine must not fail and will only be called if is_return_candidate
6160 returned true with the same parameters. */
6161 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6162
6163 /* Finish processing this argument and prepare to start processing
6164 the next one. */
6165 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6166 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6167 {
6168 AAPCS_CP(vfp)
6169 };
6170
6171 #undef AAPCS_CP
6172
6173 static int
6174 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6175 const_tree type)
6176 {
6177 int i;
6178
6179 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6180 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6181 return i;
6182
6183 return -1;
6184 }
6185
6186 static int
6187 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6188 {
6189 /* We aren't passed a decl, so we can't check that a call is local.
6190 However, it isn't clear that that would be a win anyway, since it
6191 might limit some tail-calling opportunities. */
6192 enum arm_pcs pcs_variant;
6193
6194 if (fntype)
6195 {
6196 const_tree fndecl = NULL_TREE;
6197
6198 if (TREE_CODE (fntype) == FUNCTION_DECL)
6199 {
6200 fndecl = fntype;
6201 fntype = TREE_TYPE (fntype);
6202 }
6203
6204 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6205 }
6206 else
6207 pcs_variant = arm_pcs_default;
6208
6209 if (pcs_variant != ARM_PCS_AAPCS)
6210 {
6211 int i;
6212
6213 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6214 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6215 TYPE_MODE (type),
6216 type))
6217 return i;
6218 }
6219 return -1;
6220 }
6221
6222 static rtx
6223 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6224 const_tree fntype)
6225 {
6226 /* We aren't passed a decl, so we can't check that a call is local.
6227 However, it isn't clear that that would be a win anyway, since it
6228 might limit some tail-calling opportunities. */
6229 enum arm_pcs pcs_variant;
6230 int unsignedp ATTRIBUTE_UNUSED;
6231
6232 if (fntype)
6233 {
6234 const_tree fndecl = NULL_TREE;
6235
6236 if (TREE_CODE (fntype) == FUNCTION_DECL)
6237 {
6238 fndecl = fntype;
6239 fntype = TREE_TYPE (fntype);
6240 }
6241
6242 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6243 }
6244 else
6245 pcs_variant = arm_pcs_default;
6246
6247 /* Promote integer types. */
6248 if (type && INTEGRAL_TYPE_P (type))
6249 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6250
6251 if (pcs_variant != ARM_PCS_AAPCS)
6252 {
6253 int i;
6254
6255 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6256 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6257 type))
6258 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6259 mode, type);
6260 }
6261
6262 /* Promotes small structs returned in a register to full-word size
6263 for big-endian AAPCS. */
6264 if (type && arm_return_in_msb (type))
6265 {
6266 HOST_WIDE_INT size = int_size_in_bytes (type);
6267 if (size % UNITS_PER_WORD != 0)
6268 {
6269 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6270 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6271 }
6272 }
6273
6274 return gen_rtx_REG (mode, R0_REGNUM);
6275 }
6276
6277 static rtx
6278 aapcs_libcall_value (machine_mode mode)
6279 {
6280 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6281 && GET_MODE_SIZE (mode) <= 4)
6282 mode = SImode;
6283
6284 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6285 }
6286
6287 /* Lay out a function argument using the AAPCS rules. The rule
6288 numbers referred to here are those in the AAPCS. */
6289 static void
6290 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6291 const_tree type, bool named)
6292 {
6293 int nregs, nregs2;
6294 int ncrn;
6295
6296 /* We only need to do this once per argument. */
6297 if (pcum->aapcs_arg_processed)
6298 return;
6299
6300 pcum->aapcs_arg_processed = true;
6301
6302 /* Special case: if named is false then we are handling an incoming
6303 anonymous argument which is on the stack. */
6304 if (!named)
6305 return;
6306
6307 /* Is this a potential co-processor register candidate? */
6308 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6309 {
6310 int slot = aapcs_select_call_coproc (pcum, mode, type);
6311 pcum->aapcs_cprc_slot = slot;
6312
6313 /* We don't have to apply any of the rules from part B of the
6314 preparation phase, these are handled elsewhere in the
6315 compiler. */
6316
6317 if (slot >= 0)
6318 {
6319 /* A Co-processor register candidate goes either in its own
6320 class of registers or on the stack. */
6321 if (!pcum->aapcs_cprc_failed[slot])
6322 {
6323 /* C1.cp - Try to allocate the argument to co-processor
6324 registers. */
6325 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6326 return;
6327
6328 /* C2.cp - Put the argument on the stack and note that we
6329 can't assign any more candidates in this slot. We also
6330 need to note that we have allocated stack space, so that
6331 we won't later try to split a non-cprc candidate between
6332 core registers and the stack. */
6333 pcum->aapcs_cprc_failed[slot] = true;
6334 pcum->can_split = false;
6335 }
6336
6337 /* We didn't get a register, so this argument goes on the
6338 stack. */
6339 gcc_assert (pcum->can_split == false);
6340 return;
6341 }
6342 }
6343
6344 /* C3 - For double-word aligned arguments, round the NCRN up to the
6345 next even number. */
6346 ncrn = pcum->aapcs_ncrn;
6347 if (ncrn & 1)
6348 {
6349 int res = arm_needs_doubleword_align (mode, type);
6350 /* Only warn during RTL expansion of call stmts, otherwise we would
6351 warn e.g. during gimplification even on functions that will be
6352 always inlined, and we'd warn multiple times. Don't warn when
6353 called in expand_function_start either, as we warn instead in
6354 arm_function_arg_boundary in that case. */
6355 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6356 inform (input_location, "parameter passing for argument of type "
6357 "%qT changed in GCC 7.1", type);
6358 else if (res > 0)
6359 ncrn++;
6360 }
6361
6362 nregs = ARM_NUM_REGS2(mode, type);
6363
6364 /* Sigh, this test should really assert that nregs > 0, but a GCC
6365 extension allows empty structs and then gives them empty size; it
6366 then allows such a structure to be passed by value. For some of
6367 the code below we have to pretend that such an argument has
6368 non-zero size so that we 'locate' it correctly either in
6369 registers or on the stack. */
6370 gcc_assert (nregs >= 0);
6371
6372 nregs2 = nregs ? nregs : 1;
6373
6374 /* C4 - Argument fits entirely in core registers. */
6375 if (ncrn + nregs2 <= NUM_ARG_REGS)
6376 {
6377 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6378 pcum->aapcs_next_ncrn = ncrn + nregs;
6379 return;
6380 }
6381
6382 /* C5 - Some core registers left and there are no arguments already
6383 on the stack: split this argument between the remaining core
6384 registers and the stack. */
6385 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6386 {
6387 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6388 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6389 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6390 return;
6391 }
6392
6393 /* C6 - NCRN is set to 4. */
6394 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6395
6396 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6397 return;
6398 }
6399
6400 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6401 for a call to a function whose data type is FNTYPE.
6402 For a library call, FNTYPE is NULL. */
6403 void
6404 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6405 rtx libname,
6406 tree fndecl ATTRIBUTE_UNUSED)
6407 {
6408 /* Long call handling. */
6409 if (fntype)
6410 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6411 else
6412 pcum->pcs_variant = arm_pcs_default;
6413
6414 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6415 {
6416 if (arm_libcall_uses_aapcs_base (libname))
6417 pcum->pcs_variant = ARM_PCS_AAPCS;
6418
6419 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6420 pcum->aapcs_reg = NULL_RTX;
6421 pcum->aapcs_partial = 0;
6422 pcum->aapcs_arg_processed = false;
6423 pcum->aapcs_cprc_slot = -1;
6424 pcum->can_split = true;
6425
6426 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6427 {
6428 int i;
6429
6430 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6431 {
6432 pcum->aapcs_cprc_failed[i] = false;
6433 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6434 }
6435 }
6436 return;
6437 }
6438
6439 /* Legacy ABIs */
6440
6441 /* On the ARM, the offset starts at 0. */
6442 pcum->nregs = 0;
6443 pcum->iwmmxt_nregs = 0;
6444 pcum->can_split = true;
6445
6446 /* Varargs vectors are treated the same as long long.
6447 named_count avoids having to change the way arm handles 'named' */
6448 pcum->named_count = 0;
6449 pcum->nargs = 0;
6450
6451 if (TARGET_REALLY_IWMMXT && fntype)
6452 {
6453 tree fn_arg;
6454
6455 for (fn_arg = TYPE_ARG_TYPES (fntype);
6456 fn_arg;
6457 fn_arg = TREE_CHAIN (fn_arg))
6458 pcum->named_count += 1;
6459
6460 if (! pcum->named_count)
6461 pcum->named_count = INT_MAX;
6462 }
6463 }
6464
6465 /* Return 1 if double word alignment is required for argument passing.
6466 Return -1 if double word alignment used to be required for argument
6467 passing before PR77728 ABI fix, but is not required anymore.
6468 Return 0 if double word alignment is not required and wasn't requried
6469 before either. */
6470 static int
6471 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6472 {
6473 if (!type)
6474 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6475
6476 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6477 if (!AGGREGATE_TYPE_P (type))
6478 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6479
6480 /* Array types: Use member alignment of element type. */
6481 if (TREE_CODE (type) == ARRAY_TYPE)
6482 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6483
6484 int ret = 0;
6485 /* Record/aggregate types: Use greatest member alignment of any member. */
6486 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6487 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6488 {
6489 if (TREE_CODE (field) == FIELD_DECL)
6490 return 1;
6491 else
6492 /* Before PR77728 fix, we were incorrectly considering also
6493 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6494 Make sure we can warn about that with -Wpsabi. */
6495 ret = -1;
6496 }
6497
6498 return ret;
6499 }
6500
6501
6502 /* Determine where to put an argument to a function.
6503 Value is zero to push the argument on the stack,
6504 or a hard register in which to store the argument.
6505
6506 MODE is the argument's machine mode.
6507 TYPE is the data type of the argument (as a tree).
6508 This is null for libcalls where that information may
6509 not be available.
6510 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6511 the preceding args and about the function being called.
6512 NAMED is nonzero if this argument is a named parameter
6513 (otherwise it is an extra parameter matching an ellipsis).
6514
6515 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6516 other arguments are passed on the stack. If (NAMED == 0) (which happens
6517 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6518 defined), say it is passed in the stack (function_prologue will
6519 indeed make it pass in the stack if necessary). */
6520
6521 static rtx
6522 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6523 const_tree type, bool named)
6524 {
6525 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6526 int nregs;
6527
6528 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6529 a call insn (op3 of a call_value insn). */
6530 if (mode == VOIDmode)
6531 return const0_rtx;
6532
6533 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6534 {
6535 aapcs_layout_arg (pcum, mode, type, named);
6536 return pcum->aapcs_reg;
6537 }
6538
6539 /* Varargs vectors are treated the same as long long.
6540 named_count avoids having to change the way arm handles 'named' */
6541 if (TARGET_IWMMXT_ABI
6542 && arm_vector_mode_supported_p (mode)
6543 && pcum->named_count > pcum->nargs + 1)
6544 {
6545 if (pcum->iwmmxt_nregs <= 9)
6546 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6547 else
6548 {
6549 pcum->can_split = false;
6550 return NULL_RTX;
6551 }
6552 }
6553
6554 /* Put doubleword aligned quantities in even register pairs. */
6555 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6556 {
6557 int res = arm_needs_doubleword_align (mode, type);
6558 if (res < 0 && warn_psabi)
6559 inform (input_location, "parameter passing for argument of type "
6560 "%qT changed in GCC 7.1", type);
6561 else if (res > 0)
6562 pcum->nregs++;
6563 }
6564
6565 /* Only allow splitting an arg between regs and memory if all preceding
6566 args were allocated to regs. For args passed by reference we only count
6567 the reference pointer. */
6568 if (pcum->can_split)
6569 nregs = 1;
6570 else
6571 nregs = ARM_NUM_REGS2 (mode, type);
6572
6573 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6574 return NULL_RTX;
6575
6576 return gen_rtx_REG (mode, pcum->nregs);
6577 }
6578
6579 static unsigned int
6580 arm_function_arg_boundary (machine_mode mode, const_tree type)
6581 {
6582 if (!ARM_DOUBLEWORD_ALIGN)
6583 return PARM_BOUNDARY;
6584
6585 int res = arm_needs_doubleword_align (mode, type);
6586 if (res < 0 && warn_psabi)
6587 inform (input_location, "parameter passing for argument of type %qT "
6588 "changed in GCC 7.1", type);
6589
6590 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6591 }
6592
6593 static int
6594 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6595 tree type, bool named)
6596 {
6597 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6598 int nregs = pcum->nregs;
6599
6600 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6601 {
6602 aapcs_layout_arg (pcum, mode, type, named);
6603 return pcum->aapcs_partial;
6604 }
6605
6606 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6607 return 0;
6608
6609 if (NUM_ARG_REGS > nregs
6610 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6611 && pcum->can_split)
6612 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6613
6614 return 0;
6615 }
6616
6617 /* Update the data in PCUM to advance over an argument
6618 of mode MODE and data type TYPE.
6619 (TYPE is null for libcalls where that information may not be available.) */
6620
6621 static void
6622 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6623 const_tree type, bool named)
6624 {
6625 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6626
6627 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6628 {
6629 aapcs_layout_arg (pcum, mode, type, named);
6630
6631 if (pcum->aapcs_cprc_slot >= 0)
6632 {
6633 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6634 type);
6635 pcum->aapcs_cprc_slot = -1;
6636 }
6637
6638 /* Generic stuff. */
6639 pcum->aapcs_arg_processed = false;
6640 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6641 pcum->aapcs_reg = NULL_RTX;
6642 pcum->aapcs_partial = 0;
6643 }
6644 else
6645 {
6646 pcum->nargs += 1;
6647 if (arm_vector_mode_supported_p (mode)
6648 && pcum->named_count > pcum->nargs
6649 && TARGET_IWMMXT_ABI)
6650 pcum->iwmmxt_nregs += 1;
6651 else
6652 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6653 }
6654 }
6655
6656 /* Variable sized types are passed by reference. This is a GCC
6657 extension to the ARM ABI. */
6658
6659 static bool
6660 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6661 machine_mode mode ATTRIBUTE_UNUSED,
6662 const_tree type, bool named ATTRIBUTE_UNUSED)
6663 {
6664 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6665 }
6666 \f
6667 /* Encode the current state of the #pragma [no_]long_calls. */
6668 typedef enum
6669 {
6670 OFF, /* No #pragma [no_]long_calls is in effect. */
6671 LONG, /* #pragma long_calls is in effect. */
6672 SHORT /* #pragma no_long_calls is in effect. */
6673 } arm_pragma_enum;
6674
6675 static arm_pragma_enum arm_pragma_long_calls = OFF;
6676
6677 void
6678 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6679 {
6680 arm_pragma_long_calls = LONG;
6681 }
6682
6683 void
6684 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6685 {
6686 arm_pragma_long_calls = SHORT;
6687 }
6688
6689 void
6690 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6691 {
6692 arm_pragma_long_calls = OFF;
6693 }
6694 \f
6695 /* Handle an attribute requiring a FUNCTION_DECL;
6696 arguments as in struct attribute_spec.handler. */
6697 static tree
6698 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6699 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6700 {
6701 if (TREE_CODE (*node) != FUNCTION_DECL)
6702 {
6703 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6704 name);
6705 *no_add_attrs = true;
6706 }
6707
6708 return NULL_TREE;
6709 }
6710
6711 /* Handle an "interrupt" or "isr" attribute;
6712 arguments as in struct attribute_spec.handler. */
6713 static tree
6714 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6715 bool *no_add_attrs)
6716 {
6717 if (DECL_P (*node))
6718 {
6719 if (TREE_CODE (*node) != FUNCTION_DECL)
6720 {
6721 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6722 name);
6723 *no_add_attrs = true;
6724 }
6725 /* FIXME: the argument if any is checked for type attributes;
6726 should it be checked for decl ones? */
6727 }
6728 else
6729 {
6730 if (TREE_CODE (*node) == FUNCTION_TYPE
6731 || TREE_CODE (*node) == METHOD_TYPE)
6732 {
6733 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6734 {
6735 warning (OPT_Wattributes, "%qE attribute ignored",
6736 name);
6737 *no_add_attrs = true;
6738 }
6739 }
6740 else if (TREE_CODE (*node) == POINTER_TYPE
6741 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6742 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6743 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6744 {
6745 *node = build_variant_type_copy (*node);
6746 TREE_TYPE (*node) = build_type_attribute_variant
6747 (TREE_TYPE (*node),
6748 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6749 *no_add_attrs = true;
6750 }
6751 else
6752 {
6753 /* Possibly pass this attribute on from the type to a decl. */
6754 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6755 | (int) ATTR_FLAG_FUNCTION_NEXT
6756 | (int) ATTR_FLAG_ARRAY_NEXT))
6757 {
6758 *no_add_attrs = true;
6759 return tree_cons (name, args, NULL_TREE);
6760 }
6761 else
6762 {
6763 warning (OPT_Wattributes, "%qE attribute ignored",
6764 name);
6765 }
6766 }
6767 }
6768
6769 return NULL_TREE;
6770 }
6771
6772 /* Handle a "pcs" attribute; arguments as in struct
6773 attribute_spec.handler. */
6774 static tree
6775 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6776 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6777 {
6778 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6779 {
6780 warning (OPT_Wattributes, "%qE attribute ignored", name);
6781 *no_add_attrs = true;
6782 }
6783 return NULL_TREE;
6784 }
6785
6786 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6787 /* Handle the "notshared" attribute. This attribute is another way of
6788 requesting hidden visibility. ARM's compiler supports
6789 "__declspec(notshared)"; we support the same thing via an
6790 attribute. */
6791
6792 static tree
6793 arm_handle_notshared_attribute (tree *node,
6794 tree name ATTRIBUTE_UNUSED,
6795 tree args ATTRIBUTE_UNUSED,
6796 int flags ATTRIBUTE_UNUSED,
6797 bool *no_add_attrs)
6798 {
6799 tree decl = TYPE_NAME (*node);
6800
6801 if (decl)
6802 {
6803 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6804 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6805 *no_add_attrs = false;
6806 }
6807 return NULL_TREE;
6808 }
6809 #endif
6810
6811 /* This function returns true if a function with declaration FNDECL and type
6812 FNTYPE uses the stack to pass arguments or return variables and false
6813 otherwise. This is used for functions with the attributes
6814 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6815 diagnostic messages if the stack is used. NAME is the name of the attribute
6816 used. */
6817
6818 static bool
6819 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6820 {
6821 function_args_iterator args_iter;
6822 CUMULATIVE_ARGS args_so_far_v;
6823 cumulative_args_t args_so_far;
6824 bool first_param = true;
6825 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6826
6827 /* Error out if any argument is passed on the stack. */
6828 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6829 args_so_far = pack_cumulative_args (&args_so_far_v);
6830 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6831 {
6832 rtx arg_rtx;
6833 machine_mode arg_mode = TYPE_MODE (arg_type);
6834
6835 prev_arg_type = arg_type;
6836 if (VOID_TYPE_P (arg_type))
6837 continue;
6838
6839 if (!first_param)
6840 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6841 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6842 if (!arg_rtx
6843 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6844 {
6845 error ("%qE attribute not available to functions with arguments "
6846 "passed on the stack", name);
6847 return true;
6848 }
6849 first_param = false;
6850 }
6851
6852 /* Error out for variadic functions since we cannot control how many
6853 arguments will be passed and thus stack could be used. stdarg_p () is not
6854 used for the checking to avoid browsing arguments twice. */
6855 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6856 {
6857 error ("%qE attribute not available to functions with variable number "
6858 "of arguments", name);
6859 return true;
6860 }
6861
6862 /* Error out if return value is passed on the stack. */
6863 ret_type = TREE_TYPE (fntype);
6864 if (arm_return_in_memory (ret_type, fntype))
6865 {
6866 error ("%qE attribute not available to functions that return value on "
6867 "the stack", name);
6868 return true;
6869 }
6870 return false;
6871 }
6872
6873 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6874 function will check whether the attribute is allowed here and will add the
6875 attribute to the function declaration tree or otherwise issue a warning. */
6876
6877 static tree
6878 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6879 tree /* args */,
6880 int /* flags */,
6881 bool *no_add_attrs)
6882 {
6883 tree fndecl;
6884
6885 if (!use_cmse)
6886 {
6887 *no_add_attrs = true;
6888 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6889 name);
6890 return NULL_TREE;
6891 }
6892
6893 /* Ignore attribute for function types. */
6894 if (TREE_CODE (*node) != FUNCTION_DECL)
6895 {
6896 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6897 name);
6898 *no_add_attrs = true;
6899 return NULL_TREE;
6900 }
6901
6902 fndecl = *node;
6903
6904 /* Warn for static linkage functions. */
6905 if (!TREE_PUBLIC (fndecl))
6906 {
6907 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6908 "with static linkage", name);
6909 *no_add_attrs = true;
6910 return NULL_TREE;
6911 }
6912
6913 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6914 TREE_TYPE (fndecl));
6915 return NULL_TREE;
6916 }
6917
6918
6919 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6920 function will check whether the attribute is allowed here and will add the
6921 attribute to the function type tree or otherwise issue a diagnostic. The
6922 reason we check this at declaration time is to only allow the use of the
6923 attribute with declarations of function pointers and not function
6924 declarations. This function checks NODE is of the expected type and issues
6925 diagnostics otherwise using NAME. If it is not of the expected type
6926 *NO_ADD_ATTRS will be set to true. */
6927
6928 static tree
6929 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6930 tree /* args */,
6931 int /* flags */,
6932 bool *no_add_attrs)
6933 {
6934 tree decl = NULL_TREE, fntype = NULL_TREE;
6935 tree type;
6936
6937 if (!use_cmse)
6938 {
6939 *no_add_attrs = true;
6940 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6941 name);
6942 return NULL_TREE;
6943 }
6944
6945 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
6946 {
6947 decl = *node;
6948 fntype = TREE_TYPE (decl);
6949 }
6950
6951 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
6952 fntype = TREE_TYPE (fntype);
6953
6954 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
6955 {
6956 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
6957 "function pointer", name);
6958 *no_add_attrs = true;
6959 return NULL_TREE;
6960 }
6961
6962 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
6963
6964 if (*no_add_attrs)
6965 return NULL_TREE;
6966
6967 /* Prevent trees being shared among function types with and without
6968 cmse_nonsecure_call attribute. */
6969 type = TREE_TYPE (decl);
6970
6971 type = build_distinct_type_copy (type);
6972 TREE_TYPE (decl) = type;
6973 fntype = type;
6974
6975 while (TREE_CODE (fntype) != FUNCTION_TYPE)
6976 {
6977 type = fntype;
6978 fntype = TREE_TYPE (fntype);
6979 fntype = build_distinct_type_copy (fntype);
6980 TREE_TYPE (type) = fntype;
6981 }
6982
6983 /* Construct a type attribute and add it to the function type. */
6984 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
6985 TYPE_ATTRIBUTES (fntype));
6986 TYPE_ATTRIBUTES (fntype) = attrs;
6987 return NULL_TREE;
6988 }
6989
6990 /* Return 0 if the attributes for two types are incompatible, 1 if they
6991 are compatible, and 2 if they are nearly compatible (which causes a
6992 warning to be generated). */
6993 static int
6994 arm_comp_type_attributes (const_tree type1, const_tree type2)
6995 {
6996 int l1, l2, s1, s2;
6997
6998 /* Check for mismatch of non-default calling convention. */
6999 if (TREE_CODE (type1) != FUNCTION_TYPE)
7000 return 1;
7001
7002 /* Check for mismatched call attributes. */
7003 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7004 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7005 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7006 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7007
7008 /* Only bother to check if an attribute is defined. */
7009 if (l1 | l2 | s1 | s2)
7010 {
7011 /* If one type has an attribute, the other must have the same attribute. */
7012 if ((l1 != l2) || (s1 != s2))
7013 return 0;
7014
7015 /* Disallow mixed attributes. */
7016 if ((l1 & s2) || (l2 & s1))
7017 return 0;
7018 }
7019
7020 /* Check for mismatched ISR attribute. */
7021 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7022 if (! l1)
7023 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7024 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7025 if (! l2)
7026 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7027 if (l1 != l2)
7028 return 0;
7029
7030 l1 = lookup_attribute ("cmse_nonsecure_call",
7031 TYPE_ATTRIBUTES (type1)) != NULL;
7032 l2 = lookup_attribute ("cmse_nonsecure_call",
7033 TYPE_ATTRIBUTES (type2)) != NULL;
7034
7035 if (l1 != l2)
7036 return 0;
7037
7038 return 1;
7039 }
7040
7041 /* Assigns default attributes to newly defined type. This is used to
7042 set short_call/long_call attributes for function types of
7043 functions defined inside corresponding #pragma scopes. */
7044 static void
7045 arm_set_default_type_attributes (tree type)
7046 {
7047 /* Add __attribute__ ((long_call)) to all functions, when
7048 inside #pragma long_calls or __attribute__ ((short_call)),
7049 when inside #pragma no_long_calls. */
7050 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7051 {
7052 tree type_attr_list, attr_name;
7053 type_attr_list = TYPE_ATTRIBUTES (type);
7054
7055 if (arm_pragma_long_calls == LONG)
7056 attr_name = get_identifier ("long_call");
7057 else if (arm_pragma_long_calls == SHORT)
7058 attr_name = get_identifier ("short_call");
7059 else
7060 return;
7061
7062 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7063 TYPE_ATTRIBUTES (type) = type_attr_list;
7064 }
7065 }
7066 \f
7067 /* Return true if DECL is known to be linked into section SECTION. */
7068
7069 static bool
7070 arm_function_in_section_p (tree decl, section *section)
7071 {
7072 /* We can only be certain about the prevailing symbol definition. */
7073 if (!decl_binds_to_current_def_p (decl))
7074 return false;
7075
7076 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7077 if (!DECL_SECTION_NAME (decl))
7078 {
7079 /* Make sure that we will not create a unique section for DECL. */
7080 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7081 return false;
7082 }
7083
7084 return function_section (decl) == section;
7085 }
7086
7087 /* Return nonzero if a 32-bit "long_call" should be generated for
7088 a call from the current function to DECL. We generate a long_call
7089 if the function:
7090
7091 a. has an __attribute__((long call))
7092 or b. is within the scope of a #pragma long_calls
7093 or c. the -mlong-calls command line switch has been specified
7094
7095 However we do not generate a long call if the function:
7096
7097 d. has an __attribute__ ((short_call))
7098 or e. is inside the scope of a #pragma no_long_calls
7099 or f. is defined in the same section as the current function. */
7100
7101 bool
7102 arm_is_long_call_p (tree decl)
7103 {
7104 tree attrs;
7105
7106 if (!decl)
7107 return TARGET_LONG_CALLS;
7108
7109 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7110 if (lookup_attribute ("short_call", attrs))
7111 return false;
7112
7113 /* For "f", be conservative, and only cater for cases in which the
7114 whole of the current function is placed in the same section. */
7115 if (!flag_reorder_blocks_and_partition
7116 && TREE_CODE (decl) == FUNCTION_DECL
7117 && arm_function_in_section_p (decl, current_function_section ()))
7118 return false;
7119
7120 if (lookup_attribute ("long_call", attrs))
7121 return true;
7122
7123 return TARGET_LONG_CALLS;
7124 }
7125
7126 /* Return nonzero if it is ok to make a tail-call to DECL. */
7127 static bool
7128 arm_function_ok_for_sibcall (tree decl, tree exp)
7129 {
7130 unsigned long func_type;
7131
7132 if (cfun->machine->sibcall_blocked)
7133 return false;
7134
7135 /* Never tailcall something if we are generating code for Thumb-1. */
7136 if (TARGET_THUMB1)
7137 return false;
7138
7139 /* The PIC register is live on entry to VxWorks PLT entries, so we
7140 must make the call before restoring the PIC register. */
7141 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7142 return false;
7143
7144 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7145 may be used both as target of the call and base register for restoring
7146 the VFP registers */
7147 if (TARGET_APCS_FRAME && TARGET_ARM
7148 && TARGET_HARD_FLOAT
7149 && decl && arm_is_long_call_p (decl))
7150 return false;
7151
7152 /* If we are interworking and the function is not declared static
7153 then we can't tail-call it unless we know that it exists in this
7154 compilation unit (since it might be a Thumb routine). */
7155 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7156 && !TREE_ASM_WRITTEN (decl))
7157 return false;
7158
7159 func_type = arm_current_func_type ();
7160 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7161 if (IS_INTERRUPT (func_type))
7162 return false;
7163
7164 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7165 generated for entry functions themselves. */
7166 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7167 return false;
7168
7169 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7170 this would complicate matters for later code generation. */
7171 if (TREE_CODE (exp) == CALL_EXPR)
7172 {
7173 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7174 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7175 return false;
7176 }
7177
7178 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7179 {
7180 /* Check that the return value locations are the same. For
7181 example that we aren't returning a value from the sibling in
7182 a VFP register but then need to transfer it to a core
7183 register. */
7184 rtx a, b;
7185 tree decl_or_type = decl;
7186
7187 /* If it is an indirect function pointer, get the function type. */
7188 if (!decl)
7189 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7190
7191 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7192 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7193 cfun->decl, false);
7194 if (!rtx_equal_p (a, b))
7195 return false;
7196 }
7197
7198 /* Never tailcall if function may be called with a misaligned SP. */
7199 if (IS_STACKALIGN (func_type))
7200 return false;
7201
7202 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7203 references should become a NOP. Don't convert such calls into
7204 sibling calls. */
7205 if (TARGET_AAPCS_BASED
7206 && arm_abi == ARM_ABI_AAPCS
7207 && decl
7208 && DECL_WEAK (decl))
7209 return false;
7210
7211 /* We cannot do a tailcall for an indirect call by descriptor if all the
7212 argument registers are used because the only register left to load the
7213 address is IP and it will already contain the static chain. */
7214 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7215 {
7216 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7217 CUMULATIVE_ARGS cum;
7218 cumulative_args_t cum_v;
7219
7220 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7221 cum_v = pack_cumulative_args (&cum);
7222
7223 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7224 {
7225 tree type = TREE_VALUE (t);
7226 if (!VOID_TYPE_P (type))
7227 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7228 }
7229
7230 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7231 return false;
7232 }
7233
7234 /* Everything else is ok. */
7235 return true;
7236 }
7237
7238 \f
7239 /* Addressing mode support functions. */
7240
7241 /* Return nonzero if X is a legitimate immediate operand when compiling
7242 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7243 int
7244 legitimate_pic_operand_p (rtx x)
7245 {
7246 if (GET_CODE (x) == SYMBOL_REF
7247 || (GET_CODE (x) == CONST
7248 && GET_CODE (XEXP (x, 0)) == PLUS
7249 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7250 return 0;
7251
7252 return 1;
7253 }
7254
7255 /* Record that the current function needs a PIC register. Initialize
7256 cfun->machine->pic_reg if we have not already done so. */
7257
7258 static void
7259 require_pic_register (void)
7260 {
7261 /* A lot of the logic here is made obscure by the fact that this
7262 routine gets called as part of the rtx cost estimation process.
7263 We don't want those calls to affect any assumptions about the real
7264 function; and further, we can't call entry_of_function() until we
7265 start the real expansion process. */
7266 if (!crtl->uses_pic_offset_table)
7267 {
7268 gcc_assert (can_create_pseudo_p ());
7269 if (arm_pic_register != INVALID_REGNUM
7270 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7271 {
7272 if (!cfun->machine->pic_reg)
7273 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7274
7275 /* Play games to avoid marking the function as needing pic
7276 if we are being called as part of the cost-estimation
7277 process. */
7278 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7279 crtl->uses_pic_offset_table = 1;
7280 }
7281 else
7282 {
7283 rtx_insn *seq, *insn;
7284
7285 if (!cfun->machine->pic_reg)
7286 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7287
7288 /* Play games to avoid marking the function as needing pic
7289 if we are being called as part of the cost-estimation
7290 process. */
7291 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7292 {
7293 crtl->uses_pic_offset_table = 1;
7294 start_sequence ();
7295
7296 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7297 && arm_pic_register > LAST_LO_REGNUM)
7298 emit_move_insn (cfun->machine->pic_reg,
7299 gen_rtx_REG (Pmode, arm_pic_register));
7300 else
7301 arm_load_pic_register (0UL);
7302
7303 seq = get_insns ();
7304 end_sequence ();
7305
7306 for (insn = seq; insn; insn = NEXT_INSN (insn))
7307 if (INSN_P (insn))
7308 INSN_LOCATION (insn) = prologue_location;
7309
7310 /* We can be called during expansion of PHI nodes, where
7311 we can't yet emit instructions directly in the final
7312 insn stream. Queue the insns on the entry edge, they will
7313 be committed after everything else is expanded. */
7314 insert_insn_on_edge (seq,
7315 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7316 }
7317 }
7318 }
7319 }
7320
7321 rtx
7322 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7323 {
7324 if (GET_CODE (orig) == SYMBOL_REF
7325 || GET_CODE (orig) == LABEL_REF)
7326 {
7327 if (reg == 0)
7328 {
7329 gcc_assert (can_create_pseudo_p ());
7330 reg = gen_reg_rtx (Pmode);
7331 }
7332
7333 /* VxWorks does not impose a fixed gap between segments; the run-time
7334 gap can be different from the object-file gap. We therefore can't
7335 use GOTOFF unless we are absolutely sure that the symbol is in the
7336 same segment as the GOT. Unfortunately, the flexibility of linker
7337 scripts means that we can't be sure of that in general, so assume
7338 that GOTOFF is never valid on VxWorks. */
7339 /* References to weak symbols cannot be resolved locally: they
7340 may be overridden by a non-weak definition at link time. */
7341 rtx_insn *insn;
7342 if ((GET_CODE (orig) == LABEL_REF
7343 || (GET_CODE (orig) == SYMBOL_REF
7344 && SYMBOL_REF_LOCAL_P (orig)
7345 && (SYMBOL_REF_DECL (orig)
7346 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7347 && NEED_GOT_RELOC
7348 && arm_pic_data_is_text_relative)
7349 insn = arm_pic_static_addr (orig, reg);
7350 else
7351 {
7352 rtx pat;
7353 rtx mem;
7354
7355 /* If this function doesn't have a pic register, create one now. */
7356 require_pic_register ();
7357
7358 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7359
7360 /* Make the MEM as close to a constant as possible. */
7361 mem = SET_SRC (pat);
7362 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7363 MEM_READONLY_P (mem) = 1;
7364 MEM_NOTRAP_P (mem) = 1;
7365
7366 insn = emit_insn (pat);
7367 }
7368
7369 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7370 by loop. */
7371 set_unique_reg_note (insn, REG_EQUAL, orig);
7372
7373 return reg;
7374 }
7375 else if (GET_CODE (orig) == CONST)
7376 {
7377 rtx base, offset;
7378
7379 if (GET_CODE (XEXP (orig, 0)) == PLUS
7380 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7381 return orig;
7382
7383 /* Handle the case where we have: const (UNSPEC_TLS). */
7384 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7385 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7386 return orig;
7387
7388 /* Handle the case where we have:
7389 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7390 CONST_INT. */
7391 if (GET_CODE (XEXP (orig, 0)) == PLUS
7392 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7393 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7394 {
7395 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7396 return orig;
7397 }
7398
7399 if (reg == 0)
7400 {
7401 gcc_assert (can_create_pseudo_p ());
7402 reg = gen_reg_rtx (Pmode);
7403 }
7404
7405 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7406
7407 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7408 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7409 base == reg ? 0 : reg);
7410
7411 if (CONST_INT_P (offset))
7412 {
7413 /* The base register doesn't really matter, we only want to
7414 test the index for the appropriate mode. */
7415 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7416 {
7417 gcc_assert (can_create_pseudo_p ());
7418 offset = force_reg (Pmode, offset);
7419 }
7420
7421 if (CONST_INT_P (offset))
7422 return plus_constant (Pmode, base, INTVAL (offset));
7423 }
7424
7425 if (GET_MODE_SIZE (mode) > 4
7426 && (GET_MODE_CLASS (mode) == MODE_INT
7427 || TARGET_SOFT_FLOAT))
7428 {
7429 emit_insn (gen_addsi3 (reg, base, offset));
7430 return reg;
7431 }
7432
7433 return gen_rtx_PLUS (Pmode, base, offset);
7434 }
7435
7436 return orig;
7437 }
7438
7439
7440 /* Find a spare register to use during the prolog of a function. */
7441
7442 static int
7443 thumb_find_work_register (unsigned long pushed_regs_mask)
7444 {
7445 int reg;
7446
7447 /* Check the argument registers first as these are call-used. The
7448 register allocation order means that sometimes r3 might be used
7449 but earlier argument registers might not, so check them all. */
7450 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7451 if (!df_regs_ever_live_p (reg))
7452 return reg;
7453
7454 /* Before going on to check the call-saved registers we can try a couple
7455 more ways of deducing that r3 is available. The first is when we are
7456 pushing anonymous arguments onto the stack and we have less than 4
7457 registers worth of fixed arguments(*). In this case r3 will be part of
7458 the variable argument list and so we can be sure that it will be
7459 pushed right at the start of the function. Hence it will be available
7460 for the rest of the prologue.
7461 (*): ie crtl->args.pretend_args_size is greater than 0. */
7462 if (cfun->machine->uses_anonymous_args
7463 && crtl->args.pretend_args_size > 0)
7464 return LAST_ARG_REGNUM;
7465
7466 /* The other case is when we have fixed arguments but less than 4 registers
7467 worth. In this case r3 might be used in the body of the function, but
7468 it is not being used to convey an argument into the function. In theory
7469 we could just check crtl->args.size to see how many bytes are
7470 being passed in argument registers, but it seems that it is unreliable.
7471 Sometimes it will have the value 0 when in fact arguments are being
7472 passed. (See testcase execute/20021111-1.c for an example). So we also
7473 check the args_info.nregs field as well. The problem with this field is
7474 that it makes no allowances for arguments that are passed to the
7475 function but which are not used. Hence we could miss an opportunity
7476 when a function has an unused argument in r3. But it is better to be
7477 safe than to be sorry. */
7478 if (! cfun->machine->uses_anonymous_args
7479 && crtl->args.size >= 0
7480 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7481 && (TARGET_AAPCS_BASED
7482 ? crtl->args.info.aapcs_ncrn < 4
7483 : crtl->args.info.nregs < 4))
7484 return LAST_ARG_REGNUM;
7485
7486 /* Otherwise look for a call-saved register that is going to be pushed. */
7487 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7488 if (pushed_regs_mask & (1 << reg))
7489 return reg;
7490
7491 if (TARGET_THUMB2)
7492 {
7493 /* Thumb-2 can use high regs. */
7494 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7495 if (pushed_regs_mask & (1 << reg))
7496 return reg;
7497 }
7498 /* Something went wrong - thumb_compute_save_reg_mask()
7499 should have arranged for a suitable register to be pushed. */
7500 gcc_unreachable ();
7501 }
7502
7503 static GTY(()) int pic_labelno;
7504
7505 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7506 low register. */
7507
7508 void
7509 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7510 {
7511 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7512
7513 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7514 return;
7515
7516 gcc_assert (flag_pic);
7517
7518 pic_reg = cfun->machine->pic_reg;
7519 if (TARGET_VXWORKS_RTP)
7520 {
7521 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7522 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7523 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7524
7525 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7526
7527 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7528 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7529 }
7530 else
7531 {
7532 /* We use an UNSPEC rather than a LABEL_REF because this label
7533 never appears in the code stream. */
7534
7535 labelno = GEN_INT (pic_labelno++);
7536 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7537 l1 = gen_rtx_CONST (VOIDmode, l1);
7538
7539 /* On the ARM the PC register contains 'dot + 8' at the time of the
7540 addition, on the Thumb it is 'dot + 4'. */
7541 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7542 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7543 UNSPEC_GOTSYM_OFF);
7544 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7545
7546 if (TARGET_32BIT)
7547 {
7548 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7549 }
7550 else /* TARGET_THUMB1 */
7551 {
7552 if (arm_pic_register != INVALID_REGNUM
7553 && REGNO (pic_reg) > LAST_LO_REGNUM)
7554 {
7555 /* We will have pushed the pic register, so we should always be
7556 able to find a work register. */
7557 pic_tmp = gen_rtx_REG (SImode,
7558 thumb_find_work_register (saved_regs));
7559 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7560 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7561 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7562 }
7563 else if (arm_pic_register != INVALID_REGNUM
7564 && arm_pic_register > LAST_LO_REGNUM
7565 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7566 {
7567 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7568 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7569 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7570 }
7571 else
7572 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7573 }
7574 }
7575
7576 /* Need to emit this whether or not we obey regdecls,
7577 since setjmp/longjmp can cause life info to screw up. */
7578 emit_use (pic_reg);
7579 }
7580
7581 /* Generate code to load the address of a static var when flag_pic is set. */
7582 static rtx_insn *
7583 arm_pic_static_addr (rtx orig, rtx reg)
7584 {
7585 rtx l1, labelno, offset_rtx;
7586
7587 gcc_assert (flag_pic);
7588
7589 /* We use an UNSPEC rather than a LABEL_REF because this label
7590 never appears in the code stream. */
7591 labelno = GEN_INT (pic_labelno++);
7592 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7593 l1 = gen_rtx_CONST (VOIDmode, l1);
7594
7595 /* On the ARM the PC register contains 'dot + 8' at the time of the
7596 addition, on the Thumb it is 'dot + 4'. */
7597 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7598 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7599 UNSPEC_SYMBOL_OFFSET);
7600 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7601
7602 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7603 }
7604
7605 /* Return nonzero if X is valid as an ARM state addressing register. */
7606 static int
7607 arm_address_register_rtx_p (rtx x, int strict_p)
7608 {
7609 int regno;
7610
7611 if (!REG_P (x))
7612 return 0;
7613
7614 regno = REGNO (x);
7615
7616 if (strict_p)
7617 return ARM_REGNO_OK_FOR_BASE_P (regno);
7618
7619 return (regno <= LAST_ARM_REGNUM
7620 || regno >= FIRST_PSEUDO_REGISTER
7621 || regno == FRAME_POINTER_REGNUM
7622 || regno == ARG_POINTER_REGNUM);
7623 }
7624
7625 /* Return TRUE if this rtx is the difference of a symbol and a label,
7626 and will reduce to a PC-relative relocation in the object file.
7627 Expressions like this can be left alone when generating PIC, rather
7628 than forced through the GOT. */
7629 static int
7630 pcrel_constant_p (rtx x)
7631 {
7632 if (GET_CODE (x) == MINUS)
7633 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7634
7635 return FALSE;
7636 }
7637
7638 /* Return true if X will surely end up in an index register after next
7639 splitting pass. */
7640 static bool
7641 will_be_in_index_register (const_rtx x)
7642 {
7643 /* arm.md: calculate_pic_address will split this into a register. */
7644 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7645 }
7646
7647 /* Return nonzero if X is a valid ARM state address operand. */
7648 int
7649 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7650 int strict_p)
7651 {
7652 bool use_ldrd;
7653 enum rtx_code code = GET_CODE (x);
7654
7655 if (arm_address_register_rtx_p (x, strict_p))
7656 return 1;
7657
7658 use_ldrd = (TARGET_LDRD
7659 && (mode == DImode || mode == DFmode));
7660
7661 if (code == POST_INC || code == PRE_DEC
7662 || ((code == PRE_INC || code == POST_DEC)
7663 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7664 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7665
7666 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7667 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7668 && GET_CODE (XEXP (x, 1)) == PLUS
7669 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7670 {
7671 rtx addend = XEXP (XEXP (x, 1), 1);
7672
7673 /* Don't allow ldrd post increment by register because it's hard
7674 to fixup invalid register choices. */
7675 if (use_ldrd
7676 && GET_CODE (x) == POST_MODIFY
7677 && REG_P (addend))
7678 return 0;
7679
7680 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7681 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7682 }
7683
7684 /* After reload constants split into minipools will have addresses
7685 from a LABEL_REF. */
7686 else if (reload_completed
7687 && (code == LABEL_REF
7688 || (code == CONST
7689 && GET_CODE (XEXP (x, 0)) == PLUS
7690 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7691 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7692 return 1;
7693
7694 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7695 return 0;
7696
7697 else if (code == PLUS)
7698 {
7699 rtx xop0 = XEXP (x, 0);
7700 rtx xop1 = XEXP (x, 1);
7701
7702 return ((arm_address_register_rtx_p (xop0, strict_p)
7703 && ((CONST_INT_P (xop1)
7704 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7705 || (!strict_p && will_be_in_index_register (xop1))))
7706 || (arm_address_register_rtx_p (xop1, strict_p)
7707 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7708 }
7709
7710 #if 0
7711 /* Reload currently can't handle MINUS, so disable this for now */
7712 else if (GET_CODE (x) == MINUS)
7713 {
7714 rtx xop0 = XEXP (x, 0);
7715 rtx xop1 = XEXP (x, 1);
7716
7717 return (arm_address_register_rtx_p (xop0, strict_p)
7718 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7719 }
7720 #endif
7721
7722 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7723 && code == SYMBOL_REF
7724 && CONSTANT_POOL_ADDRESS_P (x)
7725 && ! (flag_pic
7726 && symbol_mentioned_p (get_pool_constant (x))
7727 && ! pcrel_constant_p (get_pool_constant (x))))
7728 return 1;
7729
7730 return 0;
7731 }
7732
7733 /* Return true if we can avoid creating a constant pool entry for x. */
7734 static bool
7735 can_avoid_literal_pool_for_label_p (rtx x)
7736 {
7737 /* Normally we can assign constant values to target registers without
7738 the help of constant pool. But there are cases we have to use constant
7739 pool like:
7740 1) assign a label to register.
7741 2) sign-extend a 8bit value to 32bit and then assign to register.
7742
7743 Constant pool access in format:
7744 (set (reg r0) (mem (symbol_ref (".LC0"))))
7745 will cause the use of literal pool (later in function arm_reorg).
7746 So here we mark such format as an invalid format, then the compiler
7747 will adjust it into:
7748 (set (reg r0) (symbol_ref (".LC0")))
7749 (set (reg r0) (mem (reg r0))).
7750 No extra register is required, and (mem (reg r0)) won't cause the use
7751 of literal pools. */
7752 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7753 && CONSTANT_POOL_ADDRESS_P (x))
7754 return 1;
7755 return 0;
7756 }
7757
7758
7759 /* Return nonzero if X is a valid Thumb-2 address operand. */
7760 static int
7761 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7762 {
7763 bool use_ldrd;
7764 enum rtx_code code = GET_CODE (x);
7765
7766 if (arm_address_register_rtx_p (x, strict_p))
7767 return 1;
7768
7769 use_ldrd = (TARGET_LDRD
7770 && (mode == DImode || mode == DFmode));
7771
7772 if (code == POST_INC || code == PRE_DEC
7773 || ((code == PRE_INC || code == POST_DEC)
7774 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7775 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7776
7777 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7778 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7779 && GET_CODE (XEXP (x, 1)) == PLUS
7780 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7781 {
7782 /* Thumb-2 only has autoincrement by constant. */
7783 rtx addend = XEXP (XEXP (x, 1), 1);
7784 HOST_WIDE_INT offset;
7785
7786 if (!CONST_INT_P (addend))
7787 return 0;
7788
7789 offset = INTVAL(addend);
7790 if (GET_MODE_SIZE (mode) <= 4)
7791 return (offset > -256 && offset < 256);
7792
7793 return (use_ldrd && offset > -1024 && offset < 1024
7794 && (offset & 3) == 0);
7795 }
7796
7797 /* After reload constants split into minipools will have addresses
7798 from a LABEL_REF. */
7799 else if (reload_completed
7800 && (code == LABEL_REF
7801 || (code == CONST
7802 && GET_CODE (XEXP (x, 0)) == PLUS
7803 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7804 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7805 return 1;
7806
7807 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7808 return 0;
7809
7810 else if (code == PLUS)
7811 {
7812 rtx xop0 = XEXP (x, 0);
7813 rtx xop1 = XEXP (x, 1);
7814
7815 return ((arm_address_register_rtx_p (xop0, strict_p)
7816 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7817 || (!strict_p && will_be_in_index_register (xop1))))
7818 || (arm_address_register_rtx_p (xop1, strict_p)
7819 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7820 }
7821
7822 else if (can_avoid_literal_pool_for_label_p (x))
7823 return 0;
7824
7825 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7826 && code == SYMBOL_REF
7827 && CONSTANT_POOL_ADDRESS_P (x)
7828 && ! (flag_pic
7829 && symbol_mentioned_p (get_pool_constant (x))
7830 && ! pcrel_constant_p (get_pool_constant (x))))
7831 return 1;
7832
7833 return 0;
7834 }
7835
7836 /* Return nonzero if INDEX is valid for an address index operand in
7837 ARM state. */
7838 static int
7839 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7840 int strict_p)
7841 {
7842 HOST_WIDE_INT range;
7843 enum rtx_code code = GET_CODE (index);
7844
7845 /* Standard coprocessor addressing modes. */
7846 if (TARGET_HARD_FLOAT
7847 && (mode == SFmode || mode == DFmode))
7848 return (code == CONST_INT && INTVAL (index) < 1024
7849 && INTVAL (index) > -1024
7850 && (INTVAL (index) & 3) == 0);
7851
7852 /* For quad modes, we restrict the constant offset to be slightly less
7853 than what the instruction format permits. We do this because for
7854 quad mode moves, we will actually decompose them into two separate
7855 double-mode reads or writes. INDEX must therefore be a valid
7856 (double-mode) offset and so should INDEX+8. */
7857 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7858 return (code == CONST_INT
7859 && INTVAL (index) < 1016
7860 && INTVAL (index) > -1024
7861 && (INTVAL (index) & 3) == 0);
7862
7863 /* We have no such constraint on double mode offsets, so we permit the
7864 full range of the instruction format. */
7865 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7866 return (code == CONST_INT
7867 && INTVAL (index) < 1024
7868 && INTVAL (index) > -1024
7869 && (INTVAL (index) & 3) == 0);
7870
7871 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7872 return (code == CONST_INT
7873 && INTVAL (index) < 1024
7874 && INTVAL (index) > -1024
7875 && (INTVAL (index) & 3) == 0);
7876
7877 if (arm_address_register_rtx_p (index, strict_p)
7878 && (GET_MODE_SIZE (mode) <= 4))
7879 return 1;
7880
7881 if (mode == DImode || mode == DFmode)
7882 {
7883 if (code == CONST_INT)
7884 {
7885 HOST_WIDE_INT val = INTVAL (index);
7886
7887 if (TARGET_LDRD)
7888 return val > -256 && val < 256;
7889 else
7890 return val > -4096 && val < 4092;
7891 }
7892
7893 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7894 }
7895
7896 if (GET_MODE_SIZE (mode) <= 4
7897 && ! (arm_arch4
7898 && (mode == HImode
7899 || mode == HFmode
7900 || (mode == QImode && outer == SIGN_EXTEND))))
7901 {
7902 if (code == MULT)
7903 {
7904 rtx xiop0 = XEXP (index, 0);
7905 rtx xiop1 = XEXP (index, 1);
7906
7907 return ((arm_address_register_rtx_p (xiop0, strict_p)
7908 && power_of_two_operand (xiop1, SImode))
7909 || (arm_address_register_rtx_p (xiop1, strict_p)
7910 && power_of_two_operand (xiop0, SImode)));
7911 }
7912 else if (code == LSHIFTRT || code == ASHIFTRT
7913 || code == ASHIFT || code == ROTATERT)
7914 {
7915 rtx op = XEXP (index, 1);
7916
7917 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7918 && CONST_INT_P (op)
7919 && INTVAL (op) > 0
7920 && INTVAL (op) <= 31);
7921 }
7922 }
7923
7924 /* For ARM v4 we may be doing a sign-extend operation during the
7925 load. */
7926 if (arm_arch4)
7927 {
7928 if (mode == HImode
7929 || mode == HFmode
7930 || (outer == SIGN_EXTEND && mode == QImode))
7931 range = 256;
7932 else
7933 range = 4096;
7934 }
7935 else
7936 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7937
7938 return (code == CONST_INT
7939 && INTVAL (index) < range
7940 && INTVAL (index) > -range);
7941 }
7942
7943 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7944 index operand. i.e. 1, 2, 4 or 8. */
7945 static bool
7946 thumb2_index_mul_operand (rtx op)
7947 {
7948 HOST_WIDE_INT val;
7949
7950 if (!CONST_INT_P (op))
7951 return false;
7952
7953 val = INTVAL(op);
7954 return (val == 1 || val == 2 || val == 4 || val == 8);
7955 }
7956
7957 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7958 static int
7959 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7960 {
7961 enum rtx_code code = GET_CODE (index);
7962
7963 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7964 /* Standard coprocessor addressing modes. */
7965 if (TARGET_HARD_FLOAT
7966 && (mode == SFmode || mode == DFmode))
7967 return (code == CONST_INT && INTVAL (index) < 1024
7968 /* Thumb-2 allows only > -256 index range for it's core register
7969 load/stores. Since we allow SF/DF in core registers, we have
7970 to use the intersection between -256~4096 (core) and -1024~1024
7971 (coprocessor). */
7972 && INTVAL (index) > -256
7973 && (INTVAL (index) & 3) == 0);
7974
7975 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7976 {
7977 /* For DImode assume values will usually live in core regs
7978 and only allow LDRD addressing modes. */
7979 if (!TARGET_LDRD || mode != DImode)
7980 return (code == CONST_INT
7981 && INTVAL (index) < 1024
7982 && INTVAL (index) > -1024
7983 && (INTVAL (index) & 3) == 0);
7984 }
7985
7986 /* For quad modes, we restrict the constant offset to be slightly less
7987 than what the instruction format permits. We do this because for
7988 quad mode moves, we will actually decompose them into two separate
7989 double-mode reads or writes. INDEX must therefore be a valid
7990 (double-mode) offset and so should INDEX+8. */
7991 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7992 return (code == CONST_INT
7993 && INTVAL (index) < 1016
7994 && INTVAL (index) > -1024
7995 && (INTVAL (index) & 3) == 0);
7996
7997 /* We have no such constraint on double mode offsets, so we permit the
7998 full range of the instruction format. */
7999 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8000 return (code == CONST_INT
8001 && INTVAL (index) < 1024
8002 && INTVAL (index) > -1024
8003 && (INTVAL (index) & 3) == 0);
8004
8005 if (arm_address_register_rtx_p (index, strict_p)
8006 && (GET_MODE_SIZE (mode) <= 4))
8007 return 1;
8008
8009 if (mode == DImode || mode == DFmode)
8010 {
8011 if (code == CONST_INT)
8012 {
8013 HOST_WIDE_INT val = INTVAL (index);
8014 /* ??? Can we assume ldrd for thumb2? */
8015 /* Thumb-2 ldrd only has reg+const addressing modes. */
8016 /* ldrd supports offsets of +-1020.
8017 However the ldr fallback does not. */
8018 return val > -256 && val < 256 && (val & 3) == 0;
8019 }
8020 else
8021 return 0;
8022 }
8023
8024 if (code == MULT)
8025 {
8026 rtx xiop0 = XEXP (index, 0);
8027 rtx xiop1 = XEXP (index, 1);
8028
8029 return ((arm_address_register_rtx_p (xiop0, strict_p)
8030 && thumb2_index_mul_operand (xiop1))
8031 || (arm_address_register_rtx_p (xiop1, strict_p)
8032 && thumb2_index_mul_operand (xiop0)));
8033 }
8034 else if (code == ASHIFT)
8035 {
8036 rtx op = XEXP (index, 1);
8037
8038 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8039 && CONST_INT_P (op)
8040 && INTVAL (op) > 0
8041 && INTVAL (op) <= 3);
8042 }
8043
8044 return (code == CONST_INT
8045 && INTVAL (index) < 4096
8046 && INTVAL (index) > -256);
8047 }
8048
8049 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8050 static int
8051 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8052 {
8053 int regno;
8054
8055 if (!REG_P (x))
8056 return 0;
8057
8058 regno = REGNO (x);
8059
8060 if (strict_p)
8061 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8062
8063 return (regno <= LAST_LO_REGNUM
8064 || regno > LAST_VIRTUAL_REGISTER
8065 || regno == FRAME_POINTER_REGNUM
8066 || (GET_MODE_SIZE (mode) >= 4
8067 && (regno == STACK_POINTER_REGNUM
8068 || regno >= FIRST_PSEUDO_REGISTER
8069 || x == hard_frame_pointer_rtx
8070 || x == arg_pointer_rtx)));
8071 }
8072
8073 /* Return nonzero if x is a legitimate index register. This is the case
8074 for any base register that can access a QImode object. */
8075 inline static int
8076 thumb1_index_register_rtx_p (rtx x, int strict_p)
8077 {
8078 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8079 }
8080
8081 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8082
8083 The AP may be eliminated to either the SP or the FP, so we use the
8084 least common denominator, e.g. SImode, and offsets from 0 to 64.
8085
8086 ??? Verify whether the above is the right approach.
8087
8088 ??? Also, the FP may be eliminated to the SP, so perhaps that
8089 needs special handling also.
8090
8091 ??? Look at how the mips16 port solves this problem. It probably uses
8092 better ways to solve some of these problems.
8093
8094 Although it is not incorrect, we don't accept QImode and HImode
8095 addresses based on the frame pointer or arg pointer until the
8096 reload pass starts. This is so that eliminating such addresses
8097 into stack based ones won't produce impossible code. */
8098 int
8099 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8100 {
8101 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8102 return 0;
8103
8104 /* ??? Not clear if this is right. Experiment. */
8105 if (GET_MODE_SIZE (mode) < 4
8106 && !(reload_in_progress || reload_completed)
8107 && (reg_mentioned_p (frame_pointer_rtx, x)
8108 || reg_mentioned_p (arg_pointer_rtx, x)
8109 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8110 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8111 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8112 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8113 return 0;
8114
8115 /* Accept any base register. SP only in SImode or larger. */
8116 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8117 return 1;
8118
8119 /* This is PC relative data before arm_reorg runs. */
8120 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8121 && GET_CODE (x) == SYMBOL_REF
8122 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8123 return 1;
8124
8125 /* This is PC relative data after arm_reorg runs. */
8126 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8127 && reload_completed
8128 && (GET_CODE (x) == LABEL_REF
8129 || (GET_CODE (x) == CONST
8130 && GET_CODE (XEXP (x, 0)) == PLUS
8131 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8132 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8133 return 1;
8134
8135 /* Post-inc indexing only supported for SImode and larger. */
8136 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8137 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8138 return 1;
8139
8140 else if (GET_CODE (x) == PLUS)
8141 {
8142 /* REG+REG address can be any two index registers. */
8143 /* We disallow FRAME+REG addressing since we know that FRAME
8144 will be replaced with STACK, and SP relative addressing only
8145 permits SP+OFFSET. */
8146 if (GET_MODE_SIZE (mode) <= 4
8147 && XEXP (x, 0) != frame_pointer_rtx
8148 && XEXP (x, 1) != frame_pointer_rtx
8149 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8150 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8151 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8152 return 1;
8153
8154 /* REG+const has 5-7 bit offset for non-SP registers. */
8155 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8156 || XEXP (x, 0) == arg_pointer_rtx)
8157 && CONST_INT_P (XEXP (x, 1))
8158 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8159 return 1;
8160
8161 /* REG+const has 10-bit offset for SP, but only SImode and
8162 larger is supported. */
8163 /* ??? Should probably check for DI/DFmode overflow here
8164 just like GO_IF_LEGITIMATE_OFFSET does. */
8165 else if (REG_P (XEXP (x, 0))
8166 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8167 && GET_MODE_SIZE (mode) >= 4
8168 && CONST_INT_P (XEXP (x, 1))
8169 && INTVAL (XEXP (x, 1)) >= 0
8170 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8171 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8172 return 1;
8173
8174 else if (REG_P (XEXP (x, 0))
8175 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8176 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8177 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8178 && REGNO (XEXP (x, 0))
8179 <= LAST_VIRTUAL_POINTER_REGISTER))
8180 && GET_MODE_SIZE (mode) >= 4
8181 && CONST_INT_P (XEXP (x, 1))
8182 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8183 return 1;
8184 }
8185
8186 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8187 && GET_MODE_SIZE (mode) == 4
8188 && GET_CODE (x) == SYMBOL_REF
8189 && CONSTANT_POOL_ADDRESS_P (x)
8190 && ! (flag_pic
8191 && symbol_mentioned_p (get_pool_constant (x))
8192 && ! pcrel_constant_p (get_pool_constant (x))))
8193 return 1;
8194
8195 return 0;
8196 }
8197
8198 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8199 instruction of mode MODE. */
8200 int
8201 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8202 {
8203 switch (GET_MODE_SIZE (mode))
8204 {
8205 case 1:
8206 return val >= 0 && val < 32;
8207
8208 case 2:
8209 return val >= 0 && val < 64 && (val & 1) == 0;
8210
8211 default:
8212 return (val >= 0
8213 && (val + GET_MODE_SIZE (mode)) <= 128
8214 && (val & 3) == 0);
8215 }
8216 }
8217
8218 bool
8219 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8220 {
8221 if (TARGET_ARM)
8222 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8223 else if (TARGET_THUMB2)
8224 return thumb2_legitimate_address_p (mode, x, strict_p);
8225 else /* if (TARGET_THUMB1) */
8226 return thumb1_legitimate_address_p (mode, x, strict_p);
8227 }
8228
8229 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8230
8231 Given an rtx X being reloaded into a reg required to be
8232 in class CLASS, return the class of reg to actually use.
8233 In general this is just CLASS, but for the Thumb core registers and
8234 immediate constants we prefer a LO_REGS class or a subset. */
8235
8236 static reg_class_t
8237 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8238 {
8239 if (TARGET_32BIT)
8240 return rclass;
8241 else
8242 {
8243 if (rclass == GENERAL_REGS)
8244 return LO_REGS;
8245 else
8246 return rclass;
8247 }
8248 }
8249
8250 /* Build the SYMBOL_REF for __tls_get_addr. */
8251
8252 static GTY(()) rtx tls_get_addr_libfunc;
8253
8254 static rtx
8255 get_tls_get_addr (void)
8256 {
8257 if (!tls_get_addr_libfunc)
8258 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8259 return tls_get_addr_libfunc;
8260 }
8261
8262 rtx
8263 arm_load_tp (rtx target)
8264 {
8265 if (!target)
8266 target = gen_reg_rtx (SImode);
8267
8268 if (TARGET_HARD_TP)
8269 {
8270 /* Can return in any reg. */
8271 emit_insn (gen_load_tp_hard (target));
8272 }
8273 else
8274 {
8275 /* Always returned in r0. Immediately copy the result into a pseudo,
8276 otherwise other uses of r0 (e.g. setting up function arguments) may
8277 clobber the value. */
8278
8279 rtx tmp;
8280
8281 emit_insn (gen_load_tp_soft ());
8282
8283 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8284 emit_move_insn (target, tmp);
8285 }
8286 return target;
8287 }
8288
8289 static rtx
8290 load_tls_operand (rtx x, rtx reg)
8291 {
8292 rtx tmp;
8293
8294 if (reg == NULL_RTX)
8295 reg = gen_reg_rtx (SImode);
8296
8297 tmp = gen_rtx_CONST (SImode, x);
8298
8299 emit_move_insn (reg, tmp);
8300
8301 return reg;
8302 }
8303
8304 static rtx_insn *
8305 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8306 {
8307 rtx label, labelno, sum;
8308
8309 gcc_assert (reloc != TLS_DESCSEQ);
8310 start_sequence ();
8311
8312 labelno = GEN_INT (pic_labelno++);
8313 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8314 label = gen_rtx_CONST (VOIDmode, label);
8315
8316 sum = gen_rtx_UNSPEC (Pmode,
8317 gen_rtvec (4, x, GEN_INT (reloc), label,
8318 GEN_INT (TARGET_ARM ? 8 : 4)),
8319 UNSPEC_TLS);
8320 reg = load_tls_operand (sum, reg);
8321
8322 if (TARGET_ARM)
8323 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8324 else
8325 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8326
8327 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8328 LCT_PURE, /* LCT_CONST? */
8329 Pmode, 1, reg, Pmode);
8330
8331 rtx_insn *insns = get_insns ();
8332 end_sequence ();
8333
8334 return insns;
8335 }
8336
8337 static rtx
8338 arm_tls_descseq_addr (rtx x, rtx reg)
8339 {
8340 rtx labelno = GEN_INT (pic_labelno++);
8341 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8342 rtx sum = gen_rtx_UNSPEC (Pmode,
8343 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8344 gen_rtx_CONST (VOIDmode, label),
8345 GEN_INT (!TARGET_ARM)),
8346 UNSPEC_TLS);
8347 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8348
8349 emit_insn (gen_tlscall (x, labelno));
8350 if (!reg)
8351 reg = gen_reg_rtx (SImode);
8352 else
8353 gcc_assert (REGNO (reg) != R0_REGNUM);
8354
8355 emit_move_insn (reg, reg0);
8356
8357 return reg;
8358 }
8359
8360 rtx
8361 legitimize_tls_address (rtx x, rtx reg)
8362 {
8363 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8364 rtx_insn *insns;
8365 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8366
8367 switch (model)
8368 {
8369 case TLS_MODEL_GLOBAL_DYNAMIC:
8370 if (TARGET_GNU2_TLS)
8371 {
8372 reg = arm_tls_descseq_addr (x, reg);
8373
8374 tp = arm_load_tp (NULL_RTX);
8375
8376 dest = gen_rtx_PLUS (Pmode, tp, reg);
8377 }
8378 else
8379 {
8380 /* Original scheme */
8381 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8382 dest = gen_reg_rtx (Pmode);
8383 emit_libcall_block (insns, dest, ret, x);
8384 }
8385 return dest;
8386
8387 case TLS_MODEL_LOCAL_DYNAMIC:
8388 if (TARGET_GNU2_TLS)
8389 {
8390 reg = arm_tls_descseq_addr (x, reg);
8391
8392 tp = arm_load_tp (NULL_RTX);
8393
8394 dest = gen_rtx_PLUS (Pmode, tp, reg);
8395 }
8396 else
8397 {
8398 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8399
8400 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8401 share the LDM result with other LD model accesses. */
8402 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8403 UNSPEC_TLS);
8404 dest = gen_reg_rtx (Pmode);
8405 emit_libcall_block (insns, dest, ret, eqv);
8406
8407 /* Load the addend. */
8408 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8409 GEN_INT (TLS_LDO32)),
8410 UNSPEC_TLS);
8411 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8412 dest = gen_rtx_PLUS (Pmode, dest, addend);
8413 }
8414 return dest;
8415
8416 case TLS_MODEL_INITIAL_EXEC:
8417 labelno = GEN_INT (pic_labelno++);
8418 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8419 label = gen_rtx_CONST (VOIDmode, label);
8420 sum = gen_rtx_UNSPEC (Pmode,
8421 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8422 GEN_INT (TARGET_ARM ? 8 : 4)),
8423 UNSPEC_TLS);
8424 reg = load_tls_operand (sum, reg);
8425
8426 if (TARGET_ARM)
8427 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8428 else if (TARGET_THUMB2)
8429 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8430 else
8431 {
8432 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8433 emit_move_insn (reg, gen_const_mem (SImode, reg));
8434 }
8435
8436 tp = arm_load_tp (NULL_RTX);
8437
8438 return gen_rtx_PLUS (Pmode, tp, reg);
8439
8440 case TLS_MODEL_LOCAL_EXEC:
8441 tp = arm_load_tp (NULL_RTX);
8442
8443 reg = gen_rtx_UNSPEC (Pmode,
8444 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8445 UNSPEC_TLS);
8446 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8447
8448 return gen_rtx_PLUS (Pmode, tp, reg);
8449
8450 default:
8451 abort ();
8452 }
8453 }
8454
8455 /* Try machine-dependent ways of modifying an illegitimate address
8456 to be legitimate. If we find one, return the new, valid address. */
8457 rtx
8458 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8459 {
8460 if (arm_tls_referenced_p (x))
8461 {
8462 rtx addend = NULL;
8463
8464 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8465 {
8466 addend = XEXP (XEXP (x, 0), 1);
8467 x = XEXP (XEXP (x, 0), 0);
8468 }
8469
8470 if (GET_CODE (x) != SYMBOL_REF)
8471 return x;
8472
8473 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8474
8475 x = legitimize_tls_address (x, NULL_RTX);
8476
8477 if (addend)
8478 {
8479 x = gen_rtx_PLUS (SImode, x, addend);
8480 orig_x = x;
8481 }
8482 else
8483 return x;
8484 }
8485
8486 if (!TARGET_ARM)
8487 {
8488 /* TODO: legitimize_address for Thumb2. */
8489 if (TARGET_THUMB2)
8490 return x;
8491 return thumb_legitimize_address (x, orig_x, mode);
8492 }
8493
8494 if (GET_CODE (x) == PLUS)
8495 {
8496 rtx xop0 = XEXP (x, 0);
8497 rtx xop1 = XEXP (x, 1);
8498
8499 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8500 xop0 = force_reg (SImode, xop0);
8501
8502 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8503 && !symbol_mentioned_p (xop1))
8504 xop1 = force_reg (SImode, xop1);
8505
8506 if (ARM_BASE_REGISTER_RTX_P (xop0)
8507 && CONST_INT_P (xop1))
8508 {
8509 HOST_WIDE_INT n, low_n;
8510 rtx base_reg, val;
8511 n = INTVAL (xop1);
8512
8513 /* VFP addressing modes actually allow greater offsets, but for
8514 now we just stick with the lowest common denominator. */
8515 if (mode == DImode || mode == DFmode)
8516 {
8517 low_n = n & 0x0f;
8518 n &= ~0x0f;
8519 if (low_n > 4)
8520 {
8521 n += 16;
8522 low_n -= 16;
8523 }
8524 }
8525 else
8526 {
8527 low_n = ((mode) == TImode ? 0
8528 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8529 n -= low_n;
8530 }
8531
8532 base_reg = gen_reg_rtx (SImode);
8533 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8534 emit_move_insn (base_reg, val);
8535 x = plus_constant (Pmode, base_reg, low_n);
8536 }
8537 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8538 x = gen_rtx_PLUS (SImode, xop0, xop1);
8539 }
8540
8541 /* XXX We don't allow MINUS any more -- see comment in
8542 arm_legitimate_address_outer_p (). */
8543 else if (GET_CODE (x) == MINUS)
8544 {
8545 rtx xop0 = XEXP (x, 0);
8546 rtx xop1 = XEXP (x, 1);
8547
8548 if (CONSTANT_P (xop0))
8549 xop0 = force_reg (SImode, xop0);
8550
8551 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8552 xop1 = force_reg (SImode, xop1);
8553
8554 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8555 x = gen_rtx_MINUS (SImode, xop0, xop1);
8556 }
8557
8558 /* Make sure to take full advantage of the pre-indexed addressing mode
8559 with absolute addresses which often allows for the base register to
8560 be factorized for multiple adjacent memory references, and it might
8561 even allows for the mini pool to be avoided entirely. */
8562 else if (CONST_INT_P (x) && optimize > 0)
8563 {
8564 unsigned int bits;
8565 HOST_WIDE_INT mask, base, index;
8566 rtx base_reg;
8567
8568 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8569 use a 8-bit index. So let's use a 12-bit index for SImode only and
8570 hope that arm_gen_constant will enable ldrb to use more bits. */
8571 bits = (mode == SImode) ? 12 : 8;
8572 mask = (1 << bits) - 1;
8573 base = INTVAL (x) & ~mask;
8574 index = INTVAL (x) & mask;
8575 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8576 {
8577 /* It'll most probably be more efficient to generate the base
8578 with more bits set and use a negative index instead. */
8579 base |= mask;
8580 index -= mask;
8581 }
8582 base_reg = force_reg (SImode, GEN_INT (base));
8583 x = plus_constant (Pmode, base_reg, index);
8584 }
8585
8586 if (flag_pic)
8587 {
8588 /* We need to find and carefully transform any SYMBOL and LABEL
8589 references; so go back to the original address expression. */
8590 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8591
8592 if (new_x != orig_x)
8593 x = new_x;
8594 }
8595
8596 return x;
8597 }
8598
8599
8600 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8601 to be legitimate. If we find one, return the new, valid address. */
8602 rtx
8603 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8604 {
8605 if (GET_CODE (x) == PLUS
8606 && CONST_INT_P (XEXP (x, 1))
8607 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8608 || INTVAL (XEXP (x, 1)) < 0))
8609 {
8610 rtx xop0 = XEXP (x, 0);
8611 rtx xop1 = XEXP (x, 1);
8612 HOST_WIDE_INT offset = INTVAL (xop1);
8613
8614 /* Try and fold the offset into a biasing of the base register and
8615 then offsetting that. Don't do this when optimizing for space
8616 since it can cause too many CSEs. */
8617 if (optimize_size && offset >= 0
8618 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8619 {
8620 HOST_WIDE_INT delta;
8621
8622 if (offset >= 256)
8623 delta = offset - (256 - GET_MODE_SIZE (mode));
8624 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8625 delta = 31 * GET_MODE_SIZE (mode);
8626 else
8627 delta = offset & (~31 * GET_MODE_SIZE (mode));
8628
8629 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8630 NULL_RTX);
8631 x = plus_constant (Pmode, xop0, delta);
8632 }
8633 else if (offset < 0 && offset > -256)
8634 /* Small negative offsets are best done with a subtract before the
8635 dereference, forcing these into a register normally takes two
8636 instructions. */
8637 x = force_operand (x, NULL_RTX);
8638 else
8639 {
8640 /* For the remaining cases, force the constant into a register. */
8641 xop1 = force_reg (SImode, xop1);
8642 x = gen_rtx_PLUS (SImode, xop0, xop1);
8643 }
8644 }
8645 else if (GET_CODE (x) == PLUS
8646 && s_register_operand (XEXP (x, 1), SImode)
8647 && !s_register_operand (XEXP (x, 0), SImode))
8648 {
8649 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8650
8651 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8652 }
8653
8654 if (flag_pic)
8655 {
8656 /* We need to find and carefully transform any SYMBOL and LABEL
8657 references; so go back to the original address expression. */
8658 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8659
8660 if (new_x != orig_x)
8661 x = new_x;
8662 }
8663
8664 return x;
8665 }
8666
8667 /* Return TRUE if X contains any TLS symbol references. */
8668
8669 bool
8670 arm_tls_referenced_p (rtx x)
8671 {
8672 if (! TARGET_HAVE_TLS)
8673 return false;
8674
8675 subrtx_iterator::array_type array;
8676 FOR_EACH_SUBRTX (iter, array, x, ALL)
8677 {
8678 const_rtx x = *iter;
8679 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8680 {
8681 /* ARM currently does not provide relocations to encode TLS variables
8682 into AArch32 instructions, only data, so there is no way to
8683 currently implement these if a literal pool is disabled. */
8684 if (arm_disable_literal_pool)
8685 sorry ("accessing thread-local storage is not currently supported "
8686 "with -mpure-code or -mslow-flash-data");
8687
8688 return true;
8689 }
8690
8691 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8692 TLS offsets, not real symbol references. */
8693 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8694 iter.skip_subrtxes ();
8695 }
8696 return false;
8697 }
8698
8699 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8700
8701 On the ARM, allow any integer (invalid ones are removed later by insn
8702 patterns), nice doubles and symbol_refs which refer to the function's
8703 constant pool XXX.
8704
8705 When generating pic allow anything. */
8706
8707 static bool
8708 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8709 {
8710 return flag_pic || !label_mentioned_p (x);
8711 }
8712
8713 static bool
8714 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8715 {
8716 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8717 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8718 for ARMv8-M Baseline or later the result is valid. */
8719 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8720 x = XEXP (x, 0);
8721
8722 return (CONST_INT_P (x)
8723 || CONST_DOUBLE_P (x)
8724 || CONSTANT_ADDRESS_P (x)
8725 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8726 || flag_pic);
8727 }
8728
8729 static bool
8730 arm_legitimate_constant_p (machine_mode mode, rtx x)
8731 {
8732 return (!arm_cannot_force_const_mem (mode, x)
8733 && (TARGET_32BIT
8734 ? arm_legitimate_constant_p_1 (mode, x)
8735 : thumb_legitimate_constant_p (mode, x)));
8736 }
8737
8738 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8739
8740 static bool
8741 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8742 {
8743 rtx base, offset;
8744
8745 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8746 {
8747 split_const (x, &base, &offset);
8748 if (GET_CODE (base) == SYMBOL_REF
8749 && !offset_within_block_p (base, INTVAL (offset)))
8750 return true;
8751 }
8752 return arm_tls_referenced_p (x);
8753 }
8754 \f
8755 #define REG_OR_SUBREG_REG(X) \
8756 (REG_P (X) \
8757 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8758
8759 #define REG_OR_SUBREG_RTX(X) \
8760 (REG_P (X) ? (X) : SUBREG_REG (X))
8761
8762 static inline int
8763 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8764 {
8765 machine_mode mode = GET_MODE (x);
8766 int total, words;
8767
8768 switch (code)
8769 {
8770 case ASHIFT:
8771 case ASHIFTRT:
8772 case LSHIFTRT:
8773 case ROTATERT:
8774 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8775
8776 case PLUS:
8777 case MINUS:
8778 case COMPARE:
8779 case NEG:
8780 case NOT:
8781 return COSTS_N_INSNS (1);
8782
8783 case MULT:
8784 if (arm_arch6m && arm_m_profile_small_mul)
8785 return COSTS_N_INSNS (32);
8786
8787 if (CONST_INT_P (XEXP (x, 1)))
8788 {
8789 int cycles = 0;
8790 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8791
8792 while (i)
8793 {
8794 i >>= 2;
8795 cycles++;
8796 }
8797 return COSTS_N_INSNS (2) + cycles;
8798 }
8799 return COSTS_N_INSNS (1) + 16;
8800
8801 case SET:
8802 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8803 the mode. */
8804 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8805 return (COSTS_N_INSNS (words)
8806 + 4 * ((MEM_P (SET_SRC (x)))
8807 + MEM_P (SET_DEST (x))));
8808
8809 case CONST_INT:
8810 if (outer == SET)
8811 {
8812 if (UINTVAL (x) < 256
8813 /* 16-bit constant. */
8814 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8815 return 0;
8816 if (thumb_shiftable_const (INTVAL (x)))
8817 return COSTS_N_INSNS (2);
8818 return COSTS_N_INSNS (3);
8819 }
8820 else if ((outer == PLUS || outer == COMPARE)
8821 && INTVAL (x) < 256 && INTVAL (x) > -256)
8822 return 0;
8823 else if ((outer == IOR || outer == XOR || outer == AND)
8824 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8825 return COSTS_N_INSNS (1);
8826 else if (outer == AND)
8827 {
8828 int i;
8829 /* This duplicates the tests in the andsi3 expander. */
8830 for (i = 9; i <= 31; i++)
8831 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8832 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8833 return COSTS_N_INSNS (2);
8834 }
8835 else if (outer == ASHIFT || outer == ASHIFTRT
8836 || outer == LSHIFTRT)
8837 return 0;
8838 return COSTS_N_INSNS (2);
8839
8840 case CONST:
8841 case CONST_DOUBLE:
8842 case LABEL_REF:
8843 case SYMBOL_REF:
8844 return COSTS_N_INSNS (3);
8845
8846 case UDIV:
8847 case UMOD:
8848 case DIV:
8849 case MOD:
8850 return 100;
8851
8852 case TRUNCATE:
8853 return 99;
8854
8855 case AND:
8856 case XOR:
8857 case IOR:
8858 /* XXX guess. */
8859 return 8;
8860
8861 case MEM:
8862 /* XXX another guess. */
8863 /* Memory costs quite a lot for the first word, but subsequent words
8864 load at the equivalent of a single insn each. */
8865 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8866 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8867 ? 4 : 0));
8868
8869 case IF_THEN_ELSE:
8870 /* XXX a guess. */
8871 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8872 return 14;
8873 return 2;
8874
8875 case SIGN_EXTEND:
8876 case ZERO_EXTEND:
8877 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8878 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8879
8880 if (mode == SImode)
8881 return total;
8882
8883 if (arm_arch6)
8884 return total + COSTS_N_INSNS (1);
8885
8886 /* Assume a two-shift sequence. Increase the cost slightly so
8887 we prefer actual shifts over an extend operation. */
8888 return total + 1 + COSTS_N_INSNS (2);
8889
8890 default:
8891 return 99;
8892 }
8893 }
8894
8895 /* Estimates the size cost of thumb1 instructions.
8896 For now most of the code is copied from thumb1_rtx_costs. We need more
8897 fine grain tuning when we have more related test cases. */
8898 static inline int
8899 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8900 {
8901 machine_mode mode = GET_MODE (x);
8902 int words, cost;
8903
8904 switch (code)
8905 {
8906 case ASHIFT:
8907 case ASHIFTRT:
8908 case LSHIFTRT:
8909 case ROTATERT:
8910 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8911
8912 case PLUS:
8913 case MINUS:
8914 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8915 defined by RTL expansion, especially for the expansion of
8916 multiplication. */
8917 if ((GET_CODE (XEXP (x, 0)) == MULT
8918 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8919 || (GET_CODE (XEXP (x, 1)) == MULT
8920 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8921 return COSTS_N_INSNS (2);
8922 /* Fall through. */
8923 case COMPARE:
8924 case NEG:
8925 case NOT:
8926 return COSTS_N_INSNS (1);
8927
8928 case MULT:
8929 if (CONST_INT_P (XEXP (x, 1)))
8930 {
8931 /* Thumb1 mul instruction can't operate on const. We must Load it
8932 into a register first. */
8933 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8934 /* For the targets which have a very small and high-latency multiply
8935 unit, we prefer to synthesize the mult with up to 5 instructions,
8936 giving a good balance between size and performance. */
8937 if (arm_arch6m && arm_m_profile_small_mul)
8938 return COSTS_N_INSNS (5);
8939 else
8940 return COSTS_N_INSNS (1) + const_size;
8941 }
8942 return COSTS_N_INSNS (1);
8943
8944 case SET:
8945 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8946 the mode. */
8947 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8948 cost = COSTS_N_INSNS (words);
8949 if (satisfies_constraint_J (SET_SRC (x))
8950 || satisfies_constraint_K (SET_SRC (x))
8951 /* Too big an immediate for a 2-byte mov, using MOVT. */
8952 || (CONST_INT_P (SET_SRC (x))
8953 && UINTVAL (SET_SRC (x)) >= 256
8954 && TARGET_HAVE_MOVT
8955 && satisfies_constraint_j (SET_SRC (x)))
8956 /* thumb1_movdi_insn. */
8957 || ((words > 1) && MEM_P (SET_SRC (x))))
8958 cost += COSTS_N_INSNS (1);
8959 return cost;
8960
8961 case CONST_INT:
8962 if (outer == SET)
8963 {
8964 if (UINTVAL (x) < 256)
8965 return COSTS_N_INSNS (1);
8966 /* movw is 4byte long. */
8967 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
8968 return COSTS_N_INSNS (2);
8969 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8970 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8971 return COSTS_N_INSNS (2);
8972 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8973 if (thumb_shiftable_const (INTVAL (x)))
8974 return COSTS_N_INSNS (2);
8975 return COSTS_N_INSNS (3);
8976 }
8977 else if ((outer == PLUS || outer == COMPARE)
8978 && INTVAL (x) < 256 && INTVAL (x) > -256)
8979 return 0;
8980 else if ((outer == IOR || outer == XOR || outer == AND)
8981 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8982 return COSTS_N_INSNS (1);
8983 else if (outer == AND)
8984 {
8985 int i;
8986 /* This duplicates the tests in the andsi3 expander. */
8987 for (i = 9; i <= 31; i++)
8988 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8989 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8990 return COSTS_N_INSNS (2);
8991 }
8992 else if (outer == ASHIFT || outer == ASHIFTRT
8993 || outer == LSHIFTRT)
8994 return 0;
8995 return COSTS_N_INSNS (2);
8996
8997 case CONST:
8998 case CONST_DOUBLE:
8999 case LABEL_REF:
9000 case SYMBOL_REF:
9001 return COSTS_N_INSNS (3);
9002
9003 case UDIV:
9004 case UMOD:
9005 case DIV:
9006 case MOD:
9007 return 100;
9008
9009 case TRUNCATE:
9010 return 99;
9011
9012 case AND:
9013 case XOR:
9014 case IOR:
9015 return COSTS_N_INSNS (1);
9016
9017 case MEM:
9018 return (COSTS_N_INSNS (1)
9019 + COSTS_N_INSNS (1)
9020 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9021 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9022 ? COSTS_N_INSNS (1) : 0));
9023
9024 case IF_THEN_ELSE:
9025 /* XXX a guess. */
9026 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9027 return 14;
9028 return 2;
9029
9030 case ZERO_EXTEND:
9031 /* XXX still guessing. */
9032 switch (GET_MODE (XEXP (x, 0)))
9033 {
9034 case QImode:
9035 return (1 + (mode == DImode ? 4 : 0)
9036 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9037
9038 case HImode:
9039 return (4 + (mode == DImode ? 4 : 0)
9040 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9041
9042 case SImode:
9043 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9044
9045 default:
9046 return 99;
9047 }
9048
9049 default:
9050 return 99;
9051 }
9052 }
9053
9054 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9055 operand, then return the operand that is being shifted. If the shift
9056 is not by a constant, then set SHIFT_REG to point to the operand.
9057 Return NULL if OP is not a shifter operand. */
9058 static rtx
9059 shifter_op_p (rtx op, rtx *shift_reg)
9060 {
9061 enum rtx_code code = GET_CODE (op);
9062
9063 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9064 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9065 return XEXP (op, 0);
9066 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9067 return XEXP (op, 0);
9068 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9069 || code == ASHIFTRT)
9070 {
9071 if (!CONST_INT_P (XEXP (op, 1)))
9072 *shift_reg = XEXP (op, 1);
9073 return XEXP (op, 0);
9074 }
9075
9076 return NULL;
9077 }
9078
9079 static bool
9080 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9081 {
9082 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9083 rtx_code code = GET_CODE (x);
9084 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9085
9086 switch (XINT (x, 1))
9087 {
9088 case UNSPEC_UNALIGNED_LOAD:
9089 /* We can only do unaligned loads into the integer unit, and we can't
9090 use LDM or LDRD. */
9091 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9092 if (speed_p)
9093 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9094 + extra_cost->ldst.load_unaligned);
9095
9096 #ifdef NOT_YET
9097 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9098 ADDR_SPACE_GENERIC, speed_p);
9099 #endif
9100 return true;
9101
9102 case UNSPEC_UNALIGNED_STORE:
9103 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9104 if (speed_p)
9105 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9106 + extra_cost->ldst.store_unaligned);
9107
9108 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9109 #ifdef NOT_YET
9110 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9111 ADDR_SPACE_GENERIC, speed_p);
9112 #endif
9113 return true;
9114
9115 case UNSPEC_VRINTZ:
9116 case UNSPEC_VRINTP:
9117 case UNSPEC_VRINTM:
9118 case UNSPEC_VRINTR:
9119 case UNSPEC_VRINTX:
9120 case UNSPEC_VRINTA:
9121 if (speed_p)
9122 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9123
9124 return true;
9125 default:
9126 *cost = COSTS_N_INSNS (2);
9127 break;
9128 }
9129 return true;
9130 }
9131
9132 /* Cost of a libcall. We assume one insn per argument, an amount for the
9133 call (one insn for -Os) and then one for processing the result. */
9134 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9135
9136 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9137 do \
9138 { \
9139 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9140 if (shift_op != NULL \
9141 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9142 { \
9143 if (shift_reg) \
9144 { \
9145 if (speed_p) \
9146 *cost += extra_cost->alu.arith_shift_reg; \
9147 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9148 ASHIFT, 1, speed_p); \
9149 } \
9150 else if (speed_p) \
9151 *cost += extra_cost->alu.arith_shift; \
9152 \
9153 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9154 ASHIFT, 0, speed_p) \
9155 + rtx_cost (XEXP (x, 1 - IDX), \
9156 GET_MODE (shift_op), \
9157 OP, 1, speed_p)); \
9158 return true; \
9159 } \
9160 } \
9161 while (0);
9162
9163 /* RTX costs. Make an estimate of the cost of executing the operation
9164 X, which is contained with an operation with code OUTER_CODE.
9165 SPEED_P indicates whether the cost desired is the performance cost,
9166 or the size cost. The estimate is stored in COST and the return
9167 value is TRUE if the cost calculation is final, or FALSE if the
9168 caller should recurse through the operands of X to add additional
9169 costs.
9170
9171 We currently make no attempt to model the size savings of Thumb-2
9172 16-bit instructions. At the normal points in compilation where
9173 this code is called we have no measure of whether the condition
9174 flags are live or not, and thus no realistic way to determine what
9175 the size will eventually be. */
9176 static bool
9177 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9178 const struct cpu_cost_table *extra_cost,
9179 int *cost, bool speed_p)
9180 {
9181 machine_mode mode = GET_MODE (x);
9182
9183 *cost = COSTS_N_INSNS (1);
9184
9185 if (TARGET_THUMB1)
9186 {
9187 if (speed_p)
9188 *cost = thumb1_rtx_costs (x, code, outer_code);
9189 else
9190 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9191 return true;
9192 }
9193
9194 switch (code)
9195 {
9196 case SET:
9197 *cost = 0;
9198 /* SET RTXs don't have a mode so we get it from the destination. */
9199 mode = GET_MODE (SET_DEST (x));
9200
9201 if (REG_P (SET_SRC (x))
9202 && REG_P (SET_DEST (x)))
9203 {
9204 /* Assume that most copies can be done with a single insn,
9205 unless we don't have HW FP, in which case everything
9206 larger than word mode will require two insns. */
9207 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9208 && GET_MODE_SIZE (mode) > 4)
9209 || mode == DImode)
9210 ? 2 : 1);
9211 /* Conditional register moves can be encoded
9212 in 16 bits in Thumb mode. */
9213 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9214 *cost >>= 1;
9215
9216 return true;
9217 }
9218
9219 if (CONST_INT_P (SET_SRC (x)))
9220 {
9221 /* Handle CONST_INT here, since the value doesn't have a mode
9222 and we would otherwise be unable to work out the true cost. */
9223 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9224 0, speed_p);
9225 outer_code = SET;
9226 /* Slightly lower the cost of setting a core reg to a constant.
9227 This helps break up chains and allows for better scheduling. */
9228 if (REG_P (SET_DEST (x))
9229 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9230 *cost -= 1;
9231 x = SET_SRC (x);
9232 /* Immediate moves with an immediate in the range [0, 255] can be
9233 encoded in 16 bits in Thumb mode. */
9234 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9235 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9236 *cost >>= 1;
9237 goto const_int_cost;
9238 }
9239
9240 return false;
9241
9242 case MEM:
9243 /* A memory access costs 1 insn if the mode is small, or the address is
9244 a single register, otherwise it costs one insn per word. */
9245 if (REG_P (XEXP (x, 0)))
9246 *cost = COSTS_N_INSNS (1);
9247 else if (flag_pic
9248 && GET_CODE (XEXP (x, 0)) == PLUS
9249 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9250 /* This will be split into two instructions.
9251 See arm.md:calculate_pic_address. */
9252 *cost = COSTS_N_INSNS (2);
9253 else
9254 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9255
9256 /* For speed optimizations, add the costs of the address and
9257 accessing memory. */
9258 if (speed_p)
9259 #ifdef NOT_YET
9260 *cost += (extra_cost->ldst.load
9261 + arm_address_cost (XEXP (x, 0), mode,
9262 ADDR_SPACE_GENERIC, speed_p));
9263 #else
9264 *cost += extra_cost->ldst.load;
9265 #endif
9266 return true;
9267
9268 case PARALLEL:
9269 {
9270 /* Calculations of LDM costs are complex. We assume an initial cost
9271 (ldm_1st) which will load the number of registers mentioned in
9272 ldm_regs_per_insn_1st registers; then each additional
9273 ldm_regs_per_insn_subsequent registers cost one more insn. The
9274 formula for N regs is thus:
9275
9276 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9277 + ldm_regs_per_insn_subsequent - 1)
9278 / ldm_regs_per_insn_subsequent).
9279
9280 Additional costs may also be added for addressing. A similar
9281 formula is used for STM. */
9282
9283 bool is_ldm = load_multiple_operation (x, SImode);
9284 bool is_stm = store_multiple_operation (x, SImode);
9285
9286 if (is_ldm || is_stm)
9287 {
9288 if (speed_p)
9289 {
9290 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9291 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9292 ? extra_cost->ldst.ldm_regs_per_insn_1st
9293 : extra_cost->ldst.stm_regs_per_insn_1st;
9294 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9295 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9296 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9297
9298 *cost += regs_per_insn_1st
9299 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9300 + regs_per_insn_sub - 1)
9301 / regs_per_insn_sub);
9302 return true;
9303 }
9304
9305 }
9306 return false;
9307 }
9308 case DIV:
9309 case UDIV:
9310 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9311 && (mode == SFmode || !TARGET_VFP_SINGLE))
9312 *cost += COSTS_N_INSNS (speed_p
9313 ? extra_cost->fp[mode != SFmode].div : 0);
9314 else if (mode == SImode && TARGET_IDIV)
9315 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9316 else
9317 *cost = LIBCALL_COST (2);
9318 return false; /* All arguments must be in registers. */
9319
9320 case MOD:
9321 /* MOD by a power of 2 can be expanded as:
9322 rsbs r1, r0, #0
9323 and r0, r0, #(n - 1)
9324 and r1, r1, #(n - 1)
9325 rsbpl r0, r1, #0. */
9326 if (CONST_INT_P (XEXP (x, 1))
9327 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9328 && mode == SImode)
9329 {
9330 *cost += COSTS_N_INSNS (3);
9331
9332 if (speed_p)
9333 *cost += 2 * extra_cost->alu.logical
9334 + extra_cost->alu.arith;
9335 return true;
9336 }
9337
9338 /* Fall-through. */
9339 case UMOD:
9340 *cost = LIBCALL_COST (2);
9341 return false; /* All arguments must be in registers. */
9342
9343 case ROTATE:
9344 if (mode == SImode && REG_P (XEXP (x, 1)))
9345 {
9346 *cost += (COSTS_N_INSNS (1)
9347 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9348 if (speed_p)
9349 *cost += extra_cost->alu.shift_reg;
9350 return true;
9351 }
9352 /* Fall through */
9353 case ROTATERT:
9354 case ASHIFT:
9355 case LSHIFTRT:
9356 case ASHIFTRT:
9357 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9358 {
9359 *cost += (COSTS_N_INSNS (2)
9360 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9361 if (speed_p)
9362 *cost += 2 * extra_cost->alu.shift;
9363 return true;
9364 }
9365 else if (mode == SImode)
9366 {
9367 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9368 /* Slightly disparage register shifts at -Os, but not by much. */
9369 if (!CONST_INT_P (XEXP (x, 1)))
9370 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9371 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9372 return true;
9373 }
9374 else if (GET_MODE_CLASS (mode) == MODE_INT
9375 && GET_MODE_SIZE (mode) < 4)
9376 {
9377 if (code == ASHIFT)
9378 {
9379 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9380 /* Slightly disparage register shifts at -Os, but not by
9381 much. */
9382 if (!CONST_INT_P (XEXP (x, 1)))
9383 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9384 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9385 }
9386 else if (code == LSHIFTRT || code == ASHIFTRT)
9387 {
9388 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9389 {
9390 /* Can use SBFX/UBFX. */
9391 if (speed_p)
9392 *cost += extra_cost->alu.bfx;
9393 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9394 }
9395 else
9396 {
9397 *cost += COSTS_N_INSNS (1);
9398 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9399 if (speed_p)
9400 {
9401 if (CONST_INT_P (XEXP (x, 1)))
9402 *cost += 2 * extra_cost->alu.shift;
9403 else
9404 *cost += (extra_cost->alu.shift
9405 + extra_cost->alu.shift_reg);
9406 }
9407 else
9408 /* Slightly disparage register shifts. */
9409 *cost += !CONST_INT_P (XEXP (x, 1));
9410 }
9411 }
9412 else /* Rotates. */
9413 {
9414 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9415 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9416 if (speed_p)
9417 {
9418 if (CONST_INT_P (XEXP (x, 1)))
9419 *cost += (2 * extra_cost->alu.shift
9420 + extra_cost->alu.log_shift);
9421 else
9422 *cost += (extra_cost->alu.shift
9423 + extra_cost->alu.shift_reg
9424 + extra_cost->alu.log_shift_reg);
9425 }
9426 }
9427 return true;
9428 }
9429
9430 *cost = LIBCALL_COST (2);
9431 return false;
9432
9433 case BSWAP:
9434 if (arm_arch6)
9435 {
9436 if (mode == SImode)
9437 {
9438 if (speed_p)
9439 *cost += extra_cost->alu.rev;
9440
9441 return false;
9442 }
9443 }
9444 else
9445 {
9446 /* No rev instruction available. Look at arm_legacy_rev
9447 and thumb_legacy_rev for the form of RTL used then. */
9448 if (TARGET_THUMB)
9449 {
9450 *cost += COSTS_N_INSNS (9);
9451
9452 if (speed_p)
9453 {
9454 *cost += 6 * extra_cost->alu.shift;
9455 *cost += 3 * extra_cost->alu.logical;
9456 }
9457 }
9458 else
9459 {
9460 *cost += COSTS_N_INSNS (4);
9461
9462 if (speed_p)
9463 {
9464 *cost += 2 * extra_cost->alu.shift;
9465 *cost += extra_cost->alu.arith_shift;
9466 *cost += 2 * extra_cost->alu.logical;
9467 }
9468 }
9469 return true;
9470 }
9471 return false;
9472
9473 case MINUS:
9474 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9475 && (mode == SFmode || !TARGET_VFP_SINGLE))
9476 {
9477 if (GET_CODE (XEXP (x, 0)) == MULT
9478 || GET_CODE (XEXP (x, 1)) == MULT)
9479 {
9480 rtx mul_op0, mul_op1, sub_op;
9481
9482 if (speed_p)
9483 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9484
9485 if (GET_CODE (XEXP (x, 0)) == MULT)
9486 {
9487 mul_op0 = XEXP (XEXP (x, 0), 0);
9488 mul_op1 = XEXP (XEXP (x, 0), 1);
9489 sub_op = XEXP (x, 1);
9490 }
9491 else
9492 {
9493 mul_op0 = XEXP (XEXP (x, 1), 0);
9494 mul_op1 = XEXP (XEXP (x, 1), 1);
9495 sub_op = XEXP (x, 0);
9496 }
9497
9498 /* The first operand of the multiply may be optionally
9499 negated. */
9500 if (GET_CODE (mul_op0) == NEG)
9501 mul_op0 = XEXP (mul_op0, 0);
9502
9503 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9504 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9505 + rtx_cost (sub_op, mode, code, 0, speed_p));
9506
9507 return true;
9508 }
9509
9510 if (speed_p)
9511 *cost += extra_cost->fp[mode != SFmode].addsub;
9512 return false;
9513 }
9514
9515 if (mode == SImode)
9516 {
9517 rtx shift_by_reg = NULL;
9518 rtx shift_op;
9519 rtx non_shift_op;
9520
9521 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9522 if (shift_op == NULL)
9523 {
9524 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9525 non_shift_op = XEXP (x, 0);
9526 }
9527 else
9528 non_shift_op = XEXP (x, 1);
9529
9530 if (shift_op != NULL)
9531 {
9532 if (shift_by_reg != NULL)
9533 {
9534 if (speed_p)
9535 *cost += extra_cost->alu.arith_shift_reg;
9536 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9537 }
9538 else if (speed_p)
9539 *cost += extra_cost->alu.arith_shift;
9540
9541 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9542 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9543 return true;
9544 }
9545
9546 if (arm_arch_thumb2
9547 && GET_CODE (XEXP (x, 1)) == MULT)
9548 {
9549 /* MLS. */
9550 if (speed_p)
9551 *cost += extra_cost->mult[0].add;
9552 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9553 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9554 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9555 return true;
9556 }
9557
9558 if (CONST_INT_P (XEXP (x, 0)))
9559 {
9560 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9561 INTVAL (XEXP (x, 0)), NULL_RTX,
9562 NULL_RTX, 1, 0);
9563 *cost = COSTS_N_INSNS (insns);
9564 if (speed_p)
9565 *cost += insns * extra_cost->alu.arith;
9566 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9567 return true;
9568 }
9569 else if (speed_p)
9570 *cost += extra_cost->alu.arith;
9571
9572 return false;
9573 }
9574
9575 if (GET_MODE_CLASS (mode) == MODE_INT
9576 && GET_MODE_SIZE (mode) < 4)
9577 {
9578 rtx shift_op, shift_reg;
9579 shift_reg = NULL;
9580
9581 /* We check both sides of the MINUS for shifter operands since,
9582 unlike PLUS, it's not commutative. */
9583
9584 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9585 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9586
9587 /* Slightly disparage, as we might need to widen the result. */
9588 *cost += 1;
9589 if (speed_p)
9590 *cost += extra_cost->alu.arith;
9591
9592 if (CONST_INT_P (XEXP (x, 0)))
9593 {
9594 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9595 return true;
9596 }
9597
9598 return false;
9599 }
9600
9601 if (mode == DImode)
9602 {
9603 *cost += COSTS_N_INSNS (1);
9604
9605 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9606 {
9607 rtx op1 = XEXP (x, 1);
9608
9609 if (speed_p)
9610 *cost += 2 * extra_cost->alu.arith;
9611
9612 if (GET_CODE (op1) == ZERO_EXTEND)
9613 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9614 0, speed_p);
9615 else
9616 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9617 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9618 0, speed_p);
9619 return true;
9620 }
9621 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9622 {
9623 if (speed_p)
9624 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9625 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9626 0, speed_p)
9627 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9628 return true;
9629 }
9630 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9631 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9632 {
9633 if (speed_p)
9634 *cost += (extra_cost->alu.arith
9635 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9636 ? extra_cost->alu.arith
9637 : extra_cost->alu.arith_shift));
9638 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9639 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9640 GET_CODE (XEXP (x, 1)), 0, speed_p));
9641 return true;
9642 }
9643
9644 if (speed_p)
9645 *cost += 2 * extra_cost->alu.arith;
9646 return false;
9647 }
9648
9649 /* Vector mode? */
9650
9651 *cost = LIBCALL_COST (2);
9652 return false;
9653
9654 case PLUS:
9655 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9656 && (mode == SFmode || !TARGET_VFP_SINGLE))
9657 {
9658 if (GET_CODE (XEXP (x, 0)) == MULT)
9659 {
9660 rtx mul_op0, mul_op1, add_op;
9661
9662 if (speed_p)
9663 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9664
9665 mul_op0 = XEXP (XEXP (x, 0), 0);
9666 mul_op1 = XEXP (XEXP (x, 0), 1);
9667 add_op = XEXP (x, 1);
9668
9669 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9670 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9671 + rtx_cost (add_op, mode, code, 0, speed_p));
9672
9673 return true;
9674 }
9675
9676 if (speed_p)
9677 *cost += extra_cost->fp[mode != SFmode].addsub;
9678 return false;
9679 }
9680 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9681 {
9682 *cost = LIBCALL_COST (2);
9683 return false;
9684 }
9685
9686 /* Narrow modes can be synthesized in SImode, but the range
9687 of useful sub-operations is limited. Check for shift operations
9688 on one of the operands. Only left shifts can be used in the
9689 narrow modes. */
9690 if (GET_MODE_CLASS (mode) == MODE_INT
9691 && GET_MODE_SIZE (mode) < 4)
9692 {
9693 rtx shift_op, shift_reg;
9694 shift_reg = NULL;
9695
9696 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9697
9698 if (CONST_INT_P (XEXP (x, 1)))
9699 {
9700 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9701 INTVAL (XEXP (x, 1)), NULL_RTX,
9702 NULL_RTX, 1, 0);
9703 *cost = COSTS_N_INSNS (insns);
9704 if (speed_p)
9705 *cost += insns * extra_cost->alu.arith;
9706 /* Slightly penalize a narrow operation as the result may
9707 need widening. */
9708 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9709 return true;
9710 }
9711
9712 /* Slightly penalize a narrow operation as the result may
9713 need widening. */
9714 *cost += 1;
9715 if (speed_p)
9716 *cost += extra_cost->alu.arith;
9717
9718 return false;
9719 }
9720
9721 if (mode == SImode)
9722 {
9723 rtx shift_op, shift_reg;
9724
9725 if (TARGET_INT_SIMD
9726 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9727 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9728 {
9729 /* UXTA[BH] or SXTA[BH]. */
9730 if (speed_p)
9731 *cost += extra_cost->alu.extend_arith;
9732 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9733 0, speed_p)
9734 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9735 return true;
9736 }
9737
9738 shift_reg = NULL;
9739 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9740 if (shift_op != NULL)
9741 {
9742 if (shift_reg)
9743 {
9744 if (speed_p)
9745 *cost += extra_cost->alu.arith_shift_reg;
9746 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9747 }
9748 else if (speed_p)
9749 *cost += extra_cost->alu.arith_shift;
9750
9751 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9752 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9753 return true;
9754 }
9755 if (GET_CODE (XEXP (x, 0)) == MULT)
9756 {
9757 rtx mul_op = XEXP (x, 0);
9758
9759 if (TARGET_DSP_MULTIPLY
9760 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9761 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9762 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9763 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9764 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9765 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9766 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9767 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9768 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9769 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9770 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9771 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9772 == 16))))))
9773 {
9774 /* SMLA[BT][BT]. */
9775 if (speed_p)
9776 *cost += extra_cost->mult[0].extend_add;
9777 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9778 SIGN_EXTEND, 0, speed_p)
9779 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9780 SIGN_EXTEND, 0, speed_p)
9781 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9782 return true;
9783 }
9784
9785 if (speed_p)
9786 *cost += extra_cost->mult[0].add;
9787 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9788 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9789 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9790 return true;
9791 }
9792 if (CONST_INT_P (XEXP (x, 1)))
9793 {
9794 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9795 INTVAL (XEXP (x, 1)), NULL_RTX,
9796 NULL_RTX, 1, 0);
9797 *cost = COSTS_N_INSNS (insns);
9798 if (speed_p)
9799 *cost += insns * extra_cost->alu.arith;
9800 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9801 return true;
9802 }
9803 else if (speed_p)
9804 *cost += extra_cost->alu.arith;
9805
9806 return false;
9807 }
9808
9809 if (mode == DImode)
9810 {
9811 if (arm_arch3m
9812 && GET_CODE (XEXP (x, 0)) == MULT
9813 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9814 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9815 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9816 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9817 {
9818 if (speed_p)
9819 *cost += extra_cost->mult[1].extend_add;
9820 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9821 ZERO_EXTEND, 0, speed_p)
9822 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9823 ZERO_EXTEND, 0, speed_p)
9824 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9825 return true;
9826 }
9827
9828 *cost += COSTS_N_INSNS (1);
9829
9830 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9831 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9832 {
9833 if (speed_p)
9834 *cost += (extra_cost->alu.arith
9835 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9836 ? extra_cost->alu.arith
9837 : extra_cost->alu.arith_shift));
9838
9839 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9840 0, speed_p)
9841 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9842 return true;
9843 }
9844
9845 if (speed_p)
9846 *cost += 2 * extra_cost->alu.arith;
9847 return false;
9848 }
9849
9850 /* Vector mode? */
9851 *cost = LIBCALL_COST (2);
9852 return false;
9853 case IOR:
9854 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9855 {
9856 if (speed_p)
9857 *cost += extra_cost->alu.rev;
9858
9859 return true;
9860 }
9861 /* Fall through. */
9862 case AND: case XOR:
9863 if (mode == SImode)
9864 {
9865 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9866 rtx op0 = XEXP (x, 0);
9867 rtx shift_op, shift_reg;
9868
9869 if (subcode == NOT
9870 && (code == AND
9871 || (code == IOR && TARGET_THUMB2)))
9872 op0 = XEXP (op0, 0);
9873
9874 shift_reg = NULL;
9875 shift_op = shifter_op_p (op0, &shift_reg);
9876 if (shift_op != NULL)
9877 {
9878 if (shift_reg)
9879 {
9880 if (speed_p)
9881 *cost += extra_cost->alu.log_shift_reg;
9882 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9883 }
9884 else if (speed_p)
9885 *cost += extra_cost->alu.log_shift;
9886
9887 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9888 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9889 return true;
9890 }
9891
9892 if (CONST_INT_P (XEXP (x, 1)))
9893 {
9894 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9895 INTVAL (XEXP (x, 1)), NULL_RTX,
9896 NULL_RTX, 1, 0);
9897
9898 *cost = COSTS_N_INSNS (insns);
9899 if (speed_p)
9900 *cost += insns * extra_cost->alu.logical;
9901 *cost += rtx_cost (op0, mode, code, 0, speed_p);
9902 return true;
9903 }
9904
9905 if (speed_p)
9906 *cost += extra_cost->alu.logical;
9907 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9908 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9909 return true;
9910 }
9911
9912 if (mode == DImode)
9913 {
9914 rtx op0 = XEXP (x, 0);
9915 enum rtx_code subcode = GET_CODE (op0);
9916
9917 *cost += COSTS_N_INSNS (1);
9918
9919 if (subcode == NOT
9920 && (code == AND
9921 || (code == IOR && TARGET_THUMB2)))
9922 op0 = XEXP (op0, 0);
9923
9924 if (GET_CODE (op0) == ZERO_EXTEND)
9925 {
9926 if (speed_p)
9927 *cost += 2 * extra_cost->alu.logical;
9928
9929 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9930 0, speed_p)
9931 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9932 return true;
9933 }
9934 else if (GET_CODE (op0) == SIGN_EXTEND)
9935 {
9936 if (speed_p)
9937 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9938
9939 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
9940 0, speed_p)
9941 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9942 return true;
9943 }
9944
9945 if (speed_p)
9946 *cost += 2 * extra_cost->alu.logical;
9947
9948 return true;
9949 }
9950 /* Vector mode? */
9951
9952 *cost = LIBCALL_COST (2);
9953 return false;
9954
9955 case MULT:
9956 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9957 && (mode == SFmode || !TARGET_VFP_SINGLE))
9958 {
9959 rtx op0 = XEXP (x, 0);
9960
9961 if (GET_CODE (op0) == NEG && !flag_rounding_math)
9962 op0 = XEXP (op0, 0);
9963
9964 if (speed_p)
9965 *cost += extra_cost->fp[mode != SFmode].mult;
9966
9967 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
9968 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
9969 return true;
9970 }
9971 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9972 {
9973 *cost = LIBCALL_COST (2);
9974 return false;
9975 }
9976
9977 if (mode == SImode)
9978 {
9979 if (TARGET_DSP_MULTIPLY
9980 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9981 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9982 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9983 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9984 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
9985 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
9986 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9987 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
9988 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9989 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9990 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9991 && (INTVAL (XEXP (XEXP (x, 1), 1))
9992 == 16))))))
9993 {
9994 /* SMUL[TB][TB]. */
9995 if (speed_p)
9996 *cost += extra_cost->mult[0].extend;
9997 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
9998 SIGN_EXTEND, 0, speed_p);
9999 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10000 SIGN_EXTEND, 1, speed_p);
10001 return true;
10002 }
10003 if (speed_p)
10004 *cost += extra_cost->mult[0].simple;
10005 return false;
10006 }
10007
10008 if (mode == DImode)
10009 {
10010 if (arm_arch3m
10011 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10012 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10013 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10014 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10015 {
10016 if (speed_p)
10017 *cost += extra_cost->mult[1].extend;
10018 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10019 ZERO_EXTEND, 0, speed_p)
10020 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10021 ZERO_EXTEND, 0, speed_p));
10022 return true;
10023 }
10024
10025 *cost = LIBCALL_COST (2);
10026 return false;
10027 }
10028
10029 /* Vector mode? */
10030 *cost = LIBCALL_COST (2);
10031 return false;
10032
10033 case NEG:
10034 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10035 && (mode == SFmode || !TARGET_VFP_SINGLE))
10036 {
10037 if (GET_CODE (XEXP (x, 0)) == MULT)
10038 {
10039 /* VNMUL. */
10040 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10041 return true;
10042 }
10043
10044 if (speed_p)
10045 *cost += extra_cost->fp[mode != SFmode].neg;
10046
10047 return false;
10048 }
10049 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10050 {
10051 *cost = LIBCALL_COST (1);
10052 return false;
10053 }
10054
10055 if (mode == SImode)
10056 {
10057 if (GET_CODE (XEXP (x, 0)) == ABS)
10058 {
10059 *cost += COSTS_N_INSNS (1);
10060 /* Assume the non-flag-changing variant. */
10061 if (speed_p)
10062 *cost += (extra_cost->alu.log_shift
10063 + extra_cost->alu.arith_shift);
10064 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10065 return true;
10066 }
10067
10068 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10069 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10070 {
10071 *cost += COSTS_N_INSNS (1);
10072 /* No extra cost for MOV imm and MVN imm. */
10073 /* If the comparison op is using the flags, there's no further
10074 cost, otherwise we need to add the cost of the comparison. */
10075 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10076 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10077 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10078 {
10079 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10080 *cost += (COSTS_N_INSNS (1)
10081 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10082 0, speed_p)
10083 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10084 1, speed_p));
10085 if (speed_p)
10086 *cost += extra_cost->alu.arith;
10087 }
10088 return true;
10089 }
10090
10091 if (speed_p)
10092 *cost += extra_cost->alu.arith;
10093 return false;
10094 }
10095
10096 if (GET_MODE_CLASS (mode) == MODE_INT
10097 && GET_MODE_SIZE (mode) < 4)
10098 {
10099 /* Slightly disparage, as we might need an extend operation. */
10100 *cost += 1;
10101 if (speed_p)
10102 *cost += extra_cost->alu.arith;
10103 return false;
10104 }
10105
10106 if (mode == DImode)
10107 {
10108 *cost += COSTS_N_INSNS (1);
10109 if (speed_p)
10110 *cost += 2 * extra_cost->alu.arith;
10111 return false;
10112 }
10113
10114 /* Vector mode? */
10115 *cost = LIBCALL_COST (1);
10116 return false;
10117
10118 case NOT:
10119 if (mode == SImode)
10120 {
10121 rtx shift_op;
10122 rtx shift_reg = NULL;
10123
10124 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10125
10126 if (shift_op)
10127 {
10128 if (shift_reg != NULL)
10129 {
10130 if (speed_p)
10131 *cost += extra_cost->alu.log_shift_reg;
10132 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10133 }
10134 else if (speed_p)
10135 *cost += extra_cost->alu.log_shift;
10136 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10137 return true;
10138 }
10139
10140 if (speed_p)
10141 *cost += extra_cost->alu.logical;
10142 return false;
10143 }
10144 if (mode == DImode)
10145 {
10146 *cost += COSTS_N_INSNS (1);
10147 return false;
10148 }
10149
10150 /* Vector mode? */
10151
10152 *cost += LIBCALL_COST (1);
10153 return false;
10154
10155 case IF_THEN_ELSE:
10156 {
10157 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10158 {
10159 *cost += COSTS_N_INSNS (3);
10160 return true;
10161 }
10162 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10163 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10164
10165 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10166 /* Assume that if one arm of the if_then_else is a register,
10167 that it will be tied with the result and eliminate the
10168 conditional insn. */
10169 if (REG_P (XEXP (x, 1)))
10170 *cost += op2cost;
10171 else if (REG_P (XEXP (x, 2)))
10172 *cost += op1cost;
10173 else
10174 {
10175 if (speed_p)
10176 {
10177 if (extra_cost->alu.non_exec_costs_exec)
10178 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10179 else
10180 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10181 }
10182 else
10183 *cost += op1cost + op2cost;
10184 }
10185 }
10186 return true;
10187
10188 case COMPARE:
10189 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10190 *cost = 0;
10191 else
10192 {
10193 machine_mode op0mode;
10194 /* We'll mostly assume that the cost of a compare is the cost of the
10195 LHS. However, there are some notable exceptions. */
10196
10197 /* Floating point compares are never done as side-effects. */
10198 op0mode = GET_MODE (XEXP (x, 0));
10199 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10200 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10201 {
10202 if (speed_p)
10203 *cost += extra_cost->fp[op0mode != SFmode].compare;
10204
10205 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10206 {
10207 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10208 return true;
10209 }
10210
10211 return false;
10212 }
10213 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10214 {
10215 *cost = LIBCALL_COST (2);
10216 return false;
10217 }
10218
10219 /* DImode compares normally take two insns. */
10220 if (op0mode == DImode)
10221 {
10222 *cost += COSTS_N_INSNS (1);
10223 if (speed_p)
10224 *cost += 2 * extra_cost->alu.arith;
10225 return false;
10226 }
10227
10228 if (op0mode == SImode)
10229 {
10230 rtx shift_op;
10231 rtx shift_reg;
10232
10233 if (XEXP (x, 1) == const0_rtx
10234 && !(REG_P (XEXP (x, 0))
10235 || (GET_CODE (XEXP (x, 0)) == SUBREG
10236 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10237 {
10238 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10239
10240 /* Multiply operations that set the flags are often
10241 significantly more expensive. */
10242 if (speed_p
10243 && GET_CODE (XEXP (x, 0)) == MULT
10244 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10245 *cost += extra_cost->mult[0].flag_setting;
10246
10247 if (speed_p
10248 && GET_CODE (XEXP (x, 0)) == PLUS
10249 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10250 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10251 0), 1), mode))
10252 *cost += extra_cost->mult[0].flag_setting;
10253 return true;
10254 }
10255
10256 shift_reg = NULL;
10257 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10258 if (shift_op != NULL)
10259 {
10260 if (shift_reg != NULL)
10261 {
10262 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10263 1, speed_p);
10264 if (speed_p)
10265 *cost += extra_cost->alu.arith_shift_reg;
10266 }
10267 else if (speed_p)
10268 *cost += extra_cost->alu.arith_shift;
10269 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10270 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10271 return true;
10272 }
10273
10274 if (speed_p)
10275 *cost += extra_cost->alu.arith;
10276 if (CONST_INT_P (XEXP (x, 1))
10277 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10278 {
10279 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10280 return true;
10281 }
10282 return false;
10283 }
10284
10285 /* Vector mode? */
10286
10287 *cost = LIBCALL_COST (2);
10288 return false;
10289 }
10290 return true;
10291
10292 case EQ:
10293 case NE:
10294 case LT:
10295 case LE:
10296 case GT:
10297 case GE:
10298 case LTU:
10299 case LEU:
10300 case GEU:
10301 case GTU:
10302 case ORDERED:
10303 case UNORDERED:
10304 case UNEQ:
10305 case UNLE:
10306 case UNLT:
10307 case UNGE:
10308 case UNGT:
10309 case LTGT:
10310 if (outer_code == SET)
10311 {
10312 /* Is it a store-flag operation? */
10313 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10314 && XEXP (x, 1) == const0_rtx)
10315 {
10316 /* Thumb also needs an IT insn. */
10317 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10318 return true;
10319 }
10320 if (XEXP (x, 1) == const0_rtx)
10321 {
10322 switch (code)
10323 {
10324 case LT:
10325 /* LSR Rd, Rn, #31. */
10326 if (speed_p)
10327 *cost += extra_cost->alu.shift;
10328 break;
10329
10330 case EQ:
10331 /* RSBS T1, Rn, #0
10332 ADC Rd, Rn, T1. */
10333
10334 case NE:
10335 /* SUBS T1, Rn, #1
10336 SBC Rd, Rn, T1. */
10337 *cost += COSTS_N_INSNS (1);
10338 break;
10339
10340 case LE:
10341 /* RSBS T1, Rn, Rn, LSR #31
10342 ADC Rd, Rn, T1. */
10343 *cost += COSTS_N_INSNS (1);
10344 if (speed_p)
10345 *cost += extra_cost->alu.arith_shift;
10346 break;
10347
10348 case GT:
10349 /* RSB Rd, Rn, Rn, ASR #1
10350 LSR Rd, Rd, #31. */
10351 *cost += COSTS_N_INSNS (1);
10352 if (speed_p)
10353 *cost += (extra_cost->alu.arith_shift
10354 + extra_cost->alu.shift);
10355 break;
10356
10357 case GE:
10358 /* ASR Rd, Rn, #31
10359 ADD Rd, Rn, #1. */
10360 *cost += COSTS_N_INSNS (1);
10361 if (speed_p)
10362 *cost += extra_cost->alu.shift;
10363 break;
10364
10365 default:
10366 /* Remaining cases are either meaningless or would take
10367 three insns anyway. */
10368 *cost = COSTS_N_INSNS (3);
10369 break;
10370 }
10371 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10372 return true;
10373 }
10374 else
10375 {
10376 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10377 if (CONST_INT_P (XEXP (x, 1))
10378 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10379 {
10380 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10381 return true;
10382 }
10383
10384 return false;
10385 }
10386 }
10387 /* Not directly inside a set. If it involves the condition code
10388 register it must be the condition for a branch, cond_exec or
10389 I_T_E operation. Since the comparison is performed elsewhere
10390 this is just the control part which has no additional
10391 cost. */
10392 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10393 && XEXP (x, 1) == const0_rtx)
10394 {
10395 *cost = 0;
10396 return true;
10397 }
10398 return false;
10399
10400 case ABS:
10401 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10402 && (mode == SFmode || !TARGET_VFP_SINGLE))
10403 {
10404 if (speed_p)
10405 *cost += extra_cost->fp[mode != SFmode].neg;
10406
10407 return false;
10408 }
10409 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10410 {
10411 *cost = LIBCALL_COST (1);
10412 return false;
10413 }
10414
10415 if (mode == SImode)
10416 {
10417 if (speed_p)
10418 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10419 return false;
10420 }
10421 /* Vector mode? */
10422 *cost = LIBCALL_COST (1);
10423 return false;
10424
10425 case SIGN_EXTEND:
10426 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10427 && MEM_P (XEXP (x, 0)))
10428 {
10429 if (mode == DImode)
10430 *cost += COSTS_N_INSNS (1);
10431
10432 if (!speed_p)
10433 return true;
10434
10435 if (GET_MODE (XEXP (x, 0)) == SImode)
10436 *cost += extra_cost->ldst.load;
10437 else
10438 *cost += extra_cost->ldst.load_sign_extend;
10439
10440 if (mode == DImode)
10441 *cost += extra_cost->alu.shift;
10442
10443 return true;
10444 }
10445
10446 /* Widening from less than 32-bits requires an extend operation. */
10447 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10448 {
10449 /* We have SXTB/SXTH. */
10450 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10451 if (speed_p)
10452 *cost += extra_cost->alu.extend;
10453 }
10454 else if (GET_MODE (XEXP (x, 0)) != SImode)
10455 {
10456 /* Needs two shifts. */
10457 *cost += COSTS_N_INSNS (1);
10458 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10459 if (speed_p)
10460 *cost += 2 * extra_cost->alu.shift;
10461 }
10462
10463 /* Widening beyond 32-bits requires one more insn. */
10464 if (mode == DImode)
10465 {
10466 *cost += COSTS_N_INSNS (1);
10467 if (speed_p)
10468 *cost += extra_cost->alu.shift;
10469 }
10470
10471 return true;
10472
10473 case ZERO_EXTEND:
10474 if ((arm_arch4
10475 || GET_MODE (XEXP (x, 0)) == SImode
10476 || GET_MODE (XEXP (x, 0)) == QImode)
10477 && MEM_P (XEXP (x, 0)))
10478 {
10479 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10480
10481 if (mode == DImode)
10482 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10483
10484 return true;
10485 }
10486
10487 /* Widening from less than 32-bits requires an extend operation. */
10488 if (GET_MODE (XEXP (x, 0)) == QImode)
10489 {
10490 /* UXTB can be a shorter instruction in Thumb2, but it might
10491 be slower than the AND Rd, Rn, #255 alternative. When
10492 optimizing for speed it should never be slower to use
10493 AND, and we don't really model 16-bit vs 32-bit insns
10494 here. */
10495 if (speed_p)
10496 *cost += extra_cost->alu.logical;
10497 }
10498 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10499 {
10500 /* We have UXTB/UXTH. */
10501 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10502 if (speed_p)
10503 *cost += extra_cost->alu.extend;
10504 }
10505 else if (GET_MODE (XEXP (x, 0)) != SImode)
10506 {
10507 /* Needs two shifts. It's marginally preferable to use
10508 shifts rather than two BIC instructions as the second
10509 shift may merge with a subsequent insn as a shifter
10510 op. */
10511 *cost = COSTS_N_INSNS (2);
10512 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10513 if (speed_p)
10514 *cost += 2 * extra_cost->alu.shift;
10515 }
10516
10517 /* Widening beyond 32-bits requires one more insn. */
10518 if (mode == DImode)
10519 {
10520 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10521 }
10522
10523 return true;
10524
10525 case CONST_INT:
10526 *cost = 0;
10527 /* CONST_INT has no mode, so we cannot tell for sure how many
10528 insns are really going to be needed. The best we can do is
10529 look at the value passed. If it fits in SImode, then assume
10530 that's the mode it will be used for. Otherwise assume it
10531 will be used in DImode. */
10532 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10533 mode = SImode;
10534 else
10535 mode = DImode;
10536
10537 /* Avoid blowing up in arm_gen_constant (). */
10538 if (!(outer_code == PLUS
10539 || outer_code == AND
10540 || outer_code == IOR
10541 || outer_code == XOR
10542 || outer_code == MINUS))
10543 outer_code = SET;
10544
10545 const_int_cost:
10546 if (mode == SImode)
10547 {
10548 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10549 INTVAL (x), NULL, NULL,
10550 0, 0));
10551 /* Extra costs? */
10552 }
10553 else
10554 {
10555 *cost += COSTS_N_INSNS (arm_gen_constant
10556 (outer_code, SImode, NULL,
10557 trunc_int_for_mode (INTVAL (x), SImode),
10558 NULL, NULL, 0, 0)
10559 + arm_gen_constant (outer_code, SImode, NULL,
10560 INTVAL (x) >> 32, NULL,
10561 NULL, 0, 0));
10562 /* Extra costs? */
10563 }
10564
10565 return true;
10566
10567 case CONST:
10568 case LABEL_REF:
10569 case SYMBOL_REF:
10570 if (speed_p)
10571 {
10572 if (arm_arch_thumb2 && !flag_pic)
10573 *cost += COSTS_N_INSNS (1);
10574 else
10575 *cost += extra_cost->ldst.load;
10576 }
10577 else
10578 *cost += COSTS_N_INSNS (1);
10579
10580 if (flag_pic)
10581 {
10582 *cost += COSTS_N_INSNS (1);
10583 if (speed_p)
10584 *cost += extra_cost->alu.arith;
10585 }
10586
10587 return true;
10588
10589 case CONST_FIXED:
10590 *cost = COSTS_N_INSNS (4);
10591 /* Fixme. */
10592 return true;
10593
10594 case CONST_DOUBLE:
10595 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10596 && (mode == SFmode || !TARGET_VFP_SINGLE))
10597 {
10598 if (vfp3_const_double_rtx (x))
10599 {
10600 if (speed_p)
10601 *cost += extra_cost->fp[mode == DFmode].fpconst;
10602 return true;
10603 }
10604
10605 if (speed_p)
10606 {
10607 if (mode == DFmode)
10608 *cost += extra_cost->ldst.loadd;
10609 else
10610 *cost += extra_cost->ldst.loadf;
10611 }
10612 else
10613 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10614
10615 return true;
10616 }
10617 *cost = COSTS_N_INSNS (4);
10618 return true;
10619
10620 case CONST_VECTOR:
10621 /* Fixme. */
10622 if (TARGET_NEON
10623 && TARGET_HARD_FLOAT
10624 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10625 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10626 *cost = COSTS_N_INSNS (1);
10627 else
10628 *cost = COSTS_N_INSNS (4);
10629 return true;
10630
10631 case HIGH:
10632 case LO_SUM:
10633 /* When optimizing for size, we prefer constant pool entries to
10634 MOVW/MOVT pairs, so bump the cost of these slightly. */
10635 if (!speed_p)
10636 *cost += 1;
10637 return true;
10638
10639 case CLZ:
10640 if (speed_p)
10641 *cost += extra_cost->alu.clz;
10642 return false;
10643
10644 case SMIN:
10645 if (XEXP (x, 1) == const0_rtx)
10646 {
10647 if (speed_p)
10648 *cost += extra_cost->alu.log_shift;
10649 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10650 return true;
10651 }
10652 /* Fall through. */
10653 case SMAX:
10654 case UMIN:
10655 case UMAX:
10656 *cost += COSTS_N_INSNS (1);
10657 return false;
10658
10659 case TRUNCATE:
10660 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10661 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10662 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10663 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10664 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10665 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10666 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10667 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10668 == ZERO_EXTEND))))
10669 {
10670 if (speed_p)
10671 *cost += extra_cost->mult[1].extend;
10672 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10673 ZERO_EXTEND, 0, speed_p)
10674 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10675 ZERO_EXTEND, 0, speed_p));
10676 return true;
10677 }
10678 *cost = LIBCALL_COST (1);
10679 return false;
10680
10681 case UNSPEC_VOLATILE:
10682 case UNSPEC:
10683 return arm_unspec_cost (x, outer_code, speed_p, cost);
10684
10685 case PC:
10686 /* Reading the PC is like reading any other register. Writing it
10687 is more expensive, but we take that into account elsewhere. */
10688 *cost = 0;
10689 return true;
10690
10691 case ZERO_EXTRACT:
10692 /* TODO: Simple zero_extract of bottom bits using AND. */
10693 /* Fall through. */
10694 case SIGN_EXTRACT:
10695 if (arm_arch6
10696 && mode == SImode
10697 && CONST_INT_P (XEXP (x, 1))
10698 && CONST_INT_P (XEXP (x, 2)))
10699 {
10700 if (speed_p)
10701 *cost += extra_cost->alu.bfx;
10702 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10703 return true;
10704 }
10705 /* Without UBFX/SBFX, need to resort to shift operations. */
10706 *cost += COSTS_N_INSNS (1);
10707 if (speed_p)
10708 *cost += 2 * extra_cost->alu.shift;
10709 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10710 return true;
10711
10712 case FLOAT_EXTEND:
10713 if (TARGET_HARD_FLOAT)
10714 {
10715 if (speed_p)
10716 *cost += extra_cost->fp[mode == DFmode].widen;
10717 if (!TARGET_FPU_ARMV8
10718 && GET_MODE (XEXP (x, 0)) == HFmode)
10719 {
10720 /* Pre v8, widening HF->DF is a two-step process, first
10721 widening to SFmode. */
10722 *cost += COSTS_N_INSNS (1);
10723 if (speed_p)
10724 *cost += extra_cost->fp[0].widen;
10725 }
10726 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10727 return true;
10728 }
10729
10730 *cost = LIBCALL_COST (1);
10731 return false;
10732
10733 case FLOAT_TRUNCATE:
10734 if (TARGET_HARD_FLOAT)
10735 {
10736 if (speed_p)
10737 *cost += extra_cost->fp[mode == DFmode].narrow;
10738 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10739 return true;
10740 /* Vector modes? */
10741 }
10742 *cost = LIBCALL_COST (1);
10743 return false;
10744
10745 case FMA:
10746 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10747 {
10748 rtx op0 = XEXP (x, 0);
10749 rtx op1 = XEXP (x, 1);
10750 rtx op2 = XEXP (x, 2);
10751
10752
10753 /* vfms or vfnma. */
10754 if (GET_CODE (op0) == NEG)
10755 op0 = XEXP (op0, 0);
10756
10757 /* vfnms or vfnma. */
10758 if (GET_CODE (op2) == NEG)
10759 op2 = XEXP (op2, 0);
10760
10761 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10762 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10763 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10764
10765 if (speed_p)
10766 *cost += extra_cost->fp[mode ==DFmode].fma;
10767
10768 return true;
10769 }
10770
10771 *cost = LIBCALL_COST (3);
10772 return false;
10773
10774 case FIX:
10775 case UNSIGNED_FIX:
10776 if (TARGET_HARD_FLOAT)
10777 {
10778 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10779 a vcvt fixed-point conversion. */
10780 if (code == FIX && mode == SImode
10781 && GET_CODE (XEXP (x, 0)) == FIX
10782 && GET_MODE (XEXP (x, 0)) == SFmode
10783 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10784 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10785 > 0)
10786 {
10787 if (speed_p)
10788 *cost += extra_cost->fp[0].toint;
10789
10790 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10791 code, 0, speed_p);
10792 return true;
10793 }
10794
10795 if (GET_MODE_CLASS (mode) == MODE_INT)
10796 {
10797 mode = GET_MODE (XEXP (x, 0));
10798 if (speed_p)
10799 *cost += extra_cost->fp[mode == DFmode].toint;
10800 /* Strip of the 'cost' of rounding towards zero. */
10801 if (GET_CODE (XEXP (x, 0)) == FIX)
10802 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10803 0, speed_p);
10804 else
10805 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10806 /* ??? Increase the cost to deal with transferring from
10807 FP -> CORE registers? */
10808 return true;
10809 }
10810 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10811 && TARGET_FPU_ARMV8)
10812 {
10813 if (speed_p)
10814 *cost += extra_cost->fp[mode == DFmode].roundint;
10815 return false;
10816 }
10817 /* Vector costs? */
10818 }
10819 *cost = LIBCALL_COST (1);
10820 return false;
10821
10822 case FLOAT:
10823 case UNSIGNED_FLOAT:
10824 if (TARGET_HARD_FLOAT)
10825 {
10826 /* ??? Increase the cost to deal with transferring from CORE
10827 -> FP registers? */
10828 if (speed_p)
10829 *cost += extra_cost->fp[mode == DFmode].fromint;
10830 return false;
10831 }
10832 *cost = LIBCALL_COST (1);
10833 return false;
10834
10835 case CALL:
10836 return true;
10837
10838 case ASM_OPERANDS:
10839 {
10840 /* Just a guess. Guess number of instructions in the asm
10841 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10842 though (see PR60663). */
10843 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10844 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10845
10846 *cost = COSTS_N_INSNS (asm_length + num_operands);
10847 return true;
10848 }
10849 default:
10850 if (mode != VOIDmode)
10851 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10852 else
10853 *cost = COSTS_N_INSNS (4); /* Who knows? */
10854 return false;
10855 }
10856 }
10857
10858 #undef HANDLE_NARROW_SHIFT_ARITH
10859
10860 /* RTX costs entry point. */
10861
10862 static bool
10863 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10864 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10865 {
10866 bool result;
10867 int code = GET_CODE (x);
10868 gcc_assert (current_tune->insn_extra_cost);
10869
10870 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
10871 (enum rtx_code) outer_code,
10872 current_tune->insn_extra_cost,
10873 total, speed);
10874
10875 if (dump_file && (dump_flags & TDF_DETAILS))
10876 {
10877 print_rtl_single (dump_file, x);
10878 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10879 *total, result ? "final" : "partial");
10880 }
10881 return result;
10882 }
10883
10884 /* All address computations that can be done are free, but rtx cost returns
10885 the same for practically all of them. So we weight the different types
10886 of address here in the order (most pref first):
10887 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10888 static inline int
10889 arm_arm_address_cost (rtx x)
10890 {
10891 enum rtx_code c = GET_CODE (x);
10892
10893 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10894 return 0;
10895 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10896 return 10;
10897
10898 if (c == PLUS)
10899 {
10900 if (CONST_INT_P (XEXP (x, 1)))
10901 return 2;
10902
10903 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10904 return 3;
10905
10906 return 4;
10907 }
10908
10909 return 6;
10910 }
10911
10912 static inline int
10913 arm_thumb_address_cost (rtx x)
10914 {
10915 enum rtx_code c = GET_CODE (x);
10916
10917 if (c == REG)
10918 return 1;
10919 if (c == PLUS
10920 && REG_P (XEXP (x, 0))
10921 && CONST_INT_P (XEXP (x, 1)))
10922 return 1;
10923
10924 return 2;
10925 }
10926
10927 static int
10928 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10929 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10930 {
10931 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10932 }
10933
10934 /* Adjust cost hook for XScale. */
10935 static bool
10936 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10937 int * cost)
10938 {
10939 /* Some true dependencies can have a higher cost depending
10940 on precisely how certain input operands are used. */
10941 if (dep_type == 0
10942 && recog_memoized (insn) >= 0
10943 && recog_memoized (dep) >= 0)
10944 {
10945 int shift_opnum = get_attr_shift (insn);
10946 enum attr_type attr_type = get_attr_type (dep);
10947
10948 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10949 operand for INSN. If we have a shifted input operand and the
10950 instruction we depend on is another ALU instruction, then we may
10951 have to account for an additional stall. */
10952 if (shift_opnum != 0
10953 && (attr_type == TYPE_ALU_SHIFT_IMM
10954 || attr_type == TYPE_ALUS_SHIFT_IMM
10955 || attr_type == TYPE_LOGIC_SHIFT_IMM
10956 || attr_type == TYPE_LOGICS_SHIFT_IMM
10957 || attr_type == TYPE_ALU_SHIFT_REG
10958 || attr_type == TYPE_ALUS_SHIFT_REG
10959 || attr_type == TYPE_LOGIC_SHIFT_REG
10960 || attr_type == TYPE_LOGICS_SHIFT_REG
10961 || attr_type == TYPE_MOV_SHIFT
10962 || attr_type == TYPE_MVN_SHIFT
10963 || attr_type == TYPE_MOV_SHIFT_REG
10964 || attr_type == TYPE_MVN_SHIFT_REG))
10965 {
10966 rtx shifted_operand;
10967 int opno;
10968
10969 /* Get the shifted operand. */
10970 extract_insn (insn);
10971 shifted_operand = recog_data.operand[shift_opnum];
10972
10973 /* Iterate over all the operands in DEP. If we write an operand
10974 that overlaps with SHIFTED_OPERAND, then we have increase the
10975 cost of this dependency. */
10976 extract_insn (dep);
10977 preprocess_constraints (dep);
10978 for (opno = 0; opno < recog_data.n_operands; opno++)
10979 {
10980 /* We can ignore strict inputs. */
10981 if (recog_data.operand_type[opno] == OP_IN)
10982 continue;
10983
10984 if (reg_overlap_mentioned_p (recog_data.operand[opno],
10985 shifted_operand))
10986 {
10987 *cost = 2;
10988 return false;
10989 }
10990 }
10991 }
10992 }
10993 return true;
10994 }
10995
10996 /* Adjust cost hook for Cortex A9. */
10997 static bool
10998 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10999 int * cost)
11000 {
11001 switch (dep_type)
11002 {
11003 case REG_DEP_ANTI:
11004 *cost = 0;
11005 return false;
11006
11007 case REG_DEP_TRUE:
11008 case REG_DEP_OUTPUT:
11009 if (recog_memoized (insn) >= 0
11010 && recog_memoized (dep) >= 0)
11011 {
11012 if (GET_CODE (PATTERN (insn)) == SET)
11013 {
11014 if (GET_MODE_CLASS
11015 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11016 || GET_MODE_CLASS
11017 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11018 {
11019 enum attr_type attr_type_insn = get_attr_type (insn);
11020 enum attr_type attr_type_dep = get_attr_type (dep);
11021
11022 /* By default all dependencies of the form
11023 s0 = s0 <op> s1
11024 s0 = s0 <op> s2
11025 have an extra latency of 1 cycle because
11026 of the input and output dependency in this
11027 case. However this gets modeled as an true
11028 dependency and hence all these checks. */
11029 if (REG_P (SET_DEST (PATTERN (insn)))
11030 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11031 {
11032 /* FMACS is a special case where the dependent
11033 instruction can be issued 3 cycles before
11034 the normal latency in case of an output
11035 dependency. */
11036 if ((attr_type_insn == TYPE_FMACS
11037 || attr_type_insn == TYPE_FMACD)
11038 && (attr_type_dep == TYPE_FMACS
11039 || attr_type_dep == TYPE_FMACD))
11040 {
11041 if (dep_type == REG_DEP_OUTPUT)
11042 *cost = insn_default_latency (dep) - 3;
11043 else
11044 *cost = insn_default_latency (dep);
11045 return false;
11046 }
11047 else
11048 {
11049 if (dep_type == REG_DEP_OUTPUT)
11050 *cost = insn_default_latency (dep) + 1;
11051 else
11052 *cost = insn_default_latency (dep);
11053 }
11054 return false;
11055 }
11056 }
11057 }
11058 }
11059 break;
11060
11061 default:
11062 gcc_unreachable ();
11063 }
11064
11065 return true;
11066 }
11067
11068 /* Adjust cost hook for FA726TE. */
11069 static bool
11070 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11071 int * cost)
11072 {
11073 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11074 have penalty of 3. */
11075 if (dep_type == REG_DEP_TRUE
11076 && recog_memoized (insn) >= 0
11077 && recog_memoized (dep) >= 0
11078 && get_attr_conds (dep) == CONDS_SET)
11079 {
11080 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11081 if (get_attr_conds (insn) == CONDS_USE
11082 && get_attr_type (insn) != TYPE_BRANCH)
11083 {
11084 *cost = 3;
11085 return false;
11086 }
11087
11088 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11089 || get_attr_conds (insn) == CONDS_USE)
11090 {
11091 *cost = 0;
11092 return false;
11093 }
11094 }
11095
11096 return true;
11097 }
11098
11099 /* Implement TARGET_REGISTER_MOVE_COST.
11100
11101 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11102 it is typically more expensive than a single memory access. We set
11103 the cost to less than two memory accesses so that floating
11104 point to integer conversion does not go through memory. */
11105
11106 int
11107 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11108 reg_class_t from, reg_class_t to)
11109 {
11110 if (TARGET_32BIT)
11111 {
11112 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11113 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11114 return 15;
11115 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11116 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11117 return 4;
11118 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11119 return 20;
11120 else
11121 return 2;
11122 }
11123 else
11124 {
11125 if (from == HI_REGS || to == HI_REGS)
11126 return 4;
11127 else
11128 return 2;
11129 }
11130 }
11131
11132 /* Implement TARGET_MEMORY_MOVE_COST. */
11133
11134 int
11135 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11136 bool in ATTRIBUTE_UNUSED)
11137 {
11138 if (TARGET_32BIT)
11139 return 10;
11140 else
11141 {
11142 if (GET_MODE_SIZE (mode) < 4)
11143 return 8;
11144 else
11145 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11146 }
11147 }
11148
11149 /* Vectorizer cost model implementation. */
11150
11151 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11152 static int
11153 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11154 tree vectype,
11155 int misalign ATTRIBUTE_UNUSED)
11156 {
11157 unsigned elements;
11158
11159 switch (type_of_cost)
11160 {
11161 case scalar_stmt:
11162 return current_tune->vec_costs->scalar_stmt_cost;
11163
11164 case scalar_load:
11165 return current_tune->vec_costs->scalar_load_cost;
11166
11167 case scalar_store:
11168 return current_tune->vec_costs->scalar_store_cost;
11169
11170 case vector_stmt:
11171 return current_tune->vec_costs->vec_stmt_cost;
11172
11173 case vector_load:
11174 return current_tune->vec_costs->vec_align_load_cost;
11175
11176 case vector_store:
11177 return current_tune->vec_costs->vec_store_cost;
11178
11179 case vec_to_scalar:
11180 return current_tune->vec_costs->vec_to_scalar_cost;
11181
11182 case scalar_to_vec:
11183 return current_tune->vec_costs->scalar_to_vec_cost;
11184
11185 case unaligned_load:
11186 return current_tune->vec_costs->vec_unalign_load_cost;
11187
11188 case unaligned_store:
11189 return current_tune->vec_costs->vec_unalign_store_cost;
11190
11191 case cond_branch_taken:
11192 return current_tune->vec_costs->cond_taken_branch_cost;
11193
11194 case cond_branch_not_taken:
11195 return current_tune->vec_costs->cond_not_taken_branch_cost;
11196
11197 case vec_perm:
11198 case vec_promote_demote:
11199 return current_tune->vec_costs->vec_stmt_cost;
11200
11201 case vec_construct:
11202 elements = TYPE_VECTOR_SUBPARTS (vectype);
11203 return elements / 2 + 1;
11204
11205 default:
11206 gcc_unreachable ();
11207 }
11208 }
11209
11210 /* Implement targetm.vectorize.add_stmt_cost. */
11211
11212 static unsigned
11213 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11214 struct _stmt_vec_info *stmt_info, int misalign,
11215 enum vect_cost_model_location where)
11216 {
11217 unsigned *cost = (unsigned *) data;
11218 unsigned retval = 0;
11219
11220 if (flag_vect_cost_model)
11221 {
11222 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11223 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11224
11225 /* Statements in an inner loop relative to the loop being
11226 vectorized are weighted more heavily. The value here is
11227 arbitrary and could potentially be improved with analysis. */
11228 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11229 count *= 50; /* FIXME. */
11230
11231 retval = (unsigned) (count * stmt_cost);
11232 cost[where] += retval;
11233 }
11234
11235 return retval;
11236 }
11237
11238 /* Return true if and only if this insn can dual-issue only as older. */
11239 static bool
11240 cortexa7_older_only (rtx_insn *insn)
11241 {
11242 if (recog_memoized (insn) < 0)
11243 return false;
11244
11245 switch (get_attr_type (insn))
11246 {
11247 case TYPE_ALU_DSP_REG:
11248 case TYPE_ALU_SREG:
11249 case TYPE_ALUS_SREG:
11250 case TYPE_LOGIC_REG:
11251 case TYPE_LOGICS_REG:
11252 case TYPE_ADC_REG:
11253 case TYPE_ADCS_REG:
11254 case TYPE_ADR:
11255 case TYPE_BFM:
11256 case TYPE_REV:
11257 case TYPE_MVN_REG:
11258 case TYPE_SHIFT_IMM:
11259 case TYPE_SHIFT_REG:
11260 case TYPE_LOAD_BYTE:
11261 case TYPE_LOAD1:
11262 case TYPE_STORE1:
11263 case TYPE_FFARITHS:
11264 case TYPE_FADDS:
11265 case TYPE_FFARITHD:
11266 case TYPE_FADDD:
11267 case TYPE_FMOV:
11268 case TYPE_F_CVT:
11269 case TYPE_FCMPS:
11270 case TYPE_FCMPD:
11271 case TYPE_FCONSTS:
11272 case TYPE_FCONSTD:
11273 case TYPE_FMULS:
11274 case TYPE_FMACS:
11275 case TYPE_FMULD:
11276 case TYPE_FMACD:
11277 case TYPE_FDIVS:
11278 case TYPE_FDIVD:
11279 case TYPE_F_MRC:
11280 case TYPE_F_MRRC:
11281 case TYPE_F_FLAG:
11282 case TYPE_F_LOADS:
11283 case TYPE_F_STORES:
11284 return true;
11285 default:
11286 return false;
11287 }
11288 }
11289
11290 /* Return true if and only if this insn can dual-issue as younger. */
11291 static bool
11292 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11293 {
11294 if (recog_memoized (insn) < 0)
11295 {
11296 if (verbose > 5)
11297 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11298 return false;
11299 }
11300
11301 switch (get_attr_type (insn))
11302 {
11303 case TYPE_ALU_IMM:
11304 case TYPE_ALUS_IMM:
11305 case TYPE_LOGIC_IMM:
11306 case TYPE_LOGICS_IMM:
11307 case TYPE_EXTEND:
11308 case TYPE_MVN_IMM:
11309 case TYPE_MOV_IMM:
11310 case TYPE_MOV_REG:
11311 case TYPE_MOV_SHIFT:
11312 case TYPE_MOV_SHIFT_REG:
11313 case TYPE_BRANCH:
11314 case TYPE_CALL:
11315 return true;
11316 default:
11317 return false;
11318 }
11319 }
11320
11321
11322 /* Look for an instruction that can dual issue only as an older
11323 instruction, and move it in front of any instructions that can
11324 dual-issue as younger, while preserving the relative order of all
11325 other instructions in the ready list. This is a hueuristic to help
11326 dual-issue in later cycles, by postponing issue of more flexible
11327 instructions. This heuristic may affect dual issue opportunities
11328 in the current cycle. */
11329 static void
11330 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11331 int *n_readyp, int clock)
11332 {
11333 int i;
11334 int first_older_only = -1, first_younger = -1;
11335
11336 if (verbose > 5)
11337 fprintf (file,
11338 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11339 clock,
11340 *n_readyp);
11341
11342 /* Traverse the ready list from the head (the instruction to issue
11343 first), and looking for the first instruction that can issue as
11344 younger and the first instruction that can dual-issue only as
11345 older. */
11346 for (i = *n_readyp - 1; i >= 0; i--)
11347 {
11348 rtx_insn *insn = ready[i];
11349 if (cortexa7_older_only (insn))
11350 {
11351 first_older_only = i;
11352 if (verbose > 5)
11353 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11354 break;
11355 }
11356 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11357 first_younger = i;
11358 }
11359
11360 /* Nothing to reorder because either no younger insn found or insn
11361 that can dual-issue only as older appears before any insn that
11362 can dual-issue as younger. */
11363 if (first_younger == -1)
11364 {
11365 if (verbose > 5)
11366 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11367 return;
11368 }
11369
11370 /* Nothing to reorder because no older-only insn in the ready list. */
11371 if (first_older_only == -1)
11372 {
11373 if (verbose > 5)
11374 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11375 return;
11376 }
11377
11378 /* Move first_older_only insn before first_younger. */
11379 if (verbose > 5)
11380 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11381 INSN_UID(ready [first_older_only]),
11382 INSN_UID(ready [first_younger]));
11383 rtx_insn *first_older_only_insn = ready [first_older_only];
11384 for (i = first_older_only; i < first_younger; i++)
11385 {
11386 ready[i] = ready[i+1];
11387 }
11388
11389 ready[i] = first_older_only_insn;
11390 return;
11391 }
11392
11393 /* Implement TARGET_SCHED_REORDER. */
11394 static int
11395 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11396 int clock)
11397 {
11398 switch (arm_tune)
11399 {
11400 case TARGET_CPU_cortexa7:
11401 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11402 break;
11403 default:
11404 /* Do nothing for other cores. */
11405 break;
11406 }
11407
11408 return arm_issue_rate ();
11409 }
11410
11411 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11412 It corrects the value of COST based on the relationship between
11413 INSN and DEP through the dependence LINK. It returns the new
11414 value. There is a per-core adjust_cost hook to adjust scheduler costs
11415 and the per-core hook can choose to completely override the generic
11416 adjust_cost function. Only put bits of code into arm_adjust_cost that
11417 are common across all cores. */
11418 static int
11419 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11420 unsigned int)
11421 {
11422 rtx i_pat, d_pat;
11423
11424 /* When generating Thumb-1 code, we want to place flag-setting operations
11425 close to a conditional branch which depends on them, so that we can
11426 omit the comparison. */
11427 if (TARGET_THUMB1
11428 && dep_type == 0
11429 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11430 && recog_memoized (dep) >= 0
11431 && get_attr_conds (dep) == CONDS_SET)
11432 return 0;
11433
11434 if (current_tune->sched_adjust_cost != NULL)
11435 {
11436 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11437 return cost;
11438 }
11439
11440 /* XXX Is this strictly true? */
11441 if (dep_type == REG_DEP_ANTI
11442 || dep_type == REG_DEP_OUTPUT)
11443 return 0;
11444
11445 /* Call insns don't incur a stall, even if they follow a load. */
11446 if (dep_type == 0
11447 && CALL_P (insn))
11448 return 1;
11449
11450 if ((i_pat = single_set (insn)) != NULL
11451 && MEM_P (SET_SRC (i_pat))
11452 && (d_pat = single_set (dep)) != NULL
11453 && MEM_P (SET_DEST (d_pat)))
11454 {
11455 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11456 /* This is a load after a store, there is no conflict if the load reads
11457 from a cached area. Assume that loads from the stack, and from the
11458 constant pool are cached, and that others will miss. This is a
11459 hack. */
11460
11461 if ((GET_CODE (src_mem) == SYMBOL_REF
11462 && CONSTANT_POOL_ADDRESS_P (src_mem))
11463 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11464 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11465 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11466 return 1;
11467 }
11468
11469 return cost;
11470 }
11471
11472 int
11473 arm_max_conditional_execute (void)
11474 {
11475 return max_insns_skipped;
11476 }
11477
11478 static int
11479 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11480 {
11481 if (TARGET_32BIT)
11482 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11483 else
11484 return (optimize > 0) ? 2 : 0;
11485 }
11486
11487 static int
11488 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11489 {
11490 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11491 }
11492
11493 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11494 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11495 sequences of non-executed instructions in IT blocks probably take the same
11496 amount of time as executed instructions (and the IT instruction itself takes
11497 space in icache). This function was experimentally determined to give good
11498 results on a popular embedded benchmark. */
11499
11500 static int
11501 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11502 {
11503 return (TARGET_32BIT && speed_p) ? 1
11504 : arm_default_branch_cost (speed_p, predictable_p);
11505 }
11506
11507 static int
11508 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11509 {
11510 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11511 }
11512
11513 static bool fp_consts_inited = false;
11514
11515 static REAL_VALUE_TYPE value_fp0;
11516
11517 static void
11518 init_fp_table (void)
11519 {
11520 REAL_VALUE_TYPE r;
11521
11522 r = REAL_VALUE_ATOF ("0", DFmode);
11523 value_fp0 = r;
11524 fp_consts_inited = true;
11525 }
11526
11527 /* Return TRUE if rtx X is a valid immediate FP constant. */
11528 int
11529 arm_const_double_rtx (rtx x)
11530 {
11531 const REAL_VALUE_TYPE *r;
11532
11533 if (!fp_consts_inited)
11534 init_fp_table ();
11535
11536 r = CONST_DOUBLE_REAL_VALUE (x);
11537 if (REAL_VALUE_MINUS_ZERO (*r))
11538 return 0;
11539
11540 if (real_equal (r, &value_fp0))
11541 return 1;
11542
11543 return 0;
11544 }
11545
11546 /* VFPv3 has a fairly wide range of representable immediates, formed from
11547 "quarter-precision" floating-point values. These can be evaluated using this
11548 formula (with ^ for exponentiation):
11549
11550 -1^s * n * 2^-r
11551
11552 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11553 16 <= n <= 31 and 0 <= r <= 7.
11554
11555 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11556
11557 - A (most-significant) is the sign bit.
11558 - BCD are the exponent (encoded as r XOR 3).
11559 - EFGH are the mantissa (encoded as n - 16).
11560 */
11561
11562 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11563 fconst[sd] instruction, or -1 if X isn't suitable. */
11564 static int
11565 vfp3_const_double_index (rtx x)
11566 {
11567 REAL_VALUE_TYPE r, m;
11568 int sign, exponent;
11569 unsigned HOST_WIDE_INT mantissa, mant_hi;
11570 unsigned HOST_WIDE_INT mask;
11571 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11572 bool fail;
11573
11574 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11575 return -1;
11576
11577 r = *CONST_DOUBLE_REAL_VALUE (x);
11578
11579 /* We can't represent these things, so detect them first. */
11580 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11581 return -1;
11582
11583 /* Extract sign, exponent and mantissa. */
11584 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11585 r = real_value_abs (&r);
11586 exponent = REAL_EXP (&r);
11587 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11588 highest (sign) bit, with a fixed binary point at bit point_pos.
11589 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11590 bits for the mantissa, this may fail (low bits would be lost). */
11591 real_ldexp (&m, &r, point_pos - exponent);
11592 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11593 mantissa = w.elt (0);
11594 mant_hi = w.elt (1);
11595
11596 /* If there are bits set in the low part of the mantissa, we can't
11597 represent this value. */
11598 if (mantissa != 0)
11599 return -1;
11600
11601 /* Now make it so that mantissa contains the most-significant bits, and move
11602 the point_pos to indicate that the least-significant bits have been
11603 discarded. */
11604 point_pos -= HOST_BITS_PER_WIDE_INT;
11605 mantissa = mant_hi;
11606
11607 /* We can permit four significant bits of mantissa only, plus a high bit
11608 which is always 1. */
11609 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11610 if ((mantissa & mask) != 0)
11611 return -1;
11612
11613 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11614 mantissa >>= point_pos - 5;
11615
11616 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11617 floating-point immediate zero with Neon using an integer-zero load, but
11618 that case is handled elsewhere.) */
11619 if (mantissa == 0)
11620 return -1;
11621
11622 gcc_assert (mantissa >= 16 && mantissa <= 31);
11623
11624 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11625 normalized significands are in the range [1, 2). (Our mantissa is shifted
11626 left 4 places at this point relative to normalized IEEE754 values). GCC
11627 internally uses [0.5, 1) (see real.c), so the exponent returned from
11628 REAL_EXP must be altered. */
11629 exponent = 5 - exponent;
11630
11631 if (exponent < 0 || exponent > 7)
11632 return -1;
11633
11634 /* Sign, mantissa and exponent are now in the correct form to plug into the
11635 formula described in the comment above. */
11636 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11637 }
11638
11639 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11640 int
11641 vfp3_const_double_rtx (rtx x)
11642 {
11643 if (!TARGET_VFP3)
11644 return 0;
11645
11646 return vfp3_const_double_index (x) != -1;
11647 }
11648
11649 /* Recognize immediates which can be used in various Neon instructions. Legal
11650 immediates are described by the following table (for VMVN variants, the
11651 bitwise inverse of the constant shown is recognized. In either case, VMOV
11652 is output and the correct instruction to use for a given constant is chosen
11653 by the assembler). The constant shown is replicated across all elements of
11654 the destination vector.
11655
11656 insn elems variant constant (binary)
11657 ---- ----- ------- -----------------
11658 vmov i32 0 00000000 00000000 00000000 abcdefgh
11659 vmov i32 1 00000000 00000000 abcdefgh 00000000
11660 vmov i32 2 00000000 abcdefgh 00000000 00000000
11661 vmov i32 3 abcdefgh 00000000 00000000 00000000
11662 vmov i16 4 00000000 abcdefgh
11663 vmov i16 5 abcdefgh 00000000
11664 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11665 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11666 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11667 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11668 vmvn i16 10 00000000 abcdefgh
11669 vmvn i16 11 abcdefgh 00000000
11670 vmov i32 12 00000000 00000000 abcdefgh 11111111
11671 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11672 vmov i32 14 00000000 abcdefgh 11111111 11111111
11673 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11674 vmov i8 16 abcdefgh
11675 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11676 eeeeeeee ffffffff gggggggg hhhhhhhh
11677 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11678 vmov f32 19 00000000 00000000 00000000 00000000
11679
11680 For case 18, B = !b. Representable values are exactly those accepted by
11681 vfp3_const_double_index, but are output as floating-point numbers rather
11682 than indices.
11683
11684 For case 19, we will change it to vmov.i32 when assembling.
11685
11686 Variants 0-5 (inclusive) may also be used as immediates for the second
11687 operand of VORR/VBIC instructions.
11688
11689 The INVERSE argument causes the bitwise inverse of the given operand to be
11690 recognized instead (used for recognizing legal immediates for the VAND/VORN
11691 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11692 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11693 output, rather than the real insns vbic/vorr).
11694
11695 INVERSE makes no difference to the recognition of float vectors.
11696
11697 The return value is the variant of immediate as shown in the above table, or
11698 -1 if the given value doesn't match any of the listed patterns.
11699 */
11700 static int
11701 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11702 rtx *modconst, int *elementwidth)
11703 {
11704 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11705 matches = 1; \
11706 for (i = 0; i < idx; i += (STRIDE)) \
11707 if (!(TEST)) \
11708 matches = 0; \
11709 if (matches) \
11710 { \
11711 immtype = (CLASS); \
11712 elsize = (ELSIZE); \
11713 break; \
11714 }
11715
11716 unsigned int i, elsize = 0, idx = 0, n_elts;
11717 unsigned int innersize;
11718 unsigned char bytes[16];
11719 int immtype = -1, matches;
11720 unsigned int invmask = inverse ? 0xff : 0;
11721 bool vector = GET_CODE (op) == CONST_VECTOR;
11722
11723 if (vector)
11724 n_elts = CONST_VECTOR_NUNITS (op);
11725 else
11726 {
11727 n_elts = 1;
11728 if (mode == VOIDmode)
11729 mode = DImode;
11730 }
11731
11732 innersize = GET_MODE_UNIT_SIZE (mode);
11733
11734 /* Vectors of float constants. */
11735 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11736 {
11737 rtx el0 = CONST_VECTOR_ELT (op, 0);
11738
11739 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11740 return -1;
11741
11742 /* FP16 vectors cannot be represented. */
11743 if (GET_MODE_INNER (mode) == HFmode)
11744 return -1;
11745
11746 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11747 are distinct in this context. */
11748 if (!const_vec_duplicate_p (op))
11749 return -1;
11750
11751 if (modconst)
11752 *modconst = CONST_VECTOR_ELT (op, 0);
11753
11754 if (elementwidth)
11755 *elementwidth = 0;
11756
11757 if (el0 == CONST0_RTX (GET_MODE (el0)))
11758 return 19;
11759 else
11760 return 18;
11761 }
11762
11763 /* The tricks done in the code below apply for little-endian vector layout.
11764 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11765 FIXME: Implement logic for big-endian vectors. */
11766 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11767 return -1;
11768
11769 /* Splat vector constant out into a byte vector. */
11770 for (i = 0; i < n_elts; i++)
11771 {
11772 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11773 unsigned HOST_WIDE_INT elpart;
11774
11775 gcc_assert (CONST_INT_P (el));
11776 elpart = INTVAL (el);
11777
11778 for (unsigned int byte = 0; byte < innersize; byte++)
11779 {
11780 bytes[idx++] = (elpart & 0xff) ^ invmask;
11781 elpart >>= BITS_PER_UNIT;
11782 }
11783 }
11784
11785 /* Sanity check. */
11786 gcc_assert (idx == GET_MODE_SIZE (mode));
11787
11788 do
11789 {
11790 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11791 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11792
11793 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11794 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11795
11796 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11797 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11798
11799 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11800 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11801
11802 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11803
11804 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11805
11806 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11807 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11808
11809 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11810 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11811
11812 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11813 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11814
11815 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11816 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11817
11818 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11819
11820 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11821
11822 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11823 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11824
11825 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11826 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11827
11828 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11829 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11830
11831 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11832 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11833
11834 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11835
11836 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11837 && bytes[i] == bytes[(i + 8) % idx]);
11838 }
11839 while (0);
11840
11841 if (immtype == -1)
11842 return -1;
11843
11844 if (elementwidth)
11845 *elementwidth = elsize;
11846
11847 if (modconst)
11848 {
11849 unsigned HOST_WIDE_INT imm = 0;
11850
11851 /* Un-invert bytes of recognized vector, if necessary. */
11852 if (invmask != 0)
11853 for (i = 0; i < idx; i++)
11854 bytes[i] ^= invmask;
11855
11856 if (immtype == 17)
11857 {
11858 /* FIXME: Broken on 32-bit H_W_I hosts. */
11859 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11860
11861 for (i = 0; i < 8; i++)
11862 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11863 << (i * BITS_PER_UNIT);
11864
11865 *modconst = GEN_INT (imm);
11866 }
11867 else
11868 {
11869 unsigned HOST_WIDE_INT imm = 0;
11870
11871 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11872 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11873
11874 *modconst = GEN_INT (imm);
11875 }
11876 }
11877
11878 return immtype;
11879 #undef CHECK
11880 }
11881
11882 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11883 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11884 float elements), and a modified constant (whatever should be output for a
11885 VMOV) in *MODCONST. */
11886
11887 int
11888 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11889 rtx *modconst, int *elementwidth)
11890 {
11891 rtx tmpconst;
11892 int tmpwidth;
11893 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11894
11895 if (retval == -1)
11896 return 0;
11897
11898 if (modconst)
11899 *modconst = tmpconst;
11900
11901 if (elementwidth)
11902 *elementwidth = tmpwidth;
11903
11904 return 1;
11905 }
11906
11907 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11908 the immediate is valid, write a constant suitable for using as an operand
11909 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11910 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11911
11912 int
11913 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11914 rtx *modconst, int *elementwidth)
11915 {
11916 rtx tmpconst;
11917 int tmpwidth;
11918 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11919
11920 if (retval < 0 || retval > 5)
11921 return 0;
11922
11923 if (modconst)
11924 *modconst = tmpconst;
11925
11926 if (elementwidth)
11927 *elementwidth = tmpwidth;
11928
11929 return 1;
11930 }
11931
11932 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11933 the immediate is valid, write a constant suitable for using as an operand
11934 to VSHR/VSHL to *MODCONST and the corresponding element width to
11935 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11936 because they have different limitations. */
11937
11938 int
11939 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
11940 rtx *modconst, int *elementwidth,
11941 bool isleftshift)
11942 {
11943 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11944 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11945 unsigned HOST_WIDE_INT last_elt = 0;
11946 unsigned HOST_WIDE_INT maxshift;
11947
11948 /* Split vector constant out into a byte vector. */
11949 for (i = 0; i < n_elts; i++)
11950 {
11951 rtx el = CONST_VECTOR_ELT (op, i);
11952 unsigned HOST_WIDE_INT elpart;
11953
11954 if (CONST_INT_P (el))
11955 elpart = INTVAL (el);
11956 else if (CONST_DOUBLE_P (el))
11957 return 0;
11958 else
11959 gcc_unreachable ();
11960
11961 if (i != 0 && elpart != last_elt)
11962 return 0;
11963
11964 last_elt = elpart;
11965 }
11966
11967 /* Shift less than element size. */
11968 maxshift = innersize * 8;
11969
11970 if (isleftshift)
11971 {
11972 /* Left shift immediate value can be from 0 to <size>-1. */
11973 if (last_elt >= maxshift)
11974 return 0;
11975 }
11976 else
11977 {
11978 /* Right shift immediate value can be from 1 to <size>. */
11979 if (last_elt == 0 || last_elt > maxshift)
11980 return 0;
11981 }
11982
11983 if (elementwidth)
11984 *elementwidth = innersize * 8;
11985
11986 if (modconst)
11987 *modconst = CONST_VECTOR_ELT (op, 0);
11988
11989 return 1;
11990 }
11991
11992 /* Return a string suitable for output of Neon immediate logic operation
11993 MNEM. */
11994
11995 char *
11996 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
11997 int inverse, int quad)
11998 {
11999 int width, is_valid;
12000 static char templ[40];
12001
12002 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12003
12004 gcc_assert (is_valid != 0);
12005
12006 if (quad)
12007 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12008 else
12009 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12010
12011 return templ;
12012 }
12013
12014 /* Return a string suitable for output of Neon immediate shift operation
12015 (VSHR or VSHL) MNEM. */
12016
12017 char *
12018 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12019 machine_mode mode, int quad,
12020 bool isleftshift)
12021 {
12022 int width, is_valid;
12023 static char templ[40];
12024
12025 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12026 gcc_assert (is_valid != 0);
12027
12028 if (quad)
12029 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12030 else
12031 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12032
12033 return templ;
12034 }
12035
12036 /* Output a sequence of pairwise operations to implement a reduction.
12037 NOTE: We do "too much work" here, because pairwise operations work on two
12038 registers-worth of operands in one go. Unfortunately we can't exploit those
12039 extra calculations to do the full operation in fewer steps, I don't think.
12040 Although all vector elements of the result but the first are ignored, we
12041 actually calculate the same result in each of the elements. An alternative
12042 such as initially loading a vector with zero to use as each of the second
12043 operands would use up an additional register and take an extra instruction,
12044 for no particular gain. */
12045
12046 void
12047 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12048 rtx (*reduc) (rtx, rtx, rtx))
12049 {
12050 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12051 rtx tmpsum = op1;
12052
12053 for (i = parts / 2; i >= 1; i /= 2)
12054 {
12055 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12056 emit_insn (reduc (dest, tmpsum, tmpsum));
12057 tmpsum = dest;
12058 }
12059 }
12060
12061 /* If VALS is a vector constant that can be loaded into a register
12062 using VDUP, generate instructions to do so and return an RTX to
12063 assign to the register. Otherwise return NULL_RTX. */
12064
12065 static rtx
12066 neon_vdup_constant (rtx vals)
12067 {
12068 machine_mode mode = GET_MODE (vals);
12069 machine_mode inner_mode = GET_MODE_INNER (mode);
12070 rtx x;
12071
12072 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12073 return NULL_RTX;
12074
12075 if (!const_vec_duplicate_p (vals, &x))
12076 /* The elements are not all the same. We could handle repeating
12077 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12078 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12079 vdup.i16). */
12080 return NULL_RTX;
12081
12082 /* We can load this constant by using VDUP and a constant in a
12083 single ARM register. This will be cheaper than a vector
12084 load. */
12085
12086 x = copy_to_mode_reg (inner_mode, x);
12087 return gen_rtx_VEC_DUPLICATE (mode, x);
12088 }
12089
12090 /* Generate code to load VALS, which is a PARALLEL containing only
12091 constants (for vec_init) or CONST_VECTOR, efficiently into a
12092 register. Returns an RTX to copy into the register, or NULL_RTX
12093 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12094
12095 rtx
12096 neon_make_constant (rtx vals)
12097 {
12098 machine_mode mode = GET_MODE (vals);
12099 rtx target;
12100 rtx const_vec = NULL_RTX;
12101 int n_elts = GET_MODE_NUNITS (mode);
12102 int n_const = 0;
12103 int i;
12104
12105 if (GET_CODE (vals) == CONST_VECTOR)
12106 const_vec = vals;
12107 else if (GET_CODE (vals) == PARALLEL)
12108 {
12109 /* A CONST_VECTOR must contain only CONST_INTs and
12110 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12111 Only store valid constants in a CONST_VECTOR. */
12112 for (i = 0; i < n_elts; ++i)
12113 {
12114 rtx x = XVECEXP (vals, 0, i);
12115 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12116 n_const++;
12117 }
12118 if (n_const == n_elts)
12119 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12120 }
12121 else
12122 gcc_unreachable ();
12123
12124 if (const_vec != NULL
12125 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12126 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12127 return const_vec;
12128 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12129 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12130 pipeline cycle; creating the constant takes one or two ARM
12131 pipeline cycles. */
12132 return target;
12133 else if (const_vec != NULL_RTX)
12134 /* Load from constant pool. On Cortex-A8 this takes two cycles
12135 (for either double or quad vectors). We can not take advantage
12136 of single-cycle VLD1 because we need a PC-relative addressing
12137 mode. */
12138 return const_vec;
12139 else
12140 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12141 We can not construct an initializer. */
12142 return NULL_RTX;
12143 }
12144
12145 /* Initialize vector TARGET to VALS. */
12146
12147 void
12148 neon_expand_vector_init (rtx target, rtx vals)
12149 {
12150 machine_mode mode = GET_MODE (target);
12151 machine_mode inner_mode = GET_MODE_INNER (mode);
12152 int n_elts = GET_MODE_NUNITS (mode);
12153 int n_var = 0, one_var = -1;
12154 bool all_same = true;
12155 rtx x, mem;
12156 int i;
12157
12158 for (i = 0; i < n_elts; ++i)
12159 {
12160 x = XVECEXP (vals, 0, i);
12161 if (!CONSTANT_P (x))
12162 ++n_var, one_var = i;
12163
12164 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12165 all_same = false;
12166 }
12167
12168 if (n_var == 0)
12169 {
12170 rtx constant = neon_make_constant (vals);
12171 if (constant != NULL_RTX)
12172 {
12173 emit_move_insn (target, constant);
12174 return;
12175 }
12176 }
12177
12178 /* Splat a single non-constant element if we can. */
12179 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12180 {
12181 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12182 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12183 return;
12184 }
12185
12186 /* One field is non-constant. Load constant then overwrite varying
12187 field. This is more efficient than using the stack. */
12188 if (n_var == 1)
12189 {
12190 rtx copy = copy_rtx (vals);
12191 rtx index = GEN_INT (one_var);
12192
12193 /* Load constant part of vector, substitute neighboring value for
12194 varying element. */
12195 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12196 neon_expand_vector_init (target, copy);
12197
12198 /* Insert variable. */
12199 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12200 switch (mode)
12201 {
12202 case V8QImode:
12203 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12204 break;
12205 case V16QImode:
12206 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12207 break;
12208 case V4HImode:
12209 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12210 break;
12211 case V8HImode:
12212 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12213 break;
12214 case V2SImode:
12215 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12216 break;
12217 case V4SImode:
12218 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12219 break;
12220 case V2SFmode:
12221 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12222 break;
12223 case V4SFmode:
12224 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12225 break;
12226 case V2DImode:
12227 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12228 break;
12229 default:
12230 gcc_unreachable ();
12231 }
12232 return;
12233 }
12234
12235 /* Construct the vector in memory one field at a time
12236 and load the whole vector. */
12237 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12238 for (i = 0; i < n_elts; i++)
12239 emit_move_insn (adjust_address_nv (mem, inner_mode,
12240 i * GET_MODE_SIZE (inner_mode)),
12241 XVECEXP (vals, 0, i));
12242 emit_move_insn (target, mem);
12243 }
12244
12245 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12246 ERR if it doesn't. EXP indicates the source location, which includes the
12247 inlining history for intrinsics. */
12248
12249 static void
12250 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12251 const_tree exp, const char *desc)
12252 {
12253 HOST_WIDE_INT lane;
12254
12255 gcc_assert (CONST_INT_P (operand));
12256
12257 lane = INTVAL (operand);
12258
12259 if (lane < low || lane >= high)
12260 {
12261 if (exp)
12262 error ("%K%s %wd out of range %wd - %wd",
12263 exp, desc, lane, low, high - 1);
12264 else
12265 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12266 }
12267 }
12268
12269 /* Bounds-check lanes. */
12270
12271 void
12272 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12273 const_tree exp)
12274 {
12275 bounds_check (operand, low, high, exp, "lane");
12276 }
12277
12278 /* Bounds-check constants. */
12279
12280 void
12281 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12282 {
12283 bounds_check (operand, low, high, NULL_TREE, "constant");
12284 }
12285
12286 HOST_WIDE_INT
12287 neon_element_bits (machine_mode mode)
12288 {
12289 return GET_MODE_UNIT_BITSIZE (mode);
12290 }
12291
12292 \f
12293 /* Predicates for `match_operand' and `match_operator'. */
12294
12295 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12296 WB is true if full writeback address modes are allowed and is false
12297 if limited writeback address modes (POST_INC and PRE_DEC) are
12298 allowed. */
12299
12300 int
12301 arm_coproc_mem_operand (rtx op, bool wb)
12302 {
12303 rtx ind;
12304
12305 /* Reject eliminable registers. */
12306 if (! (reload_in_progress || reload_completed || lra_in_progress)
12307 && ( reg_mentioned_p (frame_pointer_rtx, op)
12308 || reg_mentioned_p (arg_pointer_rtx, op)
12309 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12310 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12311 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12312 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12313 return FALSE;
12314
12315 /* Constants are converted into offsets from labels. */
12316 if (!MEM_P (op))
12317 return FALSE;
12318
12319 ind = XEXP (op, 0);
12320
12321 if (reload_completed
12322 && (GET_CODE (ind) == LABEL_REF
12323 || (GET_CODE (ind) == CONST
12324 && GET_CODE (XEXP (ind, 0)) == PLUS
12325 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12326 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12327 return TRUE;
12328
12329 /* Match: (mem (reg)). */
12330 if (REG_P (ind))
12331 return arm_address_register_rtx_p (ind, 0);
12332
12333 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12334 acceptable in any case (subject to verification by
12335 arm_address_register_rtx_p). We need WB to be true to accept
12336 PRE_INC and POST_DEC. */
12337 if (GET_CODE (ind) == POST_INC
12338 || GET_CODE (ind) == PRE_DEC
12339 || (wb
12340 && (GET_CODE (ind) == PRE_INC
12341 || GET_CODE (ind) == POST_DEC)))
12342 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12343
12344 if (wb
12345 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12346 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12347 && GET_CODE (XEXP (ind, 1)) == PLUS
12348 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12349 ind = XEXP (ind, 1);
12350
12351 /* Match:
12352 (plus (reg)
12353 (const)). */
12354 if (GET_CODE (ind) == PLUS
12355 && REG_P (XEXP (ind, 0))
12356 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12357 && CONST_INT_P (XEXP (ind, 1))
12358 && INTVAL (XEXP (ind, 1)) > -1024
12359 && INTVAL (XEXP (ind, 1)) < 1024
12360 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12361 return TRUE;
12362
12363 return FALSE;
12364 }
12365
12366 /* Return TRUE if OP is a memory operand which we can load or store a vector
12367 to/from. TYPE is one of the following values:
12368 0 - Vector load/stor (vldr)
12369 1 - Core registers (ldm)
12370 2 - Element/structure loads (vld1)
12371 */
12372 int
12373 neon_vector_mem_operand (rtx op, int type, bool strict)
12374 {
12375 rtx ind;
12376
12377 /* Reject eliminable registers. */
12378 if (strict && ! (reload_in_progress || reload_completed)
12379 && (reg_mentioned_p (frame_pointer_rtx, op)
12380 || reg_mentioned_p (arg_pointer_rtx, op)
12381 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12382 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12383 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12384 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12385 return FALSE;
12386
12387 /* Constants are converted into offsets from labels. */
12388 if (!MEM_P (op))
12389 return FALSE;
12390
12391 ind = XEXP (op, 0);
12392
12393 if (reload_completed
12394 && (GET_CODE (ind) == LABEL_REF
12395 || (GET_CODE (ind) == CONST
12396 && GET_CODE (XEXP (ind, 0)) == PLUS
12397 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12398 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12399 return TRUE;
12400
12401 /* Match: (mem (reg)). */
12402 if (REG_P (ind))
12403 return arm_address_register_rtx_p (ind, 0);
12404
12405 /* Allow post-increment with Neon registers. */
12406 if ((type != 1 && GET_CODE (ind) == POST_INC)
12407 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12408 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12409
12410 /* Allow post-increment by register for VLDn */
12411 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12412 && GET_CODE (XEXP (ind, 1)) == PLUS
12413 && REG_P (XEXP (XEXP (ind, 1), 1)))
12414 return true;
12415
12416 /* Match:
12417 (plus (reg)
12418 (const)). */
12419 if (type == 0
12420 && GET_CODE (ind) == PLUS
12421 && REG_P (XEXP (ind, 0))
12422 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12423 && CONST_INT_P (XEXP (ind, 1))
12424 && INTVAL (XEXP (ind, 1)) > -1024
12425 /* For quad modes, we restrict the constant offset to be slightly less
12426 than what the instruction format permits. We have no such constraint
12427 on double mode offsets. (This must match arm_legitimate_index_p.) */
12428 && (INTVAL (XEXP (ind, 1))
12429 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12430 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12431 return TRUE;
12432
12433 return FALSE;
12434 }
12435
12436 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12437 type. */
12438 int
12439 neon_struct_mem_operand (rtx op)
12440 {
12441 rtx ind;
12442
12443 /* Reject eliminable registers. */
12444 if (! (reload_in_progress || reload_completed)
12445 && ( reg_mentioned_p (frame_pointer_rtx, op)
12446 || reg_mentioned_p (arg_pointer_rtx, op)
12447 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12448 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12449 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12450 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12451 return FALSE;
12452
12453 /* Constants are converted into offsets from labels. */
12454 if (!MEM_P (op))
12455 return FALSE;
12456
12457 ind = XEXP (op, 0);
12458
12459 if (reload_completed
12460 && (GET_CODE (ind) == LABEL_REF
12461 || (GET_CODE (ind) == CONST
12462 && GET_CODE (XEXP (ind, 0)) == PLUS
12463 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12464 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12465 return TRUE;
12466
12467 /* Match: (mem (reg)). */
12468 if (REG_P (ind))
12469 return arm_address_register_rtx_p (ind, 0);
12470
12471 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12472 if (GET_CODE (ind) == POST_INC
12473 || GET_CODE (ind) == PRE_DEC)
12474 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12475
12476 return FALSE;
12477 }
12478
12479 /* Return true if X is a register that will be eliminated later on. */
12480 int
12481 arm_eliminable_register (rtx x)
12482 {
12483 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12484 || REGNO (x) == ARG_POINTER_REGNUM
12485 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12486 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12487 }
12488
12489 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12490 coprocessor registers. Otherwise return NO_REGS. */
12491
12492 enum reg_class
12493 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12494 {
12495 if (mode == HFmode)
12496 {
12497 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12498 return GENERAL_REGS;
12499 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12500 return NO_REGS;
12501 return GENERAL_REGS;
12502 }
12503
12504 /* The neon move patterns handle all legitimate vector and struct
12505 addresses. */
12506 if (TARGET_NEON
12507 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12508 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12509 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12510 || VALID_NEON_STRUCT_MODE (mode)))
12511 return NO_REGS;
12512
12513 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12514 return NO_REGS;
12515
12516 return GENERAL_REGS;
12517 }
12518
12519 /* Values which must be returned in the most-significant end of the return
12520 register. */
12521
12522 static bool
12523 arm_return_in_msb (const_tree valtype)
12524 {
12525 return (TARGET_AAPCS_BASED
12526 && BYTES_BIG_ENDIAN
12527 && (AGGREGATE_TYPE_P (valtype)
12528 || TREE_CODE (valtype) == COMPLEX_TYPE
12529 || FIXED_POINT_TYPE_P (valtype)));
12530 }
12531
12532 /* Return TRUE if X references a SYMBOL_REF. */
12533 int
12534 symbol_mentioned_p (rtx x)
12535 {
12536 const char * fmt;
12537 int i;
12538
12539 if (GET_CODE (x) == SYMBOL_REF)
12540 return 1;
12541
12542 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12543 are constant offsets, not symbols. */
12544 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12545 return 0;
12546
12547 fmt = GET_RTX_FORMAT (GET_CODE (x));
12548
12549 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12550 {
12551 if (fmt[i] == 'E')
12552 {
12553 int j;
12554
12555 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12556 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12557 return 1;
12558 }
12559 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12560 return 1;
12561 }
12562
12563 return 0;
12564 }
12565
12566 /* Return TRUE if X references a LABEL_REF. */
12567 int
12568 label_mentioned_p (rtx x)
12569 {
12570 const char * fmt;
12571 int i;
12572
12573 if (GET_CODE (x) == LABEL_REF)
12574 return 1;
12575
12576 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12577 instruction, but they are constant offsets, not symbols. */
12578 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12579 return 0;
12580
12581 fmt = GET_RTX_FORMAT (GET_CODE (x));
12582 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12583 {
12584 if (fmt[i] == 'E')
12585 {
12586 int j;
12587
12588 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12589 if (label_mentioned_p (XVECEXP (x, i, j)))
12590 return 1;
12591 }
12592 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12593 return 1;
12594 }
12595
12596 return 0;
12597 }
12598
12599 int
12600 tls_mentioned_p (rtx x)
12601 {
12602 switch (GET_CODE (x))
12603 {
12604 case CONST:
12605 return tls_mentioned_p (XEXP (x, 0));
12606
12607 case UNSPEC:
12608 if (XINT (x, 1) == UNSPEC_TLS)
12609 return 1;
12610
12611 /* Fall through. */
12612 default:
12613 return 0;
12614 }
12615 }
12616
12617 /* Must not copy any rtx that uses a pc-relative address.
12618 Also, disallow copying of load-exclusive instructions that
12619 may appear after splitting of compare-and-swap-style operations
12620 so as to prevent those loops from being transformed away from their
12621 canonical forms (see PR 69904). */
12622
12623 static bool
12624 arm_cannot_copy_insn_p (rtx_insn *insn)
12625 {
12626 /* The tls call insn cannot be copied, as it is paired with a data
12627 word. */
12628 if (recog_memoized (insn) == CODE_FOR_tlscall)
12629 return true;
12630
12631 subrtx_iterator::array_type array;
12632 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12633 {
12634 const_rtx x = *iter;
12635 if (GET_CODE (x) == UNSPEC
12636 && (XINT (x, 1) == UNSPEC_PIC_BASE
12637 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12638 return true;
12639 }
12640
12641 rtx set = single_set (insn);
12642 if (set)
12643 {
12644 rtx src = SET_SRC (set);
12645 if (GET_CODE (src) == ZERO_EXTEND)
12646 src = XEXP (src, 0);
12647
12648 /* Catch the load-exclusive and load-acquire operations. */
12649 if (GET_CODE (src) == UNSPEC_VOLATILE
12650 && (XINT (src, 1) == VUNSPEC_LL
12651 || XINT (src, 1) == VUNSPEC_LAX))
12652 return true;
12653 }
12654 return false;
12655 }
12656
12657 enum rtx_code
12658 minmax_code (rtx x)
12659 {
12660 enum rtx_code code = GET_CODE (x);
12661
12662 switch (code)
12663 {
12664 case SMAX:
12665 return GE;
12666 case SMIN:
12667 return LE;
12668 case UMIN:
12669 return LEU;
12670 case UMAX:
12671 return GEU;
12672 default:
12673 gcc_unreachable ();
12674 }
12675 }
12676
12677 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12678
12679 bool
12680 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12681 int *mask, bool *signed_sat)
12682 {
12683 /* The high bound must be a power of two minus one. */
12684 int log = exact_log2 (INTVAL (hi_bound) + 1);
12685 if (log == -1)
12686 return false;
12687
12688 /* The low bound is either zero (for usat) or one less than the
12689 negation of the high bound (for ssat). */
12690 if (INTVAL (lo_bound) == 0)
12691 {
12692 if (mask)
12693 *mask = log;
12694 if (signed_sat)
12695 *signed_sat = false;
12696
12697 return true;
12698 }
12699
12700 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12701 {
12702 if (mask)
12703 *mask = log + 1;
12704 if (signed_sat)
12705 *signed_sat = true;
12706
12707 return true;
12708 }
12709
12710 return false;
12711 }
12712
12713 /* Return 1 if memory locations are adjacent. */
12714 int
12715 adjacent_mem_locations (rtx a, rtx b)
12716 {
12717 /* We don't guarantee to preserve the order of these memory refs. */
12718 if (volatile_refs_p (a) || volatile_refs_p (b))
12719 return 0;
12720
12721 if ((REG_P (XEXP (a, 0))
12722 || (GET_CODE (XEXP (a, 0)) == PLUS
12723 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12724 && (REG_P (XEXP (b, 0))
12725 || (GET_CODE (XEXP (b, 0)) == PLUS
12726 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12727 {
12728 HOST_WIDE_INT val0 = 0, val1 = 0;
12729 rtx reg0, reg1;
12730 int val_diff;
12731
12732 if (GET_CODE (XEXP (a, 0)) == PLUS)
12733 {
12734 reg0 = XEXP (XEXP (a, 0), 0);
12735 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12736 }
12737 else
12738 reg0 = XEXP (a, 0);
12739
12740 if (GET_CODE (XEXP (b, 0)) == PLUS)
12741 {
12742 reg1 = XEXP (XEXP (b, 0), 0);
12743 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12744 }
12745 else
12746 reg1 = XEXP (b, 0);
12747
12748 /* Don't accept any offset that will require multiple
12749 instructions to handle, since this would cause the
12750 arith_adjacentmem pattern to output an overlong sequence. */
12751 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12752 return 0;
12753
12754 /* Don't allow an eliminable register: register elimination can make
12755 the offset too large. */
12756 if (arm_eliminable_register (reg0))
12757 return 0;
12758
12759 val_diff = val1 - val0;
12760
12761 if (arm_ld_sched)
12762 {
12763 /* If the target has load delay slots, then there's no benefit
12764 to using an ldm instruction unless the offset is zero and
12765 we are optimizing for size. */
12766 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12767 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12768 && (val_diff == 4 || val_diff == -4));
12769 }
12770
12771 return ((REGNO (reg0) == REGNO (reg1))
12772 && (val_diff == 4 || val_diff == -4));
12773 }
12774
12775 return 0;
12776 }
12777
12778 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12779 for load operations, false for store operations. CONSECUTIVE is true
12780 if the register numbers in the operation must be consecutive in the register
12781 bank. RETURN_PC is true if value is to be loaded in PC.
12782 The pattern we are trying to match for load is:
12783 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12784 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12785 :
12786 :
12787 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12788 ]
12789 where
12790 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12791 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12792 3. If consecutive is TRUE, then for kth register being loaded,
12793 REGNO (R_dk) = REGNO (R_d0) + k.
12794 The pattern for store is similar. */
12795 bool
12796 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12797 bool consecutive, bool return_pc)
12798 {
12799 HOST_WIDE_INT count = XVECLEN (op, 0);
12800 rtx reg, mem, addr;
12801 unsigned regno;
12802 unsigned first_regno;
12803 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12804 rtx elt;
12805 bool addr_reg_in_reglist = false;
12806 bool update = false;
12807 int reg_increment;
12808 int offset_adj;
12809 int regs_per_val;
12810
12811 /* If not in SImode, then registers must be consecutive
12812 (e.g., VLDM instructions for DFmode). */
12813 gcc_assert ((mode == SImode) || consecutive);
12814 /* Setting return_pc for stores is illegal. */
12815 gcc_assert (!return_pc || load);
12816
12817 /* Set up the increments and the regs per val based on the mode. */
12818 reg_increment = GET_MODE_SIZE (mode);
12819 regs_per_val = reg_increment / 4;
12820 offset_adj = return_pc ? 1 : 0;
12821
12822 if (count <= 1
12823 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12824 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12825 return false;
12826
12827 /* Check if this is a write-back. */
12828 elt = XVECEXP (op, 0, offset_adj);
12829 if (GET_CODE (SET_SRC (elt)) == PLUS)
12830 {
12831 i++;
12832 base = 1;
12833 update = true;
12834
12835 /* The offset adjustment must be the number of registers being
12836 popped times the size of a single register. */
12837 if (!REG_P (SET_DEST (elt))
12838 || !REG_P (XEXP (SET_SRC (elt), 0))
12839 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12840 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12841 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12842 ((count - 1 - offset_adj) * reg_increment))
12843 return false;
12844 }
12845
12846 i = i + offset_adj;
12847 base = base + offset_adj;
12848 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12849 success depends on the type: VLDM can do just one reg,
12850 LDM must do at least two. */
12851 if ((count <= i) && (mode == SImode))
12852 return false;
12853
12854 elt = XVECEXP (op, 0, i - 1);
12855 if (GET_CODE (elt) != SET)
12856 return false;
12857
12858 if (load)
12859 {
12860 reg = SET_DEST (elt);
12861 mem = SET_SRC (elt);
12862 }
12863 else
12864 {
12865 reg = SET_SRC (elt);
12866 mem = SET_DEST (elt);
12867 }
12868
12869 if (!REG_P (reg) || !MEM_P (mem))
12870 return false;
12871
12872 regno = REGNO (reg);
12873 first_regno = regno;
12874 addr = XEXP (mem, 0);
12875 if (GET_CODE (addr) == PLUS)
12876 {
12877 if (!CONST_INT_P (XEXP (addr, 1)))
12878 return false;
12879
12880 offset = INTVAL (XEXP (addr, 1));
12881 addr = XEXP (addr, 0);
12882 }
12883
12884 if (!REG_P (addr))
12885 return false;
12886
12887 /* Don't allow SP to be loaded unless it is also the base register. It
12888 guarantees that SP is reset correctly when an LDM instruction
12889 is interrupted. Otherwise, we might end up with a corrupt stack. */
12890 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12891 return false;
12892
12893 for (; i < count; i++)
12894 {
12895 elt = XVECEXP (op, 0, i);
12896 if (GET_CODE (elt) != SET)
12897 return false;
12898
12899 if (load)
12900 {
12901 reg = SET_DEST (elt);
12902 mem = SET_SRC (elt);
12903 }
12904 else
12905 {
12906 reg = SET_SRC (elt);
12907 mem = SET_DEST (elt);
12908 }
12909
12910 if (!REG_P (reg)
12911 || GET_MODE (reg) != mode
12912 || REGNO (reg) <= regno
12913 || (consecutive
12914 && (REGNO (reg) !=
12915 (unsigned int) (first_regno + regs_per_val * (i - base))))
12916 /* Don't allow SP to be loaded unless it is also the base register. It
12917 guarantees that SP is reset correctly when an LDM instruction
12918 is interrupted. Otherwise, we might end up with a corrupt stack. */
12919 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12920 || !MEM_P (mem)
12921 || GET_MODE (mem) != mode
12922 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12923 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12924 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12925 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12926 offset + (i - base) * reg_increment))
12927 && (!REG_P (XEXP (mem, 0))
12928 || offset + (i - base) * reg_increment != 0)))
12929 return false;
12930
12931 regno = REGNO (reg);
12932 if (regno == REGNO (addr))
12933 addr_reg_in_reglist = true;
12934 }
12935
12936 if (load)
12937 {
12938 if (update && addr_reg_in_reglist)
12939 return false;
12940
12941 /* For Thumb-1, address register is always modified - either by write-back
12942 or by explicit load. If the pattern does not describe an update,
12943 then the address register must be in the list of loaded registers. */
12944 if (TARGET_THUMB1)
12945 return update || addr_reg_in_reglist;
12946 }
12947
12948 return true;
12949 }
12950
12951 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12952 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12953 instruction. ADD_OFFSET is nonzero if the base address register needs
12954 to be modified with an add instruction before we can use it. */
12955
12956 static bool
12957 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
12958 int nops, HOST_WIDE_INT add_offset)
12959 {
12960 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12961 if the offset isn't small enough. The reason 2 ldrs are faster
12962 is because these ARMs are able to do more than one cache access
12963 in a single cycle. The ARM9 and StrongARM have Harvard caches,
12964 whilst the ARM8 has a double bandwidth cache. This means that
12965 these cores can do both an instruction fetch and a data fetch in
12966 a single cycle, so the trick of calculating the address into a
12967 scratch register (one of the result regs) and then doing a load
12968 multiple actually becomes slower (and no smaller in code size).
12969 That is the transformation
12970
12971 ldr rd1, [rbase + offset]
12972 ldr rd2, [rbase + offset + 4]
12973
12974 to
12975
12976 add rd1, rbase, offset
12977 ldmia rd1, {rd1, rd2}
12978
12979 produces worse code -- '3 cycles + any stalls on rd2' instead of
12980 '2 cycles + any stalls on rd2'. On ARMs with only one cache
12981 access per cycle, the first sequence could never complete in less
12982 than 6 cycles, whereas the ldm sequence would only take 5 and
12983 would make better use of sequential accesses if not hitting the
12984 cache.
12985
12986 We cheat here and test 'arm_ld_sched' which we currently know to
12987 only be true for the ARM8, ARM9 and StrongARM. If this ever
12988 changes, then the test below needs to be reworked. */
12989 if (nops == 2 && arm_ld_sched && add_offset != 0)
12990 return false;
12991
12992 /* XScale has load-store double instructions, but they have stricter
12993 alignment requirements than load-store multiple, so we cannot
12994 use them.
12995
12996 For XScale ldm requires 2 + NREGS cycles to complete and blocks
12997 the pipeline until completion.
12998
12999 NREGS CYCLES
13000 1 3
13001 2 4
13002 3 5
13003 4 6
13004
13005 An ldr instruction takes 1-3 cycles, but does not block the
13006 pipeline.
13007
13008 NREGS CYCLES
13009 1 1-3
13010 2 2-6
13011 3 3-9
13012 4 4-12
13013
13014 Best case ldr will always win. However, the more ldr instructions
13015 we issue, the less likely we are to be able to schedule them well.
13016 Using ldr instructions also increases code size.
13017
13018 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13019 for counts of 3 or 4 regs. */
13020 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13021 return false;
13022 return true;
13023 }
13024
13025 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13026 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13027 an array ORDER which describes the sequence to use when accessing the
13028 offsets that produces an ascending order. In this sequence, each
13029 offset must be larger by exactly 4 than the previous one. ORDER[0]
13030 must have been filled in with the lowest offset by the caller.
13031 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13032 we use to verify that ORDER produces an ascending order of registers.
13033 Return true if it was possible to construct such an order, false if
13034 not. */
13035
13036 static bool
13037 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13038 int *unsorted_regs)
13039 {
13040 int i;
13041 for (i = 1; i < nops; i++)
13042 {
13043 int j;
13044
13045 order[i] = order[i - 1];
13046 for (j = 0; j < nops; j++)
13047 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13048 {
13049 /* We must find exactly one offset that is higher than the
13050 previous one by 4. */
13051 if (order[i] != order[i - 1])
13052 return false;
13053 order[i] = j;
13054 }
13055 if (order[i] == order[i - 1])
13056 return false;
13057 /* The register numbers must be ascending. */
13058 if (unsorted_regs != NULL
13059 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13060 return false;
13061 }
13062 return true;
13063 }
13064
13065 /* Used to determine in a peephole whether a sequence of load
13066 instructions can be changed into a load-multiple instruction.
13067 NOPS is the number of separate load instructions we are examining. The
13068 first NOPS entries in OPERANDS are the destination registers, the
13069 next NOPS entries are memory operands. If this function is
13070 successful, *BASE is set to the common base register of the memory
13071 accesses; *LOAD_OFFSET is set to the first memory location's offset
13072 from that base register.
13073 REGS is an array filled in with the destination register numbers.
13074 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13075 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13076 the sequence of registers in REGS matches the loads from ascending memory
13077 locations, and the function verifies that the register numbers are
13078 themselves ascending. If CHECK_REGS is false, the register numbers
13079 are stored in the order they are found in the operands. */
13080 static int
13081 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13082 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13083 {
13084 int unsorted_regs[MAX_LDM_STM_OPS];
13085 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13086 int order[MAX_LDM_STM_OPS];
13087 rtx base_reg_rtx = NULL;
13088 int base_reg = -1;
13089 int i, ldm_case;
13090
13091 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13092 easily extended if required. */
13093 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13094
13095 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13096
13097 /* Loop over the operands and check that the memory references are
13098 suitable (i.e. immediate offsets from the same base register). At
13099 the same time, extract the target register, and the memory
13100 offsets. */
13101 for (i = 0; i < nops; i++)
13102 {
13103 rtx reg;
13104 rtx offset;
13105
13106 /* Convert a subreg of a mem into the mem itself. */
13107 if (GET_CODE (operands[nops + i]) == SUBREG)
13108 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13109
13110 gcc_assert (MEM_P (operands[nops + i]));
13111
13112 /* Don't reorder volatile memory references; it doesn't seem worth
13113 looking for the case where the order is ok anyway. */
13114 if (MEM_VOLATILE_P (operands[nops + i]))
13115 return 0;
13116
13117 offset = const0_rtx;
13118
13119 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13120 || (GET_CODE (reg) == SUBREG
13121 && REG_P (reg = SUBREG_REG (reg))))
13122 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13123 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13124 || (GET_CODE (reg) == SUBREG
13125 && REG_P (reg = SUBREG_REG (reg))))
13126 && (CONST_INT_P (offset
13127 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13128 {
13129 if (i == 0)
13130 {
13131 base_reg = REGNO (reg);
13132 base_reg_rtx = reg;
13133 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13134 return 0;
13135 }
13136 else if (base_reg != (int) REGNO (reg))
13137 /* Not addressed from the same base register. */
13138 return 0;
13139
13140 unsorted_regs[i] = (REG_P (operands[i])
13141 ? REGNO (operands[i])
13142 : REGNO (SUBREG_REG (operands[i])));
13143
13144 /* If it isn't an integer register, or if it overwrites the
13145 base register but isn't the last insn in the list, then
13146 we can't do this. */
13147 if (unsorted_regs[i] < 0
13148 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13149 || unsorted_regs[i] > 14
13150 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13151 return 0;
13152
13153 /* Don't allow SP to be loaded unless it is also the base
13154 register. It guarantees that SP is reset correctly when
13155 an LDM instruction is interrupted. Otherwise, we might
13156 end up with a corrupt stack. */
13157 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13158 return 0;
13159
13160 unsorted_offsets[i] = INTVAL (offset);
13161 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13162 order[0] = i;
13163 }
13164 else
13165 /* Not a suitable memory address. */
13166 return 0;
13167 }
13168
13169 /* All the useful information has now been extracted from the
13170 operands into unsorted_regs and unsorted_offsets; additionally,
13171 order[0] has been set to the lowest offset in the list. Sort
13172 the offsets into order, verifying that they are adjacent, and
13173 check that the register numbers are ascending. */
13174 if (!compute_offset_order (nops, unsorted_offsets, order,
13175 check_regs ? unsorted_regs : NULL))
13176 return 0;
13177
13178 if (saved_order)
13179 memcpy (saved_order, order, sizeof order);
13180
13181 if (base)
13182 {
13183 *base = base_reg;
13184
13185 for (i = 0; i < nops; i++)
13186 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13187
13188 *load_offset = unsorted_offsets[order[0]];
13189 }
13190
13191 if (TARGET_THUMB1
13192 && !peep2_reg_dead_p (nops, base_reg_rtx))
13193 return 0;
13194
13195 if (unsorted_offsets[order[0]] == 0)
13196 ldm_case = 1; /* ldmia */
13197 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13198 ldm_case = 2; /* ldmib */
13199 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13200 ldm_case = 3; /* ldmda */
13201 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13202 ldm_case = 4; /* ldmdb */
13203 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13204 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13205 ldm_case = 5;
13206 else
13207 return 0;
13208
13209 if (!multiple_operation_profitable_p (false, nops,
13210 ldm_case == 5
13211 ? unsorted_offsets[order[0]] : 0))
13212 return 0;
13213
13214 return ldm_case;
13215 }
13216
13217 /* Used to determine in a peephole whether a sequence of store instructions can
13218 be changed into a store-multiple instruction.
13219 NOPS is the number of separate store instructions we are examining.
13220 NOPS_TOTAL is the total number of instructions recognized by the peephole
13221 pattern.
13222 The first NOPS entries in OPERANDS are the source registers, the next
13223 NOPS entries are memory operands. If this function is successful, *BASE is
13224 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13225 to the first memory location's offset from that base register. REGS is an
13226 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13227 likewise filled with the corresponding rtx's.
13228 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13229 numbers to an ascending order of stores.
13230 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13231 from ascending memory locations, and the function verifies that the register
13232 numbers are themselves ascending. If CHECK_REGS is false, the register
13233 numbers are stored in the order they are found in the operands. */
13234 static int
13235 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13236 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13237 HOST_WIDE_INT *load_offset, bool check_regs)
13238 {
13239 int unsorted_regs[MAX_LDM_STM_OPS];
13240 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13241 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13242 int order[MAX_LDM_STM_OPS];
13243 int base_reg = -1;
13244 rtx base_reg_rtx = NULL;
13245 int i, stm_case;
13246
13247 /* Write back of base register is currently only supported for Thumb 1. */
13248 int base_writeback = TARGET_THUMB1;
13249
13250 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13251 easily extended if required. */
13252 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13253
13254 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13255
13256 /* Loop over the operands and check that the memory references are
13257 suitable (i.e. immediate offsets from the same base register). At
13258 the same time, extract the target register, and the memory
13259 offsets. */
13260 for (i = 0; i < nops; i++)
13261 {
13262 rtx reg;
13263 rtx offset;
13264
13265 /* Convert a subreg of a mem into the mem itself. */
13266 if (GET_CODE (operands[nops + i]) == SUBREG)
13267 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13268
13269 gcc_assert (MEM_P (operands[nops + i]));
13270
13271 /* Don't reorder volatile memory references; it doesn't seem worth
13272 looking for the case where the order is ok anyway. */
13273 if (MEM_VOLATILE_P (operands[nops + i]))
13274 return 0;
13275
13276 offset = const0_rtx;
13277
13278 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13279 || (GET_CODE (reg) == SUBREG
13280 && REG_P (reg = SUBREG_REG (reg))))
13281 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13282 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13283 || (GET_CODE (reg) == SUBREG
13284 && REG_P (reg = SUBREG_REG (reg))))
13285 && (CONST_INT_P (offset
13286 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13287 {
13288 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13289 ? operands[i] : SUBREG_REG (operands[i]));
13290 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13291
13292 if (i == 0)
13293 {
13294 base_reg = REGNO (reg);
13295 base_reg_rtx = reg;
13296 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13297 return 0;
13298 }
13299 else if (base_reg != (int) REGNO (reg))
13300 /* Not addressed from the same base register. */
13301 return 0;
13302
13303 /* If it isn't an integer register, then we can't do this. */
13304 if (unsorted_regs[i] < 0
13305 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13306 /* The effects are unpredictable if the base register is
13307 both updated and stored. */
13308 || (base_writeback && unsorted_regs[i] == base_reg)
13309 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13310 || unsorted_regs[i] > 14)
13311 return 0;
13312
13313 unsorted_offsets[i] = INTVAL (offset);
13314 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13315 order[0] = i;
13316 }
13317 else
13318 /* Not a suitable memory address. */
13319 return 0;
13320 }
13321
13322 /* All the useful information has now been extracted from the
13323 operands into unsorted_regs and unsorted_offsets; additionally,
13324 order[0] has been set to the lowest offset in the list. Sort
13325 the offsets into order, verifying that they are adjacent, and
13326 check that the register numbers are ascending. */
13327 if (!compute_offset_order (nops, unsorted_offsets, order,
13328 check_regs ? unsorted_regs : NULL))
13329 return 0;
13330
13331 if (saved_order)
13332 memcpy (saved_order, order, sizeof order);
13333
13334 if (base)
13335 {
13336 *base = base_reg;
13337
13338 for (i = 0; i < nops; i++)
13339 {
13340 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13341 if (reg_rtxs)
13342 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13343 }
13344
13345 *load_offset = unsorted_offsets[order[0]];
13346 }
13347
13348 if (TARGET_THUMB1
13349 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13350 return 0;
13351
13352 if (unsorted_offsets[order[0]] == 0)
13353 stm_case = 1; /* stmia */
13354 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13355 stm_case = 2; /* stmib */
13356 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13357 stm_case = 3; /* stmda */
13358 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13359 stm_case = 4; /* stmdb */
13360 else
13361 return 0;
13362
13363 if (!multiple_operation_profitable_p (false, nops, 0))
13364 return 0;
13365
13366 return stm_case;
13367 }
13368 \f
13369 /* Routines for use in generating RTL. */
13370
13371 /* Generate a load-multiple instruction. COUNT is the number of loads in
13372 the instruction; REGS and MEMS are arrays containing the operands.
13373 BASEREG is the base register to be used in addressing the memory operands.
13374 WBACK_OFFSET is nonzero if the instruction should update the base
13375 register. */
13376
13377 static rtx
13378 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13379 HOST_WIDE_INT wback_offset)
13380 {
13381 int i = 0, j;
13382 rtx result;
13383
13384 if (!multiple_operation_profitable_p (false, count, 0))
13385 {
13386 rtx seq;
13387
13388 start_sequence ();
13389
13390 for (i = 0; i < count; i++)
13391 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13392
13393 if (wback_offset != 0)
13394 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13395
13396 seq = get_insns ();
13397 end_sequence ();
13398
13399 return seq;
13400 }
13401
13402 result = gen_rtx_PARALLEL (VOIDmode,
13403 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13404 if (wback_offset != 0)
13405 {
13406 XVECEXP (result, 0, 0)
13407 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13408 i = 1;
13409 count++;
13410 }
13411
13412 for (j = 0; i < count; i++, j++)
13413 XVECEXP (result, 0, i)
13414 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13415
13416 return result;
13417 }
13418
13419 /* Generate a store-multiple instruction. COUNT is the number of stores in
13420 the instruction; REGS and MEMS are arrays containing the operands.
13421 BASEREG is the base register to be used in addressing the memory operands.
13422 WBACK_OFFSET is nonzero if the instruction should update the base
13423 register. */
13424
13425 static rtx
13426 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13427 HOST_WIDE_INT wback_offset)
13428 {
13429 int i = 0, j;
13430 rtx result;
13431
13432 if (GET_CODE (basereg) == PLUS)
13433 basereg = XEXP (basereg, 0);
13434
13435 if (!multiple_operation_profitable_p (false, count, 0))
13436 {
13437 rtx seq;
13438
13439 start_sequence ();
13440
13441 for (i = 0; i < count; i++)
13442 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13443
13444 if (wback_offset != 0)
13445 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13446
13447 seq = get_insns ();
13448 end_sequence ();
13449
13450 return seq;
13451 }
13452
13453 result = gen_rtx_PARALLEL (VOIDmode,
13454 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13455 if (wback_offset != 0)
13456 {
13457 XVECEXP (result, 0, 0)
13458 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13459 i = 1;
13460 count++;
13461 }
13462
13463 for (j = 0; i < count; i++, j++)
13464 XVECEXP (result, 0, i)
13465 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13466
13467 return result;
13468 }
13469
13470 /* Generate either a load-multiple or a store-multiple instruction. This
13471 function can be used in situations where we can start with a single MEM
13472 rtx and adjust its address upwards.
13473 COUNT is the number of operations in the instruction, not counting a
13474 possible update of the base register. REGS is an array containing the
13475 register operands.
13476 BASEREG is the base register to be used in addressing the memory operands,
13477 which are constructed from BASEMEM.
13478 WRITE_BACK specifies whether the generated instruction should include an
13479 update of the base register.
13480 OFFSETP is used to pass an offset to and from this function; this offset
13481 is not used when constructing the address (instead BASEMEM should have an
13482 appropriate offset in its address), it is used only for setting
13483 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13484
13485 static rtx
13486 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13487 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13488 {
13489 rtx mems[MAX_LDM_STM_OPS];
13490 HOST_WIDE_INT offset = *offsetp;
13491 int i;
13492
13493 gcc_assert (count <= MAX_LDM_STM_OPS);
13494
13495 if (GET_CODE (basereg) == PLUS)
13496 basereg = XEXP (basereg, 0);
13497
13498 for (i = 0; i < count; i++)
13499 {
13500 rtx addr = plus_constant (Pmode, basereg, i * 4);
13501 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13502 offset += 4;
13503 }
13504
13505 if (write_back)
13506 *offsetp = offset;
13507
13508 if (is_load)
13509 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13510 write_back ? 4 * count : 0);
13511 else
13512 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13513 write_back ? 4 * count : 0);
13514 }
13515
13516 rtx
13517 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13518 rtx basemem, HOST_WIDE_INT *offsetp)
13519 {
13520 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13521 offsetp);
13522 }
13523
13524 rtx
13525 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13526 rtx basemem, HOST_WIDE_INT *offsetp)
13527 {
13528 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13529 offsetp);
13530 }
13531
13532 /* Called from a peephole2 expander to turn a sequence of loads into an
13533 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13534 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13535 is true if we can reorder the registers because they are used commutatively
13536 subsequently.
13537 Returns true iff we could generate a new instruction. */
13538
13539 bool
13540 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13541 {
13542 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13543 rtx mems[MAX_LDM_STM_OPS];
13544 int i, j, base_reg;
13545 rtx base_reg_rtx;
13546 HOST_WIDE_INT offset;
13547 int write_back = FALSE;
13548 int ldm_case;
13549 rtx addr;
13550
13551 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13552 &base_reg, &offset, !sort_regs);
13553
13554 if (ldm_case == 0)
13555 return false;
13556
13557 if (sort_regs)
13558 for (i = 0; i < nops - 1; i++)
13559 for (j = i + 1; j < nops; j++)
13560 if (regs[i] > regs[j])
13561 {
13562 int t = regs[i];
13563 regs[i] = regs[j];
13564 regs[j] = t;
13565 }
13566 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13567
13568 if (TARGET_THUMB1)
13569 {
13570 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13571 gcc_assert (ldm_case == 1 || ldm_case == 5);
13572 write_back = TRUE;
13573 }
13574
13575 if (ldm_case == 5)
13576 {
13577 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13578 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13579 offset = 0;
13580 if (!TARGET_THUMB1)
13581 {
13582 base_reg = regs[0];
13583 base_reg_rtx = newbase;
13584 }
13585 }
13586
13587 for (i = 0; i < nops; i++)
13588 {
13589 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13590 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13591 SImode, addr, 0);
13592 }
13593 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13594 write_back ? offset + i * 4 : 0));
13595 return true;
13596 }
13597
13598 /* Called from a peephole2 expander to turn a sequence of stores into an
13599 STM instruction. OPERANDS are the operands found by the peephole matcher;
13600 NOPS indicates how many separate stores we are trying to combine.
13601 Returns true iff we could generate a new instruction. */
13602
13603 bool
13604 gen_stm_seq (rtx *operands, int nops)
13605 {
13606 int i;
13607 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13608 rtx mems[MAX_LDM_STM_OPS];
13609 int base_reg;
13610 rtx base_reg_rtx;
13611 HOST_WIDE_INT offset;
13612 int write_back = FALSE;
13613 int stm_case;
13614 rtx addr;
13615 bool base_reg_dies;
13616
13617 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13618 mem_order, &base_reg, &offset, true);
13619
13620 if (stm_case == 0)
13621 return false;
13622
13623 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13624
13625 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13626 if (TARGET_THUMB1)
13627 {
13628 gcc_assert (base_reg_dies);
13629 write_back = TRUE;
13630 }
13631
13632 if (stm_case == 5)
13633 {
13634 gcc_assert (base_reg_dies);
13635 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13636 offset = 0;
13637 }
13638
13639 addr = plus_constant (Pmode, base_reg_rtx, offset);
13640
13641 for (i = 0; i < nops; i++)
13642 {
13643 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13644 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13645 SImode, addr, 0);
13646 }
13647 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13648 write_back ? offset + i * 4 : 0));
13649 return true;
13650 }
13651
13652 /* Called from a peephole2 expander to turn a sequence of stores that are
13653 preceded by constant loads into an STM instruction. OPERANDS are the
13654 operands found by the peephole matcher; NOPS indicates how many
13655 separate stores we are trying to combine; there are 2 * NOPS
13656 instructions in the peephole.
13657 Returns true iff we could generate a new instruction. */
13658
13659 bool
13660 gen_const_stm_seq (rtx *operands, int nops)
13661 {
13662 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13663 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13664 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13665 rtx mems[MAX_LDM_STM_OPS];
13666 int base_reg;
13667 rtx base_reg_rtx;
13668 HOST_WIDE_INT offset;
13669 int write_back = FALSE;
13670 int stm_case;
13671 rtx addr;
13672 bool base_reg_dies;
13673 int i, j;
13674 HARD_REG_SET allocated;
13675
13676 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13677 mem_order, &base_reg, &offset, false);
13678
13679 if (stm_case == 0)
13680 return false;
13681
13682 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13683
13684 /* If the same register is used more than once, try to find a free
13685 register. */
13686 CLEAR_HARD_REG_SET (allocated);
13687 for (i = 0; i < nops; i++)
13688 {
13689 for (j = i + 1; j < nops; j++)
13690 if (regs[i] == regs[j])
13691 {
13692 rtx t = peep2_find_free_register (0, nops * 2,
13693 TARGET_THUMB1 ? "l" : "r",
13694 SImode, &allocated);
13695 if (t == NULL_RTX)
13696 return false;
13697 reg_rtxs[i] = t;
13698 regs[i] = REGNO (t);
13699 }
13700 }
13701
13702 /* Compute an ordering that maps the register numbers to an ascending
13703 sequence. */
13704 reg_order[0] = 0;
13705 for (i = 0; i < nops; i++)
13706 if (regs[i] < regs[reg_order[0]])
13707 reg_order[0] = i;
13708
13709 for (i = 1; i < nops; i++)
13710 {
13711 int this_order = reg_order[i - 1];
13712 for (j = 0; j < nops; j++)
13713 if (regs[j] > regs[reg_order[i - 1]]
13714 && (this_order == reg_order[i - 1]
13715 || regs[j] < regs[this_order]))
13716 this_order = j;
13717 reg_order[i] = this_order;
13718 }
13719
13720 /* Ensure that registers that must be live after the instruction end
13721 up with the correct value. */
13722 for (i = 0; i < nops; i++)
13723 {
13724 int this_order = reg_order[i];
13725 if ((this_order != mem_order[i]
13726 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13727 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13728 return false;
13729 }
13730
13731 /* Load the constants. */
13732 for (i = 0; i < nops; i++)
13733 {
13734 rtx op = operands[2 * nops + mem_order[i]];
13735 sorted_regs[i] = regs[reg_order[i]];
13736 emit_move_insn (reg_rtxs[reg_order[i]], op);
13737 }
13738
13739 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13740
13741 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13742 if (TARGET_THUMB1)
13743 {
13744 gcc_assert (base_reg_dies);
13745 write_back = TRUE;
13746 }
13747
13748 if (stm_case == 5)
13749 {
13750 gcc_assert (base_reg_dies);
13751 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13752 offset = 0;
13753 }
13754
13755 addr = plus_constant (Pmode, base_reg_rtx, offset);
13756
13757 for (i = 0; i < nops; i++)
13758 {
13759 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13760 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13761 SImode, addr, 0);
13762 }
13763 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13764 write_back ? offset + i * 4 : 0));
13765 return true;
13766 }
13767
13768 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13769 unaligned copies on processors which support unaligned semantics for those
13770 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13771 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13772 An interleave factor of 1 (the minimum) will perform no interleaving.
13773 Load/store multiple are used for aligned addresses where possible. */
13774
13775 static void
13776 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13777 HOST_WIDE_INT length,
13778 unsigned int interleave_factor)
13779 {
13780 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13781 int *regnos = XALLOCAVEC (int, interleave_factor);
13782 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13783 HOST_WIDE_INT i, j;
13784 HOST_WIDE_INT remaining = length, words;
13785 rtx halfword_tmp = NULL, byte_tmp = NULL;
13786 rtx dst, src;
13787 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13788 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13789 HOST_WIDE_INT srcoffset, dstoffset;
13790 HOST_WIDE_INT src_autoinc, dst_autoinc;
13791 rtx mem, addr;
13792
13793 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13794
13795 /* Use hard registers if we have aligned source or destination so we can use
13796 load/store multiple with contiguous registers. */
13797 if (dst_aligned || src_aligned)
13798 for (i = 0; i < interleave_factor; i++)
13799 regs[i] = gen_rtx_REG (SImode, i);
13800 else
13801 for (i = 0; i < interleave_factor; i++)
13802 regs[i] = gen_reg_rtx (SImode);
13803
13804 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13805 src = copy_addr_to_reg (XEXP (srcbase, 0));
13806
13807 srcoffset = dstoffset = 0;
13808
13809 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13810 For copying the last bytes we want to subtract this offset again. */
13811 src_autoinc = dst_autoinc = 0;
13812
13813 for (i = 0; i < interleave_factor; i++)
13814 regnos[i] = i;
13815
13816 /* Copy BLOCK_SIZE_BYTES chunks. */
13817
13818 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13819 {
13820 /* Load words. */
13821 if (src_aligned && interleave_factor > 1)
13822 {
13823 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13824 TRUE, srcbase, &srcoffset));
13825 src_autoinc += UNITS_PER_WORD * interleave_factor;
13826 }
13827 else
13828 {
13829 for (j = 0; j < interleave_factor; j++)
13830 {
13831 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13832 - src_autoinc));
13833 mem = adjust_automodify_address (srcbase, SImode, addr,
13834 srcoffset + j * UNITS_PER_WORD);
13835 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13836 }
13837 srcoffset += block_size_bytes;
13838 }
13839
13840 /* Store words. */
13841 if (dst_aligned && interleave_factor > 1)
13842 {
13843 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13844 TRUE, dstbase, &dstoffset));
13845 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13846 }
13847 else
13848 {
13849 for (j = 0; j < interleave_factor; j++)
13850 {
13851 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13852 - dst_autoinc));
13853 mem = adjust_automodify_address (dstbase, SImode, addr,
13854 dstoffset + j * UNITS_PER_WORD);
13855 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13856 }
13857 dstoffset += block_size_bytes;
13858 }
13859
13860 remaining -= block_size_bytes;
13861 }
13862
13863 /* Copy any whole words left (note these aren't interleaved with any
13864 subsequent halfword/byte load/stores in the interests of simplicity). */
13865
13866 words = remaining / UNITS_PER_WORD;
13867
13868 gcc_assert (words < interleave_factor);
13869
13870 if (src_aligned && words > 1)
13871 {
13872 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13873 &srcoffset));
13874 src_autoinc += UNITS_PER_WORD * words;
13875 }
13876 else
13877 {
13878 for (j = 0; j < words; j++)
13879 {
13880 addr = plus_constant (Pmode, src,
13881 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13882 mem = adjust_automodify_address (srcbase, SImode, addr,
13883 srcoffset + j * UNITS_PER_WORD);
13884 if (src_aligned)
13885 emit_move_insn (regs[j], mem);
13886 else
13887 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13888 }
13889 srcoffset += words * UNITS_PER_WORD;
13890 }
13891
13892 if (dst_aligned && words > 1)
13893 {
13894 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13895 &dstoffset));
13896 dst_autoinc += words * UNITS_PER_WORD;
13897 }
13898 else
13899 {
13900 for (j = 0; j < words; j++)
13901 {
13902 addr = plus_constant (Pmode, dst,
13903 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13904 mem = adjust_automodify_address (dstbase, SImode, addr,
13905 dstoffset + j * UNITS_PER_WORD);
13906 if (dst_aligned)
13907 emit_move_insn (mem, regs[j]);
13908 else
13909 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13910 }
13911 dstoffset += words * UNITS_PER_WORD;
13912 }
13913
13914 remaining -= words * UNITS_PER_WORD;
13915
13916 gcc_assert (remaining < 4);
13917
13918 /* Copy a halfword if necessary. */
13919
13920 if (remaining >= 2)
13921 {
13922 halfword_tmp = gen_reg_rtx (SImode);
13923
13924 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13925 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13926 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13927
13928 /* Either write out immediately, or delay until we've loaded the last
13929 byte, depending on interleave factor. */
13930 if (interleave_factor == 1)
13931 {
13932 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13933 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13934 emit_insn (gen_unaligned_storehi (mem,
13935 gen_lowpart (HImode, halfword_tmp)));
13936 halfword_tmp = NULL;
13937 dstoffset += 2;
13938 }
13939
13940 remaining -= 2;
13941 srcoffset += 2;
13942 }
13943
13944 gcc_assert (remaining < 2);
13945
13946 /* Copy last byte. */
13947
13948 if ((remaining & 1) != 0)
13949 {
13950 byte_tmp = gen_reg_rtx (SImode);
13951
13952 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13953 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
13954 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
13955
13956 if (interleave_factor == 1)
13957 {
13958 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13959 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13960 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13961 byte_tmp = NULL;
13962 dstoffset++;
13963 }
13964
13965 remaining--;
13966 srcoffset++;
13967 }
13968
13969 /* Store last halfword if we haven't done so already. */
13970
13971 if (halfword_tmp)
13972 {
13973 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13974 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13975 emit_insn (gen_unaligned_storehi (mem,
13976 gen_lowpart (HImode, halfword_tmp)));
13977 dstoffset += 2;
13978 }
13979
13980 /* Likewise for last byte. */
13981
13982 if (byte_tmp)
13983 {
13984 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13985 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13986 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13987 dstoffset++;
13988 }
13989
13990 gcc_assert (remaining == 0 && srcoffset == dstoffset);
13991 }
13992
13993 /* From mips_adjust_block_mem:
13994
13995 Helper function for doing a loop-based block operation on memory
13996 reference MEM. Each iteration of the loop will operate on LENGTH
13997 bytes of MEM.
13998
13999 Create a new base register for use within the loop and point it to
14000 the start of MEM. Create a new memory reference that uses this
14001 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14002
14003 static void
14004 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14005 rtx *loop_mem)
14006 {
14007 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14008
14009 /* Although the new mem does not refer to a known location,
14010 it does keep up to LENGTH bytes of alignment. */
14011 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14012 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14013 }
14014
14015 /* From mips_block_move_loop:
14016
14017 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14018 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14019 the memory regions do not overlap. */
14020
14021 static void
14022 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14023 unsigned int interleave_factor,
14024 HOST_WIDE_INT bytes_per_iter)
14025 {
14026 rtx src_reg, dest_reg, final_src, test;
14027 HOST_WIDE_INT leftover;
14028
14029 leftover = length % bytes_per_iter;
14030 length -= leftover;
14031
14032 /* Create registers and memory references for use within the loop. */
14033 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14034 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14035
14036 /* Calculate the value that SRC_REG should have after the last iteration of
14037 the loop. */
14038 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14039 0, 0, OPTAB_WIDEN);
14040
14041 /* Emit the start of the loop. */
14042 rtx_code_label *label = gen_label_rtx ();
14043 emit_label (label);
14044
14045 /* Emit the loop body. */
14046 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14047 interleave_factor);
14048
14049 /* Move on to the next block. */
14050 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14051 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14052
14053 /* Emit the loop condition. */
14054 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14055 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14056
14057 /* Mop up any left-over bytes. */
14058 if (leftover)
14059 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14060 }
14061
14062 /* Emit a block move when either the source or destination is unaligned (not
14063 aligned to a four-byte boundary). This may need further tuning depending on
14064 core type, optimize_size setting, etc. */
14065
14066 static int
14067 arm_movmemqi_unaligned (rtx *operands)
14068 {
14069 HOST_WIDE_INT length = INTVAL (operands[2]);
14070
14071 if (optimize_size)
14072 {
14073 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14074 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14075 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14076 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14077 or dst_aligned though: allow more interleaving in those cases since the
14078 resulting code can be smaller. */
14079 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14080 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14081
14082 if (length > 12)
14083 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14084 interleave_factor, bytes_per_iter);
14085 else
14086 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14087 interleave_factor);
14088 }
14089 else
14090 {
14091 /* Note that the loop created by arm_block_move_unaligned_loop may be
14092 subject to loop unrolling, which makes tuning this condition a little
14093 redundant. */
14094 if (length > 32)
14095 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14096 else
14097 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14098 }
14099
14100 return 1;
14101 }
14102
14103 int
14104 arm_gen_movmemqi (rtx *operands)
14105 {
14106 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14107 HOST_WIDE_INT srcoffset, dstoffset;
14108 int i;
14109 rtx src, dst, srcbase, dstbase;
14110 rtx part_bytes_reg = NULL;
14111 rtx mem;
14112
14113 if (!CONST_INT_P (operands[2])
14114 || !CONST_INT_P (operands[3])
14115 || INTVAL (operands[2]) > 64)
14116 return 0;
14117
14118 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14119 return arm_movmemqi_unaligned (operands);
14120
14121 if (INTVAL (operands[3]) & 3)
14122 return 0;
14123
14124 dstbase = operands[0];
14125 srcbase = operands[1];
14126
14127 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14128 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14129
14130 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14131 out_words_to_go = INTVAL (operands[2]) / 4;
14132 last_bytes = INTVAL (operands[2]) & 3;
14133 dstoffset = srcoffset = 0;
14134
14135 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14136 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14137
14138 for (i = 0; in_words_to_go >= 2; i+=4)
14139 {
14140 if (in_words_to_go > 4)
14141 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14142 TRUE, srcbase, &srcoffset));
14143 else
14144 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14145 src, FALSE, srcbase,
14146 &srcoffset));
14147
14148 if (out_words_to_go)
14149 {
14150 if (out_words_to_go > 4)
14151 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14152 TRUE, dstbase, &dstoffset));
14153 else if (out_words_to_go != 1)
14154 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14155 out_words_to_go, dst,
14156 (last_bytes == 0
14157 ? FALSE : TRUE),
14158 dstbase, &dstoffset));
14159 else
14160 {
14161 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14162 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14163 if (last_bytes != 0)
14164 {
14165 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14166 dstoffset += 4;
14167 }
14168 }
14169 }
14170
14171 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14172 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14173 }
14174
14175 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14176 if (out_words_to_go)
14177 {
14178 rtx sreg;
14179
14180 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14181 sreg = copy_to_reg (mem);
14182
14183 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14184 emit_move_insn (mem, sreg);
14185 in_words_to_go--;
14186
14187 gcc_assert (!in_words_to_go); /* Sanity check */
14188 }
14189
14190 if (in_words_to_go)
14191 {
14192 gcc_assert (in_words_to_go > 0);
14193
14194 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14195 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14196 }
14197
14198 gcc_assert (!last_bytes || part_bytes_reg);
14199
14200 if (BYTES_BIG_ENDIAN && last_bytes)
14201 {
14202 rtx tmp = gen_reg_rtx (SImode);
14203
14204 /* The bytes we want are in the top end of the word. */
14205 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14206 GEN_INT (8 * (4 - last_bytes))));
14207 part_bytes_reg = tmp;
14208
14209 while (last_bytes)
14210 {
14211 mem = adjust_automodify_address (dstbase, QImode,
14212 plus_constant (Pmode, dst,
14213 last_bytes - 1),
14214 dstoffset + last_bytes - 1);
14215 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14216
14217 if (--last_bytes)
14218 {
14219 tmp = gen_reg_rtx (SImode);
14220 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14221 part_bytes_reg = tmp;
14222 }
14223 }
14224
14225 }
14226 else
14227 {
14228 if (last_bytes > 1)
14229 {
14230 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14231 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14232 last_bytes -= 2;
14233 if (last_bytes)
14234 {
14235 rtx tmp = gen_reg_rtx (SImode);
14236 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14237 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14238 part_bytes_reg = tmp;
14239 dstoffset += 2;
14240 }
14241 }
14242
14243 if (last_bytes)
14244 {
14245 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14246 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14247 }
14248 }
14249
14250 return 1;
14251 }
14252
14253 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14254 by mode size. */
14255 inline static rtx
14256 next_consecutive_mem (rtx mem)
14257 {
14258 machine_mode mode = GET_MODE (mem);
14259 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14260 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14261
14262 return adjust_automodify_address (mem, mode, addr, offset);
14263 }
14264
14265 /* Copy using LDRD/STRD instructions whenever possible.
14266 Returns true upon success. */
14267 bool
14268 gen_movmem_ldrd_strd (rtx *operands)
14269 {
14270 unsigned HOST_WIDE_INT len;
14271 HOST_WIDE_INT align;
14272 rtx src, dst, base;
14273 rtx reg0;
14274 bool src_aligned, dst_aligned;
14275 bool src_volatile, dst_volatile;
14276
14277 gcc_assert (CONST_INT_P (operands[2]));
14278 gcc_assert (CONST_INT_P (operands[3]));
14279
14280 len = UINTVAL (operands[2]);
14281 if (len > 64)
14282 return false;
14283
14284 /* Maximum alignment we can assume for both src and dst buffers. */
14285 align = INTVAL (operands[3]);
14286
14287 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14288 return false;
14289
14290 /* Place src and dst addresses in registers
14291 and update the corresponding mem rtx. */
14292 dst = operands[0];
14293 dst_volatile = MEM_VOLATILE_P (dst);
14294 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14295 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14296 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14297
14298 src = operands[1];
14299 src_volatile = MEM_VOLATILE_P (src);
14300 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14301 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14302 src = adjust_automodify_address (src, VOIDmode, base, 0);
14303
14304 if (!unaligned_access && !(src_aligned && dst_aligned))
14305 return false;
14306
14307 if (src_volatile || dst_volatile)
14308 return false;
14309
14310 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14311 if (!(dst_aligned || src_aligned))
14312 return arm_gen_movmemqi (operands);
14313
14314 /* If the either src or dst is unaligned we'll be accessing it as pairs
14315 of unaligned SImode accesses. Otherwise we can generate DImode
14316 ldrd/strd instructions. */
14317 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14318 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14319
14320 while (len >= 8)
14321 {
14322 len -= 8;
14323 reg0 = gen_reg_rtx (DImode);
14324 rtx low_reg = NULL_RTX;
14325 rtx hi_reg = NULL_RTX;
14326
14327 if (!src_aligned || !dst_aligned)
14328 {
14329 low_reg = gen_lowpart (SImode, reg0);
14330 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14331 }
14332 if (src_aligned)
14333 emit_move_insn (reg0, src);
14334 else
14335 {
14336 emit_insn (gen_unaligned_loadsi (low_reg, src));
14337 src = next_consecutive_mem (src);
14338 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14339 }
14340
14341 if (dst_aligned)
14342 emit_move_insn (dst, reg0);
14343 else
14344 {
14345 emit_insn (gen_unaligned_storesi (dst, low_reg));
14346 dst = next_consecutive_mem (dst);
14347 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14348 }
14349
14350 src = next_consecutive_mem (src);
14351 dst = next_consecutive_mem (dst);
14352 }
14353
14354 gcc_assert (len < 8);
14355 if (len >= 4)
14356 {
14357 /* More than a word but less than a double-word to copy. Copy a word. */
14358 reg0 = gen_reg_rtx (SImode);
14359 src = adjust_address (src, SImode, 0);
14360 dst = adjust_address (dst, SImode, 0);
14361 if (src_aligned)
14362 emit_move_insn (reg0, src);
14363 else
14364 emit_insn (gen_unaligned_loadsi (reg0, src));
14365
14366 if (dst_aligned)
14367 emit_move_insn (dst, reg0);
14368 else
14369 emit_insn (gen_unaligned_storesi (dst, reg0));
14370
14371 src = next_consecutive_mem (src);
14372 dst = next_consecutive_mem (dst);
14373 len -= 4;
14374 }
14375
14376 if (len == 0)
14377 return true;
14378
14379 /* Copy the remaining bytes. */
14380 if (len >= 2)
14381 {
14382 dst = adjust_address (dst, HImode, 0);
14383 src = adjust_address (src, HImode, 0);
14384 reg0 = gen_reg_rtx (SImode);
14385 if (src_aligned)
14386 emit_insn (gen_zero_extendhisi2 (reg0, src));
14387 else
14388 emit_insn (gen_unaligned_loadhiu (reg0, src));
14389
14390 if (dst_aligned)
14391 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14392 else
14393 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14394
14395 src = next_consecutive_mem (src);
14396 dst = next_consecutive_mem (dst);
14397 if (len == 2)
14398 return true;
14399 }
14400
14401 dst = adjust_address (dst, QImode, 0);
14402 src = adjust_address (src, QImode, 0);
14403 reg0 = gen_reg_rtx (QImode);
14404 emit_move_insn (reg0, src);
14405 emit_move_insn (dst, reg0);
14406 return true;
14407 }
14408
14409 /* Select a dominance comparison mode if possible for a test of the general
14410 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14411 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14412 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14413 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14414 In all cases OP will be either EQ or NE, but we don't need to know which
14415 here. If we are unable to support a dominance comparison we return
14416 CC mode. This will then fail to match for the RTL expressions that
14417 generate this call. */
14418 machine_mode
14419 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14420 {
14421 enum rtx_code cond1, cond2;
14422 int swapped = 0;
14423
14424 /* Currently we will probably get the wrong result if the individual
14425 comparisons are not simple. This also ensures that it is safe to
14426 reverse a comparison if necessary. */
14427 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14428 != CCmode)
14429 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14430 != CCmode))
14431 return CCmode;
14432
14433 /* The if_then_else variant of this tests the second condition if the
14434 first passes, but is true if the first fails. Reverse the first
14435 condition to get a true "inclusive-or" expression. */
14436 if (cond_or == DOM_CC_NX_OR_Y)
14437 cond1 = reverse_condition (cond1);
14438
14439 /* If the comparisons are not equal, and one doesn't dominate the other,
14440 then we can't do this. */
14441 if (cond1 != cond2
14442 && !comparison_dominates_p (cond1, cond2)
14443 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14444 return CCmode;
14445
14446 if (swapped)
14447 std::swap (cond1, cond2);
14448
14449 switch (cond1)
14450 {
14451 case EQ:
14452 if (cond_or == DOM_CC_X_AND_Y)
14453 return CC_DEQmode;
14454
14455 switch (cond2)
14456 {
14457 case EQ: return CC_DEQmode;
14458 case LE: return CC_DLEmode;
14459 case LEU: return CC_DLEUmode;
14460 case GE: return CC_DGEmode;
14461 case GEU: return CC_DGEUmode;
14462 default: gcc_unreachable ();
14463 }
14464
14465 case LT:
14466 if (cond_or == DOM_CC_X_AND_Y)
14467 return CC_DLTmode;
14468
14469 switch (cond2)
14470 {
14471 case LT:
14472 return CC_DLTmode;
14473 case LE:
14474 return CC_DLEmode;
14475 case NE:
14476 return CC_DNEmode;
14477 default:
14478 gcc_unreachable ();
14479 }
14480
14481 case GT:
14482 if (cond_or == DOM_CC_X_AND_Y)
14483 return CC_DGTmode;
14484
14485 switch (cond2)
14486 {
14487 case GT:
14488 return CC_DGTmode;
14489 case GE:
14490 return CC_DGEmode;
14491 case NE:
14492 return CC_DNEmode;
14493 default:
14494 gcc_unreachable ();
14495 }
14496
14497 case LTU:
14498 if (cond_or == DOM_CC_X_AND_Y)
14499 return CC_DLTUmode;
14500
14501 switch (cond2)
14502 {
14503 case LTU:
14504 return CC_DLTUmode;
14505 case LEU:
14506 return CC_DLEUmode;
14507 case NE:
14508 return CC_DNEmode;
14509 default:
14510 gcc_unreachable ();
14511 }
14512
14513 case GTU:
14514 if (cond_or == DOM_CC_X_AND_Y)
14515 return CC_DGTUmode;
14516
14517 switch (cond2)
14518 {
14519 case GTU:
14520 return CC_DGTUmode;
14521 case GEU:
14522 return CC_DGEUmode;
14523 case NE:
14524 return CC_DNEmode;
14525 default:
14526 gcc_unreachable ();
14527 }
14528
14529 /* The remaining cases only occur when both comparisons are the
14530 same. */
14531 case NE:
14532 gcc_assert (cond1 == cond2);
14533 return CC_DNEmode;
14534
14535 case LE:
14536 gcc_assert (cond1 == cond2);
14537 return CC_DLEmode;
14538
14539 case GE:
14540 gcc_assert (cond1 == cond2);
14541 return CC_DGEmode;
14542
14543 case LEU:
14544 gcc_assert (cond1 == cond2);
14545 return CC_DLEUmode;
14546
14547 case GEU:
14548 gcc_assert (cond1 == cond2);
14549 return CC_DGEUmode;
14550
14551 default:
14552 gcc_unreachable ();
14553 }
14554 }
14555
14556 machine_mode
14557 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14558 {
14559 /* All floating point compares return CCFP if it is an equality
14560 comparison, and CCFPE otherwise. */
14561 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14562 {
14563 switch (op)
14564 {
14565 case EQ:
14566 case NE:
14567 case UNORDERED:
14568 case ORDERED:
14569 case UNLT:
14570 case UNLE:
14571 case UNGT:
14572 case UNGE:
14573 case UNEQ:
14574 case LTGT:
14575 return CCFPmode;
14576
14577 case LT:
14578 case LE:
14579 case GT:
14580 case GE:
14581 return CCFPEmode;
14582
14583 default:
14584 gcc_unreachable ();
14585 }
14586 }
14587
14588 /* A compare with a shifted operand. Because of canonicalization, the
14589 comparison will have to be swapped when we emit the assembler. */
14590 if (GET_MODE (y) == SImode
14591 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14592 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14593 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14594 || GET_CODE (x) == ROTATERT))
14595 return CC_SWPmode;
14596
14597 /* This operation is performed swapped, but since we only rely on the Z
14598 flag we don't need an additional mode. */
14599 if (GET_MODE (y) == SImode
14600 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14601 && GET_CODE (x) == NEG
14602 && (op == EQ || op == NE))
14603 return CC_Zmode;
14604
14605 /* This is a special case that is used by combine to allow a
14606 comparison of a shifted byte load to be split into a zero-extend
14607 followed by a comparison of the shifted integer (only valid for
14608 equalities and unsigned inequalities). */
14609 if (GET_MODE (x) == SImode
14610 && GET_CODE (x) == ASHIFT
14611 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14612 && GET_CODE (XEXP (x, 0)) == SUBREG
14613 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14614 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14615 && (op == EQ || op == NE
14616 || op == GEU || op == GTU || op == LTU || op == LEU)
14617 && CONST_INT_P (y))
14618 return CC_Zmode;
14619
14620 /* A construct for a conditional compare, if the false arm contains
14621 0, then both conditions must be true, otherwise either condition
14622 must be true. Not all conditions are possible, so CCmode is
14623 returned if it can't be done. */
14624 if (GET_CODE (x) == IF_THEN_ELSE
14625 && (XEXP (x, 2) == const0_rtx
14626 || XEXP (x, 2) == const1_rtx)
14627 && COMPARISON_P (XEXP (x, 0))
14628 && COMPARISON_P (XEXP (x, 1)))
14629 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14630 INTVAL (XEXP (x, 2)));
14631
14632 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14633 if (GET_CODE (x) == AND
14634 && (op == EQ || op == NE)
14635 && COMPARISON_P (XEXP (x, 0))
14636 && COMPARISON_P (XEXP (x, 1)))
14637 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14638 DOM_CC_X_AND_Y);
14639
14640 if (GET_CODE (x) == IOR
14641 && (op == EQ || op == NE)
14642 && COMPARISON_P (XEXP (x, 0))
14643 && COMPARISON_P (XEXP (x, 1)))
14644 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14645 DOM_CC_X_OR_Y);
14646
14647 /* An operation (on Thumb) where we want to test for a single bit.
14648 This is done by shifting that bit up into the top bit of a
14649 scratch register; we can then branch on the sign bit. */
14650 if (TARGET_THUMB1
14651 && GET_MODE (x) == SImode
14652 && (op == EQ || op == NE)
14653 && GET_CODE (x) == ZERO_EXTRACT
14654 && XEXP (x, 1) == const1_rtx)
14655 return CC_Nmode;
14656
14657 /* An operation that sets the condition codes as a side-effect, the
14658 V flag is not set correctly, so we can only use comparisons where
14659 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14660 instead.) */
14661 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14662 if (GET_MODE (x) == SImode
14663 && y == const0_rtx
14664 && (op == EQ || op == NE || op == LT || op == GE)
14665 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14666 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14667 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14668 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14669 || GET_CODE (x) == LSHIFTRT
14670 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14671 || GET_CODE (x) == ROTATERT
14672 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14673 return CC_NOOVmode;
14674
14675 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14676 return CC_Zmode;
14677
14678 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14679 && GET_CODE (x) == PLUS
14680 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14681 return CC_Cmode;
14682
14683 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14684 {
14685 switch (op)
14686 {
14687 case EQ:
14688 case NE:
14689 /* A DImode comparison against zero can be implemented by
14690 or'ing the two halves together. */
14691 if (y == const0_rtx)
14692 return CC_Zmode;
14693
14694 /* We can do an equality test in three Thumb instructions. */
14695 if (!TARGET_32BIT)
14696 return CC_Zmode;
14697
14698 /* FALLTHROUGH */
14699
14700 case LTU:
14701 case LEU:
14702 case GTU:
14703 case GEU:
14704 /* DImode unsigned comparisons can be implemented by cmp +
14705 cmpeq without a scratch register. Not worth doing in
14706 Thumb-2. */
14707 if (TARGET_32BIT)
14708 return CC_CZmode;
14709
14710 /* FALLTHROUGH */
14711
14712 case LT:
14713 case LE:
14714 case GT:
14715 case GE:
14716 /* DImode signed and unsigned comparisons can be implemented
14717 by cmp + sbcs with a scratch register, but that does not
14718 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14719 gcc_assert (op != EQ && op != NE);
14720 return CC_NCVmode;
14721
14722 default:
14723 gcc_unreachable ();
14724 }
14725 }
14726
14727 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14728 return GET_MODE (x);
14729
14730 return CCmode;
14731 }
14732
14733 /* X and Y are two things to compare using CODE. Emit the compare insn and
14734 return the rtx for register 0 in the proper mode. FP means this is a
14735 floating point compare: I don't think that it is needed on the arm. */
14736 rtx
14737 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14738 {
14739 machine_mode mode;
14740 rtx cc_reg;
14741 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14742
14743 /* We might have X as a constant, Y as a register because of the predicates
14744 used for cmpdi. If so, force X to a register here. */
14745 if (dimode_comparison && !REG_P (x))
14746 x = force_reg (DImode, x);
14747
14748 mode = SELECT_CC_MODE (code, x, y);
14749 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14750
14751 if (dimode_comparison
14752 && mode != CC_CZmode)
14753 {
14754 rtx clobber, set;
14755
14756 /* To compare two non-zero values for equality, XOR them and
14757 then compare against zero. Not used for ARM mode; there
14758 CC_CZmode is cheaper. */
14759 if (mode == CC_Zmode && y != const0_rtx)
14760 {
14761 gcc_assert (!reload_completed);
14762 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14763 y = const0_rtx;
14764 }
14765
14766 /* A scratch register is required. */
14767 if (reload_completed)
14768 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14769 else
14770 scratch = gen_rtx_SCRATCH (SImode);
14771
14772 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14773 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14774 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14775 }
14776 else
14777 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14778
14779 return cc_reg;
14780 }
14781
14782 /* Generate a sequence of insns that will generate the correct return
14783 address mask depending on the physical architecture that the program
14784 is running on. */
14785 rtx
14786 arm_gen_return_addr_mask (void)
14787 {
14788 rtx reg = gen_reg_rtx (Pmode);
14789
14790 emit_insn (gen_return_addr_mask (reg));
14791 return reg;
14792 }
14793
14794 void
14795 arm_reload_in_hi (rtx *operands)
14796 {
14797 rtx ref = operands[1];
14798 rtx base, scratch;
14799 HOST_WIDE_INT offset = 0;
14800
14801 if (GET_CODE (ref) == SUBREG)
14802 {
14803 offset = SUBREG_BYTE (ref);
14804 ref = SUBREG_REG (ref);
14805 }
14806
14807 if (REG_P (ref))
14808 {
14809 /* We have a pseudo which has been spilt onto the stack; there
14810 are two cases here: the first where there is a simple
14811 stack-slot replacement and a second where the stack-slot is
14812 out of range, or is used as a subreg. */
14813 if (reg_equiv_mem (REGNO (ref)))
14814 {
14815 ref = reg_equiv_mem (REGNO (ref));
14816 base = find_replacement (&XEXP (ref, 0));
14817 }
14818 else
14819 /* The slot is out of range, or was dressed up in a SUBREG. */
14820 base = reg_equiv_address (REGNO (ref));
14821
14822 /* PR 62554: If there is no equivalent memory location then just move
14823 the value as an SImode register move. This happens when the target
14824 architecture variant does not have an HImode register move. */
14825 if (base == NULL)
14826 {
14827 gcc_assert (REG_P (operands[0]));
14828 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14829 gen_rtx_SUBREG (SImode, ref, 0)));
14830 return;
14831 }
14832 }
14833 else
14834 base = find_replacement (&XEXP (ref, 0));
14835
14836 /* Handle the case where the address is too complex to be offset by 1. */
14837 if (GET_CODE (base) == MINUS
14838 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14839 {
14840 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14841
14842 emit_set_insn (base_plus, base);
14843 base = base_plus;
14844 }
14845 else if (GET_CODE (base) == PLUS)
14846 {
14847 /* The addend must be CONST_INT, or we would have dealt with it above. */
14848 HOST_WIDE_INT hi, lo;
14849
14850 offset += INTVAL (XEXP (base, 1));
14851 base = XEXP (base, 0);
14852
14853 /* Rework the address into a legal sequence of insns. */
14854 /* Valid range for lo is -4095 -> 4095 */
14855 lo = (offset >= 0
14856 ? (offset & 0xfff)
14857 : -((-offset) & 0xfff));
14858
14859 /* Corner case, if lo is the max offset then we would be out of range
14860 once we have added the additional 1 below, so bump the msb into the
14861 pre-loading insn(s). */
14862 if (lo == 4095)
14863 lo &= 0x7ff;
14864
14865 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14866 ^ (HOST_WIDE_INT) 0x80000000)
14867 - (HOST_WIDE_INT) 0x80000000);
14868
14869 gcc_assert (hi + lo == offset);
14870
14871 if (hi != 0)
14872 {
14873 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14874
14875 /* Get the base address; addsi3 knows how to handle constants
14876 that require more than one insn. */
14877 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14878 base = base_plus;
14879 offset = lo;
14880 }
14881 }
14882
14883 /* Operands[2] may overlap operands[0] (though it won't overlap
14884 operands[1]), that's why we asked for a DImode reg -- so we can
14885 use the bit that does not overlap. */
14886 if (REGNO (operands[2]) == REGNO (operands[0]))
14887 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14888 else
14889 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14890
14891 emit_insn (gen_zero_extendqisi2 (scratch,
14892 gen_rtx_MEM (QImode,
14893 plus_constant (Pmode, base,
14894 offset))));
14895 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14896 gen_rtx_MEM (QImode,
14897 plus_constant (Pmode, base,
14898 offset + 1))));
14899 if (!BYTES_BIG_ENDIAN)
14900 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14901 gen_rtx_IOR (SImode,
14902 gen_rtx_ASHIFT
14903 (SImode,
14904 gen_rtx_SUBREG (SImode, operands[0], 0),
14905 GEN_INT (8)),
14906 scratch));
14907 else
14908 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14909 gen_rtx_IOR (SImode,
14910 gen_rtx_ASHIFT (SImode, scratch,
14911 GEN_INT (8)),
14912 gen_rtx_SUBREG (SImode, operands[0], 0)));
14913 }
14914
14915 /* Handle storing a half-word to memory during reload by synthesizing as two
14916 byte stores. Take care not to clobber the input values until after we
14917 have moved them somewhere safe. This code assumes that if the DImode
14918 scratch in operands[2] overlaps either the input value or output address
14919 in some way, then that value must die in this insn (we absolutely need
14920 two scratch registers for some corner cases). */
14921 void
14922 arm_reload_out_hi (rtx *operands)
14923 {
14924 rtx ref = operands[0];
14925 rtx outval = operands[1];
14926 rtx base, scratch;
14927 HOST_WIDE_INT offset = 0;
14928
14929 if (GET_CODE (ref) == SUBREG)
14930 {
14931 offset = SUBREG_BYTE (ref);
14932 ref = SUBREG_REG (ref);
14933 }
14934
14935 if (REG_P (ref))
14936 {
14937 /* We have a pseudo which has been spilt onto the stack; there
14938 are two cases here: the first where there is a simple
14939 stack-slot replacement and a second where the stack-slot is
14940 out of range, or is used as a subreg. */
14941 if (reg_equiv_mem (REGNO (ref)))
14942 {
14943 ref = reg_equiv_mem (REGNO (ref));
14944 base = find_replacement (&XEXP (ref, 0));
14945 }
14946 else
14947 /* The slot is out of range, or was dressed up in a SUBREG. */
14948 base = reg_equiv_address (REGNO (ref));
14949
14950 /* PR 62254: If there is no equivalent memory location then just move
14951 the value as an SImode register move. This happens when the target
14952 architecture variant does not have an HImode register move. */
14953 if (base == NULL)
14954 {
14955 gcc_assert (REG_P (outval) || SUBREG_P (outval));
14956
14957 if (REG_P (outval))
14958 {
14959 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14960 gen_rtx_SUBREG (SImode, outval, 0)));
14961 }
14962 else /* SUBREG_P (outval) */
14963 {
14964 if (GET_MODE (SUBREG_REG (outval)) == SImode)
14965 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14966 SUBREG_REG (outval)));
14967 else
14968 /* FIXME: Handle other cases ? */
14969 gcc_unreachable ();
14970 }
14971 return;
14972 }
14973 }
14974 else
14975 base = find_replacement (&XEXP (ref, 0));
14976
14977 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14978
14979 /* Handle the case where the address is too complex to be offset by 1. */
14980 if (GET_CODE (base) == MINUS
14981 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14982 {
14983 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14984
14985 /* Be careful not to destroy OUTVAL. */
14986 if (reg_overlap_mentioned_p (base_plus, outval))
14987 {
14988 /* Updating base_plus might destroy outval, see if we can
14989 swap the scratch and base_plus. */
14990 if (!reg_overlap_mentioned_p (scratch, outval))
14991 std::swap (scratch, base_plus);
14992 else
14993 {
14994 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14995
14996 /* Be conservative and copy OUTVAL into the scratch now,
14997 this should only be necessary if outval is a subreg
14998 of something larger than a word. */
14999 /* XXX Might this clobber base? I can't see how it can,
15000 since scratch is known to overlap with OUTVAL, and
15001 must be wider than a word. */
15002 emit_insn (gen_movhi (scratch_hi, outval));
15003 outval = scratch_hi;
15004 }
15005 }
15006
15007 emit_set_insn (base_plus, base);
15008 base = base_plus;
15009 }
15010 else if (GET_CODE (base) == PLUS)
15011 {
15012 /* The addend must be CONST_INT, or we would have dealt with it above. */
15013 HOST_WIDE_INT hi, lo;
15014
15015 offset += INTVAL (XEXP (base, 1));
15016 base = XEXP (base, 0);
15017
15018 /* Rework the address into a legal sequence of insns. */
15019 /* Valid range for lo is -4095 -> 4095 */
15020 lo = (offset >= 0
15021 ? (offset & 0xfff)
15022 : -((-offset) & 0xfff));
15023
15024 /* Corner case, if lo is the max offset then we would be out of range
15025 once we have added the additional 1 below, so bump the msb into the
15026 pre-loading insn(s). */
15027 if (lo == 4095)
15028 lo &= 0x7ff;
15029
15030 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15031 ^ (HOST_WIDE_INT) 0x80000000)
15032 - (HOST_WIDE_INT) 0x80000000);
15033
15034 gcc_assert (hi + lo == offset);
15035
15036 if (hi != 0)
15037 {
15038 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15039
15040 /* Be careful not to destroy OUTVAL. */
15041 if (reg_overlap_mentioned_p (base_plus, outval))
15042 {
15043 /* Updating base_plus might destroy outval, see if we
15044 can swap the scratch and base_plus. */
15045 if (!reg_overlap_mentioned_p (scratch, outval))
15046 std::swap (scratch, base_plus);
15047 else
15048 {
15049 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15050
15051 /* Be conservative and copy outval into scratch now,
15052 this should only be necessary if outval is a
15053 subreg of something larger than a word. */
15054 /* XXX Might this clobber base? I can't see how it
15055 can, since scratch is known to overlap with
15056 outval. */
15057 emit_insn (gen_movhi (scratch_hi, outval));
15058 outval = scratch_hi;
15059 }
15060 }
15061
15062 /* Get the base address; addsi3 knows how to handle constants
15063 that require more than one insn. */
15064 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15065 base = base_plus;
15066 offset = lo;
15067 }
15068 }
15069
15070 if (BYTES_BIG_ENDIAN)
15071 {
15072 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15073 plus_constant (Pmode, base,
15074 offset + 1)),
15075 gen_lowpart (QImode, outval)));
15076 emit_insn (gen_lshrsi3 (scratch,
15077 gen_rtx_SUBREG (SImode, outval, 0),
15078 GEN_INT (8)));
15079 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15080 offset)),
15081 gen_lowpart (QImode, scratch)));
15082 }
15083 else
15084 {
15085 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15086 offset)),
15087 gen_lowpart (QImode, outval)));
15088 emit_insn (gen_lshrsi3 (scratch,
15089 gen_rtx_SUBREG (SImode, outval, 0),
15090 GEN_INT (8)));
15091 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15092 plus_constant (Pmode, base,
15093 offset + 1)),
15094 gen_lowpart (QImode, scratch)));
15095 }
15096 }
15097
15098 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15099 (padded to the size of a word) should be passed in a register. */
15100
15101 static bool
15102 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15103 {
15104 if (TARGET_AAPCS_BASED)
15105 return must_pass_in_stack_var_size (mode, type);
15106 else
15107 return must_pass_in_stack_var_size_or_pad (mode, type);
15108 }
15109
15110
15111 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15112 Return true if an argument passed on the stack should be padded upwards,
15113 i.e. if the least-significant byte has useful data.
15114 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15115 aggregate types are placed in the lowest memory address. */
15116
15117 bool
15118 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15119 {
15120 if (!TARGET_AAPCS_BASED)
15121 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15122
15123 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15124 return false;
15125
15126 return true;
15127 }
15128
15129
15130 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15131 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15132 register has useful data, and return the opposite if the most
15133 significant byte does. */
15134
15135 bool
15136 arm_pad_reg_upward (machine_mode mode,
15137 tree type, int first ATTRIBUTE_UNUSED)
15138 {
15139 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15140 {
15141 /* For AAPCS, small aggregates, small fixed-point types,
15142 and small complex types are always padded upwards. */
15143 if (type)
15144 {
15145 if ((AGGREGATE_TYPE_P (type)
15146 || TREE_CODE (type) == COMPLEX_TYPE
15147 || FIXED_POINT_TYPE_P (type))
15148 && int_size_in_bytes (type) <= 4)
15149 return true;
15150 }
15151 else
15152 {
15153 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15154 && GET_MODE_SIZE (mode) <= 4)
15155 return true;
15156 }
15157 }
15158
15159 /* Otherwise, use default padding. */
15160 return !BYTES_BIG_ENDIAN;
15161 }
15162
15163 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15164 assuming that the address in the base register is word aligned. */
15165 bool
15166 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15167 {
15168 HOST_WIDE_INT max_offset;
15169
15170 /* Offset must be a multiple of 4 in Thumb mode. */
15171 if (TARGET_THUMB2 && ((offset & 3) != 0))
15172 return false;
15173
15174 if (TARGET_THUMB2)
15175 max_offset = 1020;
15176 else if (TARGET_ARM)
15177 max_offset = 255;
15178 else
15179 return false;
15180
15181 return ((offset <= max_offset) && (offset >= -max_offset));
15182 }
15183
15184 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15185 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15186 Assumes that the address in the base register RN is word aligned. Pattern
15187 guarantees that both memory accesses use the same base register,
15188 the offsets are constants within the range, and the gap between the offsets is 4.
15189 If preload complete then check that registers are legal. WBACK indicates whether
15190 address is updated. LOAD indicates whether memory access is load or store. */
15191 bool
15192 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15193 bool wback, bool load)
15194 {
15195 unsigned int t, t2, n;
15196
15197 if (!reload_completed)
15198 return true;
15199
15200 if (!offset_ok_for_ldrd_strd (offset))
15201 return false;
15202
15203 t = REGNO (rt);
15204 t2 = REGNO (rt2);
15205 n = REGNO (rn);
15206
15207 if ((TARGET_THUMB2)
15208 && ((wback && (n == t || n == t2))
15209 || (t == SP_REGNUM)
15210 || (t == PC_REGNUM)
15211 || (t2 == SP_REGNUM)
15212 || (t2 == PC_REGNUM)
15213 || (!load && (n == PC_REGNUM))
15214 || (load && (t == t2))
15215 /* Triggers Cortex-M3 LDRD errata. */
15216 || (!wback && load && fix_cm3_ldrd && (n == t))))
15217 return false;
15218
15219 if ((TARGET_ARM)
15220 && ((wback && (n == t || n == t2))
15221 || (t2 == PC_REGNUM)
15222 || (t % 2 != 0) /* First destination register is not even. */
15223 || (t2 != t + 1)
15224 /* PC can be used as base register (for offset addressing only),
15225 but it is depricated. */
15226 || (n == PC_REGNUM)))
15227 return false;
15228
15229 return true;
15230 }
15231
15232 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15233 operand MEM's address contains an immediate offset from the base
15234 register and has no side effects, in which case it sets BASE and
15235 OFFSET accordingly. */
15236 static bool
15237 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15238 {
15239 rtx addr;
15240
15241 gcc_assert (base != NULL && offset != NULL);
15242
15243 /* TODO: Handle more general memory operand patterns, such as
15244 PRE_DEC and PRE_INC. */
15245
15246 if (side_effects_p (mem))
15247 return false;
15248
15249 /* Can't deal with subregs. */
15250 if (GET_CODE (mem) == SUBREG)
15251 return false;
15252
15253 gcc_assert (MEM_P (mem));
15254
15255 *offset = const0_rtx;
15256
15257 addr = XEXP (mem, 0);
15258
15259 /* If addr isn't valid for DImode, then we can't handle it. */
15260 if (!arm_legitimate_address_p (DImode, addr,
15261 reload_in_progress || reload_completed))
15262 return false;
15263
15264 if (REG_P (addr))
15265 {
15266 *base = addr;
15267 return true;
15268 }
15269 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15270 {
15271 *base = XEXP (addr, 0);
15272 *offset = XEXP (addr, 1);
15273 return (REG_P (*base) && CONST_INT_P (*offset));
15274 }
15275
15276 return false;
15277 }
15278
15279 /* Called from a peephole2 to replace two word-size accesses with a
15280 single LDRD/STRD instruction. Returns true iff we can generate a
15281 new instruction sequence. That is, both accesses use the same base
15282 register and the gap between constant offsets is 4. This function
15283 may reorder its operands to match ldrd/strd RTL templates.
15284 OPERANDS are the operands found by the peephole matcher;
15285 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15286 corresponding memory operands. LOAD indicaates whether the access
15287 is load or store. CONST_STORE indicates a store of constant
15288 integer values held in OPERANDS[4,5] and assumes that the pattern
15289 is of length 4 insn, for the purpose of checking dead registers.
15290 COMMUTE indicates that register operands may be reordered. */
15291 bool
15292 gen_operands_ldrd_strd (rtx *operands, bool load,
15293 bool const_store, bool commute)
15294 {
15295 int nops = 2;
15296 HOST_WIDE_INT offsets[2], offset;
15297 rtx base = NULL_RTX;
15298 rtx cur_base, cur_offset, tmp;
15299 int i, gap;
15300 HARD_REG_SET regset;
15301
15302 gcc_assert (!const_store || !load);
15303 /* Check that the memory references are immediate offsets from the
15304 same base register. Extract the base register, the destination
15305 registers, and the corresponding memory offsets. */
15306 for (i = 0; i < nops; i++)
15307 {
15308 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15309 return false;
15310
15311 if (i == 0)
15312 base = cur_base;
15313 else if (REGNO (base) != REGNO (cur_base))
15314 return false;
15315
15316 offsets[i] = INTVAL (cur_offset);
15317 if (GET_CODE (operands[i]) == SUBREG)
15318 {
15319 tmp = SUBREG_REG (operands[i]);
15320 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15321 operands[i] = tmp;
15322 }
15323 }
15324
15325 /* Make sure there is no dependency between the individual loads. */
15326 if (load && REGNO (operands[0]) == REGNO (base))
15327 return false; /* RAW */
15328
15329 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15330 return false; /* WAW */
15331
15332 /* If the same input register is used in both stores
15333 when storing different constants, try to find a free register.
15334 For example, the code
15335 mov r0, 0
15336 str r0, [r2]
15337 mov r0, 1
15338 str r0, [r2, #4]
15339 can be transformed into
15340 mov r1, 0
15341 mov r0, 1
15342 strd r1, r0, [r2]
15343 in Thumb mode assuming that r1 is free.
15344 For ARM mode do the same but only if the starting register
15345 can be made to be even. */
15346 if (const_store
15347 && REGNO (operands[0]) == REGNO (operands[1])
15348 && INTVAL (operands[4]) != INTVAL (operands[5]))
15349 {
15350 if (TARGET_THUMB2)
15351 {
15352 CLEAR_HARD_REG_SET (regset);
15353 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15354 if (tmp == NULL_RTX)
15355 return false;
15356
15357 /* Use the new register in the first load to ensure that
15358 if the original input register is not dead after peephole,
15359 then it will have the correct constant value. */
15360 operands[0] = tmp;
15361 }
15362 else if (TARGET_ARM)
15363 {
15364 int regno = REGNO (operands[0]);
15365 if (!peep2_reg_dead_p (4, operands[0]))
15366 {
15367 /* When the input register is even and is not dead after the
15368 pattern, it has to hold the second constant but we cannot
15369 form a legal STRD in ARM mode with this register as the second
15370 register. */
15371 if (regno % 2 == 0)
15372 return false;
15373
15374 /* Is regno-1 free? */
15375 SET_HARD_REG_SET (regset);
15376 CLEAR_HARD_REG_BIT(regset, regno - 1);
15377 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15378 if (tmp == NULL_RTX)
15379 return false;
15380
15381 operands[0] = tmp;
15382 }
15383 else
15384 {
15385 /* Find a DImode register. */
15386 CLEAR_HARD_REG_SET (regset);
15387 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15388 if (tmp != NULL_RTX)
15389 {
15390 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15391 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15392 }
15393 else
15394 {
15395 /* Can we use the input register to form a DI register? */
15396 SET_HARD_REG_SET (regset);
15397 CLEAR_HARD_REG_BIT(regset,
15398 regno % 2 == 0 ? regno + 1 : regno - 1);
15399 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15400 if (tmp == NULL_RTX)
15401 return false;
15402 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15403 }
15404 }
15405
15406 gcc_assert (operands[0] != NULL_RTX);
15407 gcc_assert (operands[1] != NULL_RTX);
15408 gcc_assert (REGNO (operands[0]) % 2 == 0);
15409 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15410 }
15411 }
15412
15413 /* Make sure the instructions are ordered with lower memory access first. */
15414 if (offsets[0] > offsets[1])
15415 {
15416 gap = offsets[0] - offsets[1];
15417 offset = offsets[1];
15418
15419 /* Swap the instructions such that lower memory is accessed first. */
15420 std::swap (operands[0], operands[1]);
15421 std::swap (operands[2], operands[3]);
15422 if (const_store)
15423 std::swap (operands[4], operands[5]);
15424 }
15425 else
15426 {
15427 gap = offsets[1] - offsets[0];
15428 offset = offsets[0];
15429 }
15430
15431 /* Make sure accesses are to consecutive memory locations. */
15432 if (gap != 4)
15433 return false;
15434
15435 /* Make sure we generate legal instructions. */
15436 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15437 false, load))
15438 return true;
15439
15440 /* In Thumb state, where registers are almost unconstrained, there
15441 is little hope to fix it. */
15442 if (TARGET_THUMB2)
15443 return false;
15444
15445 if (load && commute)
15446 {
15447 /* Try reordering registers. */
15448 std::swap (operands[0], operands[1]);
15449 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15450 false, load))
15451 return true;
15452 }
15453
15454 if (const_store)
15455 {
15456 /* If input registers are dead after this pattern, they can be
15457 reordered or replaced by other registers that are free in the
15458 current pattern. */
15459 if (!peep2_reg_dead_p (4, operands[0])
15460 || !peep2_reg_dead_p (4, operands[1]))
15461 return false;
15462
15463 /* Try to reorder the input registers. */
15464 /* For example, the code
15465 mov r0, 0
15466 mov r1, 1
15467 str r1, [r2]
15468 str r0, [r2, #4]
15469 can be transformed into
15470 mov r1, 0
15471 mov r0, 1
15472 strd r0, [r2]
15473 */
15474 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15475 false, false))
15476 {
15477 std::swap (operands[0], operands[1]);
15478 return true;
15479 }
15480
15481 /* Try to find a free DI register. */
15482 CLEAR_HARD_REG_SET (regset);
15483 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15484 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15485 while (true)
15486 {
15487 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15488 if (tmp == NULL_RTX)
15489 return false;
15490
15491 /* DREG must be an even-numbered register in DImode.
15492 Split it into SI registers. */
15493 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15494 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15495 gcc_assert (operands[0] != NULL_RTX);
15496 gcc_assert (operands[1] != NULL_RTX);
15497 gcc_assert (REGNO (operands[0]) % 2 == 0);
15498 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15499
15500 return (operands_ok_ldrd_strd (operands[0], operands[1],
15501 base, offset,
15502 false, load));
15503 }
15504 }
15505
15506 return false;
15507 }
15508
15509
15510
15511 \f
15512 /* Print a symbolic form of X to the debug file, F. */
15513 static void
15514 arm_print_value (FILE *f, rtx x)
15515 {
15516 switch (GET_CODE (x))
15517 {
15518 case CONST_INT:
15519 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15520 return;
15521
15522 case CONST_DOUBLE:
15523 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15524 return;
15525
15526 case CONST_VECTOR:
15527 {
15528 int i;
15529
15530 fprintf (f, "<");
15531 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15532 {
15533 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15534 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15535 fputc (',', f);
15536 }
15537 fprintf (f, ">");
15538 }
15539 return;
15540
15541 case CONST_STRING:
15542 fprintf (f, "\"%s\"", XSTR (x, 0));
15543 return;
15544
15545 case SYMBOL_REF:
15546 fprintf (f, "`%s'", XSTR (x, 0));
15547 return;
15548
15549 case LABEL_REF:
15550 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15551 return;
15552
15553 case CONST:
15554 arm_print_value (f, XEXP (x, 0));
15555 return;
15556
15557 case PLUS:
15558 arm_print_value (f, XEXP (x, 0));
15559 fprintf (f, "+");
15560 arm_print_value (f, XEXP (x, 1));
15561 return;
15562
15563 case PC:
15564 fprintf (f, "pc");
15565 return;
15566
15567 default:
15568 fprintf (f, "????");
15569 return;
15570 }
15571 }
15572 \f
15573 /* Routines for manipulation of the constant pool. */
15574
15575 /* Arm instructions cannot load a large constant directly into a
15576 register; they have to come from a pc relative load. The constant
15577 must therefore be placed in the addressable range of the pc
15578 relative load. Depending on the precise pc relative load
15579 instruction the range is somewhere between 256 bytes and 4k. This
15580 means that we often have to dump a constant inside a function, and
15581 generate code to branch around it.
15582
15583 It is important to minimize this, since the branches will slow
15584 things down and make the code larger.
15585
15586 Normally we can hide the table after an existing unconditional
15587 branch so that there is no interruption of the flow, but in the
15588 worst case the code looks like this:
15589
15590 ldr rn, L1
15591 ...
15592 b L2
15593 align
15594 L1: .long value
15595 L2:
15596 ...
15597
15598 ldr rn, L3
15599 ...
15600 b L4
15601 align
15602 L3: .long value
15603 L4:
15604 ...
15605
15606 We fix this by performing a scan after scheduling, which notices
15607 which instructions need to have their operands fetched from the
15608 constant table and builds the table.
15609
15610 The algorithm starts by building a table of all the constants that
15611 need fixing up and all the natural barriers in the function (places
15612 where a constant table can be dropped without breaking the flow).
15613 For each fixup we note how far the pc-relative replacement will be
15614 able to reach and the offset of the instruction into the function.
15615
15616 Having built the table we then group the fixes together to form
15617 tables that are as large as possible (subject to addressing
15618 constraints) and emit each table of constants after the last
15619 barrier that is within range of all the instructions in the group.
15620 If a group does not contain a barrier, then we forcibly create one
15621 by inserting a jump instruction into the flow. Once the table has
15622 been inserted, the insns are then modified to reference the
15623 relevant entry in the pool.
15624
15625 Possible enhancements to the algorithm (not implemented) are:
15626
15627 1) For some processors and object formats, there may be benefit in
15628 aligning the pools to the start of cache lines; this alignment
15629 would need to be taken into account when calculating addressability
15630 of a pool. */
15631
15632 /* These typedefs are located at the start of this file, so that
15633 they can be used in the prototypes there. This comment is to
15634 remind readers of that fact so that the following structures
15635 can be understood more easily.
15636
15637 typedef struct minipool_node Mnode;
15638 typedef struct minipool_fixup Mfix; */
15639
15640 struct minipool_node
15641 {
15642 /* Doubly linked chain of entries. */
15643 Mnode * next;
15644 Mnode * prev;
15645 /* The maximum offset into the code that this entry can be placed. While
15646 pushing fixes for forward references, all entries are sorted in order
15647 of increasing max_address. */
15648 HOST_WIDE_INT max_address;
15649 /* Similarly for an entry inserted for a backwards ref. */
15650 HOST_WIDE_INT min_address;
15651 /* The number of fixes referencing this entry. This can become zero
15652 if we "unpush" an entry. In this case we ignore the entry when we
15653 come to emit the code. */
15654 int refcount;
15655 /* The offset from the start of the minipool. */
15656 HOST_WIDE_INT offset;
15657 /* The value in table. */
15658 rtx value;
15659 /* The mode of value. */
15660 machine_mode mode;
15661 /* The size of the value. With iWMMXt enabled
15662 sizes > 4 also imply an alignment of 8-bytes. */
15663 int fix_size;
15664 };
15665
15666 struct minipool_fixup
15667 {
15668 Mfix * next;
15669 rtx_insn * insn;
15670 HOST_WIDE_INT address;
15671 rtx * loc;
15672 machine_mode mode;
15673 int fix_size;
15674 rtx value;
15675 Mnode * minipool;
15676 HOST_WIDE_INT forwards;
15677 HOST_WIDE_INT backwards;
15678 };
15679
15680 /* Fixes less than a word need padding out to a word boundary. */
15681 #define MINIPOOL_FIX_SIZE(mode) \
15682 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15683
15684 static Mnode * minipool_vector_head;
15685 static Mnode * minipool_vector_tail;
15686 static rtx_code_label *minipool_vector_label;
15687 static int minipool_pad;
15688
15689 /* The linked list of all minipool fixes required for this function. */
15690 Mfix * minipool_fix_head;
15691 Mfix * minipool_fix_tail;
15692 /* The fix entry for the current minipool, once it has been placed. */
15693 Mfix * minipool_barrier;
15694
15695 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15696 #define JUMP_TABLES_IN_TEXT_SECTION 0
15697 #endif
15698
15699 static HOST_WIDE_INT
15700 get_jump_table_size (rtx_jump_table_data *insn)
15701 {
15702 /* ADDR_VECs only take room if read-only data does into the text
15703 section. */
15704 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15705 {
15706 rtx body = PATTERN (insn);
15707 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15708 HOST_WIDE_INT size;
15709 HOST_WIDE_INT modesize;
15710
15711 modesize = GET_MODE_SIZE (GET_MODE (body));
15712 size = modesize * XVECLEN (body, elt);
15713 switch (modesize)
15714 {
15715 case 1:
15716 /* Round up size of TBB table to a halfword boundary. */
15717 size = (size + 1) & ~HOST_WIDE_INT_1;
15718 break;
15719 case 2:
15720 /* No padding necessary for TBH. */
15721 break;
15722 case 4:
15723 /* Add two bytes for alignment on Thumb. */
15724 if (TARGET_THUMB)
15725 size += 2;
15726 break;
15727 default:
15728 gcc_unreachable ();
15729 }
15730 return size;
15731 }
15732
15733 return 0;
15734 }
15735
15736 /* Return the maximum amount of padding that will be inserted before
15737 label LABEL. */
15738
15739 static HOST_WIDE_INT
15740 get_label_padding (rtx label)
15741 {
15742 HOST_WIDE_INT align, min_insn_size;
15743
15744 align = 1 << label_to_alignment (label);
15745 min_insn_size = TARGET_THUMB ? 2 : 4;
15746 return align > min_insn_size ? align - min_insn_size : 0;
15747 }
15748
15749 /* Move a minipool fix MP from its current location to before MAX_MP.
15750 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15751 constraints may need updating. */
15752 static Mnode *
15753 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15754 HOST_WIDE_INT max_address)
15755 {
15756 /* The code below assumes these are different. */
15757 gcc_assert (mp != max_mp);
15758
15759 if (max_mp == NULL)
15760 {
15761 if (max_address < mp->max_address)
15762 mp->max_address = max_address;
15763 }
15764 else
15765 {
15766 if (max_address > max_mp->max_address - mp->fix_size)
15767 mp->max_address = max_mp->max_address - mp->fix_size;
15768 else
15769 mp->max_address = max_address;
15770
15771 /* Unlink MP from its current position. Since max_mp is non-null,
15772 mp->prev must be non-null. */
15773 mp->prev->next = mp->next;
15774 if (mp->next != NULL)
15775 mp->next->prev = mp->prev;
15776 else
15777 minipool_vector_tail = mp->prev;
15778
15779 /* Re-insert it before MAX_MP. */
15780 mp->next = max_mp;
15781 mp->prev = max_mp->prev;
15782 max_mp->prev = mp;
15783
15784 if (mp->prev != NULL)
15785 mp->prev->next = mp;
15786 else
15787 minipool_vector_head = mp;
15788 }
15789
15790 /* Save the new entry. */
15791 max_mp = mp;
15792
15793 /* Scan over the preceding entries and adjust their addresses as
15794 required. */
15795 while (mp->prev != NULL
15796 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15797 {
15798 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15799 mp = mp->prev;
15800 }
15801
15802 return max_mp;
15803 }
15804
15805 /* Add a constant to the minipool for a forward reference. Returns the
15806 node added or NULL if the constant will not fit in this pool. */
15807 static Mnode *
15808 add_minipool_forward_ref (Mfix *fix)
15809 {
15810 /* If set, max_mp is the first pool_entry that has a lower
15811 constraint than the one we are trying to add. */
15812 Mnode * max_mp = NULL;
15813 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15814 Mnode * mp;
15815
15816 /* If the minipool starts before the end of FIX->INSN then this FIX
15817 can not be placed into the current pool. Furthermore, adding the
15818 new constant pool entry may cause the pool to start FIX_SIZE bytes
15819 earlier. */
15820 if (minipool_vector_head &&
15821 (fix->address + get_attr_length (fix->insn)
15822 >= minipool_vector_head->max_address - fix->fix_size))
15823 return NULL;
15824
15825 /* Scan the pool to see if a constant with the same value has
15826 already been added. While we are doing this, also note the
15827 location where we must insert the constant if it doesn't already
15828 exist. */
15829 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15830 {
15831 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15832 && fix->mode == mp->mode
15833 && (!LABEL_P (fix->value)
15834 || (CODE_LABEL_NUMBER (fix->value)
15835 == CODE_LABEL_NUMBER (mp->value)))
15836 && rtx_equal_p (fix->value, mp->value))
15837 {
15838 /* More than one fix references this entry. */
15839 mp->refcount++;
15840 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15841 }
15842
15843 /* Note the insertion point if necessary. */
15844 if (max_mp == NULL
15845 && mp->max_address > max_address)
15846 max_mp = mp;
15847
15848 /* If we are inserting an 8-bytes aligned quantity and
15849 we have not already found an insertion point, then
15850 make sure that all such 8-byte aligned quantities are
15851 placed at the start of the pool. */
15852 if (ARM_DOUBLEWORD_ALIGN
15853 && max_mp == NULL
15854 && fix->fix_size >= 8
15855 && mp->fix_size < 8)
15856 {
15857 max_mp = mp;
15858 max_address = mp->max_address;
15859 }
15860 }
15861
15862 /* The value is not currently in the minipool, so we need to create
15863 a new entry for it. If MAX_MP is NULL, the entry will be put on
15864 the end of the list since the placement is less constrained than
15865 any existing entry. Otherwise, we insert the new fix before
15866 MAX_MP and, if necessary, adjust the constraints on the other
15867 entries. */
15868 mp = XNEW (Mnode);
15869 mp->fix_size = fix->fix_size;
15870 mp->mode = fix->mode;
15871 mp->value = fix->value;
15872 mp->refcount = 1;
15873 /* Not yet required for a backwards ref. */
15874 mp->min_address = -65536;
15875
15876 if (max_mp == NULL)
15877 {
15878 mp->max_address = max_address;
15879 mp->next = NULL;
15880 mp->prev = minipool_vector_tail;
15881
15882 if (mp->prev == NULL)
15883 {
15884 minipool_vector_head = mp;
15885 minipool_vector_label = gen_label_rtx ();
15886 }
15887 else
15888 mp->prev->next = mp;
15889
15890 minipool_vector_tail = mp;
15891 }
15892 else
15893 {
15894 if (max_address > max_mp->max_address - mp->fix_size)
15895 mp->max_address = max_mp->max_address - mp->fix_size;
15896 else
15897 mp->max_address = max_address;
15898
15899 mp->next = max_mp;
15900 mp->prev = max_mp->prev;
15901 max_mp->prev = mp;
15902 if (mp->prev != NULL)
15903 mp->prev->next = mp;
15904 else
15905 minipool_vector_head = mp;
15906 }
15907
15908 /* Save the new entry. */
15909 max_mp = mp;
15910
15911 /* Scan over the preceding entries and adjust their addresses as
15912 required. */
15913 while (mp->prev != NULL
15914 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15915 {
15916 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15917 mp = mp->prev;
15918 }
15919
15920 return max_mp;
15921 }
15922
15923 static Mnode *
15924 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15925 HOST_WIDE_INT min_address)
15926 {
15927 HOST_WIDE_INT offset;
15928
15929 /* The code below assumes these are different. */
15930 gcc_assert (mp != min_mp);
15931
15932 if (min_mp == NULL)
15933 {
15934 if (min_address > mp->min_address)
15935 mp->min_address = min_address;
15936 }
15937 else
15938 {
15939 /* We will adjust this below if it is too loose. */
15940 mp->min_address = min_address;
15941
15942 /* Unlink MP from its current position. Since min_mp is non-null,
15943 mp->next must be non-null. */
15944 mp->next->prev = mp->prev;
15945 if (mp->prev != NULL)
15946 mp->prev->next = mp->next;
15947 else
15948 minipool_vector_head = mp->next;
15949
15950 /* Reinsert it after MIN_MP. */
15951 mp->prev = min_mp;
15952 mp->next = min_mp->next;
15953 min_mp->next = mp;
15954 if (mp->next != NULL)
15955 mp->next->prev = mp;
15956 else
15957 minipool_vector_tail = mp;
15958 }
15959
15960 min_mp = mp;
15961
15962 offset = 0;
15963 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15964 {
15965 mp->offset = offset;
15966 if (mp->refcount > 0)
15967 offset += mp->fix_size;
15968
15969 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
15970 mp->next->min_address = mp->min_address + mp->fix_size;
15971 }
15972
15973 return min_mp;
15974 }
15975
15976 /* Add a constant to the minipool for a backward reference. Returns the
15977 node added or NULL if the constant will not fit in this pool.
15978
15979 Note that the code for insertion for a backwards reference can be
15980 somewhat confusing because the calculated offsets for each fix do
15981 not take into account the size of the pool (which is still under
15982 construction. */
15983 static Mnode *
15984 add_minipool_backward_ref (Mfix *fix)
15985 {
15986 /* If set, min_mp is the last pool_entry that has a lower constraint
15987 than the one we are trying to add. */
15988 Mnode *min_mp = NULL;
15989 /* This can be negative, since it is only a constraint. */
15990 HOST_WIDE_INT min_address = fix->address - fix->backwards;
15991 Mnode *mp;
15992
15993 /* If we can't reach the current pool from this insn, or if we can't
15994 insert this entry at the end of the pool without pushing other
15995 fixes out of range, then we don't try. This ensures that we
15996 can't fail later on. */
15997 if (min_address >= minipool_barrier->address
15998 || (minipool_vector_tail->min_address + fix->fix_size
15999 >= minipool_barrier->address))
16000 return NULL;
16001
16002 /* Scan the pool to see if a constant with the same value has
16003 already been added. While we are doing this, also note the
16004 location where we must insert the constant if it doesn't already
16005 exist. */
16006 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16007 {
16008 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16009 && fix->mode == mp->mode
16010 && (!LABEL_P (fix->value)
16011 || (CODE_LABEL_NUMBER (fix->value)
16012 == CODE_LABEL_NUMBER (mp->value)))
16013 && rtx_equal_p (fix->value, mp->value)
16014 /* Check that there is enough slack to move this entry to the
16015 end of the table (this is conservative). */
16016 && (mp->max_address
16017 > (minipool_barrier->address
16018 + minipool_vector_tail->offset
16019 + minipool_vector_tail->fix_size)))
16020 {
16021 mp->refcount++;
16022 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16023 }
16024
16025 if (min_mp != NULL)
16026 mp->min_address += fix->fix_size;
16027 else
16028 {
16029 /* Note the insertion point if necessary. */
16030 if (mp->min_address < min_address)
16031 {
16032 /* For now, we do not allow the insertion of 8-byte alignment
16033 requiring nodes anywhere but at the start of the pool. */
16034 if (ARM_DOUBLEWORD_ALIGN
16035 && fix->fix_size >= 8 && mp->fix_size < 8)
16036 return NULL;
16037 else
16038 min_mp = mp;
16039 }
16040 else if (mp->max_address
16041 < minipool_barrier->address + mp->offset + fix->fix_size)
16042 {
16043 /* Inserting before this entry would push the fix beyond
16044 its maximum address (which can happen if we have
16045 re-located a forwards fix); force the new fix to come
16046 after it. */
16047 if (ARM_DOUBLEWORD_ALIGN
16048 && fix->fix_size >= 8 && mp->fix_size < 8)
16049 return NULL;
16050 else
16051 {
16052 min_mp = mp;
16053 min_address = mp->min_address + fix->fix_size;
16054 }
16055 }
16056 /* Do not insert a non-8-byte aligned quantity before 8-byte
16057 aligned quantities. */
16058 else if (ARM_DOUBLEWORD_ALIGN
16059 && fix->fix_size < 8
16060 && mp->fix_size >= 8)
16061 {
16062 min_mp = mp;
16063 min_address = mp->min_address + fix->fix_size;
16064 }
16065 }
16066 }
16067
16068 /* We need to create a new entry. */
16069 mp = XNEW (Mnode);
16070 mp->fix_size = fix->fix_size;
16071 mp->mode = fix->mode;
16072 mp->value = fix->value;
16073 mp->refcount = 1;
16074 mp->max_address = minipool_barrier->address + 65536;
16075
16076 mp->min_address = min_address;
16077
16078 if (min_mp == NULL)
16079 {
16080 mp->prev = NULL;
16081 mp->next = minipool_vector_head;
16082
16083 if (mp->next == NULL)
16084 {
16085 minipool_vector_tail = mp;
16086 minipool_vector_label = gen_label_rtx ();
16087 }
16088 else
16089 mp->next->prev = mp;
16090
16091 minipool_vector_head = mp;
16092 }
16093 else
16094 {
16095 mp->next = min_mp->next;
16096 mp->prev = min_mp;
16097 min_mp->next = mp;
16098
16099 if (mp->next != NULL)
16100 mp->next->prev = mp;
16101 else
16102 minipool_vector_tail = mp;
16103 }
16104
16105 /* Save the new entry. */
16106 min_mp = mp;
16107
16108 if (mp->prev)
16109 mp = mp->prev;
16110 else
16111 mp->offset = 0;
16112
16113 /* Scan over the following entries and adjust their offsets. */
16114 while (mp->next != NULL)
16115 {
16116 if (mp->next->min_address < mp->min_address + mp->fix_size)
16117 mp->next->min_address = mp->min_address + mp->fix_size;
16118
16119 if (mp->refcount)
16120 mp->next->offset = mp->offset + mp->fix_size;
16121 else
16122 mp->next->offset = mp->offset;
16123
16124 mp = mp->next;
16125 }
16126
16127 return min_mp;
16128 }
16129
16130 static void
16131 assign_minipool_offsets (Mfix *barrier)
16132 {
16133 HOST_WIDE_INT offset = 0;
16134 Mnode *mp;
16135
16136 minipool_barrier = barrier;
16137
16138 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16139 {
16140 mp->offset = offset;
16141
16142 if (mp->refcount > 0)
16143 offset += mp->fix_size;
16144 }
16145 }
16146
16147 /* Output the literal table */
16148 static void
16149 dump_minipool (rtx_insn *scan)
16150 {
16151 Mnode * mp;
16152 Mnode * nmp;
16153 int align64 = 0;
16154
16155 if (ARM_DOUBLEWORD_ALIGN)
16156 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16157 if (mp->refcount > 0 && mp->fix_size >= 8)
16158 {
16159 align64 = 1;
16160 break;
16161 }
16162
16163 if (dump_file)
16164 fprintf (dump_file,
16165 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16166 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16167
16168 scan = emit_label_after (gen_label_rtx (), scan);
16169 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16170 scan = emit_label_after (minipool_vector_label, scan);
16171
16172 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16173 {
16174 if (mp->refcount > 0)
16175 {
16176 if (dump_file)
16177 {
16178 fprintf (dump_file,
16179 ";; Offset %u, min %ld, max %ld ",
16180 (unsigned) mp->offset, (unsigned long) mp->min_address,
16181 (unsigned long) mp->max_address);
16182 arm_print_value (dump_file, mp->value);
16183 fputc ('\n', dump_file);
16184 }
16185
16186 rtx val = copy_rtx (mp->value);
16187
16188 switch (GET_MODE_SIZE (mp->mode))
16189 {
16190 #ifdef HAVE_consttable_1
16191 case 1:
16192 scan = emit_insn_after (gen_consttable_1 (val), scan);
16193 break;
16194
16195 #endif
16196 #ifdef HAVE_consttable_2
16197 case 2:
16198 scan = emit_insn_after (gen_consttable_2 (val), scan);
16199 break;
16200
16201 #endif
16202 #ifdef HAVE_consttable_4
16203 case 4:
16204 scan = emit_insn_after (gen_consttable_4 (val), scan);
16205 break;
16206
16207 #endif
16208 #ifdef HAVE_consttable_8
16209 case 8:
16210 scan = emit_insn_after (gen_consttable_8 (val), scan);
16211 break;
16212
16213 #endif
16214 #ifdef HAVE_consttable_16
16215 case 16:
16216 scan = emit_insn_after (gen_consttable_16 (val), scan);
16217 break;
16218
16219 #endif
16220 default:
16221 gcc_unreachable ();
16222 }
16223 }
16224
16225 nmp = mp->next;
16226 free (mp);
16227 }
16228
16229 minipool_vector_head = minipool_vector_tail = NULL;
16230 scan = emit_insn_after (gen_consttable_end (), scan);
16231 scan = emit_barrier_after (scan);
16232 }
16233
16234 /* Return the cost of forcibly inserting a barrier after INSN. */
16235 static int
16236 arm_barrier_cost (rtx_insn *insn)
16237 {
16238 /* Basing the location of the pool on the loop depth is preferable,
16239 but at the moment, the basic block information seems to be
16240 corrupt by this stage of the compilation. */
16241 int base_cost = 50;
16242 rtx_insn *next = next_nonnote_insn (insn);
16243
16244 if (next != NULL && LABEL_P (next))
16245 base_cost -= 20;
16246
16247 switch (GET_CODE (insn))
16248 {
16249 case CODE_LABEL:
16250 /* It will always be better to place the table before the label, rather
16251 than after it. */
16252 return 50;
16253
16254 case INSN:
16255 case CALL_INSN:
16256 return base_cost;
16257
16258 case JUMP_INSN:
16259 return base_cost - 10;
16260
16261 default:
16262 return base_cost + 10;
16263 }
16264 }
16265
16266 /* Find the best place in the insn stream in the range
16267 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16268 Create the barrier by inserting a jump and add a new fix entry for
16269 it. */
16270 static Mfix *
16271 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16272 {
16273 HOST_WIDE_INT count = 0;
16274 rtx_barrier *barrier;
16275 rtx_insn *from = fix->insn;
16276 /* The instruction after which we will insert the jump. */
16277 rtx_insn *selected = NULL;
16278 int selected_cost;
16279 /* The address at which the jump instruction will be placed. */
16280 HOST_WIDE_INT selected_address;
16281 Mfix * new_fix;
16282 HOST_WIDE_INT max_count = max_address - fix->address;
16283 rtx_code_label *label = gen_label_rtx ();
16284
16285 selected_cost = arm_barrier_cost (from);
16286 selected_address = fix->address;
16287
16288 while (from && count < max_count)
16289 {
16290 rtx_jump_table_data *tmp;
16291 int new_cost;
16292
16293 /* This code shouldn't have been called if there was a natural barrier
16294 within range. */
16295 gcc_assert (!BARRIER_P (from));
16296
16297 /* Count the length of this insn. This must stay in sync with the
16298 code that pushes minipool fixes. */
16299 if (LABEL_P (from))
16300 count += get_label_padding (from);
16301 else
16302 count += get_attr_length (from);
16303
16304 /* If there is a jump table, add its length. */
16305 if (tablejump_p (from, NULL, &tmp))
16306 {
16307 count += get_jump_table_size (tmp);
16308
16309 /* Jump tables aren't in a basic block, so base the cost on
16310 the dispatch insn. If we select this location, we will
16311 still put the pool after the table. */
16312 new_cost = arm_barrier_cost (from);
16313
16314 if (count < max_count
16315 && (!selected || new_cost <= selected_cost))
16316 {
16317 selected = tmp;
16318 selected_cost = new_cost;
16319 selected_address = fix->address + count;
16320 }
16321
16322 /* Continue after the dispatch table. */
16323 from = NEXT_INSN (tmp);
16324 continue;
16325 }
16326
16327 new_cost = arm_barrier_cost (from);
16328
16329 if (count < max_count
16330 && (!selected || new_cost <= selected_cost))
16331 {
16332 selected = from;
16333 selected_cost = new_cost;
16334 selected_address = fix->address + count;
16335 }
16336
16337 from = NEXT_INSN (from);
16338 }
16339
16340 /* Make sure that we found a place to insert the jump. */
16341 gcc_assert (selected);
16342
16343 /* Make sure we do not split a call and its corresponding
16344 CALL_ARG_LOCATION note. */
16345 if (CALL_P (selected))
16346 {
16347 rtx_insn *next = NEXT_INSN (selected);
16348 if (next && NOTE_P (next)
16349 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16350 selected = next;
16351 }
16352
16353 /* Create a new JUMP_INSN that branches around a barrier. */
16354 from = emit_jump_insn_after (gen_jump (label), selected);
16355 JUMP_LABEL (from) = label;
16356 barrier = emit_barrier_after (from);
16357 emit_label_after (label, barrier);
16358
16359 /* Create a minipool barrier entry for the new barrier. */
16360 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16361 new_fix->insn = barrier;
16362 new_fix->address = selected_address;
16363 new_fix->next = fix->next;
16364 fix->next = new_fix;
16365
16366 return new_fix;
16367 }
16368
16369 /* Record that there is a natural barrier in the insn stream at
16370 ADDRESS. */
16371 static void
16372 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16373 {
16374 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16375
16376 fix->insn = insn;
16377 fix->address = address;
16378
16379 fix->next = NULL;
16380 if (minipool_fix_head != NULL)
16381 minipool_fix_tail->next = fix;
16382 else
16383 minipool_fix_head = fix;
16384
16385 minipool_fix_tail = fix;
16386 }
16387
16388 /* Record INSN, which will need fixing up to load a value from the
16389 minipool. ADDRESS is the offset of the insn since the start of the
16390 function; LOC is a pointer to the part of the insn which requires
16391 fixing; VALUE is the constant that must be loaded, which is of type
16392 MODE. */
16393 static void
16394 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16395 machine_mode mode, rtx value)
16396 {
16397 gcc_assert (!arm_disable_literal_pool);
16398 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16399
16400 fix->insn = insn;
16401 fix->address = address;
16402 fix->loc = loc;
16403 fix->mode = mode;
16404 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16405 fix->value = value;
16406 fix->forwards = get_attr_pool_range (insn);
16407 fix->backwards = get_attr_neg_pool_range (insn);
16408 fix->minipool = NULL;
16409
16410 /* If an insn doesn't have a range defined for it, then it isn't
16411 expecting to be reworked by this code. Better to stop now than
16412 to generate duff assembly code. */
16413 gcc_assert (fix->forwards || fix->backwards);
16414
16415 /* If an entry requires 8-byte alignment then assume all constant pools
16416 require 4 bytes of padding. Trying to do this later on a per-pool
16417 basis is awkward because existing pool entries have to be modified. */
16418 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16419 minipool_pad = 4;
16420
16421 if (dump_file)
16422 {
16423 fprintf (dump_file,
16424 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16425 GET_MODE_NAME (mode),
16426 INSN_UID (insn), (unsigned long) address,
16427 -1 * (long)fix->backwards, (long)fix->forwards);
16428 arm_print_value (dump_file, fix->value);
16429 fprintf (dump_file, "\n");
16430 }
16431
16432 /* Add it to the chain of fixes. */
16433 fix->next = NULL;
16434
16435 if (minipool_fix_head != NULL)
16436 minipool_fix_tail->next = fix;
16437 else
16438 minipool_fix_head = fix;
16439
16440 minipool_fix_tail = fix;
16441 }
16442
16443 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16444 Returns the number of insns needed, or 99 if we always want to synthesize
16445 the value. */
16446 int
16447 arm_max_const_double_inline_cost ()
16448 {
16449 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16450 }
16451
16452 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16453 Returns the number of insns needed, or 99 if we don't know how to
16454 do it. */
16455 int
16456 arm_const_double_inline_cost (rtx val)
16457 {
16458 rtx lowpart, highpart;
16459 machine_mode mode;
16460
16461 mode = GET_MODE (val);
16462
16463 if (mode == VOIDmode)
16464 mode = DImode;
16465
16466 gcc_assert (GET_MODE_SIZE (mode) == 8);
16467
16468 lowpart = gen_lowpart (SImode, val);
16469 highpart = gen_highpart_mode (SImode, mode, val);
16470
16471 gcc_assert (CONST_INT_P (lowpart));
16472 gcc_assert (CONST_INT_P (highpart));
16473
16474 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16475 NULL_RTX, NULL_RTX, 0, 0)
16476 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16477 NULL_RTX, NULL_RTX, 0, 0));
16478 }
16479
16480 /* Cost of loading a SImode constant. */
16481 static inline int
16482 arm_const_inline_cost (enum rtx_code code, rtx val)
16483 {
16484 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16485 NULL_RTX, NULL_RTX, 1, 0);
16486 }
16487
16488 /* Return true if it is worthwhile to split a 64-bit constant into two
16489 32-bit operations. This is the case if optimizing for size, or
16490 if we have load delay slots, or if one 32-bit part can be done with
16491 a single data operation. */
16492 bool
16493 arm_const_double_by_parts (rtx val)
16494 {
16495 machine_mode mode = GET_MODE (val);
16496 rtx part;
16497
16498 if (optimize_size || arm_ld_sched)
16499 return true;
16500
16501 if (mode == VOIDmode)
16502 mode = DImode;
16503
16504 part = gen_highpart_mode (SImode, mode, val);
16505
16506 gcc_assert (CONST_INT_P (part));
16507
16508 if (const_ok_for_arm (INTVAL (part))
16509 || const_ok_for_arm (~INTVAL (part)))
16510 return true;
16511
16512 part = gen_lowpart (SImode, val);
16513
16514 gcc_assert (CONST_INT_P (part));
16515
16516 if (const_ok_for_arm (INTVAL (part))
16517 || const_ok_for_arm (~INTVAL (part)))
16518 return true;
16519
16520 return false;
16521 }
16522
16523 /* Return true if it is possible to inline both the high and low parts
16524 of a 64-bit constant into 32-bit data processing instructions. */
16525 bool
16526 arm_const_double_by_immediates (rtx val)
16527 {
16528 machine_mode mode = GET_MODE (val);
16529 rtx part;
16530
16531 if (mode == VOIDmode)
16532 mode = DImode;
16533
16534 part = gen_highpart_mode (SImode, mode, val);
16535
16536 gcc_assert (CONST_INT_P (part));
16537
16538 if (!const_ok_for_arm (INTVAL (part)))
16539 return false;
16540
16541 part = gen_lowpart (SImode, val);
16542
16543 gcc_assert (CONST_INT_P (part));
16544
16545 if (!const_ok_for_arm (INTVAL (part)))
16546 return false;
16547
16548 return true;
16549 }
16550
16551 /* Scan INSN and note any of its operands that need fixing.
16552 If DO_PUSHES is false we do not actually push any of the fixups
16553 needed. */
16554 static void
16555 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16556 {
16557 int opno;
16558
16559 extract_constrain_insn (insn);
16560
16561 if (recog_data.n_alternatives == 0)
16562 return;
16563
16564 /* Fill in recog_op_alt with information about the constraints of
16565 this insn. */
16566 preprocess_constraints (insn);
16567
16568 const operand_alternative *op_alt = which_op_alt ();
16569 for (opno = 0; opno < recog_data.n_operands; opno++)
16570 {
16571 /* Things we need to fix can only occur in inputs. */
16572 if (recog_data.operand_type[opno] != OP_IN)
16573 continue;
16574
16575 /* If this alternative is a memory reference, then any mention
16576 of constants in this alternative is really to fool reload
16577 into allowing us to accept one there. We need to fix them up
16578 now so that we output the right code. */
16579 if (op_alt[opno].memory_ok)
16580 {
16581 rtx op = recog_data.operand[opno];
16582
16583 if (CONSTANT_P (op))
16584 {
16585 if (do_pushes)
16586 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16587 recog_data.operand_mode[opno], op);
16588 }
16589 else if (MEM_P (op)
16590 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16591 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16592 {
16593 if (do_pushes)
16594 {
16595 rtx cop = avoid_constant_pool_reference (op);
16596
16597 /* Casting the address of something to a mode narrower
16598 than a word can cause avoid_constant_pool_reference()
16599 to return the pool reference itself. That's no good to
16600 us here. Lets just hope that we can use the
16601 constant pool value directly. */
16602 if (op == cop)
16603 cop = get_pool_constant (XEXP (op, 0));
16604
16605 push_minipool_fix (insn, address,
16606 recog_data.operand_loc[opno],
16607 recog_data.operand_mode[opno], cop);
16608 }
16609
16610 }
16611 }
16612 }
16613
16614 return;
16615 }
16616
16617 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16618 and unions in the context of ARMv8-M Security Extensions. It is used as a
16619 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16620 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16621 or four masks, depending on whether it is being computed for a
16622 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16623 respectively. The tree for the type of the argument or a field within an
16624 argument is passed in ARG_TYPE, the current register this argument or field
16625 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16626 argument or field starts at is passed in STARTING_BIT and the last used bit
16627 is kept in LAST_USED_BIT which is also updated accordingly. */
16628
16629 static unsigned HOST_WIDE_INT
16630 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16631 uint32_t * padding_bits_to_clear,
16632 unsigned starting_bit, int * last_used_bit)
16633
16634 {
16635 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16636
16637 if (TREE_CODE (arg_type) == RECORD_TYPE)
16638 {
16639 unsigned current_bit = starting_bit;
16640 tree field;
16641 long int offset, size;
16642
16643
16644 field = TYPE_FIELDS (arg_type);
16645 while (field)
16646 {
16647 /* The offset within a structure is always an offset from
16648 the start of that structure. Make sure we take that into the
16649 calculation of the register based offset that we use here. */
16650 offset = starting_bit;
16651 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16652 offset %= 32;
16653
16654 /* This is the actual size of the field, for bitfields this is the
16655 bitfield width and not the container size. */
16656 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16657
16658 if (*last_used_bit != offset)
16659 {
16660 if (offset < *last_used_bit)
16661 {
16662 /* This field's offset is before the 'last_used_bit', that
16663 means this field goes on the next register. So we need to
16664 pad the rest of the current register and increase the
16665 register number. */
16666 uint32_t mask;
16667 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16668 mask++;
16669
16670 padding_bits_to_clear[*regno] |= mask;
16671 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16672 (*regno)++;
16673 }
16674 else
16675 {
16676 /* Otherwise we pad the bits between the last field's end and
16677 the start of the new field. */
16678 uint32_t mask;
16679
16680 mask = ((uint32_t)-1) >> (32 - offset);
16681 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16682 padding_bits_to_clear[*regno] |= mask;
16683 }
16684 current_bit = offset;
16685 }
16686
16687 /* Calculate further padding bits for inner structs/unions too. */
16688 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16689 {
16690 *last_used_bit = current_bit;
16691 not_to_clear_reg_mask
16692 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16693 padding_bits_to_clear, offset,
16694 last_used_bit);
16695 }
16696 else
16697 {
16698 /* Update 'current_bit' with this field's size. If the
16699 'current_bit' lies in a subsequent register, update 'regno' and
16700 reset 'current_bit' to point to the current bit in that new
16701 register. */
16702 current_bit += size;
16703 while (current_bit >= 32)
16704 {
16705 current_bit-=32;
16706 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16707 (*regno)++;
16708 }
16709 *last_used_bit = current_bit;
16710 }
16711
16712 field = TREE_CHAIN (field);
16713 }
16714 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16715 }
16716 else if (TREE_CODE (arg_type) == UNION_TYPE)
16717 {
16718 tree field, field_t;
16719 int i, regno_t, field_size;
16720 int max_reg = -1;
16721 int max_bit = -1;
16722 uint32_t mask;
16723 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16724 = {-1, -1, -1, -1};
16725
16726 /* To compute the padding bits in a union we only consider bits as
16727 padding bits if they are always either a padding bit or fall outside a
16728 fields size for all fields in the union. */
16729 field = TYPE_FIELDS (arg_type);
16730 while (field)
16731 {
16732 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16733 = {0U, 0U, 0U, 0U};
16734 int last_used_bit_t = *last_used_bit;
16735 regno_t = *regno;
16736 field_t = TREE_TYPE (field);
16737
16738 /* If the field's type is either a record or a union make sure to
16739 compute their padding bits too. */
16740 if (RECORD_OR_UNION_TYPE_P (field_t))
16741 not_to_clear_reg_mask
16742 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16743 &padding_bits_to_clear_t[0],
16744 starting_bit, &last_used_bit_t);
16745 else
16746 {
16747 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16748 regno_t = (field_size / 32) + *regno;
16749 last_used_bit_t = (starting_bit + field_size) % 32;
16750 }
16751
16752 for (i = *regno; i < regno_t; i++)
16753 {
16754 /* For all but the last register used by this field only keep the
16755 padding bits that were padding bits in this field. */
16756 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16757 }
16758
16759 /* For the last register, keep all padding bits that were padding
16760 bits in this field and any padding bits that are still valid
16761 as padding bits but fall outside of this field's size. */
16762 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16763 padding_bits_to_clear_res[regno_t]
16764 &= padding_bits_to_clear_t[regno_t] | mask;
16765
16766 /* Update the maximum size of the fields in terms of registers used
16767 ('max_reg') and the 'last_used_bit' in said register. */
16768 if (max_reg < regno_t)
16769 {
16770 max_reg = regno_t;
16771 max_bit = last_used_bit_t;
16772 }
16773 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16774 max_bit = last_used_bit_t;
16775
16776 field = TREE_CHAIN (field);
16777 }
16778
16779 /* Update the current padding_bits_to_clear using the intersection of the
16780 padding bits of all the fields. */
16781 for (i=*regno; i < max_reg; i++)
16782 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16783
16784 /* Do not keep trailing padding bits, we do not know yet whether this
16785 is the end of the argument. */
16786 mask = ((uint32_t) 1 << max_bit) - 1;
16787 padding_bits_to_clear[max_reg]
16788 |= padding_bits_to_clear_res[max_reg] & mask;
16789
16790 *regno = max_reg;
16791 *last_used_bit = max_bit;
16792 }
16793 else
16794 /* This function should only be used for structs and unions. */
16795 gcc_unreachable ();
16796
16797 return not_to_clear_reg_mask;
16798 }
16799
16800 /* In the context of ARMv8-M Security Extensions, this function is used for both
16801 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16802 registers are used when returning or passing arguments, which is then
16803 returned as a mask. It will also compute a mask to indicate padding/unused
16804 bits for each of these registers, and passes this through the
16805 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16806 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16807 the starting register used to pass this argument or return value is passed
16808 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16809 for struct and union types. */
16810
16811 static unsigned HOST_WIDE_INT
16812 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16813 uint32_t * padding_bits_to_clear)
16814
16815 {
16816 int last_used_bit = 0;
16817 unsigned HOST_WIDE_INT not_to_clear_mask;
16818
16819 if (RECORD_OR_UNION_TYPE_P (arg_type))
16820 {
16821 not_to_clear_mask
16822 = comp_not_to_clear_mask_str_un (arg_type, &regno,
16823 padding_bits_to_clear, 0,
16824 &last_used_bit);
16825
16826
16827 /* If the 'last_used_bit' is not zero, that means we are still using a
16828 part of the last 'regno'. In such cases we must clear the trailing
16829 bits. Otherwise we are not using regno and we should mark it as to
16830 clear. */
16831 if (last_used_bit != 0)
16832 padding_bits_to_clear[regno]
16833 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16834 else
16835 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16836 }
16837 else
16838 {
16839 not_to_clear_mask = 0;
16840 /* We are not dealing with structs nor unions. So these arguments may be
16841 passed in floating point registers too. In some cases a BLKmode is
16842 used when returning or passing arguments in multiple VFP registers. */
16843 if (GET_MODE (arg_rtx) == BLKmode)
16844 {
16845 int i, arg_regs;
16846 rtx reg;
16847
16848 /* This should really only occur when dealing with the hard-float
16849 ABI. */
16850 gcc_assert (TARGET_HARD_FLOAT_ABI);
16851
16852 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16853 {
16854 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16855 gcc_assert (REG_P (reg));
16856
16857 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16858
16859 /* If we are dealing with DF mode, make sure we don't
16860 clear either of the registers it addresses. */
16861 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16862 if (arg_regs > 1)
16863 {
16864 unsigned HOST_WIDE_INT mask;
16865 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16866 mask -= HOST_WIDE_INT_1U << REGNO (reg);
16867 not_to_clear_mask |= mask;
16868 }
16869 }
16870 }
16871 else
16872 {
16873 /* Otherwise we can rely on the MODE to determine how many registers
16874 are being used by this argument. */
16875 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16876 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16877 if (arg_regs > 1)
16878 {
16879 unsigned HOST_WIDE_INT
16880 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16881 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16882 not_to_clear_mask |= mask;
16883 }
16884 }
16885 }
16886
16887 return not_to_clear_mask;
16888 }
16889
16890 /* Clears caller saved registers not used to pass arguments before a
16891 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
16892 registers is done in __gnu_cmse_nonsecure_call libcall.
16893 See libgcc/config/arm/cmse_nonsecure_call.S. */
16894
16895 static void
16896 cmse_nonsecure_call_clear_caller_saved (void)
16897 {
16898 basic_block bb;
16899
16900 FOR_EACH_BB_FN (bb, cfun)
16901 {
16902 rtx_insn *insn;
16903
16904 FOR_BB_INSNS (bb, insn)
16905 {
16906 uint64_t to_clear_mask, float_mask;
16907 rtx_insn *seq;
16908 rtx pat, call, unspec, reg, cleared_reg, tmp;
16909 unsigned int regno, maxregno;
16910 rtx address;
16911 CUMULATIVE_ARGS args_so_far_v;
16912 cumulative_args_t args_so_far;
16913 tree arg_type, fntype;
16914 bool using_r4, first_param = true;
16915 function_args_iterator args_iter;
16916 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16917 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16918
16919 if (!NONDEBUG_INSN_P (insn))
16920 continue;
16921
16922 if (!CALL_P (insn))
16923 continue;
16924
16925 pat = PATTERN (insn);
16926 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16927 call = XVECEXP (pat, 0, 0);
16928
16929 /* Get the real call RTX if the insn sets a value, ie. returns. */
16930 if (GET_CODE (call) == SET)
16931 call = SET_SRC (call);
16932
16933 /* Check if it is a cmse_nonsecure_call. */
16934 unspec = XEXP (call, 0);
16935 if (GET_CODE (unspec) != UNSPEC
16936 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16937 continue;
16938
16939 /* Determine the caller-saved registers we need to clear. */
16940 to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
16941 maxregno = NUM_ARG_REGS - 1;
16942 /* Only look at the caller-saved floating point registers in case of
16943 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16944 lazy store and loads which clear both caller- and callee-saved
16945 registers. */
16946 if (TARGET_HARD_FLOAT_ABI)
16947 {
16948 float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
16949 float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
16950 to_clear_mask |= float_mask;
16951 maxregno = D7_VFP_REGNUM;
16952 }
16953
16954 /* Make sure the register used to hold the function address is not
16955 cleared. */
16956 address = RTVEC_ELT (XVEC (unspec, 0), 0);
16957 gcc_assert (MEM_P (address));
16958 gcc_assert (REG_P (XEXP (address, 0)));
16959 to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
16960
16961 /* Set basic block of call insn so that df rescan is performed on
16962 insns inserted here. */
16963 set_block_for_insn (insn, bb);
16964 df_set_flags (DF_DEFER_INSN_RESCAN);
16965 start_sequence ();
16966
16967 /* Make sure the scheduler doesn't schedule other insns beyond
16968 here. */
16969 emit_insn (gen_blockage ());
16970
16971 /* Walk through all arguments and clear registers appropriately.
16972 */
16973 fntype = TREE_TYPE (MEM_EXPR (address));
16974 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
16975 NULL_TREE);
16976 args_so_far = pack_cumulative_args (&args_so_far_v);
16977 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
16978 {
16979 rtx arg_rtx;
16980 machine_mode arg_mode = TYPE_MODE (arg_type);
16981
16982 if (VOID_TYPE_P (arg_type))
16983 continue;
16984
16985 if (!first_param)
16986 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
16987 true);
16988
16989 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
16990 true);
16991 gcc_assert (REG_P (arg_rtx));
16992 to_clear_mask
16993 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
16994 REGNO (arg_rtx),
16995 padding_bits_to_clear_ptr);
16996
16997 first_param = false;
16998 }
16999
17000 /* Clear padding bits where needed. */
17001 cleared_reg = XEXP (address, 0);
17002 reg = gen_rtx_REG (SImode, IP_REGNUM);
17003 using_r4 = false;
17004 for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17005 {
17006 if (padding_bits_to_clear[regno] == 0)
17007 continue;
17008
17009 /* If this is a Thumb-1 target copy the address of the function
17010 we are calling from 'r4' into 'ip' such that we can use r4 to
17011 clear the unused bits in the arguments. */
17012 if (TARGET_THUMB1 && !using_r4)
17013 {
17014 using_r4 = true;
17015 reg = cleared_reg;
17016 emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17017 reg);
17018 }
17019
17020 tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17021 emit_move_insn (reg, tmp);
17022 /* Also fill the top half of the negated
17023 padding_bits_to_clear. */
17024 if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17025 {
17026 tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17027 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17028 GEN_INT (16),
17029 GEN_INT (16)),
17030 tmp));
17031 }
17032
17033 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17034 gen_rtx_REG (SImode, regno),
17035 reg));
17036
17037 }
17038 if (using_r4)
17039 emit_move_insn (cleared_reg,
17040 gen_rtx_REG (SImode, IP_REGNUM));
17041
17042 /* We use right shift and left shift to clear the LSB of the address
17043 we jump to instead of using bic, to avoid having to use an extra
17044 register on Thumb-1. */
17045 tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17046 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17047 tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17048 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17049
17050 /* Clearing all registers that leak before doing a non-secure
17051 call. */
17052 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17053 {
17054 if (!(to_clear_mask & (1LL << regno)))
17055 continue;
17056
17057 /* If regno is an even vfp register and its successor is also to
17058 be cleared, use vmov. */
17059 if (IS_VFP_REGNUM (regno))
17060 {
17061 if (TARGET_VFP_DOUBLE
17062 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17063 && to_clear_mask & (1LL << (regno + 1)))
17064 emit_move_insn (gen_rtx_REG (DFmode, regno++),
17065 CONST0_RTX (DFmode));
17066 else
17067 emit_move_insn (gen_rtx_REG (SFmode, regno),
17068 CONST0_RTX (SFmode));
17069 }
17070 else
17071 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17072 }
17073
17074 seq = get_insns ();
17075 end_sequence ();
17076 emit_insn_before (seq, insn);
17077
17078 }
17079 }
17080 }
17081
17082 /* Rewrite move insn into subtract of 0 if the condition codes will
17083 be useful in next conditional jump insn. */
17084
17085 static void
17086 thumb1_reorg (void)
17087 {
17088 basic_block bb;
17089
17090 FOR_EACH_BB_FN (bb, cfun)
17091 {
17092 rtx dest, src;
17093 rtx cmp, op0, op1, set = NULL;
17094 rtx_insn *prev, *insn = BB_END (bb);
17095 bool insn_clobbered = false;
17096
17097 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17098 insn = PREV_INSN (insn);
17099
17100 /* Find the last cbranchsi4_insn in basic block BB. */
17101 if (insn == BB_HEAD (bb)
17102 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17103 continue;
17104
17105 /* Get the register with which we are comparing. */
17106 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17107 op0 = XEXP (cmp, 0);
17108 op1 = XEXP (cmp, 1);
17109
17110 /* Check that comparison is against ZERO. */
17111 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17112 continue;
17113
17114 /* Find the first flag setting insn before INSN in basic block BB. */
17115 gcc_assert (insn != BB_HEAD (bb));
17116 for (prev = PREV_INSN (insn);
17117 (!insn_clobbered
17118 && prev != BB_HEAD (bb)
17119 && (NOTE_P (prev)
17120 || DEBUG_INSN_P (prev)
17121 || ((set = single_set (prev)) != NULL
17122 && get_attr_conds (prev) == CONDS_NOCOND)));
17123 prev = PREV_INSN (prev))
17124 {
17125 if (reg_set_p (op0, prev))
17126 insn_clobbered = true;
17127 }
17128
17129 /* Skip if op0 is clobbered by insn other than prev. */
17130 if (insn_clobbered)
17131 continue;
17132
17133 if (!set)
17134 continue;
17135
17136 dest = SET_DEST (set);
17137 src = SET_SRC (set);
17138 if (!low_register_operand (dest, SImode)
17139 || !low_register_operand (src, SImode))
17140 continue;
17141
17142 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17143 in INSN. Both src and dest of the move insn are checked. */
17144 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17145 {
17146 dest = copy_rtx (dest);
17147 src = copy_rtx (src);
17148 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17149 PATTERN (prev) = gen_rtx_SET (dest, src);
17150 INSN_CODE (prev) = -1;
17151 /* Set test register in INSN to dest. */
17152 XEXP (cmp, 0) = copy_rtx (dest);
17153 INSN_CODE (insn) = -1;
17154 }
17155 }
17156 }
17157
17158 /* Convert instructions to their cc-clobbering variant if possible, since
17159 that allows us to use smaller encodings. */
17160
17161 static void
17162 thumb2_reorg (void)
17163 {
17164 basic_block bb;
17165 regset_head live;
17166
17167 INIT_REG_SET (&live);
17168
17169 /* We are freeing block_for_insn in the toplev to keep compatibility
17170 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17171 compute_bb_for_insn ();
17172 df_analyze ();
17173
17174 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17175
17176 FOR_EACH_BB_FN (bb, cfun)
17177 {
17178 if ((current_tune->disparage_flag_setting_t16_encodings
17179 == tune_params::DISPARAGE_FLAGS_ALL)
17180 && optimize_bb_for_speed_p (bb))
17181 continue;
17182
17183 rtx_insn *insn;
17184 Convert_Action action = SKIP;
17185 Convert_Action action_for_partial_flag_setting
17186 = ((current_tune->disparage_flag_setting_t16_encodings
17187 != tune_params::DISPARAGE_FLAGS_NEITHER)
17188 && optimize_bb_for_speed_p (bb))
17189 ? SKIP : CONV;
17190
17191 COPY_REG_SET (&live, DF_LR_OUT (bb));
17192 df_simulate_initialize_backwards (bb, &live);
17193 FOR_BB_INSNS_REVERSE (bb, insn)
17194 {
17195 if (NONJUMP_INSN_P (insn)
17196 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17197 && GET_CODE (PATTERN (insn)) == SET)
17198 {
17199 action = SKIP;
17200 rtx pat = PATTERN (insn);
17201 rtx dst = XEXP (pat, 0);
17202 rtx src = XEXP (pat, 1);
17203 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17204
17205 if (UNARY_P (src) || BINARY_P (src))
17206 op0 = XEXP (src, 0);
17207
17208 if (BINARY_P (src))
17209 op1 = XEXP (src, 1);
17210
17211 if (low_register_operand (dst, SImode))
17212 {
17213 switch (GET_CODE (src))
17214 {
17215 case PLUS:
17216 /* Adding two registers and storing the result
17217 in the first source is already a 16-bit
17218 operation. */
17219 if (rtx_equal_p (dst, op0)
17220 && register_operand (op1, SImode))
17221 break;
17222
17223 if (low_register_operand (op0, SImode))
17224 {
17225 /* ADDS <Rd>,<Rn>,<Rm> */
17226 if (low_register_operand (op1, SImode))
17227 action = CONV;
17228 /* ADDS <Rdn>,#<imm8> */
17229 /* SUBS <Rdn>,#<imm8> */
17230 else if (rtx_equal_p (dst, op0)
17231 && CONST_INT_P (op1)
17232 && IN_RANGE (INTVAL (op1), -255, 255))
17233 action = CONV;
17234 /* ADDS <Rd>,<Rn>,#<imm3> */
17235 /* SUBS <Rd>,<Rn>,#<imm3> */
17236 else if (CONST_INT_P (op1)
17237 && IN_RANGE (INTVAL (op1), -7, 7))
17238 action = CONV;
17239 }
17240 /* ADCS <Rd>, <Rn> */
17241 else if (GET_CODE (XEXP (src, 0)) == PLUS
17242 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17243 && low_register_operand (XEXP (XEXP (src, 0), 1),
17244 SImode)
17245 && COMPARISON_P (op1)
17246 && cc_register (XEXP (op1, 0), VOIDmode)
17247 && maybe_get_arm_condition_code (op1) == ARM_CS
17248 && XEXP (op1, 1) == const0_rtx)
17249 action = CONV;
17250 break;
17251
17252 case MINUS:
17253 /* RSBS <Rd>,<Rn>,#0
17254 Not handled here: see NEG below. */
17255 /* SUBS <Rd>,<Rn>,#<imm3>
17256 SUBS <Rdn>,#<imm8>
17257 Not handled here: see PLUS above. */
17258 /* SUBS <Rd>,<Rn>,<Rm> */
17259 if (low_register_operand (op0, SImode)
17260 && low_register_operand (op1, SImode))
17261 action = CONV;
17262 break;
17263
17264 case MULT:
17265 /* MULS <Rdm>,<Rn>,<Rdm>
17266 As an exception to the rule, this is only used
17267 when optimizing for size since MULS is slow on all
17268 known implementations. We do not even want to use
17269 MULS in cold code, if optimizing for speed, so we
17270 test the global flag here. */
17271 if (!optimize_size)
17272 break;
17273 /* Fall through. */
17274 case AND:
17275 case IOR:
17276 case XOR:
17277 /* ANDS <Rdn>,<Rm> */
17278 if (rtx_equal_p (dst, op0)
17279 && low_register_operand (op1, SImode))
17280 action = action_for_partial_flag_setting;
17281 else if (rtx_equal_p (dst, op1)
17282 && low_register_operand (op0, SImode))
17283 action = action_for_partial_flag_setting == SKIP
17284 ? SKIP : SWAP_CONV;
17285 break;
17286
17287 case ASHIFTRT:
17288 case ASHIFT:
17289 case LSHIFTRT:
17290 /* ASRS <Rdn>,<Rm> */
17291 /* LSRS <Rdn>,<Rm> */
17292 /* LSLS <Rdn>,<Rm> */
17293 if (rtx_equal_p (dst, op0)
17294 && low_register_operand (op1, SImode))
17295 action = action_for_partial_flag_setting;
17296 /* ASRS <Rd>,<Rm>,#<imm5> */
17297 /* LSRS <Rd>,<Rm>,#<imm5> */
17298 /* LSLS <Rd>,<Rm>,#<imm5> */
17299 else if (low_register_operand (op0, SImode)
17300 && CONST_INT_P (op1)
17301 && IN_RANGE (INTVAL (op1), 0, 31))
17302 action = action_for_partial_flag_setting;
17303 break;
17304
17305 case ROTATERT:
17306 /* RORS <Rdn>,<Rm> */
17307 if (rtx_equal_p (dst, op0)
17308 && low_register_operand (op1, SImode))
17309 action = action_for_partial_flag_setting;
17310 break;
17311
17312 case NOT:
17313 /* MVNS <Rd>,<Rm> */
17314 if (low_register_operand (op0, SImode))
17315 action = action_for_partial_flag_setting;
17316 break;
17317
17318 case NEG:
17319 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17320 if (low_register_operand (op0, SImode))
17321 action = CONV;
17322 break;
17323
17324 case CONST_INT:
17325 /* MOVS <Rd>,#<imm8> */
17326 if (CONST_INT_P (src)
17327 && IN_RANGE (INTVAL (src), 0, 255))
17328 action = action_for_partial_flag_setting;
17329 break;
17330
17331 case REG:
17332 /* MOVS and MOV<c> with registers have different
17333 encodings, so are not relevant here. */
17334 break;
17335
17336 default:
17337 break;
17338 }
17339 }
17340
17341 if (action != SKIP)
17342 {
17343 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17344 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17345 rtvec vec;
17346
17347 if (action == SWAP_CONV)
17348 {
17349 src = copy_rtx (src);
17350 XEXP (src, 0) = op1;
17351 XEXP (src, 1) = op0;
17352 pat = gen_rtx_SET (dst, src);
17353 vec = gen_rtvec (2, pat, clobber);
17354 }
17355 else /* action == CONV */
17356 vec = gen_rtvec (2, pat, clobber);
17357
17358 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17359 INSN_CODE (insn) = -1;
17360 }
17361 }
17362
17363 if (NONDEBUG_INSN_P (insn))
17364 df_simulate_one_insn_backwards (bb, insn, &live);
17365 }
17366 }
17367
17368 CLEAR_REG_SET (&live);
17369 }
17370
17371 /* Gcc puts the pool in the wrong place for ARM, since we can only
17372 load addresses a limited distance around the pc. We do some
17373 special munging to move the constant pool values to the correct
17374 point in the code. */
17375 static void
17376 arm_reorg (void)
17377 {
17378 rtx_insn *insn;
17379 HOST_WIDE_INT address = 0;
17380 Mfix * fix;
17381
17382 if (use_cmse)
17383 cmse_nonsecure_call_clear_caller_saved ();
17384 if (TARGET_THUMB1)
17385 thumb1_reorg ();
17386 else if (TARGET_THUMB2)
17387 thumb2_reorg ();
17388
17389 /* Ensure all insns that must be split have been split at this point.
17390 Otherwise, the pool placement code below may compute incorrect
17391 insn lengths. Note that when optimizing, all insns have already
17392 been split at this point. */
17393 if (!optimize)
17394 split_all_insns_noflow ();
17395
17396 /* Make sure we do not attempt to create a literal pool even though it should
17397 no longer be necessary to create any. */
17398 if (arm_disable_literal_pool)
17399 return ;
17400
17401 minipool_fix_head = minipool_fix_tail = NULL;
17402
17403 /* The first insn must always be a note, or the code below won't
17404 scan it properly. */
17405 insn = get_insns ();
17406 gcc_assert (NOTE_P (insn));
17407 minipool_pad = 0;
17408
17409 /* Scan all the insns and record the operands that will need fixing. */
17410 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17411 {
17412 if (BARRIER_P (insn))
17413 push_minipool_barrier (insn, address);
17414 else if (INSN_P (insn))
17415 {
17416 rtx_jump_table_data *table;
17417
17418 note_invalid_constants (insn, address, true);
17419 address += get_attr_length (insn);
17420
17421 /* If the insn is a vector jump, add the size of the table
17422 and skip the table. */
17423 if (tablejump_p (insn, NULL, &table))
17424 {
17425 address += get_jump_table_size (table);
17426 insn = table;
17427 }
17428 }
17429 else if (LABEL_P (insn))
17430 /* Add the worst-case padding due to alignment. We don't add
17431 the _current_ padding because the minipool insertions
17432 themselves might change it. */
17433 address += get_label_padding (insn);
17434 }
17435
17436 fix = minipool_fix_head;
17437
17438 /* Now scan the fixups and perform the required changes. */
17439 while (fix)
17440 {
17441 Mfix * ftmp;
17442 Mfix * fdel;
17443 Mfix * last_added_fix;
17444 Mfix * last_barrier = NULL;
17445 Mfix * this_fix;
17446
17447 /* Skip any further barriers before the next fix. */
17448 while (fix && BARRIER_P (fix->insn))
17449 fix = fix->next;
17450
17451 /* No more fixes. */
17452 if (fix == NULL)
17453 break;
17454
17455 last_added_fix = NULL;
17456
17457 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17458 {
17459 if (BARRIER_P (ftmp->insn))
17460 {
17461 if (ftmp->address >= minipool_vector_head->max_address)
17462 break;
17463
17464 last_barrier = ftmp;
17465 }
17466 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17467 break;
17468
17469 last_added_fix = ftmp; /* Keep track of the last fix added. */
17470 }
17471
17472 /* If we found a barrier, drop back to that; any fixes that we
17473 could have reached but come after the barrier will now go in
17474 the next mini-pool. */
17475 if (last_barrier != NULL)
17476 {
17477 /* Reduce the refcount for those fixes that won't go into this
17478 pool after all. */
17479 for (fdel = last_barrier->next;
17480 fdel && fdel != ftmp;
17481 fdel = fdel->next)
17482 {
17483 fdel->minipool->refcount--;
17484 fdel->minipool = NULL;
17485 }
17486
17487 ftmp = last_barrier;
17488 }
17489 else
17490 {
17491 /* ftmp is first fix that we can't fit into this pool and
17492 there no natural barriers that we could use. Insert a
17493 new barrier in the code somewhere between the previous
17494 fix and this one, and arrange to jump around it. */
17495 HOST_WIDE_INT max_address;
17496
17497 /* The last item on the list of fixes must be a barrier, so
17498 we can never run off the end of the list of fixes without
17499 last_barrier being set. */
17500 gcc_assert (ftmp);
17501
17502 max_address = minipool_vector_head->max_address;
17503 /* Check that there isn't another fix that is in range that
17504 we couldn't fit into this pool because the pool was
17505 already too large: we need to put the pool before such an
17506 instruction. The pool itself may come just after the
17507 fix because create_fix_barrier also allows space for a
17508 jump instruction. */
17509 if (ftmp->address < max_address)
17510 max_address = ftmp->address + 1;
17511
17512 last_barrier = create_fix_barrier (last_added_fix, max_address);
17513 }
17514
17515 assign_minipool_offsets (last_barrier);
17516
17517 while (ftmp)
17518 {
17519 if (!BARRIER_P (ftmp->insn)
17520 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17521 == NULL))
17522 break;
17523
17524 ftmp = ftmp->next;
17525 }
17526
17527 /* Scan over the fixes we have identified for this pool, fixing them
17528 up and adding the constants to the pool itself. */
17529 for (this_fix = fix; this_fix && ftmp != this_fix;
17530 this_fix = this_fix->next)
17531 if (!BARRIER_P (this_fix->insn))
17532 {
17533 rtx addr
17534 = plus_constant (Pmode,
17535 gen_rtx_LABEL_REF (VOIDmode,
17536 minipool_vector_label),
17537 this_fix->minipool->offset);
17538 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17539 }
17540
17541 dump_minipool (last_barrier->insn);
17542 fix = ftmp;
17543 }
17544
17545 /* From now on we must synthesize any constants that we can't handle
17546 directly. This can happen if the RTL gets split during final
17547 instruction generation. */
17548 cfun->machine->after_arm_reorg = 1;
17549
17550 /* Free the minipool memory. */
17551 obstack_free (&minipool_obstack, minipool_startobj);
17552 }
17553 \f
17554 /* Routines to output assembly language. */
17555
17556 /* Return string representation of passed in real value. */
17557 static const char *
17558 fp_const_from_val (REAL_VALUE_TYPE *r)
17559 {
17560 if (!fp_consts_inited)
17561 init_fp_table ();
17562
17563 gcc_assert (real_equal (r, &value_fp0));
17564 return "0";
17565 }
17566
17567 /* OPERANDS[0] is the entire list of insns that constitute pop,
17568 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17569 is in the list, UPDATE is true iff the list contains explicit
17570 update of base register. */
17571 void
17572 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17573 bool update)
17574 {
17575 int i;
17576 char pattern[100];
17577 int offset;
17578 const char *conditional;
17579 int num_saves = XVECLEN (operands[0], 0);
17580 unsigned int regno;
17581 unsigned int regno_base = REGNO (operands[1]);
17582 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17583
17584 offset = 0;
17585 offset += update ? 1 : 0;
17586 offset += return_pc ? 1 : 0;
17587
17588 /* Is the base register in the list? */
17589 for (i = offset; i < num_saves; i++)
17590 {
17591 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17592 /* If SP is in the list, then the base register must be SP. */
17593 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17594 /* If base register is in the list, there must be no explicit update. */
17595 if (regno == regno_base)
17596 gcc_assert (!update);
17597 }
17598
17599 conditional = reverse ? "%?%D0" : "%?%d0";
17600 /* Can't use POP if returning from an interrupt. */
17601 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17602 sprintf (pattern, "pop%s\t{", conditional);
17603 else
17604 {
17605 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17606 It's just a convention, their semantics are identical. */
17607 if (regno_base == SP_REGNUM)
17608 sprintf (pattern, "ldmfd%s\t", conditional);
17609 else if (update)
17610 sprintf (pattern, "ldmia%s\t", conditional);
17611 else
17612 sprintf (pattern, "ldm%s\t", conditional);
17613
17614 strcat (pattern, reg_names[regno_base]);
17615 if (update)
17616 strcat (pattern, "!, {");
17617 else
17618 strcat (pattern, ", {");
17619 }
17620
17621 /* Output the first destination register. */
17622 strcat (pattern,
17623 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17624
17625 /* Output the rest of the destination registers. */
17626 for (i = offset + 1; i < num_saves; i++)
17627 {
17628 strcat (pattern, ", ");
17629 strcat (pattern,
17630 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17631 }
17632
17633 strcat (pattern, "}");
17634
17635 if (interrupt_p && return_pc)
17636 strcat (pattern, "^");
17637
17638 output_asm_insn (pattern, &cond);
17639 }
17640
17641
17642 /* Output the assembly for a store multiple. */
17643
17644 const char *
17645 vfp_output_vstmd (rtx * operands)
17646 {
17647 char pattern[100];
17648 int p;
17649 int base;
17650 int i;
17651 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17652 ? XEXP (operands[0], 0)
17653 : XEXP (XEXP (operands[0], 0), 0);
17654 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17655
17656 if (push_p)
17657 strcpy (pattern, "vpush%?.64\t{%P1");
17658 else
17659 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17660
17661 p = strlen (pattern);
17662
17663 gcc_assert (REG_P (operands[1]));
17664
17665 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17666 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17667 {
17668 p += sprintf (&pattern[p], ", d%d", base + i);
17669 }
17670 strcpy (&pattern[p], "}");
17671
17672 output_asm_insn (pattern, operands);
17673 return "";
17674 }
17675
17676
17677 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17678 number of bytes pushed. */
17679
17680 static int
17681 vfp_emit_fstmd (int base_reg, int count)
17682 {
17683 rtx par;
17684 rtx dwarf;
17685 rtx tmp, reg;
17686 int i;
17687
17688 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17689 register pairs are stored by a store multiple insn. We avoid this
17690 by pushing an extra pair. */
17691 if (count == 2 && !arm_arch6)
17692 {
17693 if (base_reg == LAST_VFP_REGNUM - 3)
17694 base_reg -= 2;
17695 count++;
17696 }
17697
17698 /* FSTMD may not store more than 16 doubleword registers at once. Split
17699 larger stores into multiple parts (up to a maximum of two, in
17700 practice). */
17701 if (count > 16)
17702 {
17703 int saved;
17704 /* NOTE: base_reg is an internal register number, so each D register
17705 counts as 2. */
17706 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17707 saved += vfp_emit_fstmd (base_reg, 16);
17708 return saved;
17709 }
17710
17711 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17712 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17713
17714 reg = gen_rtx_REG (DFmode, base_reg);
17715 base_reg += 2;
17716
17717 XVECEXP (par, 0, 0)
17718 = gen_rtx_SET (gen_frame_mem
17719 (BLKmode,
17720 gen_rtx_PRE_MODIFY (Pmode,
17721 stack_pointer_rtx,
17722 plus_constant
17723 (Pmode, stack_pointer_rtx,
17724 - (count * 8)))
17725 ),
17726 gen_rtx_UNSPEC (BLKmode,
17727 gen_rtvec (1, reg),
17728 UNSPEC_PUSH_MULT));
17729
17730 tmp = gen_rtx_SET (stack_pointer_rtx,
17731 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17732 RTX_FRAME_RELATED_P (tmp) = 1;
17733 XVECEXP (dwarf, 0, 0) = tmp;
17734
17735 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17736 RTX_FRAME_RELATED_P (tmp) = 1;
17737 XVECEXP (dwarf, 0, 1) = tmp;
17738
17739 for (i = 1; i < count; i++)
17740 {
17741 reg = gen_rtx_REG (DFmode, base_reg);
17742 base_reg += 2;
17743 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17744
17745 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17746 plus_constant (Pmode,
17747 stack_pointer_rtx,
17748 i * 8)),
17749 reg);
17750 RTX_FRAME_RELATED_P (tmp) = 1;
17751 XVECEXP (dwarf, 0, i + 1) = tmp;
17752 }
17753
17754 par = emit_insn (par);
17755 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17756 RTX_FRAME_RELATED_P (par) = 1;
17757
17758 return count * 8;
17759 }
17760
17761 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17762 has the cmse_nonsecure_call attribute and returns false otherwise. */
17763
17764 bool
17765 detect_cmse_nonsecure_call (tree addr)
17766 {
17767 if (!addr)
17768 return FALSE;
17769
17770 tree fntype = TREE_TYPE (addr);
17771 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17772 TYPE_ATTRIBUTES (fntype)))
17773 return TRUE;
17774 return FALSE;
17775 }
17776
17777
17778 /* Emit a call instruction with pattern PAT. ADDR is the address of
17779 the call target. */
17780
17781 void
17782 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17783 {
17784 rtx insn;
17785
17786 insn = emit_call_insn (pat);
17787
17788 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17789 If the call might use such an entry, add a use of the PIC register
17790 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17791 if (TARGET_VXWORKS_RTP
17792 && flag_pic
17793 && !sibcall
17794 && GET_CODE (addr) == SYMBOL_REF
17795 && (SYMBOL_REF_DECL (addr)
17796 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17797 : !SYMBOL_REF_LOCAL_P (addr)))
17798 {
17799 require_pic_register ();
17800 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17801 }
17802
17803 if (TARGET_AAPCS_BASED)
17804 {
17805 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17806 linker. We need to add an IP clobber to allow setting
17807 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17808 is not needed since it's a fixed register. */
17809 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17810 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17811 }
17812 }
17813
17814 /* Output a 'call' insn. */
17815 const char *
17816 output_call (rtx *operands)
17817 {
17818 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17819
17820 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17821 if (REGNO (operands[0]) == LR_REGNUM)
17822 {
17823 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17824 output_asm_insn ("mov%?\t%0, %|lr", operands);
17825 }
17826
17827 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17828
17829 if (TARGET_INTERWORK || arm_arch4t)
17830 output_asm_insn ("bx%?\t%0", operands);
17831 else
17832 output_asm_insn ("mov%?\t%|pc, %0", operands);
17833
17834 return "";
17835 }
17836
17837 /* Output a move from arm registers to arm registers of a long double
17838 OPERANDS[0] is the destination.
17839 OPERANDS[1] is the source. */
17840 const char *
17841 output_mov_long_double_arm_from_arm (rtx *operands)
17842 {
17843 /* We have to be careful here because the two might overlap. */
17844 int dest_start = REGNO (operands[0]);
17845 int src_start = REGNO (operands[1]);
17846 rtx ops[2];
17847 int i;
17848
17849 if (dest_start < src_start)
17850 {
17851 for (i = 0; i < 3; i++)
17852 {
17853 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17854 ops[1] = gen_rtx_REG (SImode, src_start + i);
17855 output_asm_insn ("mov%?\t%0, %1", ops);
17856 }
17857 }
17858 else
17859 {
17860 for (i = 2; i >= 0; i--)
17861 {
17862 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17863 ops[1] = gen_rtx_REG (SImode, src_start + i);
17864 output_asm_insn ("mov%?\t%0, %1", ops);
17865 }
17866 }
17867
17868 return "";
17869 }
17870
17871 void
17872 arm_emit_movpair (rtx dest, rtx src)
17873 {
17874 /* If the src is an immediate, simplify it. */
17875 if (CONST_INT_P (src))
17876 {
17877 HOST_WIDE_INT val = INTVAL (src);
17878 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17879 if ((val >> 16) & 0x0000ffff)
17880 {
17881 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17882 GEN_INT (16)),
17883 GEN_INT ((val >> 16) & 0x0000ffff));
17884 rtx_insn *insn = get_last_insn ();
17885 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17886 }
17887 return;
17888 }
17889 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17890 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17891 rtx_insn *insn = get_last_insn ();
17892 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17893 }
17894
17895 /* Output a move between double words. It must be REG<-MEM
17896 or MEM<-REG. */
17897 const char *
17898 output_move_double (rtx *operands, bool emit, int *count)
17899 {
17900 enum rtx_code code0 = GET_CODE (operands[0]);
17901 enum rtx_code code1 = GET_CODE (operands[1]);
17902 rtx otherops[3];
17903 if (count)
17904 *count = 1;
17905
17906 /* The only case when this might happen is when
17907 you are looking at the length of a DImode instruction
17908 that has an invalid constant in it. */
17909 if (code0 == REG && code1 != MEM)
17910 {
17911 gcc_assert (!emit);
17912 *count = 2;
17913 return "";
17914 }
17915
17916 if (code0 == REG)
17917 {
17918 unsigned int reg0 = REGNO (operands[0]);
17919
17920 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17921
17922 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17923
17924 switch (GET_CODE (XEXP (operands[1], 0)))
17925 {
17926 case REG:
17927
17928 if (emit)
17929 {
17930 if (TARGET_LDRD
17931 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17932 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17933 else
17934 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17935 }
17936 break;
17937
17938 case PRE_INC:
17939 gcc_assert (TARGET_LDRD);
17940 if (emit)
17941 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17942 break;
17943
17944 case PRE_DEC:
17945 if (emit)
17946 {
17947 if (TARGET_LDRD)
17948 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
17949 else
17950 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
17951 }
17952 break;
17953
17954 case POST_INC:
17955 if (emit)
17956 {
17957 if (TARGET_LDRD)
17958 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
17959 else
17960 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
17961 }
17962 break;
17963
17964 case POST_DEC:
17965 gcc_assert (TARGET_LDRD);
17966 if (emit)
17967 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
17968 break;
17969
17970 case PRE_MODIFY:
17971 case POST_MODIFY:
17972 /* Autoicrement addressing modes should never have overlapping
17973 base and destination registers, and overlapping index registers
17974 are already prohibited, so this doesn't need to worry about
17975 fix_cm3_ldrd. */
17976 otherops[0] = operands[0];
17977 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17978 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17979
17980 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17981 {
17982 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17983 {
17984 /* Registers overlap so split out the increment. */
17985 if (emit)
17986 {
17987 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17988 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
17989 }
17990 if (count)
17991 *count = 2;
17992 }
17993 else
17994 {
17995 /* Use a single insn if we can.
17996 FIXME: IWMMXT allows offsets larger than ldrd can
17997 handle, fix these up with a pair of ldr. */
17998 if (TARGET_THUMB2
17999 || !CONST_INT_P (otherops[2])
18000 || (INTVAL (otherops[2]) > -256
18001 && INTVAL (otherops[2]) < 256))
18002 {
18003 if (emit)
18004 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18005 }
18006 else
18007 {
18008 if (emit)
18009 {
18010 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18011 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18012 }
18013 if (count)
18014 *count = 2;
18015
18016 }
18017 }
18018 }
18019 else
18020 {
18021 /* Use a single insn if we can.
18022 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18023 fix these up with a pair of ldr. */
18024 if (TARGET_THUMB2
18025 || !CONST_INT_P (otherops[2])
18026 || (INTVAL (otherops[2]) > -256
18027 && INTVAL (otherops[2]) < 256))
18028 {
18029 if (emit)
18030 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18031 }
18032 else
18033 {
18034 if (emit)
18035 {
18036 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18037 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18038 }
18039 if (count)
18040 *count = 2;
18041 }
18042 }
18043 break;
18044
18045 case LABEL_REF:
18046 case CONST:
18047 /* We might be able to use ldrd %0, %1 here. However the range is
18048 different to ldr/adr, and it is broken on some ARMv7-M
18049 implementations. */
18050 /* Use the second register of the pair to avoid problematic
18051 overlap. */
18052 otherops[1] = operands[1];
18053 if (emit)
18054 output_asm_insn ("adr%?\t%0, %1", otherops);
18055 operands[1] = otherops[0];
18056 if (emit)
18057 {
18058 if (TARGET_LDRD)
18059 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18060 else
18061 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18062 }
18063
18064 if (count)
18065 *count = 2;
18066 break;
18067
18068 /* ??? This needs checking for thumb2. */
18069 default:
18070 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18071 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18072 {
18073 otherops[0] = operands[0];
18074 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18075 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18076
18077 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18078 {
18079 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18080 {
18081 switch ((int) INTVAL (otherops[2]))
18082 {
18083 case -8:
18084 if (emit)
18085 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18086 return "";
18087 case -4:
18088 if (TARGET_THUMB2)
18089 break;
18090 if (emit)
18091 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18092 return "";
18093 case 4:
18094 if (TARGET_THUMB2)
18095 break;
18096 if (emit)
18097 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18098 return "";
18099 }
18100 }
18101 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18102 operands[1] = otherops[0];
18103 if (TARGET_LDRD
18104 && (REG_P (otherops[2])
18105 || TARGET_THUMB2
18106 || (CONST_INT_P (otherops[2])
18107 && INTVAL (otherops[2]) > -256
18108 && INTVAL (otherops[2]) < 256)))
18109 {
18110 if (reg_overlap_mentioned_p (operands[0],
18111 otherops[2]))
18112 {
18113 /* Swap base and index registers over to
18114 avoid a conflict. */
18115 std::swap (otherops[1], otherops[2]);
18116 }
18117 /* If both registers conflict, it will usually
18118 have been fixed by a splitter. */
18119 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18120 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18121 {
18122 if (emit)
18123 {
18124 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18125 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18126 }
18127 if (count)
18128 *count = 2;
18129 }
18130 else
18131 {
18132 otherops[0] = operands[0];
18133 if (emit)
18134 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18135 }
18136 return "";
18137 }
18138
18139 if (CONST_INT_P (otherops[2]))
18140 {
18141 if (emit)
18142 {
18143 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18144 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18145 else
18146 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18147 }
18148 }
18149 else
18150 {
18151 if (emit)
18152 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18153 }
18154 }
18155 else
18156 {
18157 if (emit)
18158 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18159 }
18160
18161 if (count)
18162 *count = 2;
18163
18164 if (TARGET_LDRD)
18165 return "ldrd%?\t%0, [%1]";
18166
18167 return "ldmia%?\t%1, %M0";
18168 }
18169 else
18170 {
18171 otherops[1] = adjust_address (operands[1], SImode, 4);
18172 /* Take care of overlapping base/data reg. */
18173 if (reg_mentioned_p (operands[0], operands[1]))
18174 {
18175 if (emit)
18176 {
18177 output_asm_insn ("ldr%?\t%0, %1", otherops);
18178 output_asm_insn ("ldr%?\t%0, %1", operands);
18179 }
18180 if (count)
18181 *count = 2;
18182
18183 }
18184 else
18185 {
18186 if (emit)
18187 {
18188 output_asm_insn ("ldr%?\t%0, %1", operands);
18189 output_asm_insn ("ldr%?\t%0, %1", otherops);
18190 }
18191 if (count)
18192 *count = 2;
18193 }
18194 }
18195 }
18196 }
18197 else
18198 {
18199 /* Constraints should ensure this. */
18200 gcc_assert (code0 == MEM && code1 == REG);
18201 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18202 || (TARGET_ARM && TARGET_LDRD));
18203
18204 switch (GET_CODE (XEXP (operands[0], 0)))
18205 {
18206 case REG:
18207 if (emit)
18208 {
18209 if (TARGET_LDRD)
18210 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18211 else
18212 output_asm_insn ("stm%?\t%m0, %M1", operands);
18213 }
18214 break;
18215
18216 case PRE_INC:
18217 gcc_assert (TARGET_LDRD);
18218 if (emit)
18219 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18220 break;
18221
18222 case PRE_DEC:
18223 if (emit)
18224 {
18225 if (TARGET_LDRD)
18226 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18227 else
18228 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18229 }
18230 break;
18231
18232 case POST_INC:
18233 if (emit)
18234 {
18235 if (TARGET_LDRD)
18236 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18237 else
18238 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18239 }
18240 break;
18241
18242 case POST_DEC:
18243 gcc_assert (TARGET_LDRD);
18244 if (emit)
18245 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18246 break;
18247
18248 case PRE_MODIFY:
18249 case POST_MODIFY:
18250 otherops[0] = operands[1];
18251 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18252 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18253
18254 /* IWMMXT allows offsets larger than ldrd can handle,
18255 fix these up with a pair of ldr. */
18256 if (!TARGET_THUMB2
18257 && CONST_INT_P (otherops[2])
18258 && (INTVAL(otherops[2]) <= -256
18259 || INTVAL(otherops[2]) >= 256))
18260 {
18261 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18262 {
18263 if (emit)
18264 {
18265 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18266 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18267 }
18268 if (count)
18269 *count = 2;
18270 }
18271 else
18272 {
18273 if (emit)
18274 {
18275 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18276 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18277 }
18278 if (count)
18279 *count = 2;
18280 }
18281 }
18282 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18283 {
18284 if (emit)
18285 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18286 }
18287 else
18288 {
18289 if (emit)
18290 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18291 }
18292 break;
18293
18294 case PLUS:
18295 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18296 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18297 {
18298 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18299 {
18300 case -8:
18301 if (emit)
18302 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18303 return "";
18304
18305 case -4:
18306 if (TARGET_THUMB2)
18307 break;
18308 if (emit)
18309 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18310 return "";
18311
18312 case 4:
18313 if (TARGET_THUMB2)
18314 break;
18315 if (emit)
18316 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18317 return "";
18318 }
18319 }
18320 if (TARGET_LDRD
18321 && (REG_P (otherops[2])
18322 || TARGET_THUMB2
18323 || (CONST_INT_P (otherops[2])
18324 && INTVAL (otherops[2]) > -256
18325 && INTVAL (otherops[2]) < 256)))
18326 {
18327 otherops[0] = operands[1];
18328 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18329 if (emit)
18330 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18331 return "";
18332 }
18333 /* Fall through */
18334
18335 default:
18336 otherops[0] = adjust_address (operands[0], SImode, 4);
18337 otherops[1] = operands[1];
18338 if (emit)
18339 {
18340 output_asm_insn ("str%?\t%1, %0", operands);
18341 output_asm_insn ("str%?\t%H1, %0", otherops);
18342 }
18343 if (count)
18344 *count = 2;
18345 }
18346 }
18347
18348 return "";
18349 }
18350
18351 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18352 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18353
18354 const char *
18355 output_move_quad (rtx *operands)
18356 {
18357 if (REG_P (operands[0]))
18358 {
18359 /* Load, or reg->reg move. */
18360
18361 if (MEM_P (operands[1]))
18362 {
18363 switch (GET_CODE (XEXP (operands[1], 0)))
18364 {
18365 case REG:
18366 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18367 break;
18368
18369 case LABEL_REF:
18370 case CONST:
18371 output_asm_insn ("adr%?\t%0, %1", operands);
18372 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18373 break;
18374
18375 default:
18376 gcc_unreachable ();
18377 }
18378 }
18379 else
18380 {
18381 rtx ops[2];
18382 int dest, src, i;
18383
18384 gcc_assert (REG_P (operands[1]));
18385
18386 dest = REGNO (operands[0]);
18387 src = REGNO (operands[1]);
18388
18389 /* This seems pretty dumb, but hopefully GCC won't try to do it
18390 very often. */
18391 if (dest < src)
18392 for (i = 0; i < 4; i++)
18393 {
18394 ops[0] = gen_rtx_REG (SImode, dest + i);
18395 ops[1] = gen_rtx_REG (SImode, src + i);
18396 output_asm_insn ("mov%?\t%0, %1", ops);
18397 }
18398 else
18399 for (i = 3; i >= 0; i--)
18400 {
18401 ops[0] = gen_rtx_REG (SImode, dest + i);
18402 ops[1] = gen_rtx_REG (SImode, src + i);
18403 output_asm_insn ("mov%?\t%0, %1", ops);
18404 }
18405 }
18406 }
18407 else
18408 {
18409 gcc_assert (MEM_P (operands[0]));
18410 gcc_assert (REG_P (operands[1]));
18411 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18412
18413 switch (GET_CODE (XEXP (operands[0], 0)))
18414 {
18415 case REG:
18416 output_asm_insn ("stm%?\t%m0, %M1", operands);
18417 break;
18418
18419 default:
18420 gcc_unreachable ();
18421 }
18422 }
18423
18424 return "";
18425 }
18426
18427 /* Output a VFP load or store instruction. */
18428
18429 const char *
18430 output_move_vfp (rtx *operands)
18431 {
18432 rtx reg, mem, addr, ops[2];
18433 int load = REG_P (operands[0]);
18434 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18435 int sp = (!TARGET_VFP_FP16INST
18436 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18437 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18438 const char *templ;
18439 char buff[50];
18440 machine_mode mode;
18441
18442 reg = operands[!load];
18443 mem = operands[load];
18444
18445 mode = GET_MODE (reg);
18446
18447 gcc_assert (REG_P (reg));
18448 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18449 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18450 || mode == SFmode
18451 || mode == DFmode
18452 || mode == HImode
18453 || mode == SImode
18454 || mode == DImode
18455 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18456 gcc_assert (MEM_P (mem));
18457
18458 addr = XEXP (mem, 0);
18459
18460 switch (GET_CODE (addr))
18461 {
18462 case PRE_DEC:
18463 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18464 ops[0] = XEXP (addr, 0);
18465 ops[1] = reg;
18466 break;
18467
18468 case POST_INC:
18469 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18470 ops[0] = XEXP (addr, 0);
18471 ops[1] = reg;
18472 break;
18473
18474 default:
18475 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18476 ops[0] = reg;
18477 ops[1] = mem;
18478 break;
18479 }
18480
18481 sprintf (buff, templ,
18482 load ? "ld" : "st",
18483 dp ? "64" : sp ? "32" : "16",
18484 dp ? "P" : "",
18485 integer_p ? "\t%@ int" : "");
18486 output_asm_insn (buff, ops);
18487
18488 return "";
18489 }
18490
18491 /* Output a Neon double-word or quad-word load or store, or a load
18492 or store for larger structure modes.
18493
18494 WARNING: The ordering of elements is weird in big-endian mode,
18495 because the EABI requires that vectors stored in memory appear
18496 as though they were stored by a VSTM, as required by the EABI.
18497 GCC RTL defines element ordering based on in-memory order.
18498 This can be different from the architectural ordering of elements
18499 within a NEON register. The intrinsics defined in arm_neon.h use the
18500 NEON register element ordering, not the GCC RTL element ordering.
18501
18502 For example, the in-memory ordering of a big-endian a quadword
18503 vector with 16-bit elements when stored from register pair {d0,d1}
18504 will be (lowest address first, d0[N] is NEON register element N):
18505
18506 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18507
18508 When necessary, quadword registers (dN, dN+1) are moved to ARM
18509 registers from rN in the order:
18510
18511 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18512
18513 So that STM/LDM can be used on vectors in ARM registers, and the
18514 same memory layout will result as if VSTM/VLDM were used.
18515
18516 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18517 possible, which allows use of appropriate alignment tags.
18518 Note that the choice of "64" is independent of the actual vector
18519 element size; this size simply ensures that the behavior is
18520 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18521
18522 Due to limitations of those instructions, use of VST1.64/VLD1.64
18523 is not possible if:
18524 - the address contains PRE_DEC, or
18525 - the mode refers to more than 4 double-word registers
18526
18527 In those cases, it would be possible to replace VSTM/VLDM by a
18528 sequence of instructions; this is not currently implemented since
18529 this is not certain to actually improve performance. */
18530
18531 const char *
18532 output_move_neon (rtx *operands)
18533 {
18534 rtx reg, mem, addr, ops[2];
18535 int regno, nregs, load = REG_P (operands[0]);
18536 const char *templ;
18537 char buff[50];
18538 machine_mode mode;
18539
18540 reg = operands[!load];
18541 mem = operands[load];
18542
18543 mode = GET_MODE (reg);
18544
18545 gcc_assert (REG_P (reg));
18546 regno = REGNO (reg);
18547 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18548 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18549 || NEON_REGNO_OK_FOR_QUAD (regno));
18550 gcc_assert (VALID_NEON_DREG_MODE (mode)
18551 || VALID_NEON_QREG_MODE (mode)
18552 || VALID_NEON_STRUCT_MODE (mode));
18553 gcc_assert (MEM_P (mem));
18554
18555 addr = XEXP (mem, 0);
18556
18557 /* Strip off const from addresses like (const (plus (...))). */
18558 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18559 addr = XEXP (addr, 0);
18560
18561 switch (GET_CODE (addr))
18562 {
18563 case POST_INC:
18564 /* We have to use vldm / vstm for too-large modes. */
18565 if (nregs > 4)
18566 {
18567 templ = "v%smia%%?\t%%0!, %%h1";
18568 ops[0] = XEXP (addr, 0);
18569 }
18570 else
18571 {
18572 templ = "v%s1.64\t%%h1, %%A0";
18573 ops[0] = mem;
18574 }
18575 ops[1] = reg;
18576 break;
18577
18578 case PRE_DEC:
18579 /* We have to use vldm / vstm in this case, since there is no
18580 pre-decrement form of the vld1 / vst1 instructions. */
18581 templ = "v%smdb%%?\t%%0!, %%h1";
18582 ops[0] = XEXP (addr, 0);
18583 ops[1] = reg;
18584 break;
18585
18586 case POST_MODIFY:
18587 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18588 gcc_unreachable ();
18589
18590 case REG:
18591 /* We have to use vldm / vstm for too-large modes. */
18592 if (nregs > 1)
18593 {
18594 if (nregs > 4)
18595 templ = "v%smia%%?\t%%m0, %%h1";
18596 else
18597 templ = "v%s1.64\t%%h1, %%A0";
18598
18599 ops[0] = mem;
18600 ops[1] = reg;
18601 break;
18602 }
18603 /* Fall through. */
18604 case LABEL_REF:
18605 case PLUS:
18606 {
18607 int i;
18608 int overlap = -1;
18609 for (i = 0; i < nregs; i++)
18610 {
18611 /* We're only using DImode here because it's a convenient size. */
18612 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18613 ops[1] = adjust_address (mem, DImode, 8 * i);
18614 if (reg_overlap_mentioned_p (ops[0], mem))
18615 {
18616 gcc_assert (overlap == -1);
18617 overlap = i;
18618 }
18619 else
18620 {
18621 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18622 output_asm_insn (buff, ops);
18623 }
18624 }
18625 if (overlap != -1)
18626 {
18627 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18628 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18629 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18630 output_asm_insn (buff, ops);
18631 }
18632
18633 return "";
18634 }
18635
18636 default:
18637 gcc_unreachable ();
18638 }
18639
18640 sprintf (buff, templ, load ? "ld" : "st");
18641 output_asm_insn (buff, ops);
18642
18643 return "";
18644 }
18645
18646 /* Compute and return the length of neon_mov<mode>, where <mode> is
18647 one of VSTRUCT modes: EI, OI, CI or XI. */
18648 int
18649 arm_attr_length_move_neon (rtx_insn *insn)
18650 {
18651 rtx reg, mem, addr;
18652 int load;
18653 machine_mode mode;
18654
18655 extract_insn_cached (insn);
18656
18657 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18658 {
18659 mode = GET_MODE (recog_data.operand[0]);
18660 switch (mode)
18661 {
18662 case EImode:
18663 case OImode:
18664 return 8;
18665 case CImode:
18666 return 12;
18667 case XImode:
18668 return 16;
18669 default:
18670 gcc_unreachable ();
18671 }
18672 }
18673
18674 load = REG_P (recog_data.operand[0]);
18675 reg = recog_data.operand[!load];
18676 mem = recog_data.operand[load];
18677
18678 gcc_assert (MEM_P (mem));
18679
18680 mode = GET_MODE (reg);
18681 addr = XEXP (mem, 0);
18682
18683 /* Strip off const from addresses like (const (plus (...))). */
18684 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18685 addr = XEXP (addr, 0);
18686
18687 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18688 {
18689 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18690 return insns * 4;
18691 }
18692 else
18693 return 4;
18694 }
18695
18696 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18697 return zero. */
18698
18699 int
18700 arm_address_offset_is_imm (rtx_insn *insn)
18701 {
18702 rtx mem, addr;
18703
18704 extract_insn_cached (insn);
18705
18706 if (REG_P (recog_data.operand[0]))
18707 return 0;
18708
18709 mem = recog_data.operand[0];
18710
18711 gcc_assert (MEM_P (mem));
18712
18713 addr = XEXP (mem, 0);
18714
18715 if (REG_P (addr)
18716 || (GET_CODE (addr) == PLUS
18717 && REG_P (XEXP (addr, 0))
18718 && CONST_INT_P (XEXP (addr, 1))))
18719 return 1;
18720 else
18721 return 0;
18722 }
18723
18724 /* Output an ADD r, s, #n where n may be too big for one instruction.
18725 If adding zero to one register, output nothing. */
18726 const char *
18727 output_add_immediate (rtx *operands)
18728 {
18729 HOST_WIDE_INT n = INTVAL (operands[2]);
18730
18731 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18732 {
18733 if (n < 0)
18734 output_multi_immediate (operands,
18735 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18736 -n);
18737 else
18738 output_multi_immediate (operands,
18739 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18740 n);
18741 }
18742
18743 return "";
18744 }
18745
18746 /* Output a multiple immediate operation.
18747 OPERANDS is the vector of operands referred to in the output patterns.
18748 INSTR1 is the output pattern to use for the first constant.
18749 INSTR2 is the output pattern to use for subsequent constants.
18750 IMMED_OP is the index of the constant slot in OPERANDS.
18751 N is the constant value. */
18752 static const char *
18753 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18754 int immed_op, HOST_WIDE_INT n)
18755 {
18756 #if HOST_BITS_PER_WIDE_INT > 32
18757 n &= 0xffffffff;
18758 #endif
18759
18760 if (n == 0)
18761 {
18762 /* Quick and easy output. */
18763 operands[immed_op] = const0_rtx;
18764 output_asm_insn (instr1, operands);
18765 }
18766 else
18767 {
18768 int i;
18769 const char * instr = instr1;
18770
18771 /* Note that n is never zero here (which would give no output). */
18772 for (i = 0; i < 32; i += 2)
18773 {
18774 if (n & (3 << i))
18775 {
18776 operands[immed_op] = GEN_INT (n & (255 << i));
18777 output_asm_insn (instr, operands);
18778 instr = instr2;
18779 i += 6;
18780 }
18781 }
18782 }
18783
18784 return "";
18785 }
18786
18787 /* Return the name of a shifter operation. */
18788 static const char *
18789 arm_shift_nmem(enum rtx_code code)
18790 {
18791 switch (code)
18792 {
18793 case ASHIFT:
18794 return ARM_LSL_NAME;
18795
18796 case ASHIFTRT:
18797 return "asr";
18798
18799 case LSHIFTRT:
18800 return "lsr";
18801
18802 case ROTATERT:
18803 return "ror";
18804
18805 default:
18806 abort();
18807 }
18808 }
18809
18810 /* Return the appropriate ARM instruction for the operation code.
18811 The returned result should not be overwritten. OP is the rtx of the
18812 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18813 was shifted. */
18814 const char *
18815 arithmetic_instr (rtx op, int shift_first_arg)
18816 {
18817 switch (GET_CODE (op))
18818 {
18819 case PLUS:
18820 return "add";
18821
18822 case MINUS:
18823 return shift_first_arg ? "rsb" : "sub";
18824
18825 case IOR:
18826 return "orr";
18827
18828 case XOR:
18829 return "eor";
18830
18831 case AND:
18832 return "and";
18833
18834 case ASHIFT:
18835 case ASHIFTRT:
18836 case LSHIFTRT:
18837 case ROTATERT:
18838 return arm_shift_nmem(GET_CODE(op));
18839
18840 default:
18841 gcc_unreachable ();
18842 }
18843 }
18844
18845 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18846 for the operation code. The returned result should not be overwritten.
18847 OP is the rtx code of the shift.
18848 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18849 shift. */
18850 static const char *
18851 shift_op (rtx op, HOST_WIDE_INT *amountp)
18852 {
18853 const char * mnem;
18854 enum rtx_code code = GET_CODE (op);
18855
18856 switch (code)
18857 {
18858 case ROTATE:
18859 if (!CONST_INT_P (XEXP (op, 1)))
18860 {
18861 output_operand_lossage ("invalid shift operand");
18862 return NULL;
18863 }
18864
18865 code = ROTATERT;
18866 *amountp = 32 - INTVAL (XEXP (op, 1));
18867 mnem = "ror";
18868 break;
18869
18870 case ASHIFT:
18871 case ASHIFTRT:
18872 case LSHIFTRT:
18873 case ROTATERT:
18874 mnem = arm_shift_nmem(code);
18875 if (CONST_INT_P (XEXP (op, 1)))
18876 {
18877 *amountp = INTVAL (XEXP (op, 1));
18878 }
18879 else if (REG_P (XEXP (op, 1)))
18880 {
18881 *amountp = -1;
18882 return mnem;
18883 }
18884 else
18885 {
18886 output_operand_lossage ("invalid shift operand");
18887 return NULL;
18888 }
18889 break;
18890
18891 case MULT:
18892 /* We never have to worry about the amount being other than a
18893 power of 2, since this case can never be reloaded from a reg. */
18894 if (!CONST_INT_P (XEXP (op, 1)))
18895 {
18896 output_operand_lossage ("invalid shift operand");
18897 return NULL;
18898 }
18899
18900 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18901
18902 /* Amount must be a power of two. */
18903 if (*amountp & (*amountp - 1))
18904 {
18905 output_operand_lossage ("invalid shift operand");
18906 return NULL;
18907 }
18908
18909 *amountp = exact_log2 (*amountp);
18910 gcc_assert (IN_RANGE (*amountp, 0, 31));
18911 return ARM_LSL_NAME;
18912
18913 default:
18914 output_operand_lossage ("invalid shift operand");
18915 return NULL;
18916 }
18917
18918 /* This is not 100% correct, but follows from the desire to merge
18919 multiplication by a power of 2 with the recognizer for a
18920 shift. >=32 is not a valid shift for "lsl", so we must try and
18921 output a shift that produces the correct arithmetical result.
18922 Using lsr #32 is identical except for the fact that the carry bit
18923 is not set correctly if we set the flags; but we never use the
18924 carry bit from such an operation, so we can ignore that. */
18925 if (code == ROTATERT)
18926 /* Rotate is just modulo 32. */
18927 *amountp &= 31;
18928 else if (*amountp != (*amountp & 31))
18929 {
18930 if (code == ASHIFT)
18931 mnem = "lsr";
18932 *amountp = 32;
18933 }
18934
18935 /* Shifts of 0 are no-ops. */
18936 if (*amountp == 0)
18937 return NULL;
18938
18939 return mnem;
18940 }
18941
18942 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18943 because /bin/as is horribly restrictive. The judgement about
18944 whether or not each character is 'printable' (and can be output as
18945 is) or not (and must be printed with an octal escape) must be made
18946 with reference to the *host* character set -- the situation is
18947 similar to that discussed in the comments above pp_c_char in
18948 c-pretty-print.c. */
18949
18950 #define MAX_ASCII_LEN 51
18951
18952 void
18953 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18954 {
18955 int i;
18956 int len_so_far = 0;
18957
18958 fputs ("\t.ascii\t\"", stream);
18959
18960 for (i = 0; i < len; i++)
18961 {
18962 int c = p[i];
18963
18964 if (len_so_far >= MAX_ASCII_LEN)
18965 {
18966 fputs ("\"\n\t.ascii\t\"", stream);
18967 len_so_far = 0;
18968 }
18969
18970 if (ISPRINT (c))
18971 {
18972 if (c == '\\' || c == '\"')
18973 {
18974 putc ('\\', stream);
18975 len_so_far++;
18976 }
18977 putc (c, stream);
18978 len_so_far++;
18979 }
18980 else
18981 {
18982 fprintf (stream, "\\%03o", c);
18983 len_so_far += 4;
18984 }
18985 }
18986
18987 fputs ("\"\n", stream);
18988 }
18989 \f
18990 /* Whether a register is callee saved or not. This is necessary because high
18991 registers are marked as caller saved when optimizing for size on Thumb-1
18992 targets despite being callee saved in order to avoid using them. */
18993 #define callee_saved_reg_p(reg) \
18994 (!call_used_regs[reg] \
18995 || (TARGET_THUMB1 && optimize_size \
18996 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18997
18998 /* Compute the register save mask for registers 0 through 12
18999 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19000
19001 static unsigned long
19002 arm_compute_save_reg0_reg12_mask (void)
19003 {
19004 unsigned long func_type = arm_current_func_type ();
19005 unsigned long save_reg_mask = 0;
19006 unsigned int reg;
19007
19008 if (IS_INTERRUPT (func_type))
19009 {
19010 unsigned int max_reg;
19011 /* Interrupt functions must not corrupt any registers,
19012 even call clobbered ones. If this is a leaf function
19013 we can just examine the registers used by the RTL, but
19014 otherwise we have to assume that whatever function is
19015 called might clobber anything, and so we have to save
19016 all the call-clobbered registers as well. */
19017 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19018 /* FIQ handlers have registers r8 - r12 banked, so
19019 we only need to check r0 - r7, Normal ISRs only
19020 bank r14 and r15, so we must check up to r12.
19021 r13 is the stack pointer which is always preserved,
19022 so we do not need to consider it here. */
19023 max_reg = 7;
19024 else
19025 max_reg = 12;
19026
19027 for (reg = 0; reg <= max_reg; reg++)
19028 if (df_regs_ever_live_p (reg)
19029 || (! crtl->is_leaf && call_used_regs[reg]))
19030 save_reg_mask |= (1 << reg);
19031
19032 /* Also save the pic base register if necessary. */
19033 if (flag_pic
19034 && !TARGET_SINGLE_PIC_BASE
19035 && arm_pic_register != INVALID_REGNUM
19036 && crtl->uses_pic_offset_table)
19037 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19038 }
19039 else if (IS_VOLATILE(func_type))
19040 {
19041 /* For noreturn functions we historically omitted register saves
19042 altogether. However this really messes up debugging. As a
19043 compromise save just the frame pointers. Combined with the link
19044 register saved elsewhere this should be sufficient to get
19045 a backtrace. */
19046 if (frame_pointer_needed)
19047 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19048 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19049 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19050 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19051 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19052 }
19053 else
19054 {
19055 /* In the normal case we only need to save those registers
19056 which are call saved and which are used by this function. */
19057 for (reg = 0; reg <= 11; reg++)
19058 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19059 save_reg_mask |= (1 << reg);
19060
19061 /* Handle the frame pointer as a special case. */
19062 if (frame_pointer_needed)
19063 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19064
19065 /* If we aren't loading the PIC register,
19066 don't stack it even though it may be live. */
19067 if (flag_pic
19068 && !TARGET_SINGLE_PIC_BASE
19069 && arm_pic_register != INVALID_REGNUM
19070 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19071 || crtl->uses_pic_offset_table))
19072 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19073
19074 /* The prologue will copy SP into R0, so save it. */
19075 if (IS_STACKALIGN (func_type))
19076 save_reg_mask |= 1;
19077 }
19078
19079 /* Save registers so the exception handler can modify them. */
19080 if (crtl->calls_eh_return)
19081 {
19082 unsigned int i;
19083
19084 for (i = 0; ; i++)
19085 {
19086 reg = EH_RETURN_DATA_REGNO (i);
19087 if (reg == INVALID_REGNUM)
19088 break;
19089 save_reg_mask |= 1 << reg;
19090 }
19091 }
19092
19093 return save_reg_mask;
19094 }
19095
19096 /* Return true if r3 is live at the start of the function. */
19097
19098 static bool
19099 arm_r3_live_at_start_p (void)
19100 {
19101 /* Just look at cfg info, which is still close enough to correct at this
19102 point. This gives false positives for broken functions that might use
19103 uninitialized data that happens to be allocated in r3, but who cares? */
19104 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19105 }
19106
19107 /* Compute the number of bytes used to store the static chain register on the
19108 stack, above the stack frame. We need to know this accurately to get the
19109 alignment of the rest of the stack frame correct. */
19110
19111 static int
19112 arm_compute_static_chain_stack_bytes (void)
19113 {
19114 /* See the defining assertion in arm_expand_prologue. */
19115 if (IS_NESTED (arm_current_func_type ())
19116 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19117 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19118 && !df_regs_ever_live_p (LR_REGNUM)))
19119 && arm_r3_live_at_start_p ()
19120 && crtl->args.pretend_args_size == 0)
19121 return 4;
19122
19123 return 0;
19124 }
19125
19126 /* Compute a bit mask of which core registers need to be
19127 saved on the stack for the current function.
19128 This is used by arm_compute_frame_layout, which may add extra registers. */
19129
19130 static unsigned long
19131 arm_compute_save_core_reg_mask (void)
19132 {
19133 unsigned int save_reg_mask = 0;
19134 unsigned long func_type = arm_current_func_type ();
19135 unsigned int reg;
19136
19137 if (IS_NAKED (func_type))
19138 /* This should never really happen. */
19139 return 0;
19140
19141 /* If we are creating a stack frame, then we must save the frame pointer,
19142 IP (which will hold the old stack pointer), LR and the PC. */
19143 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19144 save_reg_mask |=
19145 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19146 | (1 << IP_REGNUM)
19147 | (1 << LR_REGNUM)
19148 | (1 << PC_REGNUM);
19149
19150 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19151
19152 /* Decide if we need to save the link register.
19153 Interrupt routines have their own banked link register,
19154 so they never need to save it.
19155 Otherwise if we do not use the link register we do not need to save
19156 it. If we are pushing other registers onto the stack however, we
19157 can save an instruction in the epilogue by pushing the link register
19158 now and then popping it back into the PC. This incurs extra memory
19159 accesses though, so we only do it when optimizing for size, and only
19160 if we know that we will not need a fancy return sequence. */
19161 if (df_regs_ever_live_p (LR_REGNUM)
19162 || (save_reg_mask
19163 && optimize_size
19164 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19165 && !crtl->tail_call_emit
19166 && !crtl->calls_eh_return))
19167 save_reg_mask |= 1 << LR_REGNUM;
19168
19169 if (cfun->machine->lr_save_eliminated)
19170 save_reg_mask &= ~ (1 << LR_REGNUM);
19171
19172 if (TARGET_REALLY_IWMMXT
19173 && ((bit_count (save_reg_mask)
19174 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19175 arm_compute_static_chain_stack_bytes())
19176 ) % 2) != 0)
19177 {
19178 /* The total number of registers that are going to be pushed
19179 onto the stack is odd. We need to ensure that the stack
19180 is 64-bit aligned before we start to save iWMMXt registers,
19181 and also before we start to create locals. (A local variable
19182 might be a double or long long which we will load/store using
19183 an iWMMXt instruction). Therefore we need to push another
19184 ARM register, so that the stack will be 64-bit aligned. We
19185 try to avoid using the arg registers (r0 -r3) as they might be
19186 used to pass values in a tail call. */
19187 for (reg = 4; reg <= 12; reg++)
19188 if ((save_reg_mask & (1 << reg)) == 0)
19189 break;
19190
19191 if (reg <= 12)
19192 save_reg_mask |= (1 << reg);
19193 else
19194 {
19195 cfun->machine->sibcall_blocked = 1;
19196 save_reg_mask |= (1 << 3);
19197 }
19198 }
19199
19200 /* We may need to push an additional register for use initializing the
19201 PIC base register. */
19202 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19203 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19204 {
19205 reg = thumb_find_work_register (1 << 4);
19206 if (!call_used_regs[reg])
19207 save_reg_mask |= (1 << reg);
19208 }
19209
19210 return save_reg_mask;
19211 }
19212
19213 /* Compute a bit mask of which core registers need to be
19214 saved on the stack for the current function. */
19215 static unsigned long
19216 thumb1_compute_save_core_reg_mask (void)
19217 {
19218 unsigned long mask;
19219 unsigned reg;
19220
19221 mask = 0;
19222 for (reg = 0; reg < 12; reg ++)
19223 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19224 mask |= 1 << reg;
19225
19226 /* Handle the frame pointer as a special case. */
19227 if (frame_pointer_needed)
19228 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19229
19230 if (flag_pic
19231 && !TARGET_SINGLE_PIC_BASE
19232 && arm_pic_register != INVALID_REGNUM
19233 && crtl->uses_pic_offset_table)
19234 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19235
19236 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19237 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19238 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19239
19240 /* LR will also be pushed if any lo regs are pushed. */
19241 if (mask & 0xff || thumb_force_lr_save ())
19242 mask |= (1 << LR_REGNUM);
19243
19244 /* Make sure we have a low work register if we need one.
19245 We will need one if we are going to push a high register,
19246 but we are not currently intending to push a low register. */
19247 if ((mask & 0xff) == 0
19248 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19249 {
19250 /* Use thumb_find_work_register to choose which register
19251 we will use. If the register is live then we will
19252 have to push it. Use LAST_LO_REGNUM as our fallback
19253 choice for the register to select. */
19254 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19255 /* Make sure the register returned by thumb_find_work_register is
19256 not part of the return value. */
19257 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19258 reg = LAST_LO_REGNUM;
19259
19260 if (callee_saved_reg_p (reg))
19261 mask |= 1 << reg;
19262 }
19263
19264 /* The 504 below is 8 bytes less than 512 because there are two possible
19265 alignment words. We can't tell here if they will be present or not so we
19266 have to play it safe and assume that they are. */
19267 if ((CALLER_INTERWORKING_SLOT_SIZE +
19268 ROUND_UP_WORD (get_frame_size ()) +
19269 crtl->outgoing_args_size) >= 504)
19270 {
19271 /* This is the same as the code in thumb1_expand_prologue() which
19272 determines which register to use for stack decrement. */
19273 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19274 if (mask & (1 << reg))
19275 break;
19276
19277 if (reg > LAST_LO_REGNUM)
19278 {
19279 /* Make sure we have a register available for stack decrement. */
19280 mask |= 1 << LAST_LO_REGNUM;
19281 }
19282 }
19283
19284 return mask;
19285 }
19286
19287
19288 /* Return the number of bytes required to save VFP registers. */
19289 static int
19290 arm_get_vfp_saved_size (void)
19291 {
19292 unsigned int regno;
19293 int count;
19294 int saved;
19295
19296 saved = 0;
19297 /* Space for saved VFP registers. */
19298 if (TARGET_HARD_FLOAT)
19299 {
19300 count = 0;
19301 for (regno = FIRST_VFP_REGNUM;
19302 regno < LAST_VFP_REGNUM;
19303 regno += 2)
19304 {
19305 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19306 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19307 {
19308 if (count > 0)
19309 {
19310 /* Workaround ARM10 VFPr1 bug. */
19311 if (count == 2 && !arm_arch6)
19312 count++;
19313 saved += count * 8;
19314 }
19315 count = 0;
19316 }
19317 else
19318 count++;
19319 }
19320 if (count > 0)
19321 {
19322 if (count == 2 && !arm_arch6)
19323 count++;
19324 saved += count * 8;
19325 }
19326 }
19327 return saved;
19328 }
19329
19330
19331 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19332 everything bar the final return instruction. If simple_return is true,
19333 then do not output epilogue, because it has already been emitted in RTL. */
19334 const char *
19335 output_return_instruction (rtx operand, bool really_return, bool reverse,
19336 bool simple_return)
19337 {
19338 char conditional[10];
19339 char instr[100];
19340 unsigned reg;
19341 unsigned long live_regs_mask;
19342 unsigned long func_type;
19343 arm_stack_offsets *offsets;
19344
19345 func_type = arm_current_func_type ();
19346
19347 if (IS_NAKED (func_type))
19348 return "";
19349
19350 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19351 {
19352 /* If this function was declared non-returning, and we have
19353 found a tail call, then we have to trust that the called
19354 function won't return. */
19355 if (really_return)
19356 {
19357 rtx ops[2];
19358
19359 /* Otherwise, trap an attempted return by aborting. */
19360 ops[0] = operand;
19361 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19362 : "abort");
19363 assemble_external_libcall (ops[1]);
19364 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19365 }
19366
19367 return "";
19368 }
19369
19370 gcc_assert (!cfun->calls_alloca || really_return);
19371
19372 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19373
19374 cfun->machine->return_used_this_function = 1;
19375
19376 offsets = arm_get_frame_offsets ();
19377 live_regs_mask = offsets->saved_regs_mask;
19378
19379 if (!simple_return && live_regs_mask)
19380 {
19381 const char * return_reg;
19382
19383 /* If we do not have any special requirements for function exit
19384 (e.g. interworking) then we can load the return address
19385 directly into the PC. Otherwise we must load it into LR. */
19386 if (really_return
19387 && !IS_CMSE_ENTRY (func_type)
19388 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19389 return_reg = reg_names[PC_REGNUM];
19390 else
19391 return_reg = reg_names[LR_REGNUM];
19392
19393 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19394 {
19395 /* There are three possible reasons for the IP register
19396 being saved. 1) a stack frame was created, in which case
19397 IP contains the old stack pointer, or 2) an ISR routine
19398 corrupted it, or 3) it was saved to align the stack on
19399 iWMMXt. In case 1, restore IP into SP, otherwise just
19400 restore IP. */
19401 if (frame_pointer_needed)
19402 {
19403 live_regs_mask &= ~ (1 << IP_REGNUM);
19404 live_regs_mask |= (1 << SP_REGNUM);
19405 }
19406 else
19407 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19408 }
19409
19410 /* On some ARM architectures it is faster to use LDR rather than
19411 LDM to load a single register. On other architectures, the
19412 cost is the same. In 26 bit mode, or for exception handlers,
19413 we have to use LDM to load the PC so that the CPSR is also
19414 restored. */
19415 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19416 if (live_regs_mask == (1U << reg))
19417 break;
19418
19419 if (reg <= LAST_ARM_REGNUM
19420 && (reg != LR_REGNUM
19421 || ! really_return
19422 || ! IS_INTERRUPT (func_type)))
19423 {
19424 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19425 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19426 }
19427 else
19428 {
19429 char *p;
19430 int first = 1;
19431
19432 /* Generate the load multiple instruction to restore the
19433 registers. Note we can get here, even if
19434 frame_pointer_needed is true, but only if sp already
19435 points to the base of the saved core registers. */
19436 if (live_regs_mask & (1 << SP_REGNUM))
19437 {
19438 unsigned HOST_WIDE_INT stack_adjust;
19439
19440 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19441 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19442
19443 if (stack_adjust && arm_arch5 && TARGET_ARM)
19444 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19445 else
19446 {
19447 /* If we can't use ldmib (SA110 bug),
19448 then try to pop r3 instead. */
19449 if (stack_adjust)
19450 live_regs_mask |= 1 << 3;
19451
19452 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19453 }
19454 }
19455 /* For interrupt returns we have to use an LDM rather than
19456 a POP so that we can use the exception return variant. */
19457 else if (IS_INTERRUPT (func_type))
19458 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19459 else
19460 sprintf (instr, "pop%s\t{", conditional);
19461
19462 p = instr + strlen (instr);
19463
19464 for (reg = 0; reg <= SP_REGNUM; reg++)
19465 if (live_regs_mask & (1 << reg))
19466 {
19467 int l = strlen (reg_names[reg]);
19468
19469 if (first)
19470 first = 0;
19471 else
19472 {
19473 memcpy (p, ", ", 2);
19474 p += 2;
19475 }
19476
19477 memcpy (p, "%|", 2);
19478 memcpy (p + 2, reg_names[reg], l);
19479 p += l + 2;
19480 }
19481
19482 if (live_regs_mask & (1 << LR_REGNUM))
19483 {
19484 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19485 /* If returning from an interrupt, restore the CPSR. */
19486 if (IS_INTERRUPT (func_type))
19487 strcat (p, "^");
19488 }
19489 else
19490 strcpy (p, "}");
19491 }
19492
19493 output_asm_insn (instr, & operand);
19494
19495 /* See if we need to generate an extra instruction to
19496 perform the actual function return. */
19497 if (really_return
19498 && func_type != ARM_FT_INTERWORKED
19499 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19500 {
19501 /* The return has already been handled
19502 by loading the LR into the PC. */
19503 return "";
19504 }
19505 }
19506
19507 if (really_return)
19508 {
19509 switch ((int) ARM_FUNC_TYPE (func_type))
19510 {
19511 case ARM_FT_ISR:
19512 case ARM_FT_FIQ:
19513 /* ??? This is wrong for unified assembly syntax. */
19514 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19515 break;
19516
19517 case ARM_FT_INTERWORKED:
19518 gcc_assert (arm_arch5 || arm_arch4t);
19519 sprintf (instr, "bx%s\t%%|lr", conditional);
19520 break;
19521
19522 case ARM_FT_EXCEPTION:
19523 /* ??? This is wrong for unified assembly syntax. */
19524 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19525 break;
19526
19527 default:
19528 if (IS_CMSE_ENTRY (func_type))
19529 {
19530 /* Check if we have to clear the 'GE bits' which is only used if
19531 parallel add and subtraction instructions are available. */
19532 if (TARGET_INT_SIMD)
19533 snprintf (instr, sizeof (instr),
19534 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19535 else
19536 snprintf (instr, sizeof (instr),
19537 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19538
19539 output_asm_insn (instr, & operand);
19540 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19541 {
19542 /* Clear the cumulative exception-status bits (0-4,7) and the
19543 condition code bits (28-31) of the FPSCR. We need to
19544 remember to clear the first scratch register used (IP) and
19545 save and restore the second (r4). */
19546 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19547 output_asm_insn (instr, & operand);
19548 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19549 output_asm_insn (instr, & operand);
19550 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19551 output_asm_insn (instr, & operand);
19552 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19553 output_asm_insn (instr, & operand);
19554 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19555 output_asm_insn (instr, & operand);
19556 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19557 output_asm_insn (instr, & operand);
19558 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19559 output_asm_insn (instr, & operand);
19560 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19561 output_asm_insn (instr, & operand);
19562 }
19563 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19564 }
19565 /* Use bx if it's available. */
19566 else if (arm_arch5 || arm_arch4t)
19567 sprintf (instr, "bx%s\t%%|lr", conditional);
19568 else
19569 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19570 break;
19571 }
19572
19573 output_asm_insn (instr, & operand);
19574 }
19575
19576 return "";
19577 }
19578
19579 /* Output in FILE asm statements needed to declare the NAME of the function
19580 defined by its DECL node. */
19581
19582 void
19583 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19584 {
19585 size_t cmse_name_len;
19586 char *cmse_name = 0;
19587 char cmse_prefix[] = "__acle_se_";
19588
19589 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19590 extra function label for each function with the 'cmse_nonsecure_entry'
19591 attribute. This extra function label should be prepended with
19592 '__acle_se_', telling the linker that it needs to create secure gateway
19593 veneers for this function. */
19594 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19595 DECL_ATTRIBUTES (decl)))
19596 {
19597 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19598 cmse_name = XALLOCAVEC (char, cmse_name_len);
19599 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19600 targetm.asm_out.globalize_label (file, cmse_name);
19601
19602 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19603 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19604 }
19605
19606 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19607 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19608 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19609 ASM_OUTPUT_LABEL (file, name);
19610
19611 if (cmse_name)
19612 ASM_OUTPUT_LABEL (file, cmse_name);
19613
19614 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19615 }
19616
19617 /* Write the function name into the code section, directly preceding
19618 the function prologue.
19619
19620 Code will be output similar to this:
19621 t0
19622 .ascii "arm_poke_function_name", 0
19623 .align
19624 t1
19625 .word 0xff000000 + (t1 - t0)
19626 arm_poke_function_name
19627 mov ip, sp
19628 stmfd sp!, {fp, ip, lr, pc}
19629 sub fp, ip, #4
19630
19631 When performing a stack backtrace, code can inspect the value
19632 of 'pc' stored at 'fp' + 0. If the trace function then looks
19633 at location pc - 12 and the top 8 bits are set, then we know
19634 that there is a function name embedded immediately preceding this
19635 location and has length ((pc[-3]) & 0xff000000).
19636
19637 We assume that pc is declared as a pointer to an unsigned long.
19638
19639 It is of no benefit to output the function name if we are assembling
19640 a leaf function. These function types will not contain a stack
19641 backtrace structure, therefore it is not possible to determine the
19642 function name. */
19643 void
19644 arm_poke_function_name (FILE *stream, const char *name)
19645 {
19646 unsigned long alignlength;
19647 unsigned long length;
19648 rtx x;
19649
19650 length = strlen (name) + 1;
19651 alignlength = ROUND_UP_WORD (length);
19652
19653 ASM_OUTPUT_ASCII (stream, name, length);
19654 ASM_OUTPUT_ALIGN (stream, 2);
19655 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19656 assemble_aligned_integer (UNITS_PER_WORD, x);
19657 }
19658
19659 /* Place some comments into the assembler stream
19660 describing the current function. */
19661 static void
19662 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19663 {
19664 unsigned long func_type;
19665
19666 /* Sanity check. */
19667 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19668
19669 func_type = arm_current_func_type ();
19670
19671 switch ((int) ARM_FUNC_TYPE (func_type))
19672 {
19673 default:
19674 case ARM_FT_NORMAL:
19675 break;
19676 case ARM_FT_INTERWORKED:
19677 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19678 break;
19679 case ARM_FT_ISR:
19680 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19681 break;
19682 case ARM_FT_FIQ:
19683 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19684 break;
19685 case ARM_FT_EXCEPTION:
19686 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19687 break;
19688 }
19689
19690 if (IS_NAKED (func_type))
19691 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19692
19693 if (IS_VOLATILE (func_type))
19694 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19695
19696 if (IS_NESTED (func_type))
19697 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19698 if (IS_STACKALIGN (func_type))
19699 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19700 if (IS_CMSE_ENTRY (func_type))
19701 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19702
19703 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19704 crtl->args.size,
19705 crtl->args.pretend_args_size, frame_size);
19706
19707 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19708 frame_pointer_needed,
19709 cfun->machine->uses_anonymous_args);
19710
19711 if (cfun->machine->lr_save_eliminated)
19712 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19713
19714 if (crtl->calls_eh_return)
19715 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19716
19717 }
19718
19719 static void
19720 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19721 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19722 {
19723 arm_stack_offsets *offsets;
19724
19725 if (TARGET_THUMB1)
19726 {
19727 int regno;
19728
19729 /* Emit any call-via-reg trampolines that are needed for v4t support
19730 of call_reg and call_value_reg type insns. */
19731 for (regno = 0; regno < LR_REGNUM; regno++)
19732 {
19733 rtx label = cfun->machine->call_via[regno];
19734
19735 if (label != NULL)
19736 {
19737 switch_to_section (function_section (current_function_decl));
19738 targetm.asm_out.internal_label (asm_out_file, "L",
19739 CODE_LABEL_NUMBER (label));
19740 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19741 }
19742 }
19743
19744 /* ??? Probably not safe to set this here, since it assumes that a
19745 function will be emitted as assembly immediately after we generate
19746 RTL for it. This does not happen for inline functions. */
19747 cfun->machine->return_used_this_function = 0;
19748 }
19749 else /* TARGET_32BIT */
19750 {
19751 /* We need to take into account any stack-frame rounding. */
19752 offsets = arm_get_frame_offsets ();
19753
19754 gcc_assert (!use_return_insn (FALSE, NULL)
19755 || (cfun->machine->return_used_this_function != 0)
19756 || offsets->saved_regs == offsets->outgoing_args
19757 || frame_pointer_needed);
19758 }
19759 }
19760
19761 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19762 STR and STRD. If an even number of registers are being pushed, one
19763 or more STRD patterns are created for each register pair. If an
19764 odd number of registers are pushed, emit an initial STR followed by
19765 as many STRD instructions as are needed. This works best when the
19766 stack is initially 64-bit aligned (the normal case), since it
19767 ensures that each STRD is also 64-bit aligned. */
19768 static void
19769 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19770 {
19771 int num_regs = 0;
19772 int i;
19773 int regno;
19774 rtx par = NULL_RTX;
19775 rtx dwarf = NULL_RTX;
19776 rtx tmp;
19777 bool first = true;
19778
19779 num_regs = bit_count (saved_regs_mask);
19780
19781 /* Must be at least one register to save, and can't save SP or PC. */
19782 gcc_assert (num_regs > 0 && num_regs <= 14);
19783 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19784 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19785
19786 /* Create sequence for DWARF info. All the frame-related data for
19787 debugging is held in this wrapper. */
19788 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19789
19790 /* Describe the stack adjustment. */
19791 tmp = gen_rtx_SET (stack_pointer_rtx,
19792 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19793 RTX_FRAME_RELATED_P (tmp) = 1;
19794 XVECEXP (dwarf, 0, 0) = tmp;
19795
19796 /* Find the first register. */
19797 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19798 ;
19799
19800 i = 0;
19801
19802 /* If there's an odd number of registers to push. Start off by
19803 pushing a single register. This ensures that subsequent strd
19804 operations are dword aligned (assuming that SP was originally
19805 64-bit aligned). */
19806 if ((num_regs & 1) != 0)
19807 {
19808 rtx reg, mem, insn;
19809
19810 reg = gen_rtx_REG (SImode, regno);
19811 if (num_regs == 1)
19812 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19813 stack_pointer_rtx));
19814 else
19815 mem = gen_frame_mem (Pmode,
19816 gen_rtx_PRE_MODIFY
19817 (Pmode, stack_pointer_rtx,
19818 plus_constant (Pmode, stack_pointer_rtx,
19819 -4 * num_regs)));
19820
19821 tmp = gen_rtx_SET (mem, reg);
19822 RTX_FRAME_RELATED_P (tmp) = 1;
19823 insn = emit_insn (tmp);
19824 RTX_FRAME_RELATED_P (insn) = 1;
19825 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19826 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19827 RTX_FRAME_RELATED_P (tmp) = 1;
19828 i++;
19829 regno++;
19830 XVECEXP (dwarf, 0, i) = tmp;
19831 first = false;
19832 }
19833
19834 while (i < num_regs)
19835 if (saved_regs_mask & (1 << regno))
19836 {
19837 rtx reg1, reg2, mem1, mem2;
19838 rtx tmp0, tmp1, tmp2;
19839 int regno2;
19840
19841 /* Find the register to pair with this one. */
19842 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19843 regno2++)
19844 ;
19845
19846 reg1 = gen_rtx_REG (SImode, regno);
19847 reg2 = gen_rtx_REG (SImode, regno2);
19848
19849 if (first)
19850 {
19851 rtx insn;
19852
19853 first = false;
19854 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19855 stack_pointer_rtx,
19856 -4 * num_regs));
19857 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19858 stack_pointer_rtx,
19859 -4 * (num_regs - 1)));
19860 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19861 plus_constant (Pmode, stack_pointer_rtx,
19862 -4 * (num_regs)));
19863 tmp1 = gen_rtx_SET (mem1, reg1);
19864 tmp2 = gen_rtx_SET (mem2, reg2);
19865 RTX_FRAME_RELATED_P (tmp0) = 1;
19866 RTX_FRAME_RELATED_P (tmp1) = 1;
19867 RTX_FRAME_RELATED_P (tmp2) = 1;
19868 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19869 XVECEXP (par, 0, 0) = tmp0;
19870 XVECEXP (par, 0, 1) = tmp1;
19871 XVECEXP (par, 0, 2) = tmp2;
19872 insn = emit_insn (par);
19873 RTX_FRAME_RELATED_P (insn) = 1;
19874 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19875 }
19876 else
19877 {
19878 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19879 stack_pointer_rtx,
19880 4 * i));
19881 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19882 stack_pointer_rtx,
19883 4 * (i + 1)));
19884 tmp1 = gen_rtx_SET (mem1, reg1);
19885 tmp2 = gen_rtx_SET (mem2, reg2);
19886 RTX_FRAME_RELATED_P (tmp1) = 1;
19887 RTX_FRAME_RELATED_P (tmp2) = 1;
19888 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19889 XVECEXP (par, 0, 0) = tmp1;
19890 XVECEXP (par, 0, 1) = tmp2;
19891 emit_insn (par);
19892 }
19893
19894 /* Create unwind information. This is an approximation. */
19895 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19896 plus_constant (Pmode,
19897 stack_pointer_rtx,
19898 4 * i)),
19899 reg1);
19900 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19901 plus_constant (Pmode,
19902 stack_pointer_rtx,
19903 4 * (i + 1))),
19904 reg2);
19905
19906 RTX_FRAME_RELATED_P (tmp1) = 1;
19907 RTX_FRAME_RELATED_P (tmp2) = 1;
19908 XVECEXP (dwarf, 0, i + 1) = tmp1;
19909 XVECEXP (dwarf, 0, i + 2) = tmp2;
19910 i += 2;
19911 regno = regno2 + 1;
19912 }
19913 else
19914 regno++;
19915
19916 return;
19917 }
19918
19919 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19920 whenever possible, otherwise it emits single-word stores. The first store
19921 also allocates stack space for all saved registers, using writeback with
19922 post-addressing mode. All other stores use offset addressing. If no STRD
19923 can be emitted, this function emits a sequence of single-word stores,
19924 and not an STM as before, because single-word stores provide more freedom
19925 scheduling and can be turned into an STM by peephole optimizations. */
19926 static void
19927 arm_emit_strd_push (unsigned long saved_regs_mask)
19928 {
19929 int num_regs = 0;
19930 int i, j, dwarf_index = 0;
19931 int offset = 0;
19932 rtx dwarf = NULL_RTX;
19933 rtx insn = NULL_RTX;
19934 rtx tmp, mem;
19935
19936 /* TODO: A more efficient code can be emitted by changing the
19937 layout, e.g., first push all pairs that can use STRD to keep the
19938 stack aligned, and then push all other registers. */
19939 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19940 if (saved_regs_mask & (1 << i))
19941 num_regs++;
19942
19943 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19944 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19945 gcc_assert (num_regs > 0);
19946
19947 /* Create sequence for DWARF info. */
19948 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19949
19950 /* For dwarf info, we generate explicit stack update. */
19951 tmp = gen_rtx_SET (stack_pointer_rtx,
19952 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19953 RTX_FRAME_RELATED_P (tmp) = 1;
19954 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19955
19956 /* Save registers. */
19957 offset = - 4 * num_regs;
19958 j = 0;
19959 while (j <= LAST_ARM_REGNUM)
19960 if (saved_regs_mask & (1 << j))
19961 {
19962 if ((j % 2 == 0)
19963 && (saved_regs_mask & (1 << (j + 1))))
19964 {
19965 /* Current register and previous register form register pair for
19966 which STRD can be generated. */
19967 if (offset < 0)
19968 {
19969 /* Allocate stack space for all saved registers. */
19970 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19971 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19972 mem = gen_frame_mem (DImode, tmp);
19973 offset = 0;
19974 }
19975 else if (offset > 0)
19976 mem = gen_frame_mem (DImode,
19977 plus_constant (Pmode,
19978 stack_pointer_rtx,
19979 offset));
19980 else
19981 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19982
19983 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
19984 RTX_FRAME_RELATED_P (tmp) = 1;
19985 tmp = emit_insn (tmp);
19986
19987 /* Record the first store insn. */
19988 if (dwarf_index == 1)
19989 insn = tmp;
19990
19991 /* Generate dwarf info. */
19992 mem = gen_frame_mem (SImode,
19993 plus_constant (Pmode,
19994 stack_pointer_rtx,
19995 offset));
19996 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19997 RTX_FRAME_RELATED_P (tmp) = 1;
19998 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19999
20000 mem = gen_frame_mem (SImode,
20001 plus_constant (Pmode,
20002 stack_pointer_rtx,
20003 offset + 4));
20004 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20005 RTX_FRAME_RELATED_P (tmp) = 1;
20006 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20007
20008 offset += 8;
20009 j += 2;
20010 }
20011 else
20012 {
20013 /* Emit a single word store. */
20014 if (offset < 0)
20015 {
20016 /* Allocate stack space for all saved registers. */
20017 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20018 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20019 mem = gen_frame_mem (SImode, tmp);
20020 offset = 0;
20021 }
20022 else if (offset > 0)
20023 mem = gen_frame_mem (SImode,
20024 plus_constant (Pmode,
20025 stack_pointer_rtx,
20026 offset));
20027 else
20028 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20029
20030 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20031 RTX_FRAME_RELATED_P (tmp) = 1;
20032 tmp = emit_insn (tmp);
20033
20034 /* Record the first store insn. */
20035 if (dwarf_index == 1)
20036 insn = tmp;
20037
20038 /* Generate dwarf info. */
20039 mem = gen_frame_mem (SImode,
20040 plus_constant(Pmode,
20041 stack_pointer_rtx,
20042 offset));
20043 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20044 RTX_FRAME_RELATED_P (tmp) = 1;
20045 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20046
20047 offset += 4;
20048 j += 1;
20049 }
20050 }
20051 else
20052 j++;
20053
20054 /* Attach dwarf info to the first insn we generate. */
20055 gcc_assert (insn != NULL_RTX);
20056 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20057 RTX_FRAME_RELATED_P (insn) = 1;
20058 }
20059
20060 /* Generate and emit an insn that we will recognize as a push_multi.
20061 Unfortunately, since this insn does not reflect very well the actual
20062 semantics of the operation, we need to annotate the insn for the benefit
20063 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20064 MASK for registers that should be annotated for DWARF2 frame unwind
20065 information. */
20066 static rtx
20067 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20068 {
20069 int num_regs = 0;
20070 int num_dwarf_regs = 0;
20071 int i, j;
20072 rtx par;
20073 rtx dwarf;
20074 int dwarf_par_index;
20075 rtx tmp, reg;
20076
20077 /* We don't record the PC in the dwarf frame information. */
20078 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20079
20080 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20081 {
20082 if (mask & (1 << i))
20083 num_regs++;
20084 if (dwarf_regs_mask & (1 << i))
20085 num_dwarf_regs++;
20086 }
20087
20088 gcc_assert (num_regs && num_regs <= 16);
20089 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20090
20091 /* For the body of the insn we are going to generate an UNSPEC in
20092 parallel with several USEs. This allows the insn to be recognized
20093 by the push_multi pattern in the arm.md file.
20094
20095 The body of the insn looks something like this:
20096
20097 (parallel [
20098 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20099 (const_int:SI <num>)))
20100 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20101 (use (reg:SI XX))
20102 (use (reg:SI YY))
20103 ...
20104 ])
20105
20106 For the frame note however, we try to be more explicit and actually
20107 show each register being stored into the stack frame, plus a (single)
20108 decrement of the stack pointer. We do it this way in order to be
20109 friendly to the stack unwinding code, which only wants to see a single
20110 stack decrement per instruction. The RTL we generate for the note looks
20111 something like this:
20112
20113 (sequence [
20114 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20115 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20116 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20117 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20118 ...
20119 ])
20120
20121 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20122 instead we'd have a parallel expression detailing all
20123 the stores to the various memory addresses so that debug
20124 information is more up-to-date. Remember however while writing
20125 this to take care of the constraints with the push instruction.
20126
20127 Note also that this has to be taken care of for the VFP registers.
20128
20129 For more see PR43399. */
20130
20131 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20132 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20133 dwarf_par_index = 1;
20134
20135 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20136 {
20137 if (mask & (1 << i))
20138 {
20139 reg = gen_rtx_REG (SImode, i);
20140
20141 XVECEXP (par, 0, 0)
20142 = gen_rtx_SET (gen_frame_mem
20143 (BLKmode,
20144 gen_rtx_PRE_MODIFY (Pmode,
20145 stack_pointer_rtx,
20146 plus_constant
20147 (Pmode, stack_pointer_rtx,
20148 -4 * num_regs))
20149 ),
20150 gen_rtx_UNSPEC (BLKmode,
20151 gen_rtvec (1, reg),
20152 UNSPEC_PUSH_MULT));
20153
20154 if (dwarf_regs_mask & (1 << i))
20155 {
20156 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20157 reg);
20158 RTX_FRAME_RELATED_P (tmp) = 1;
20159 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20160 }
20161
20162 break;
20163 }
20164 }
20165
20166 for (j = 1, i++; j < num_regs; i++)
20167 {
20168 if (mask & (1 << i))
20169 {
20170 reg = gen_rtx_REG (SImode, i);
20171
20172 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20173
20174 if (dwarf_regs_mask & (1 << i))
20175 {
20176 tmp
20177 = gen_rtx_SET (gen_frame_mem
20178 (SImode,
20179 plus_constant (Pmode, stack_pointer_rtx,
20180 4 * j)),
20181 reg);
20182 RTX_FRAME_RELATED_P (tmp) = 1;
20183 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20184 }
20185
20186 j++;
20187 }
20188 }
20189
20190 par = emit_insn (par);
20191
20192 tmp = gen_rtx_SET (stack_pointer_rtx,
20193 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20194 RTX_FRAME_RELATED_P (tmp) = 1;
20195 XVECEXP (dwarf, 0, 0) = tmp;
20196
20197 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20198
20199 return par;
20200 }
20201
20202 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20203 SIZE is the offset to be adjusted.
20204 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20205 static void
20206 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20207 {
20208 rtx dwarf;
20209
20210 RTX_FRAME_RELATED_P (insn) = 1;
20211 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20212 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20213 }
20214
20215 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20216 SAVED_REGS_MASK shows which registers need to be restored.
20217
20218 Unfortunately, since this insn does not reflect very well the actual
20219 semantics of the operation, we need to annotate the insn for the benefit
20220 of DWARF2 frame unwind information. */
20221 static void
20222 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20223 {
20224 int num_regs = 0;
20225 int i, j;
20226 rtx par;
20227 rtx dwarf = NULL_RTX;
20228 rtx tmp, reg;
20229 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20230 int offset_adj;
20231 int emit_update;
20232
20233 offset_adj = return_in_pc ? 1 : 0;
20234 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20235 if (saved_regs_mask & (1 << i))
20236 num_regs++;
20237
20238 gcc_assert (num_regs && num_regs <= 16);
20239
20240 /* If SP is in reglist, then we don't emit SP update insn. */
20241 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20242
20243 /* The parallel needs to hold num_regs SETs
20244 and one SET for the stack update. */
20245 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20246
20247 if (return_in_pc)
20248 XVECEXP (par, 0, 0) = ret_rtx;
20249
20250 if (emit_update)
20251 {
20252 /* Increment the stack pointer, based on there being
20253 num_regs 4-byte registers to restore. */
20254 tmp = gen_rtx_SET (stack_pointer_rtx,
20255 plus_constant (Pmode,
20256 stack_pointer_rtx,
20257 4 * num_regs));
20258 RTX_FRAME_RELATED_P (tmp) = 1;
20259 XVECEXP (par, 0, offset_adj) = tmp;
20260 }
20261
20262 /* Now restore every reg, which may include PC. */
20263 for (j = 0, i = 0; j < num_regs; i++)
20264 if (saved_regs_mask & (1 << i))
20265 {
20266 reg = gen_rtx_REG (SImode, i);
20267 if ((num_regs == 1) && emit_update && !return_in_pc)
20268 {
20269 /* Emit single load with writeback. */
20270 tmp = gen_frame_mem (SImode,
20271 gen_rtx_POST_INC (Pmode,
20272 stack_pointer_rtx));
20273 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20274 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20275 return;
20276 }
20277
20278 tmp = gen_rtx_SET (reg,
20279 gen_frame_mem
20280 (SImode,
20281 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20282 RTX_FRAME_RELATED_P (tmp) = 1;
20283 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20284
20285 /* We need to maintain a sequence for DWARF info too. As dwarf info
20286 should not have PC, skip PC. */
20287 if (i != PC_REGNUM)
20288 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20289
20290 j++;
20291 }
20292
20293 if (return_in_pc)
20294 par = emit_jump_insn (par);
20295 else
20296 par = emit_insn (par);
20297
20298 REG_NOTES (par) = dwarf;
20299 if (!return_in_pc)
20300 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20301 stack_pointer_rtx, stack_pointer_rtx);
20302 }
20303
20304 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20305 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20306
20307 Unfortunately, since this insn does not reflect very well the actual
20308 semantics of the operation, we need to annotate the insn for the benefit
20309 of DWARF2 frame unwind information. */
20310 static void
20311 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20312 {
20313 int i, j;
20314 rtx par;
20315 rtx dwarf = NULL_RTX;
20316 rtx tmp, reg;
20317
20318 gcc_assert (num_regs && num_regs <= 32);
20319
20320 /* Workaround ARM10 VFPr1 bug. */
20321 if (num_regs == 2 && !arm_arch6)
20322 {
20323 if (first_reg == 15)
20324 first_reg--;
20325
20326 num_regs++;
20327 }
20328
20329 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20330 there could be up to 32 D-registers to restore.
20331 If there are more than 16 D-registers, make two recursive calls,
20332 each of which emits one pop_multi instruction. */
20333 if (num_regs > 16)
20334 {
20335 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20336 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20337 return;
20338 }
20339
20340 /* The parallel needs to hold num_regs SETs
20341 and one SET for the stack update. */
20342 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20343
20344 /* Increment the stack pointer, based on there being
20345 num_regs 8-byte registers to restore. */
20346 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20347 RTX_FRAME_RELATED_P (tmp) = 1;
20348 XVECEXP (par, 0, 0) = tmp;
20349
20350 /* Now show every reg that will be restored, using a SET for each. */
20351 for (j = 0, i=first_reg; j < num_regs; i += 2)
20352 {
20353 reg = gen_rtx_REG (DFmode, i);
20354
20355 tmp = gen_rtx_SET (reg,
20356 gen_frame_mem
20357 (DFmode,
20358 plus_constant (Pmode, base_reg, 8 * j)));
20359 RTX_FRAME_RELATED_P (tmp) = 1;
20360 XVECEXP (par, 0, j + 1) = tmp;
20361
20362 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20363
20364 j++;
20365 }
20366
20367 par = emit_insn (par);
20368 REG_NOTES (par) = dwarf;
20369
20370 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20371 if (REGNO (base_reg) == IP_REGNUM)
20372 {
20373 RTX_FRAME_RELATED_P (par) = 1;
20374 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20375 }
20376 else
20377 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20378 base_reg, base_reg);
20379 }
20380
20381 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20382 number of registers are being popped, multiple LDRD patterns are created for
20383 all register pairs. If odd number of registers are popped, last register is
20384 loaded by using LDR pattern. */
20385 static void
20386 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20387 {
20388 int num_regs = 0;
20389 int i, j;
20390 rtx par = NULL_RTX;
20391 rtx dwarf = NULL_RTX;
20392 rtx tmp, reg, tmp1;
20393 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20394
20395 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20396 if (saved_regs_mask & (1 << i))
20397 num_regs++;
20398
20399 gcc_assert (num_regs && num_regs <= 16);
20400
20401 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20402 to be popped. So, if num_regs is even, now it will become odd,
20403 and we can generate pop with PC. If num_regs is odd, it will be
20404 even now, and ldr with return can be generated for PC. */
20405 if (return_in_pc)
20406 num_regs--;
20407
20408 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20409
20410 /* Var j iterates over all the registers to gather all the registers in
20411 saved_regs_mask. Var i gives index of saved registers in stack frame.
20412 A PARALLEL RTX of register-pair is created here, so that pattern for
20413 LDRD can be matched. As PC is always last register to be popped, and
20414 we have already decremented num_regs if PC, we don't have to worry
20415 about PC in this loop. */
20416 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20417 if (saved_regs_mask & (1 << j))
20418 {
20419 /* Create RTX for memory load. */
20420 reg = gen_rtx_REG (SImode, j);
20421 tmp = gen_rtx_SET (reg,
20422 gen_frame_mem (SImode,
20423 plus_constant (Pmode,
20424 stack_pointer_rtx, 4 * i)));
20425 RTX_FRAME_RELATED_P (tmp) = 1;
20426
20427 if (i % 2 == 0)
20428 {
20429 /* When saved-register index (i) is even, the RTX to be emitted is
20430 yet to be created. Hence create it first. The LDRD pattern we
20431 are generating is :
20432 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20433 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20434 where target registers need not be consecutive. */
20435 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20436 dwarf = NULL_RTX;
20437 }
20438
20439 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20440 added as 0th element and if i is odd, reg_i is added as 1st element
20441 of LDRD pattern shown above. */
20442 XVECEXP (par, 0, (i % 2)) = tmp;
20443 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20444
20445 if ((i % 2) == 1)
20446 {
20447 /* When saved-register index (i) is odd, RTXs for both the registers
20448 to be loaded are generated in above given LDRD pattern, and the
20449 pattern can be emitted now. */
20450 par = emit_insn (par);
20451 REG_NOTES (par) = dwarf;
20452 RTX_FRAME_RELATED_P (par) = 1;
20453 }
20454
20455 i++;
20456 }
20457
20458 /* If the number of registers pushed is odd AND return_in_pc is false OR
20459 number of registers are even AND return_in_pc is true, last register is
20460 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20461 then LDR with post increment. */
20462
20463 /* Increment the stack pointer, based on there being
20464 num_regs 4-byte registers to restore. */
20465 tmp = gen_rtx_SET (stack_pointer_rtx,
20466 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20467 RTX_FRAME_RELATED_P (tmp) = 1;
20468 tmp = emit_insn (tmp);
20469 if (!return_in_pc)
20470 {
20471 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20472 stack_pointer_rtx, stack_pointer_rtx);
20473 }
20474
20475 dwarf = NULL_RTX;
20476
20477 if (((num_regs % 2) == 1 && !return_in_pc)
20478 || ((num_regs % 2) == 0 && return_in_pc))
20479 {
20480 /* Scan for the single register to be popped. Skip until the saved
20481 register is found. */
20482 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20483
20484 /* Gen LDR with post increment here. */
20485 tmp1 = gen_rtx_MEM (SImode,
20486 gen_rtx_POST_INC (SImode,
20487 stack_pointer_rtx));
20488 set_mem_alias_set (tmp1, get_frame_alias_set ());
20489
20490 reg = gen_rtx_REG (SImode, j);
20491 tmp = gen_rtx_SET (reg, tmp1);
20492 RTX_FRAME_RELATED_P (tmp) = 1;
20493 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20494
20495 if (return_in_pc)
20496 {
20497 /* If return_in_pc, j must be PC_REGNUM. */
20498 gcc_assert (j == PC_REGNUM);
20499 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20500 XVECEXP (par, 0, 0) = ret_rtx;
20501 XVECEXP (par, 0, 1) = tmp;
20502 par = emit_jump_insn (par);
20503 }
20504 else
20505 {
20506 par = emit_insn (tmp);
20507 REG_NOTES (par) = dwarf;
20508 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20509 stack_pointer_rtx, stack_pointer_rtx);
20510 }
20511
20512 }
20513 else if ((num_regs % 2) == 1 && return_in_pc)
20514 {
20515 /* There are 2 registers to be popped. So, generate the pattern
20516 pop_multiple_with_stack_update_and_return to pop in PC. */
20517 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20518 }
20519
20520 return;
20521 }
20522
20523 /* LDRD in ARM mode needs consecutive registers as operands. This function
20524 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20525 offset addressing and then generates one separate stack udpate. This provides
20526 more scheduling freedom, compared to writeback on every load. However,
20527 if the function returns using load into PC directly
20528 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20529 before the last load. TODO: Add a peephole optimization to recognize
20530 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20531 peephole optimization to merge the load at stack-offset zero
20532 with the stack update instruction using load with writeback
20533 in post-index addressing mode. */
20534 static void
20535 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20536 {
20537 int j = 0;
20538 int offset = 0;
20539 rtx par = NULL_RTX;
20540 rtx dwarf = NULL_RTX;
20541 rtx tmp, mem;
20542
20543 /* Restore saved registers. */
20544 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20545 j = 0;
20546 while (j <= LAST_ARM_REGNUM)
20547 if (saved_regs_mask & (1 << j))
20548 {
20549 if ((j % 2) == 0
20550 && (saved_regs_mask & (1 << (j + 1)))
20551 && (j + 1) != PC_REGNUM)
20552 {
20553 /* Current register and next register form register pair for which
20554 LDRD can be generated. PC is always the last register popped, and
20555 we handle it separately. */
20556 if (offset > 0)
20557 mem = gen_frame_mem (DImode,
20558 plus_constant (Pmode,
20559 stack_pointer_rtx,
20560 offset));
20561 else
20562 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20563
20564 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20565 tmp = emit_insn (tmp);
20566 RTX_FRAME_RELATED_P (tmp) = 1;
20567
20568 /* Generate dwarf info. */
20569
20570 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20571 gen_rtx_REG (SImode, j),
20572 NULL_RTX);
20573 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20574 gen_rtx_REG (SImode, j + 1),
20575 dwarf);
20576
20577 REG_NOTES (tmp) = dwarf;
20578
20579 offset += 8;
20580 j += 2;
20581 }
20582 else if (j != PC_REGNUM)
20583 {
20584 /* Emit a single word load. */
20585 if (offset > 0)
20586 mem = gen_frame_mem (SImode,
20587 plus_constant (Pmode,
20588 stack_pointer_rtx,
20589 offset));
20590 else
20591 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20592
20593 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20594 tmp = emit_insn (tmp);
20595 RTX_FRAME_RELATED_P (tmp) = 1;
20596
20597 /* Generate dwarf info. */
20598 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20599 gen_rtx_REG (SImode, j),
20600 NULL_RTX);
20601
20602 offset += 4;
20603 j += 1;
20604 }
20605 else /* j == PC_REGNUM */
20606 j++;
20607 }
20608 else
20609 j++;
20610
20611 /* Update the stack. */
20612 if (offset > 0)
20613 {
20614 tmp = gen_rtx_SET (stack_pointer_rtx,
20615 plus_constant (Pmode,
20616 stack_pointer_rtx,
20617 offset));
20618 tmp = emit_insn (tmp);
20619 arm_add_cfa_adjust_cfa_note (tmp, offset,
20620 stack_pointer_rtx, stack_pointer_rtx);
20621 offset = 0;
20622 }
20623
20624 if (saved_regs_mask & (1 << PC_REGNUM))
20625 {
20626 /* Only PC is to be popped. */
20627 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20628 XVECEXP (par, 0, 0) = ret_rtx;
20629 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20630 gen_frame_mem (SImode,
20631 gen_rtx_POST_INC (SImode,
20632 stack_pointer_rtx)));
20633 RTX_FRAME_RELATED_P (tmp) = 1;
20634 XVECEXP (par, 0, 1) = tmp;
20635 par = emit_jump_insn (par);
20636
20637 /* Generate dwarf info. */
20638 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20639 gen_rtx_REG (SImode, PC_REGNUM),
20640 NULL_RTX);
20641 REG_NOTES (par) = dwarf;
20642 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20643 stack_pointer_rtx, stack_pointer_rtx);
20644 }
20645 }
20646
20647 /* Calculate the size of the return value that is passed in registers. */
20648 static unsigned
20649 arm_size_return_regs (void)
20650 {
20651 machine_mode mode;
20652
20653 if (crtl->return_rtx != 0)
20654 mode = GET_MODE (crtl->return_rtx);
20655 else
20656 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20657
20658 return GET_MODE_SIZE (mode);
20659 }
20660
20661 /* Return true if the current function needs to save/restore LR. */
20662 static bool
20663 thumb_force_lr_save (void)
20664 {
20665 return !cfun->machine->lr_save_eliminated
20666 && (!crtl->is_leaf
20667 || thumb_far_jump_used_p ()
20668 || df_regs_ever_live_p (LR_REGNUM));
20669 }
20670
20671 /* We do not know if r3 will be available because
20672 we do have an indirect tailcall happening in this
20673 particular case. */
20674 static bool
20675 is_indirect_tailcall_p (rtx call)
20676 {
20677 rtx pat = PATTERN (call);
20678
20679 /* Indirect tail call. */
20680 pat = XVECEXP (pat, 0, 0);
20681 if (GET_CODE (pat) == SET)
20682 pat = SET_SRC (pat);
20683
20684 pat = XEXP (XEXP (pat, 0), 0);
20685 return REG_P (pat);
20686 }
20687
20688 /* Return true if r3 is used by any of the tail call insns in the
20689 current function. */
20690 static bool
20691 any_sibcall_could_use_r3 (void)
20692 {
20693 edge_iterator ei;
20694 edge e;
20695
20696 if (!crtl->tail_call_emit)
20697 return false;
20698 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20699 if (e->flags & EDGE_SIBCALL)
20700 {
20701 rtx_insn *call = BB_END (e->src);
20702 if (!CALL_P (call))
20703 call = prev_nonnote_nondebug_insn (call);
20704 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20705 if (find_regno_fusage (call, USE, 3)
20706 || is_indirect_tailcall_p (call))
20707 return true;
20708 }
20709 return false;
20710 }
20711
20712
20713 /* Compute the distance from register FROM to register TO.
20714 These can be the arg pointer (26), the soft frame pointer (25),
20715 the stack pointer (13) or the hard frame pointer (11).
20716 In thumb mode r7 is used as the soft frame pointer, if needed.
20717 Typical stack layout looks like this:
20718
20719 old stack pointer -> | |
20720 ----
20721 | | \
20722 | | saved arguments for
20723 | | vararg functions
20724 | | /
20725 --
20726 hard FP & arg pointer -> | | \
20727 | | stack
20728 | | frame
20729 | | /
20730 --
20731 | | \
20732 | | call saved
20733 | | registers
20734 soft frame pointer -> | | /
20735 --
20736 | | \
20737 | | local
20738 | | variables
20739 locals base pointer -> | | /
20740 --
20741 | | \
20742 | | outgoing
20743 | | arguments
20744 current stack pointer -> | | /
20745 --
20746
20747 For a given function some or all of these stack components
20748 may not be needed, giving rise to the possibility of
20749 eliminating some of the registers.
20750
20751 The values returned by this function must reflect the behavior
20752 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20753
20754 The sign of the number returned reflects the direction of stack
20755 growth, so the values are positive for all eliminations except
20756 from the soft frame pointer to the hard frame pointer.
20757
20758 SFP may point just inside the local variables block to ensure correct
20759 alignment. */
20760
20761
20762 /* Return cached stack offsets. */
20763
20764 static arm_stack_offsets *
20765 arm_get_frame_offsets (void)
20766 {
20767 struct arm_stack_offsets *offsets;
20768
20769 offsets = &cfun->machine->stack_offsets;
20770
20771 return offsets;
20772 }
20773
20774
20775 /* Calculate stack offsets. These are used to calculate register elimination
20776 offsets and in prologue/epilogue code. Also calculates which registers
20777 should be saved. */
20778
20779 static void
20780 arm_compute_frame_layout (void)
20781 {
20782 struct arm_stack_offsets *offsets;
20783 unsigned long func_type;
20784 int saved;
20785 int core_saved;
20786 HOST_WIDE_INT frame_size;
20787 int i;
20788
20789 offsets = &cfun->machine->stack_offsets;
20790
20791 /* Initially this is the size of the local variables. It will translated
20792 into an offset once we have determined the size of preceding data. */
20793 frame_size = ROUND_UP_WORD (get_frame_size ());
20794
20795 /* Space for variadic functions. */
20796 offsets->saved_args = crtl->args.pretend_args_size;
20797
20798 /* In Thumb mode this is incorrect, but never used. */
20799 offsets->frame
20800 = (offsets->saved_args
20801 + arm_compute_static_chain_stack_bytes ()
20802 + (frame_pointer_needed ? 4 : 0));
20803
20804 if (TARGET_32BIT)
20805 {
20806 unsigned int regno;
20807
20808 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20809 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20810 saved = core_saved;
20811
20812 /* We know that SP will be doubleword aligned on entry, and we must
20813 preserve that condition at any subroutine call. We also require the
20814 soft frame pointer to be doubleword aligned. */
20815
20816 if (TARGET_REALLY_IWMMXT)
20817 {
20818 /* Check for the call-saved iWMMXt registers. */
20819 for (regno = FIRST_IWMMXT_REGNUM;
20820 regno <= LAST_IWMMXT_REGNUM;
20821 regno++)
20822 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20823 saved += 8;
20824 }
20825
20826 func_type = arm_current_func_type ();
20827 /* Space for saved VFP registers. */
20828 if (! IS_VOLATILE (func_type)
20829 && TARGET_HARD_FLOAT)
20830 saved += arm_get_vfp_saved_size ();
20831 }
20832 else /* TARGET_THUMB1 */
20833 {
20834 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20835 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20836 saved = core_saved;
20837 if (TARGET_BACKTRACE)
20838 saved += 16;
20839 }
20840
20841 /* Saved registers include the stack frame. */
20842 offsets->saved_regs
20843 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20844 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20845
20846 /* A leaf function does not need any stack alignment if it has nothing
20847 on the stack. */
20848 if (crtl->is_leaf && frame_size == 0
20849 /* However if it calls alloca(), we have a dynamically allocated
20850 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20851 && ! cfun->calls_alloca)
20852 {
20853 offsets->outgoing_args = offsets->soft_frame;
20854 offsets->locals_base = offsets->soft_frame;
20855 return;
20856 }
20857
20858 /* Ensure SFP has the correct alignment. */
20859 if (ARM_DOUBLEWORD_ALIGN
20860 && (offsets->soft_frame & 7))
20861 {
20862 offsets->soft_frame += 4;
20863 /* Try to align stack by pushing an extra reg. Don't bother doing this
20864 when there is a stack frame as the alignment will be rolled into
20865 the normal stack adjustment. */
20866 if (frame_size + crtl->outgoing_args_size == 0)
20867 {
20868 int reg = -1;
20869
20870 /* Register r3 is caller-saved. Normally it does not need to be
20871 saved on entry by the prologue. However if we choose to save
20872 it for padding then we may confuse the compiler into thinking
20873 a prologue sequence is required when in fact it is not. This
20874 will occur when shrink-wrapping if r3 is used as a scratch
20875 register and there are no other callee-saved writes.
20876
20877 This situation can be avoided when other callee-saved registers
20878 are available and r3 is not mandatory if we choose a callee-saved
20879 register for padding. */
20880 bool prefer_callee_reg_p = false;
20881
20882 /* If it is safe to use r3, then do so. This sometimes
20883 generates better code on Thumb-2 by avoiding the need to
20884 use 32-bit push/pop instructions. */
20885 if (! any_sibcall_could_use_r3 ()
20886 && arm_size_return_regs () <= 12
20887 && (offsets->saved_regs_mask & (1 << 3)) == 0
20888 && (TARGET_THUMB2
20889 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20890 {
20891 reg = 3;
20892 if (!TARGET_THUMB2)
20893 prefer_callee_reg_p = true;
20894 }
20895 if (reg == -1
20896 || prefer_callee_reg_p)
20897 {
20898 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20899 {
20900 /* Avoid fixed registers; they may be changed at
20901 arbitrary times so it's unsafe to restore them
20902 during the epilogue. */
20903 if (!fixed_regs[i]
20904 && (offsets->saved_regs_mask & (1 << i)) == 0)
20905 {
20906 reg = i;
20907 break;
20908 }
20909 }
20910 }
20911
20912 if (reg != -1)
20913 {
20914 offsets->saved_regs += 4;
20915 offsets->saved_regs_mask |= (1 << reg);
20916 }
20917 }
20918 }
20919
20920 offsets->locals_base = offsets->soft_frame + frame_size;
20921 offsets->outgoing_args = (offsets->locals_base
20922 + crtl->outgoing_args_size);
20923
20924 if (ARM_DOUBLEWORD_ALIGN)
20925 {
20926 /* Ensure SP remains doubleword aligned. */
20927 if (offsets->outgoing_args & 7)
20928 offsets->outgoing_args += 4;
20929 gcc_assert (!(offsets->outgoing_args & 7));
20930 }
20931 }
20932
20933
20934 /* Calculate the relative offsets for the different stack pointers. Positive
20935 offsets are in the direction of stack growth. */
20936
20937 HOST_WIDE_INT
20938 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20939 {
20940 arm_stack_offsets *offsets;
20941
20942 offsets = arm_get_frame_offsets ();
20943
20944 /* OK, now we have enough information to compute the distances.
20945 There must be an entry in these switch tables for each pair
20946 of registers in ELIMINABLE_REGS, even if some of the entries
20947 seem to be redundant or useless. */
20948 switch (from)
20949 {
20950 case ARG_POINTER_REGNUM:
20951 switch (to)
20952 {
20953 case THUMB_HARD_FRAME_POINTER_REGNUM:
20954 return 0;
20955
20956 case FRAME_POINTER_REGNUM:
20957 /* This is the reverse of the soft frame pointer
20958 to hard frame pointer elimination below. */
20959 return offsets->soft_frame - offsets->saved_args;
20960
20961 case ARM_HARD_FRAME_POINTER_REGNUM:
20962 /* This is only non-zero in the case where the static chain register
20963 is stored above the frame. */
20964 return offsets->frame - offsets->saved_args - 4;
20965
20966 case STACK_POINTER_REGNUM:
20967 /* If nothing has been pushed on the stack at all
20968 then this will return -4. This *is* correct! */
20969 return offsets->outgoing_args - (offsets->saved_args + 4);
20970
20971 default:
20972 gcc_unreachable ();
20973 }
20974 gcc_unreachable ();
20975
20976 case FRAME_POINTER_REGNUM:
20977 switch (to)
20978 {
20979 case THUMB_HARD_FRAME_POINTER_REGNUM:
20980 return 0;
20981
20982 case ARM_HARD_FRAME_POINTER_REGNUM:
20983 /* The hard frame pointer points to the top entry in the
20984 stack frame. The soft frame pointer to the bottom entry
20985 in the stack frame. If there is no stack frame at all,
20986 then they are identical. */
20987
20988 return offsets->frame - offsets->soft_frame;
20989
20990 case STACK_POINTER_REGNUM:
20991 return offsets->outgoing_args - offsets->soft_frame;
20992
20993 default:
20994 gcc_unreachable ();
20995 }
20996 gcc_unreachable ();
20997
20998 default:
20999 /* You cannot eliminate from the stack pointer.
21000 In theory you could eliminate from the hard frame
21001 pointer to the stack pointer, but this will never
21002 happen, since if a stack frame is not needed the
21003 hard frame pointer will never be used. */
21004 gcc_unreachable ();
21005 }
21006 }
21007
21008 /* Given FROM and TO register numbers, say whether this elimination is
21009 allowed. Frame pointer elimination is automatically handled.
21010
21011 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21012 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21013 pointer, we must eliminate FRAME_POINTER_REGNUM into
21014 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21015 ARG_POINTER_REGNUM. */
21016
21017 bool
21018 arm_can_eliminate (const int from, const int to)
21019 {
21020 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21021 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21022 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21023 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21024 true);
21025 }
21026
21027 /* Emit RTL to save coprocessor registers on function entry. Returns the
21028 number of bytes pushed. */
21029
21030 static int
21031 arm_save_coproc_regs(void)
21032 {
21033 int saved_size = 0;
21034 unsigned reg;
21035 unsigned start_reg;
21036 rtx insn;
21037
21038 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21039 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21040 {
21041 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21042 insn = gen_rtx_MEM (V2SImode, insn);
21043 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21044 RTX_FRAME_RELATED_P (insn) = 1;
21045 saved_size += 8;
21046 }
21047
21048 if (TARGET_HARD_FLOAT)
21049 {
21050 start_reg = FIRST_VFP_REGNUM;
21051
21052 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21053 {
21054 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21055 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21056 {
21057 if (start_reg != reg)
21058 saved_size += vfp_emit_fstmd (start_reg,
21059 (reg - start_reg) / 2);
21060 start_reg = reg + 2;
21061 }
21062 }
21063 if (start_reg != reg)
21064 saved_size += vfp_emit_fstmd (start_reg,
21065 (reg - start_reg) / 2);
21066 }
21067 return saved_size;
21068 }
21069
21070
21071 /* Set the Thumb frame pointer from the stack pointer. */
21072
21073 static void
21074 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21075 {
21076 HOST_WIDE_INT amount;
21077 rtx insn, dwarf;
21078
21079 amount = offsets->outgoing_args - offsets->locals_base;
21080 if (amount < 1024)
21081 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21082 stack_pointer_rtx, GEN_INT (amount)));
21083 else
21084 {
21085 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21086 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21087 expects the first two operands to be the same. */
21088 if (TARGET_THUMB2)
21089 {
21090 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21091 stack_pointer_rtx,
21092 hard_frame_pointer_rtx));
21093 }
21094 else
21095 {
21096 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21097 hard_frame_pointer_rtx,
21098 stack_pointer_rtx));
21099 }
21100 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21101 plus_constant (Pmode, stack_pointer_rtx, amount));
21102 RTX_FRAME_RELATED_P (dwarf) = 1;
21103 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21104 }
21105
21106 RTX_FRAME_RELATED_P (insn) = 1;
21107 }
21108
21109 struct scratch_reg {
21110 rtx reg;
21111 bool saved;
21112 };
21113
21114 /* Return a short-lived scratch register for use as a 2nd scratch register on
21115 function entry after the registers are saved in the prologue. This register
21116 must be released by means of release_scratch_register_on_entry. IP is not
21117 considered since it is always used as the 1st scratch register if available.
21118
21119 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21120 mask of live registers. */
21121
21122 static void
21123 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21124 unsigned long live_regs)
21125 {
21126 int regno = -1;
21127
21128 sr->saved = false;
21129
21130 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21131 regno = LR_REGNUM;
21132 else
21133 {
21134 unsigned int i;
21135
21136 for (i = 4; i < 11; i++)
21137 if (regno1 != i && (live_regs & (1 << i)) != 0)
21138 {
21139 regno = i;
21140 break;
21141 }
21142
21143 if (regno < 0)
21144 {
21145 /* If IP is used as the 1st scratch register for a nested function,
21146 then either r3 wasn't available or is used to preserve IP. */
21147 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21148 regno1 = 3;
21149 regno = (regno1 == 3 ? 2 : 3);
21150 sr->saved
21151 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21152 regno);
21153 }
21154 }
21155
21156 sr->reg = gen_rtx_REG (SImode, regno);
21157 if (sr->saved)
21158 {
21159 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21160 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21161 rtx x = gen_rtx_SET (stack_pointer_rtx,
21162 plus_constant (Pmode, stack_pointer_rtx, -4));
21163 RTX_FRAME_RELATED_P (insn) = 1;
21164 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21165 }
21166 }
21167
21168 /* Release a scratch register obtained from the preceding function. */
21169
21170 static void
21171 release_scratch_register_on_entry (struct scratch_reg *sr)
21172 {
21173 if (sr->saved)
21174 {
21175 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21176 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21177 rtx x = gen_rtx_SET (stack_pointer_rtx,
21178 plus_constant (Pmode, stack_pointer_rtx, 4));
21179 RTX_FRAME_RELATED_P (insn) = 1;
21180 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21181 }
21182 }
21183
21184 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21185
21186 #if PROBE_INTERVAL > 4096
21187 #error Cannot use indexed addressing mode for stack probing
21188 #endif
21189
21190 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21191 inclusive. These are offsets from the current stack pointer. REGNO1
21192 is the index number of the 1st scratch register and LIVE_REGS is the
21193 mask of live registers. */
21194
21195 static void
21196 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21197 unsigned int regno1, unsigned long live_regs)
21198 {
21199 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21200
21201 /* See if we have a constant small number of probes to generate. If so,
21202 that's the easy case. */
21203 if (size <= PROBE_INTERVAL)
21204 {
21205 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21206 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21207 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21208 }
21209
21210 /* The run-time loop is made up of 10 insns in the generic case while the
21211 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21212 else if (size <= 5 * PROBE_INTERVAL)
21213 {
21214 HOST_WIDE_INT i, rem;
21215
21216 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21217 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21218 emit_stack_probe (reg1);
21219
21220 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21221 it exceeds SIZE. If only two probes are needed, this will not
21222 generate any code. Then probe at FIRST + SIZE. */
21223 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21224 {
21225 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21226 emit_stack_probe (reg1);
21227 }
21228
21229 rem = size - (i - PROBE_INTERVAL);
21230 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21231 {
21232 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21233 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21234 }
21235 else
21236 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21237 }
21238
21239 /* Otherwise, do the same as above, but in a loop. Note that we must be
21240 extra careful with variables wrapping around because we might be at
21241 the very top (or the very bottom) of the address space and we have
21242 to be able to handle this case properly; in particular, we use an
21243 equality test for the loop condition. */
21244 else
21245 {
21246 HOST_WIDE_INT rounded_size;
21247 struct scratch_reg sr;
21248
21249 get_scratch_register_on_entry (&sr, regno1, live_regs);
21250
21251 emit_move_insn (reg1, GEN_INT (first));
21252
21253
21254 /* Step 1: round SIZE to the previous multiple of the interval. */
21255
21256 rounded_size = size & -PROBE_INTERVAL;
21257 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21258
21259
21260 /* Step 2: compute initial and final value of the loop counter. */
21261
21262 /* TEST_ADDR = SP + FIRST. */
21263 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21264
21265 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21266 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21267
21268
21269 /* Step 3: the loop
21270
21271 do
21272 {
21273 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21274 probe at TEST_ADDR
21275 }
21276 while (TEST_ADDR != LAST_ADDR)
21277
21278 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21279 until it is equal to ROUNDED_SIZE. */
21280
21281 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21282
21283
21284 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21285 that SIZE is equal to ROUNDED_SIZE. */
21286
21287 if (size != rounded_size)
21288 {
21289 HOST_WIDE_INT rem = size - rounded_size;
21290
21291 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21292 {
21293 emit_set_insn (sr.reg,
21294 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21295 emit_stack_probe (plus_constant (Pmode, sr.reg,
21296 PROBE_INTERVAL - rem));
21297 }
21298 else
21299 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21300 }
21301
21302 release_scratch_register_on_entry (&sr);
21303 }
21304
21305 /* Make sure nothing is scheduled before we are done. */
21306 emit_insn (gen_blockage ());
21307 }
21308
21309 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21310 absolute addresses. */
21311
21312 const char *
21313 output_probe_stack_range (rtx reg1, rtx reg2)
21314 {
21315 static int labelno = 0;
21316 char loop_lab[32];
21317 rtx xops[2];
21318
21319 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21320
21321 /* Loop. */
21322 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21323
21324 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21325 xops[0] = reg1;
21326 xops[1] = GEN_INT (PROBE_INTERVAL);
21327 output_asm_insn ("sub\t%0, %0, %1", xops);
21328
21329 /* Probe at TEST_ADDR. */
21330 output_asm_insn ("str\tr0, [%0, #0]", xops);
21331
21332 /* Test if TEST_ADDR == LAST_ADDR. */
21333 xops[1] = reg2;
21334 output_asm_insn ("cmp\t%0, %1", xops);
21335
21336 /* Branch. */
21337 fputs ("\tbne\t", asm_out_file);
21338 assemble_name_raw (asm_out_file, loop_lab);
21339 fputc ('\n', asm_out_file);
21340
21341 return "";
21342 }
21343
21344 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21345 function. */
21346 void
21347 arm_expand_prologue (void)
21348 {
21349 rtx amount;
21350 rtx insn;
21351 rtx ip_rtx;
21352 unsigned long live_regs_mask;
21353 unsigned long func_type;
21354 int fp_offset = 0;
21355 int saved_pretend_args = 0;
21356 int saved_regs = 0;
21357 unsigned HOST_WIDE_INT args_to_push;
21358 HOST_WIDE_INT size;
21359 arm_stack_offsets *offsets;
21360 bool clobber_ip;
21361
21362 func_type = arm_current_func_type ();
21363
21364 /* Naked functions don't have prologues. */
21365 if (IS_NAKED (func_type))
21366 {
21367 if (flag_stack_usage_info)
21368 current_function_static_stack_size = 0;
21369 return;
21370 }
21371
21372 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21373 args_to_push = crtl->args.pretend_args_size;
21374
21375 /* Compute which register we will have to save onto the stack. */
21376 offsets = arm_get_frame_offsets ();
21377 live_regs_mask = offsets->saved_regs_mask;
21378
21379 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21380
21381 if (IS_STACKALIGN (func_type))
21382 {
21383 rtx r0, r1;
21384
21385 /* Handle a word-aligned stack pointer. We generate the following:
21386
21387 mov r0, sp
21388 bic r1, r0, #7
21389 mov sp, r1
21390 <save and restore r0 in normal prologue/epilogue>
21391 mov sp, r0
21392 bx lr
21393
21394 The unwinder doesn't need to know about the stack realignment.
21395 Just tell it we saved SP in r0. */
21396 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21397
21398 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21399 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21400
21401 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21402 RTX_FRAME_RELATED_P (insn) = 1;
21403 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21404
21405 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21406
21407 /* ??? The CFA changes here, which may cause GDB to conclude that it
21408 has entered a different function. That said, the unwind info is
21409 correct, individually, before and after this instruction because
21410 we've described the save of SP, which will override the default
21411 handling of SP as restoring from the CFA. */
21412 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21413 }
21414
21415 /* The static chain register is the same as the IP register. If it is
21416 clobbered when creating the frame, we need to save and restore it. */
21417 clobber_ip = IS_NESTED (func_type)
21418 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21419 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21420 && !df_regs_ever_live_p (LR_REGNUM)
21421 && arm_r3_live_at_start_p ()));
21422
21423 /* Find somewhere to store IP whilst the frame is being created.
21424 We try the following places in order:
21425
21426 1. The last argument register r3 if it is available.
21427 2. A slot on the stack above the frame if there are no
21428 arguments to push onto the stack.
21429 3. Register r3 again, after pushing the argument registers
21430 onto the stack, if this is a varargs function.
21431 4. The last slot on the stack created for the arguments to
21432 push, if this isn't a varargs function.
21433
21434 Note - we only need to tell the dwarf2 backend about the SP
21435 adjustment in the second variant; the static chain register
21436 doesn't need to be unwound, as it doesn't contain a value
21437 inherited from the caller. */
21438 if (clobber_ip)
21439 {
21440 if (!arm_r3_live_at_start_p ())
21441 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21442 else if (args_to_push == 0)
21443 {
21444 rtx addr, dwarf;
21445
21446 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21447 saved_regs += 4;
21448
21449 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21450 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21451 fp_offset = 4;
21452
21453 /* Just tell the dwarf backend that we adjusted SP. */
21454 dwarf = gen_rtx_SET (stack_pointer_rtx,
21455 plus_constant (Pmode, stack_pointer_rtx,
21456 -fp_offset));
21457 RTX_FRAME_RELATED_P (insn) = 1;
21458 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21459 }
21460 else
21461 {
21462 /* Store the args on the stack. */
21463 if (cfun->machine->uses_anonymous_args)
21464 {
21465 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21466 (0xf0 >> (args_to_push / 4)) & 0xf);
21467 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21468 saved_pretend_args = 1;
21469 }
21470 else
21471 {
21472 rtx addr, dwarf;
21473
21474 if (args_to_push == 4)
21475 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21476 else
21477 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21478 plus_constant (Pmode,
21479 stack_pointer_rtx,
21480 -args_to_push));
21481
21482 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21483
21484 /* Just tell the dwarf backend that we adjusted SP. */
21485 dwarf = gen_rtx_SET (stack_pointer_rtx,
21486 plus_constant (Pmode, stack_pointer_rtx,
21487 -args_to_push));
21488 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21489 }
21490
21491 RTX_FRAME_RELATED_P (insn) = 1;
21492 fp_offset = args_to_push;
21493 args_to_push = 0;
21494 }
21495 }
21496
21497 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21498 {
21499 if (IS_INTERRUPT (func_type))
21500 {
21501 /* Interrupt functions must not corrupt any registers.
21502 Creating a frame pointer however, corrupts the IP
21503 register, so we must push it first. */
21504 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21505
21506 /* Do not set RTX_FRAME_RELATED_P on this insn.
21507 The dwarf stack unwinding code only wants to see one
21508 stack decrement per function, and this is not it. If
21509 this instruction is labeled as being part of the frame
21510 creation sequence then dwarf2out_frame_debug_expr will
21511 die when it encounters the assignment of IP to FP
21512 later on, since the use of SP here establishes SP as
21513 the CFA register and not IP.
21514
21515 Anyway this instruction is not really part of the stack
21516 frame creation although it is part of the prologue. */
21517 }
21518
21519 insn = emit_set_insn (ip_rtx,
21520 plus_constant (Pmode, stack_pointer_rtx,
21521 fp_offset));
21522 RTX_FRAME_RELATED_P (insn) = 1;
21523 }
21524
21525 if (args_to_push)
21526 {
21527 /* Push the argument registers, or reserve space for them. */
21528 if (cfun->machine->uses_anonymous_args)
21529 insn = emit_multi_reg_push
21530 ((0xf0 >> (args_to_push / 4)) & 0xf,
21531 (0xf0 >> (args_to_push / 4)) & 0xf);
21532 else
21533 insn = emit_insn
21534 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21535 GEN_INT (- args_to_push)));
21536 RTX_FRAME_RELATED_P (insn) = 1;
21537 }
21538
21539 /* If this is an interrupt service routine, and the link register
21540 is going to be pushed, and we're not generating extra
21541 push of IP (needed when frame is needed and frame layout if apcs),
21542 subtracting four from LR now will mean that the function return
21543 can be done with a single instruction. */
21544 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21545 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21546 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21547 && TARGET_ARM)
21548 {
21549 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21550
21551 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21552 }
21553
21554 if (live_regs_mask)
21555 {
21556 unsigned long dwarf_regs_mask = live_regs_mask;
21557
21558 saved_regs += bit_count (live_regs_mask) * 4;
21559 if (optimize_size && !frame_pointer_needed
21560 && saved_regs == offsets->saved_regs - offsets->saved_args)
21561 {
21562 /* If no coprocessor registers are being pushed and we don't have
21563 to worry about a frame pointer then push extra registers to
21564 create the stack frame. This is done in a way that does not
21565 alter the frame layout, so is independent of the epilogue. */
21566 int n;
21567 int frame;
21568 n = 0;
21569 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21570 n++;
21571 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21572 if (frame && n * 4 >= frame)
21573 {
21574 n = frame / 4;
21575 live_regs_mask |= (1 << n) - 1;
21576 saved_regs += frame;
21577 }
21578 }
21579
21580 if (TARGET_LDRD
21581 && current_tune->prefer_ldrd_strd
21582 && !optimize_function_for_size_p (cfun))
21583 {
21584 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21585 if (TARGET_THUMB2)
21586 thumb2_emit_strd_push (live_regs_mask);
21587 else if (TARGET_ARM
21588 && !TARGET_APCS_FRAME
21589 && !IS_INTERRUPT (func_type))
21590 arm_emit_strd_push (live_regs_mask);
21591 else
21592 {
21593 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21594 RTX_FRAME_RELATED_P (insn) = 1;
21595 }
21596 }
21597 else
21598 {
21599 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21600 RTX_FRAME_RELATED_P (insn) = 1;
21601 }
21602 }
21603
21604 if (! IS_VOLATILE (func_type))
21605 saved_regs += arm_save_coproc_regs ();
21606
21607 if (frame_pointer_needed && TARGET_ARM)
21608 {
21609 /* Create the new frame pointer. */
21610 if (TARGET_APCS_FRAME)
21611 {
21612 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21613 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21614 RTX_FRAME_RELATED_P (insn) = 1;
21615 }
21616 else
21617 {
21618 insn = GEN_INT (saved_regs - (4 + fp_offset));
21619 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21620 stack_pointer_rtx, insn));
21621 RTX_FRAME_RELATED_P (insn) = 1;
21622 }
21623 }
21624
21625 size = offsets->outgoing_args - offsets->saved_args;
21626 if (flag_stack_usage_info)
21627 current_function_static_stack_size = size;
21628
21629 /* If this isn't an interrupt service routine and we have a frame, then do
21630 stack checking. We use IP as the first scratch register, except for the
21631 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21632 if (!IS_INTERRUPT (func_type)
21633 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21634 {
21635 unsigned int regno;
21636
21637 if (!IS_NESTED (func_type) || clobber_ip)
21638 regno = IP_REGNUM;
21639 else if (df_regs_ever_live_p (LR_REGNUM))
21640 regno = LR_REGNUM;
21641 else
21642 regno = 3;
21643
21644 if (crtl->is_leaf && !cfun->calls_alloca)
21645 {
21646 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21647 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21648 size - STACK_CHECK_PROTECT,
21649 regno, live_regs_mask);
21650 }
21651 else if (size > 0)
21652 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21653 regno, live_regs_mask);
21654 }
21655
21656 /* Recover the static chain register. */
21657 if (clobber_ip)
21658 {
21659 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21660 insn = gen_rtx_REG (SImode, 3);
21661 else
21662 {
21663 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21664 insn = gen_frame_mem (SImode, insn);
21665 }
21666 emit_set_insn (ip_rtx, insn);
21667 emit_insn (gen_force_register_use (ip_rtx));
21668 }
21669
21670 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21671 {
21672 /* This add can produce multiple insns for a large constant, so we
21673 need to get tricky. */
21674 rtx_insn *last = get_last_insn ();
21675
21676 amount = GEN_INT (offsets->saved_args + saved_regs
21677 - offsets->outgoing_args);
21678
21679 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21680 amount));
21681 do
21682 {
21683 last = last ? NEXT_INSN (last) : get_insns ();
21684 RTX_FRAME_RELATED_P (last) = 1;
21685 }
21686 while (last != insn);
21687
21688 /* If the frame pointer is needed, emit a special barrier that
21689 will prevent the scheduler from moving stores to the frame
21690 before the stack adjustment. */
21691 if (frame_pointer_needed)
21692 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21693 hard_frame_pointer_rtx));
21694 }
21695
21696
21697 if (frame_pointer_needed && TARGET_THUMB2)
21698 thumb_set_frame_pointer (offsets);
21699
21700 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21701 {
21702 unsigned long mask;
21703
21704 mask = live_regs_mask;
21705 mask &= THUMB2_WORK_REGS;
21706 if (!IS_NESTED (func_type))
21707 mask |= (1 << IP_REGNUM);
21708 arm_load_pic_register (mask);
21709 }
21710
21711 /* If we are profiling, make sure no instructions are scheduled before
21712 the call to mcount. Similarly if the user has requested no
21713 scheduling in the prolog. Similarly if we want non-call exceptions
21714 using the EABI unwinder, to prevent faulting instructions from being
21715 swapped with a stack adjustment. */
21716 if (crtl->profile || !TARGET_SCHED_PROLOG
21717 || (arm_except_unwind_info (&global_options) == UI_TARGET
21718 && cfun->can_throw_non_call_exceptions))
21719 emit_insn (gen_blockage ());
21720
21721 /* If the link register is being kept alive, with the return address in it,
21722 then make sure that it does not get reused by the ce2 pass. */
21723 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21724 cfun->machine->lr_save_eliminated = 1;
21725 }
21726 \f
21727 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21728 static void
21729 arm_print_condition (FILE *stream)
21730 {
21731 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21732 {
21733 /* Branch conversion is not implemented for Thumb-2. */
21734 if (TARGET_THUMB)
21735 {
21736 output_operand_lossage ("predicated Thumb instruction");
21737 return;
21738 }
21739 if (current_insn_predicate != NULL)
21740 {
21741 output_operand_lossage
21742 ("predicated instruction in conditional sequence");
21743 return;
21744 }
21745
21746 fputs (arm_condition_codes[arm_current_cc], stream);
21747 }
21748 else if (current_insn_predicate)
21749 {
21750 enum arm_cond_code code;
21751
21752 if (TARGET_THUMB1)
21753 {
21754 output_operand_lossage ("predicated Thumb instruction");
21755 return;
21756 }
21757
21758 code = get_arm_condition_code (current_insn_predicate);
21759 fputs (arm_condition_codes[code], stream);
21760 }
21761 }
21762
21763
21764 /* Globally reserved letters: acln
21765 Puncutation letters currently used: @_|?().!#
21766 Lower case letters currently used: bcdefhimpqtvwxyz
21767 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21768 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21769
21770 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21771
21772 If CODE is 'd', then the X is a condition operand and the instruction
21773 should only be executed if the condition is true.
21774 if CODE is 'D', then the X is a condition operand and the instruction
21775 should only be executed if the condition is false: however, if the mode
21776 of the comparison is CCFPEmode, then always execute the instruction -- we
21777 do this because in these circumstances !GE does not necessarily imply LT;
21778 in these cases the instruction pattern will take care to make sure that
21779 an instruction containing %d will follow, thereby undoing the effects of
21780 doing this instruction unconditionally.
21781 If CODE is 'N' then X is a floating point operand that must be negated
21782 before output.
21783 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21784 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21785 static void
21786 arm_print_operand (FILE *stream, rtx x, int code)
21787 {
21788 switch (code)
21789 {
21790 case '@':
21791 fputs (ASM_COMMENT_START, stream);
21792 return;
21793
21794 case '_':
21795 fputs (user_label_prefix, stream);
21796 return;
21797
21798 case '|':
21799 fputs (REGISTER_PREFIX, stream);
21800 return;
21801
21802 case '?':
21803 arm_print_condition (stream);
21804 return;
21805
21806 case '.':
21807 /* The current condition code for a condition code setting instruction.
21808 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21809 fputc('s', stream);
21810 arm_print_condition (stream);
21811 return;
21812
21813 case '!':
21814 /* If the instruction is conditionally executed then print
21815 the current condition code, otherwise print 's'. */
21816 gcc_assert (TARGET_THUMB2);
21817 if (current_insn_predicate)
21818 arm_print_condition (stream);
21819 else
21820 fputc('s', stream);
21821 break;
21822
21823 /* %# is a "break" sequence. It doesn't output anything, but is used to
21824 separate e.g. operand numbers from following text, if that text consists
21825 of further digits which we don't want to be part of the operand
21826 number. */
21827 case '#':
21828 return;
21829
21830 case 'N':
21831 {
21832 REAL_VALUE_TYPE r;
21833 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21834 fprintf (stream, "%s", fp_const_from_val (&r));
21835 }
21836 return;
21837
21838 /* An integer or symbol address without a preceding # sign. */
21839 case 'c':
21840 switch (GET_CODE (x))
21841 {
21842 case CONST_INT:
21843 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21844 break;
21845
21846 case SYMBOL_REF:
21847 output_addr_const (stream, x);
21848 break;
21849
21850 case CONST:
21851 if (GET_CODE (XEXP (x, 0)) == PLUS
21852 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21853 {
21854 output_addr_const (stream, x);
21855 break;
21856 }
21857 /* Fall through. */
21858
21859 default:
21860 output_operand_lossage ("Unsupported operand for code '%c'", code);
21861 }
21862 return;
21863
21864 /* An integer that we want to print in HEX. */
21865 case 'x':
21866 switch (GET_CODE (x))
21867 {
21868 case CONST_INT:
21869 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21870 break;
21871
21872 default:
21873 output_operand_lossage ("Unsupported operand for code '%c'", code);
21874 }
21875 return;
21876
21877 case 'B':
21878 if (CONST_INT_P (x))
21879 {
21880 HOST_WIDE_INT val;
21881 val = ARM_SIGN_EXTEND (~INTVAL (x));
21882 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21883 }
21884 else
21885 {
21886 putc ('~', stream);
21887 output_addr_const (stream, x);
21888 }
21889 return;
21890
21891 case 'b':
21892 /* Print the log2 of a CONST_INT. */
21893 {
21894 HOST_WIDE_INT val;
21895
21896 if (!CONST_INT_P (x)
21897 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21898 output_operand_lossage ("Unsupported operand for code '%c'", code);
21899 else
21900 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21901 }
21902 return;
21903
21904 case 'L':
21905 /* The low 16 bits of an immediate constant. */
21906 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21907 return;
21908
21909 case 'i':
21910 fprintf (stream, "%s", arithmetic_instr (x, 1));
21911 return;
21912
21913 case 'I':
21914 fprintf (stream, "%s", arithmetic_instr (x, 0));
21915 return;
21916
21917 case 'S':
21918 {
21919 HOST_WIDE_INT val;
21920 const char *shift;
21921
21922 shift = shift_op (x, &val);
21923
21924 if (shift)
21925 {
21926 fprintf (stream, ", %s ", shift);
21927 if (val == -1)
21928 arm_print_operand (stream, XEXP (x, 1), 0);
21929 else
21930 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21931 }
21932 }
21933 return;
21934
21935 /* An explanation of the 'Q', 'R' and 'H' register operands:
21936
21937 In a pair of registers containing a DI or DF value the 'Q'
21938 operand returns the register number of the register containing
21939 the least significant part of the value. The 'R' operand returns
21940 the register number of the register containing the most
21941 significant part of the value.
21942
21943 The 'H' operand returns the higher of the two register numbers.
21944 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21945 same as the 'Q' operand, since the most significant part of the
21946 value is held in the lower number register. The reverse is true
21947 on systems where WORDS_BIG_ENDIAN is false.
21948
21949 The purpose of these operands is to distinguish between cases
21950 where the endian-ness of the values is important (for example
21951 when they are added together), and cases where the endian-ness
21952 is irrelevant, but the order of register operations is important.
21953 For example when loading a value from memory into a register
21954 pair, the endian-ness does not matter. Provided that the value
21955 from the lower memory address is put into the lower numbered
21956 register, and the value from the higher address is put into the
21957 higher numbered register, the load will work regardless of whether
21958 the value being loaded is big-wordian or little-wordian. The
21959 order of the two register loads can matter however, if the address
21960 of the memory location is actually held in one of the registers
21961 being overwritten by the load.
21962
21963 The 'Q' and 'R' constraints are also available for 64-bit
21964 constants. */
21965 case 'Q':
21966 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21967 {
21968 rtx part = gen_lowpart (SImode, x);
21969 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21970 return;
21971 }
21972
21973 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21974 {
21975 output_operand_lossage ("invalid operand for code '%c'", code);
21976 return;
21977 }
21978
21979 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21980 return;
21981
21982 case 'R':
21983 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21984 {
21985 machine_mode mode = GET_MODE (x);
21986 rtx part;
21987
21988 if (mode == VOIDmode)
21989 mode = DImode;
21990 part = gen_highpart_mode (SImode, mode, x);
21991 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21992 return;
21993 }
21994
21995 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21996 {
21997 output_operand_lossage ("invalid operand for code '%c'", code);
21998 return;
21999 }
22000
22001 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22002 return;
22003
22004 case 'H':
22005 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22006 {
22007 output_operand_lossage ("invalid operand for code '%c'", code);
22008 return;
22009 }
22010
22011 asm_fprintf (stream, "%r", REGNO (x) + 1);
22012 return;
22013
22014 case 'J':
22015 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22016 {
22017 output_operand_lossage ("invalid operand for code '%c'", code);
22018 return;
22019 }
22020
22021 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22022 return;
22023
22024 case 'K':
22025 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22026 {
22027 output_operand_lossage ("invalid operand for code '%c'", code);
22028 return;
22029 }
22030
22031 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22032 return;
22033
22034 case 'm':
22035 asm_fprintf (stream, "%r",
22036 REG_P (XEXP (x, 0))
22037 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22038 return;
22039
22040 case 'M':
22041 asm_fprintf (stream, "{%r-%r}",
22042 REGNO (x),
22043 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22044 return;
22045
22046 /* Like 'M', but writing doubleword vector registers, for use by Neon
22047 insns. */
22048 case 'h':
22049 {
22050 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22051 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22052 if (numregs == 1)
22053 asm_fprintf (stream, "{d%d}", regno);
22054 else
22055 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22056 }
22057 return;
22058
22059 case 'd':
22060 /* CONST_TRUE_RTX means always -- that's the default. */
22061 if (x == const_true_rtx)
22062 return;
22063
22064 if (!COMPARISON_P (x))
22065 {
22066 output_operand_lossage ("invalid operand for code '%c'", code);
22067 return;
22068 }
22069
22070 fputs (arm_condition_codes[get_arm_condition_code (x)],
22071 stream);
22072 return;
22073
22074 case 'D':
22075 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22076 want to do that. */
22077 if (x == const_true_rtx)
22078 {
22079 output_operand_lossage ("instruction never executed");
22080 return;
22081 }
22082 if (!COMPARISON_P (x))
22083 {
22084 output_operand_lossage ("invalid operand for code '%c'", code);
22085 return;
22086 }
22087
22088 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22089 (get_arm_condition_code (x))],
22090 stream);
22091 return;
22092
22093 case 's':
22094 case 'V':
22095 case 'W':
22096 case 'X':
22097 case 'Y':
22098 case 'Z':
22099 /* Former Maverick support, removed after GCC-4.7. */
22100 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22101 return;
22102
22103 case 'U':
22104 if (!REG_P (x)
22105 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22106 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22107 /* Bad value for wCG register number. */
22108 {
22109 output_operand_lossage ("invalid operand for code '%c'", code);
22110 return;
22111 }
22112
22113 else
22114 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22115 return;
22116
22117 /* Print an iWMMXt control register name. */
22118 case 'w':
22119 if (!CONST_INT_P (x)
22120 || INTVAL (x) < 0
22121 || INTVAL (x) >= 16)
22122 /* Bad value for wC register number. */
22123 {
22124 output_operand_lossage ("invalid operand for code '%c'", code);
22125 return;
22126 }
22127
22128 else
22129 {
22130 static const char * wc_reg_names [16] =
22131 {
22132 "wCID", "wCon", "wCSSF", "wCASF",
22133 "wC4", "wC5", "wC6", "wC7",
22134 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22135 "wC12", "wC13", "wC14", "wC15"
22136 };
22137
22138 fputs (wc_reg_names [INTVAL (x)], stream);
22139 }
22140 return;
22141
22142 /* Print the high single-precision register of a VFP double-precision
22143 register. */
22144 case 'p':
22145 {
22146 machine_mode mode = GET_MODE (x);
22147 int regno;
22148
22149 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22150 {
22151 output_operand_lossage ("invalid operand for code '%c'", code);
22152 return;
22153 }
22154
22155 regno = REGNO (x);
22156 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22157 {
22158 output_operand_lossage ("invalid operand for code '%c'", code);
22159 return;
22160 }
22161
22162 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22163 }
22164 return;
22165
22166 /* Print a VFP/Neon double precision or quad precision register name. */
22167 case 'P':
22168 case 'q':
22169 {
22170 machine_mode mode = GET_MODE (x);
22171 int is_quad = (code == 'q');
22172 int regno;
22173
22174 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22175 {
22176 output_operand_lossage ("invalid operand for code '%c'", code);
22177 return;
22178 }
22179
22180 if (!REG_P (x)
22181 || !IS_VFP_REGNUM (REGNO (x)))
22182 {
22183 output_operand_lossage ("invalid operand for code '%c'", code);
22184 return;
22185 }
22186
22187 regno = REGNO (x);
22188 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22189 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22190 {
22191 output_operand_lossage ("invalid operand for code '%c'", code);
22192 return;
22193 }
22194
22195 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22196 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22197 }
22198 return;
22199
22200 /* These two codes print the low/high doubleword register of a Neon quad
22201 register, respectively. For pair-structure types, can also print
22202 low/high quadword registers. */
22203 case 'e':
22204 case 'f':
22205 {
22206 machine_mode mode = GET_MODE (x);
22207 int regno;
22208
22209 if ((GET_MODE_SIZE (mode) != 16
22210 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22211 {
22212 output_operand_lossage ("invalid operand for code '%c'", code);
22213 return;
22214 }
22215
22216 regno = REGNO (x);
22217 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22218 {
22219 output_operand_lossage ("invalid operand for code '%c'", code);
22220 return;
22221 }
22222
22223 if (GET_MODE_SIZE (mode) == 16)
22224 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22225 + (code == 'f' ? 1 : 0));
22226 else
22227 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22228 + (code == 'f' ? 1 : 0));
22229 }
22230 return;
22231
22232 /* Print a VFPv3 floating-point constant, represented as an integer
22233 index. */
22234 case 'G':
22235 {
22236 int index = vfp3_const_double_index (x);
22237 gcc_assert (index != -1);
22238 fprintf (stream, "%d", index);
22239 }
22240 return;
22241
22242 /* Print bits representing opcode features for Neon.
22243
22244 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22245 and polynomials as unsigned.
22246
22247 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22248
22249 Bit 2 is 1 for rounding functions, 0 otherwise. */
22250
22251 /* Identify the type as 's', 'u', 'p' or 'f'. */
22252 case 'T':
22253 {
22254 HOST_WIDE_INT bits = INTVAL (x);
22255 fputc ("uspf"[bits & 3], stream);
22256 }
22257 return;
22258
22259 /* Likewise, but signed and unsigned integers are both 'i'. */
22260 case 'F':
22261 {
22262 HOST_WIDE_INT bits = INTVAL (x);
22263 fputc ("iipf"[bits & 3], stream);
22264 }
22265 return;
22266
22267 /* As for 'T', but emit 'u' instead of 'p'. */
22268 case 't':
22269 {
22270 HOST_WIDE_INT bits = INTVAL (x);
22271 fputc ("usuf"[bits & 3], stream);
22272 }
22273 return;
22274
22275 /* Bit 2: rounding (vs none). */
22276 case 'O':
22277 {
22278 HOST_WIDE_INT bits = INTVAL (x);
22279 fputs ((bits & 4) != 0 ? "r" : "", stream);
22280 }
22281 return;
22282
22283 /* Memory operand for vld1/vst1 instruction. */
22284 case 'A':
22285 {
22286 rtx addr;
22287 bool postinc = FALSE;
22288 rtx postinc_reg = NULL;
22289 unsigned align, memsize, align_bits;
22290
22291 gcc_assert (MEM_P (x));
22292 addr = XEXP (x, 0);
22293 if (GET_CODE (addr) == POST_INC)
22294 {
22295 postinc = 1;
22296 addr = XEXP (addr, 0);
22297 }
22298 if (GET_CODE (addr) == POST_MODIFY)
22299 {
22300 postinc_reg = XEXP( XEXP (addr, 1), 1);
22301 addr = XEXP (addr, 0);
22302 }
22303 asm_fprintf (stream, "[%r", REGNO (addr));
22304
22305 /* We know the alignment of this access, so we can emit a hint in the
22306 instruction (for some alignments) as an aid to the memory subsystem
22307 of the target. */
22308 align = MEM_ALIGN (x) >> 3;
22309 memsize = MEM_SIZE (x);
22310
22311 /* Only certain alignment specifiers are supported by the hardware. */
22312 if (memsize == 32 && (align % 32) == 0)
22313 align_bits = 256;
22314 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22315 align_bits = 128;
22316 else if (memsize >= 8 && (align % 8) == 0)
22317 align_bits = 64;
22318 else
22319 align_bits = 0;
22320
22321 if (align_bits != 0)
22322 asm_fprintf (stream, ":%d", align_bits);
22323
22324 asm_fprintf (stream, "]");
22325
22326 if (postinc)
22327 fputs("!", stream);
22328 if (postinc_reg)
22329 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22330 }
22331 return;
22332
22333 case 'C':
22334 {
22335 rtx addr;
22336
22337 gcc_assert (MEM_P (x));
22338 addr = XEXP (x, 0);
22339 gcc_assert (REG_P (addr));
22340 asm_fprintf (stream, "[%r]", REGNO (addr));
22341 }
22342 return;
22343
22344 /* Translate an S register number into a D register number and element index. */
22345 case 'y':
22346 {
22347 machine_mode mode = GET_MODE (x);
22348 int regno;
22349
22350 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22351 {
22352 output_operand_lossage ("invalid operand for code '%c'", code);
22353 return;
22354 }
22355
22356 regno = REGNO (x);
22357 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22358 {
22359 output_operand_lossage ("invalid operand for code '%c'", code);
22360 return;
22361 }
22362
22363 regno = regno - FIRST_VFP_REGNUM;
22364 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22365 }
22366 return;
22367
22368 case 'v':
22369 gcc_assert (CONST_DOUBLE_P (x));
22370 int result;
22371 result = vfp3_const_double_for_fract_bits (x);
22372 if (result == 0)
22373 result = vfp3_const_double_for_bits (x);
22374 fprintf (stream, "#%d", result);
22375 return;
22376
22377 /* Register specifier for vld1.16/vst1.16. Translate the S register
22378 number into a D register number and element index. */
22379 case 'z':
22380 {
22381 machine_mode mode = GET_MODE (x);
22382 int regno;
22383
22384 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22385 {
22386 output_operand_lossage ("invalid operand for code '%c'", code);
22387 return;
22388 }
22389
22390 regno = REGNO (x);
22391 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22392 {
22393 output_operand_lossage ("invalid operand for code '%c'", code);
22394 return;
22395 }
22396
22397 regno = regno - FIRST_VFP_REGNUM;
22398 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22399 }
22400 return;
22401
22402 default:
22403 if (x == 0)
22404 {
22405 output_operand_lossage ("missing operand");
22406 return;
22407 }
22408
22409 switch (GET_CODE (x))
22410 {
22411 case REG:
22412 asm_fprintf (stream, "%r", REGNO (x));
22413 break;
22414
22415 case MEM:
22416 output_address (GET_MODE (x), XEXP (x, 0));
22417 break;
22418
22419 case CONST_DOUBLE:
22420 {
22421 char fpstr[20];
22422 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22423 sizeof (fpstr), 0, 1);
22424 fprintf (stream, "#%s", fpstr);
22425 }
22426 break;
22427
22428 default:
22429 gcc_assert (GET_CODE (x) != NEG);
22430 fputc ('#', stream);
22431 if (GET_CODE (x) == HIGH)
22432 {
22433 fputs (":lower16:", stream);
22434 x = XEXP (x, 0);
22435 }
22436
22437 output_addr_const (stream, x);
22438 break;
22439 }
22440 }
22441 }
22442 \f
22443 /* Target hook for printing a memory address. */
22444 static void
22445 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22446 {
22447 if (TARGET_32BIT)
22448 {
22449 int is_minus = GET_CODE (x) == MINUS;
22450
22451 if (REG_P (x))
22452 asm_fprintf (stream, "[%r]", REGNO (x));
22453 else if (GET_CODE (x) == PLUS || is_minus)
22454 {
22455 rtx base = XEXP (x, 0);
22456 rtx index = XEXP (x, 1);
22457 HOST_WIDE_INT offset = 0;
22458 if (!REG_P (base)
22459 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22460 {
22461 /* Ensure that BASE is a register. */
22462 /* (one of them must be). */
22463 /* Also ensure the SP is not used as in index register. */
22464 std::swap (base, index);
22465 }
22466 switch (GET_CODE (index))
22467 {
22468 case CONST_INT:
22469 offset = INTVAL (index);
22470 if (is_minus)
22471 offset = -offset;
22472 asm_fprintf (stream, "[%r, #%wd]",
22473 REGNO (base), offset);
22474 break;
22475
22476 case REG:
22477 asm_fprintf (stream, "[%r, %s%r]",
22478 REGNO (base), is_minus ? "-" : "",
22479 REGNO (index));
22480 break;
22481
22482 case MULT:
22483 case ASHIFTRT:
22484 case LSHIFTRT:
22485 case ASHIFT:
22486 case ROTATERT:
22487 {
22488 asm_fprintf (stream, "[%r, %s%r",
22489 REGNO (base), is_minus ? "-" : "",
22490 REGNO (XEXP (index, 0)));
22491 arm_print_operand (stream, index, 'S');
22492 fputs ("]", stream);
22493 break;
22494 }
22495
22496 default:
22497 gcc_unreachable ();
22498 }
22499 }
22500 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22501 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22502 {
22503 gcc_assert (REG_P (XEXP (x, 0)));
22504
22505 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22506 asm_fprintf (stream, "[%r, #%s%d]!",
22507 REGNO (XEXP (x, 0)),
22508 GET_CODE (x) == PRE_DEC ? "-" : "",
22509 GET_MODE_SIZE (mode));
22510 else
22511 asm_fprintf (stream, "[%r], #%s%d",
22512 REGNO (XEXP (x, 0)),
22513 GET_CODE (x) == POST_DEC ? "-" : "",
22514 GET_MODE_SIZE (mode));
22515 }
22516 else if (GET_CODE (x) == PRE_MODIFY)
22517 {
22518 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22519 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22520 asm_fprintf (stream, "#%wd]!",
22521 INTVAL (XEXP (XEXP (x, 1), 1)));
22522 else
22523 asm_fprintf (stream, "%r]!",
22524 REGNO (XEXP (XEXP (x, 1), 1)));
22525 }
22526 else if (GET_CODE (x) == POST_MODIFY)
22527 {
22528 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22529 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22530 asm_fprintf (stream, "#%wd",
22531 INTVAL (XEXP (XEXP (x, 1), 1)));
22532 else
22533 asm_fprintf (stream, "%r",
22534 REGNO (XEXP (XEXP (x, 1), 1)));
22535 }
22536 else output_addr_const (stream, x);
22537 }
22538 else
22539 {
22540 if (REG_P (x))
22541 asm_fprintf (stream, "[%r]", REGNO (x));
22542 else if (GET_CODE (x) == POST_INC)
22543 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22544 else if (GET_CODE (x) == PLUS)
22545 {
22546 gcc_assert (REG_P (XEXP (x, 0)));
22547 if (CONST_INT_P (XEXP (x, 1)))
22548 asm_fprintf (stream, "[%r, #%wd]",
22549 REGNO (XEXP (x, 0)),
22550 INTVAL (XEXP (x, 1)));
22551 else
22552 asm_fprintf (stream, "[%r, %r]",
22553 REGNO (XEXP (x, 0)),
22554 REGNO (XEXP (x, 1)));
22555 }
22556 else
22557 output_addr_const (stream, x);
22558 }
22559 }
22560 \f
22561 /* Target hook for indicating whether a punctuation character for
22562 TARGET_PRINT_OPERAND is valid. */
22563 static bool
22564 arm_print_operand_punct_valid_p (unsigned char code)
22565 {
22566 return (code == '@' || code == '|' || code == '.'
22567 || code == '(' || code == ')' || code == '#'
22568 || (TARGET_32BIT && (code == '?'))
22569 || (TARGET_THUMB2 && (code == '!'))
22570 || (TARGET_THUMB && (code == '_')));
22571 }
22572 \f
22573 /* Target hook for assembling integer objects. The ARM version needs to
22574 handle word-sized values specially. */
22575 static bool
22576 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22577 {
22578 machine_mode mode;
22579
22580 if (size == UNITS_PER_WORD && aligned_p)
22581 {
22582 fputs ("\t.word\t", asm_out_file);
22583 output_addr_const (asm_out_file, x);
22584
22585 /* Mark symbols as position independent. We only do this in the
22586 .text segment, not in the .data segment. */
22587 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22588 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22589 {
22590 /* See legitimize_pic_address for an explanation of the
22591 TARGET_VXWORKS_RTP check. */
22592 /* References to weak symbols cannot be resolved locally:
22593 they may be overridden by a non-weak definition at link
22594 time. */
22595 if (!arm_pic_data_is_text_relative
22596 || (GET_CODE (x) == SYMBOL_REF
22597 && (!SYMBOL_REF_LOCAL_P (x)
22598 || (SYMBOL_REF_DECL (x)
22599 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22600 fputs ("(GOT)", asm_out_file);
22601 else
22602 fputs ("(GOTOFF)", asm_out_file);
22603 }
22604 fputc ('\n', asm_out_file);
22605 return true;
22606 }
22607
22608 mode = GET_MODE (x);
22609
22610 if (arm_vector_mode_supported_p (mode))
22611 {
22612 int i, units;
22613
22614 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22615
22616 units = CONST_VECTOR_NUNITS (x);
22617 size = GET_MODE_UNIT_SIZE (mode);
22618
22619 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22620 for (i = 0; i < units; i++)
22621 {
22622 rtx elt = CONST_VECTOR_ELT (x, i);
22623 assemble_integer
22624 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22625 }
22626 else
22627 for (i = 0; i < units; i++)
22628 {
22629 rtx elt = CONST_VECTOR_ELT (x, i);
22630 assemble_real
22631 (*CONST_DOUBLE_REAL_VALUE (elt), GET_MODE_INNER (mode),
22632 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22633 }
22634
22635 return true;
22636 }
22637
22638 return default_assemble_integer (x, size, aligned_p);
22639 }
22640
22641 static void
22642 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22643 {
22644 section *s;
22645
22646 if (!TARGET_AAPCS_BASED)
22647 {
22648 (is_ctor ?
22649 default_named_section_asm_out_constructor
22650 : default_named_section_asm_out_destructor) (symbol, priority);
22651 return;
22652 }
22653
22654 /* Put these in the .init_array section, using a special relocation. */
22655 if (priority != DEFAULT_INIT_PRIORITY)
22656 {
22657 char buf[18];
22658 sprintf (buf, "%s.%.5u",
22659 is_ctor ? ".init_array" : ".fini_array",
22660 priority);
22661 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22662 }
22663 else if (is_ctor)
22664 s = ctors_section;
22665 else
22666 s = dtors_section;
22667
22668 switch_to_section (s);
22669 assemble_align (POINTER_SIZE);
22670 fputs ("\t.word\t", asm_out_file);
22671 output_addr_const (asm_out_file, symbol);
22672 fputs ("(target1)\n", asm_out_file);
22673 }
22674
22675 /* Add a function to the list of static constructors. */
22676
22677 static void
22678 arm_elf_asm_constructor (rtx symbol, int priority)
22679 {
22680 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22681 }
22682
22683 /* Add a function to the list of static destructors. */
22684
22685 static void
22686 arm_elf_asm_destructor (rtx symbol, int priority)
22687 {
22688 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22689 }
22690 \f
22691 /* A finite state machine takes care of noticing whether or not instructions
22692 can be conditionally executed, and thus decrease execution time and code
22693 size by deleting branch instructions. The fsm is controlled by
22694 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22695
22696 /* The state of the fsm controlling condition codes are:
22697 0: normal, do nothing special
22698 1: make ASM_OUTPUT_OPCODE not output this instruction
22699 2: make ASM_OUTPUT_OPCODE not output this instruction
22700 3: make instructions conditional
22701 4: make instructions conditional
22702
22703 State transitions (state->state by whom under condition):
22704 0 -> 1 final_prescan_insn if the `target' is a label
22705 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22706 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22707 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22708 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22709 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22710 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22711 (the target insn is arm_target_insn).
22712
22713 If the jump clobbers the conditions then we use states 2 and 4.
22714
22715 A similar thing can be done with conditional return insns.
22716
22717 XXX In case the `target' is an unconditional branch, this conditionalising
22718 of the instructions always reduces code size, but not always execution
22719 time. But then, I want to reduce the code size to somewhere near what
22720 /bin/cc produces. */
22721
22722 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22723 instructions. When a COND_EXEC instruction is seen the subsequent
22724 instructions are scanned so that multiple conditional instructions can be
22725 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22726 specify the length and true/false mask for the IT block. These will be
22727 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22728
22729 /* Returns the index of the ARM condition code string in
22730 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22731 COMPARISON should be an rtx like `(eq (...) (...))'. */
22732
22733 enum arm_cond_code
22734 maybe_get_arm_condition_code (rtx comparison)
22735 {
22736 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22737 enum arm_cond_code code;
22738 enum rtx_code comp_code = GET_CODE (comparison);
22739
22740 if (GET_MODE_CLASS (mode) != MODE_CC)
22741 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22742 XEXP (comparison, 1));
22743
22744 switch (mode)
22745 {
22746 case CC_DNEmode: code = ARM_NE; goto dominance;
22747 case CC_DEQmode: code = ARM_EQ; goto dominance;
22748 case CC_DGEmode: code = ARM_GE; goto dominance;
22749 case CC_DGTmode: code = ARM_GT; goto dominance;
22750 case CC_DLEmode: code = ARM_LE; goto dominance;
22751 case CC_DLTmode: code = ARM_LT; goto dominance;
22752 case CC_DGEUmode: code = ARM_CS; goto dominance;
22753 case CC_DGTUmode: code = ARM_HI; goto dominance;
22754 case CC_DLEUmode: code = ARM_LS; goto dominance;
22755 case CC_DLTUmode: code = ARM_CC;
22756
22757 dominance:
22758 if (comp_code == EQ)
22759 return ARM_INVERSE_CONDITION_CODE (code);
22760 if (comp_code == NE)
22761 return code;
22762 return ARM_NV;
22763
22764 case CC_NOOVmode:
22765 switch (comp_code)
22766 {
22767 case NE: return ARM_NE;
22768 case EQ: return ARM_EQ;
22769 case GE: return ARM_PL;
22770 case LT: return ARM_MI;
22771 default: return ARM_NV;
22772 }
22773
22774 case CC_Zmode:
22775 switch (comp_code)
22776 {
22777 case NE: return ARM_NE;
22778 case EQ: return ARM_EQ;
22779 default: return ARM_NV;
22780 }
22781
22782 case CC_Nmode:
22783 switch (comp_code)
22784 {
22785 case NE: return ARM_MI;
22786 case EQ: return ARM_PL;
22787 default: return ARM_NV;
22788 }
22789
22790 case CCFPEmode:
22791 case CCFPmode:
22792 /* We can handle all cases except UNEQ and LTGT. */
22793 switch (comp_code)
22794 {
22795 case GE: return ARM_GE;
22796 case GT: return ARM_GT;
22797 case LE: return ARM_LS;
22798 case LT: return ARM_MI;
22799 case NE: return ARM_NE;
22800 case EQ: return ARM_EQ;
22801 case ORDERED: return ARM_VC;
22802 case UNORDERED: return ARM_VS;
22803 case UNLT: return ARM_LT;
22804 case UNLE: return ARM_LE;
22805 case UNGT: return ARM_HI;
22806 case UNGE: return ARM_PL;
22807 /* UNEQ and LTGT do not have a representation. */
22808 case UNEQ: /* Fall through. */
22809 case LTGT: /* Fall through. */
22810 default: return ARM_NV;
22811 }
22812
22813 case CC_SWPmode:
22814 switch (comp_code)
22815 {
22816 case NE: return ARM_NE;
22817 case EQ: return ARM_EQ;
22818 case GE: return ARM_LE;
22819 case GT: return ARM_LT;
22820 case LE: return ARM_GE;
22821 case LT: return ARM_GT;
22822 case GEU: return ARM_LS;
22823 case GTU: return ARM_CC;
22824 case LEU: return ARM_CS;
22825 case LTU: return ARM_HI;
22826 default: return ARM_NV;
22827 }
22828
22829 case CC_Cmode:
22830 switch (comp_code)
22831 {
22832 case LTU: return ARM_CS;
22833 case GEU: return ARM_CC;
22834 case NE: return ARM_CS;
22835 case EQ: return ARM_CC;
22836 default: return ARM_NV;
22837 }
22838
22839 case CC_CZmode:
22840 switch (comp_code)
22841 {
22842 case NE: return ARM_NE;
22843 case EQ: return ARM_EQ;
22844 case GEU: return ARM_CS;
22845 case GTU: return ARM_HI;
22846 case LEU: return ARM_LS;
22847 case LTU: return ARM_CC;
22848 default: return ARM_NV;
22849 }
22850
22851 case CC_NCVmode:
22852 switch (comp_code)
22853 {
22854 case GE: return ARM_GE;
22855 case LT: return ARM_LT;
22856 case GEU: return ARM_CS;
22857 case LTU: return ARM_CC;
22858 default: return ARM_NV;
22859 }
22860
22861 case CC_Vmode:
22862 switch (comp_code)
22863 {
22864 case NE: return ARM_VS;
22865 case EQ: return ARM_VC;
22866 default: return ARM_NV;
22867 }
22868
22869 case CCmode:
22870 switch (comp_code)
22871 {
22872 case NE: return ARM_NE;
22873 case EQ: return ARM_EQ;
22874 case GE: return ARM_GE;
22875 case GT: return ARM_GT;
22876 case LE: return ARM_LE;
22877 case LT: return ARM_LT;
22878 case GEU: return ARM_CS;
22879 case GTU: return ARM_HI;
22880 case LEU: return ARM_LS;
22881 case LTU: return ARM_CC;
22882 default: return ARM_NV;
22883 }
22884
22885 default: gcc_unreachable ();
22886 }
22887 }
22888
22889 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22890 static enum arm_cond_code
22891 get_arm_condition_code (rtx comparison)
22892 {
22893 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22894 gcc_assert (code != ARM_NV);
22895 return code;
22896 }
22897
22898 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22899 instructions. */
22900 void
22901 thumb2_final_prescan_insn (rtx_insn *insn)
22902 {
22903 rtx_insn *first_insn = insn;
22904 rtx body = PATTERN (insn);
22905 rtx predicate;
22906 enum arm_cond_code code;
22907 int n;
22908 int mask;
22909 int max;
22910
22911 /* max_insns_skipped in the tune was already taken into account in the
22912 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22913 just emit the IT blocks as we can. It does not make sense to split
22914 the IT blocks. */
22915 max = MAX_INSN_PER_IT_BLOCK;
22916
22917 /* Remove the previous insn from the count of insns to be output. */
22918 if (arm_condexec_count)
22919 arm_condexec_count--;
22920
22921 /* Nothing to do if we are already inside a conditional block. */
22922 if (arm_condexec_count)
22923 return;
22924
22925 if (GET_CODE (body) != COND_EXEC)
22926 return;
22927
22928 /* Conditional jumps are implemented directly. */
22929 if (JUMP_P (insn))
22930 return;
22931
22932 predicate = COND_EXEC_TEST (body);
22933 arm_current_cc = get_arm_condition_code (predicate);
22934
22935 n = get_attr_ce_count (insn);
22936 arm_condexec_count = 1;
22937 arm_condexec_mask = (1 << n) - 1;
22938 arm_condexec_masklen = n;
22939 /* See if subsequent instructions can be combined into the same block. */
22940 for (;;)
22941 {
22942 insn = next_nonnote_insn (insn);
22943
22944 /* Jumping into the middle of an IT block is illegal, so a label or
22945 barrier terminates the block. */
22946 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22947 break;
22948
22949 body = PATTERN (insn);
22950 /* USE and CLOBBER aren't really insns, so just skip them. */
22951 if (GET_CODE (body) == USE
22952 || GET_CODE (body) == CLOBBER)
22953 continue;
22954
22955 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22956 if (GET_CODE (body) != COND_EXEC)
22957 break;
22958 /* Maximum number of conditionally executed instructions in a block. */
22959 n = get_attr_ce_count (insn);
22960 if (arm_condexec_masklen + n > max)
22961 break;
22962
22963 predicate = COND_EXEC_TEST (body);
22964 code = get_arm_condition_code (predicate);
22965 mask = (1 << n) - 1;
22966 if (arm_current_cc == code)
22967 arm_condexec_mask |= (mask << arm_condexec_masklen);
22968 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22969 break;
22970
22971 arm_condexec_count++;
22972 arm_condexec_masklen += n;
22973
22974 /* A jump must be the last instruction in a conditional block. */
22975 if (JUMP_P (insn))
22976 break;
22977 }
22978 /* Restore recog_data (getting the attributes of other insns can
22979 destroy this array, but final.c assumes that it remains intact
22980 across this call). */
22981 extract_constrain_insn_cached (first_insn);
22982 }
22983
22984 void
22985 arm_final_prescan_insn (rtx_insn *insn)
22986 {
22987 /* BODY will hold the body of INSN. */
22988 rtx body = PATTERN (insn);
22989
22990 /* This will be 1 if trying to repeat the trick, and things need to be
22991 reversed if it appears to fail. */
22992 int reverse = 0;
22993
22994 /* If we start with a return insn, we only succeed if we find another one. */
22995 int seeking_return = 0;
22996 enum rtx_code return_code = UNKNOWN;
22997
22998 /* START_INSN will hold the insn from where we start looking. This is the
22999 first insn after the following code_label if REVERSE is true. */
23000 rtx_insn *start_insn = insn;
23001
23002 /* If in state 4, check if the target branch is reached, in order to
23003 change back to state 0. */
23004 if (arm_ccfsm_state == 4)
23005 {
23006 if (insn == arm_target_insn)
23007 {
23008 arm_target_insn = NULL;
23009 arm_ccfsm_state = 0;
23010 }
23011 return;
23012 }
23013
23014 /* If in state 3, it is possible to repeat the trick, if this insn is an
23015 unconditional branch to a label, and immediately following this branch
23016 is the previous target label which is only used once, and the label this
23017 branch jumps to is not too far off. */
23018 if (arm_ccfsm_state == 3)
23019 {
23020 if (simplejump_p (insn))
23021 {
23022 start_insn = next_nonnote_insn (start_insn);
23023 if (BARRIER_P (start_insn))
23024 {
23025 /* XXX Isn't this always a barrier? */
23026 start_insn = next_nonnote_insn (start_insn);
23027 }
23028 if (LABEL_P (start_insn)
23029 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23030 && LABEL_NUSES (start_insn) == 1)
23031 reverse = TRUE;
23032 else
23033 return;
23034 }
23035 else if (ANY_RETURN_P (body))
23036 {
23037 start_insn = next_nonnote_insn (start_insn);
23038 if (BARRIER_P (start_insn))
23039 start_insn = next_nonnote_insn (start_insn);
23040 if (LABEL_P (start_insn)
23041 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23042 && LABEL_NUSES (start_insn) == 1)
23043 {
23044 reverse = TRUE;
23045 seeking_return = 1;
23046 return_code = GET_CODE (body);
23047 }
23048 else
23049 return;
23050 }
23051 else
23052 return;
23053 }
23054
23055 gcc_assert (!arm_ccfsm_state || reverse);
23056 if (!JUMP_P (insn))
23057 return;
23058
23059 /* This jump might be paralleled with a clobber of the condition codes
23060 the jump should always come first */
23061 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23062 body = XVECEXP (body, 0, 0);
23063
23064 if (reverse
23065 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23066 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23067 {
23068 int insns_skipped;
23069 int fail = FALSE, succeed = FALSE;
23070 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23071 int then_not_else = TRUE;
23072 rtx_insn *this_insn = start_insn;
23073 rtx label = 0;
23074
23075 /* Register the insn jumped to. */
23076 if (reverse)
23077 {
23078 if (!seeking_return)
23079 label = XEXP (SET_SRC (body), 0);
23080 }
23081 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23082 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23083 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23084 {
23085 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23086 then_not_else = FALSE;
23087 }
23088 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23089 {
23090 seeking_return = 1;
23091 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23092 }
23093 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23094 {
23095 seeking_return = 1;
23096 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23097 then_not_else = FALSE;
23098 }
23099 else
23100 gcc_unreachable ();
23101
23102 /* See how many insns this branch skips, and what kind of insns. If all
23103 insns are okay, and the label or unconditional branch to the same
23104 label is not too far away, succeed. */
23105 for (insns_skipped = 0;
23106 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23107 {
23108 rtx scanbody;
23109
23110 this_insn = next_nonnote_insn (this_insn);
23111 if (!this_insn)
23112 break;
23113
23114 switch (GET_CODE (this_insn))
23115 {
23116 case CODE_LABEL:
23117 /* Succeed if it is the target label, otherwise fail since
23118 control falls in from somewhere else. */
23119 if (this_insn == label)
23120 {
23121 arm_ccfsm_state = 1;
23122 succeed = TRUE;
23123 }
23124 else
23125 fail = TRUE;
23126 break;
23127
23128 case BARRIER:
23129 /* Succeed if the following insn is the target label.
23130 Otherwise fail.
23131 If return insns are used then the last insn in a function
23132 will be a barrier. */
23133 this_insn = next_nonnote_insn (this_insn);
23134 if (this_insn && this_insn == label)
23135 {
23136 arm_ccfsm_state = 1;
23137 succeed = TRUE;
23138 }
23139 else
23140 fail = TRUE;
23141 break;
23142
23143 case CALL_INSN:
23144 /* The AAPCS says that conditional calls should not be
23145 used since they make interworking inefficient (the
23146 linker can't transform BL<cond> into BLX). That's
23147 only a problem if the machine has BLX. */
23148 if (arm_arch5)
23149 {
23150 fail = TRUE;
23151 break;
23152 }
23153
23154 /* Succeed if the following insn is the target label, or
23155 if the following two insns are a barrier and the
23156 target label. */
23157 this_insn = next_nonnote_insn (this_insn);
23158 if (this_insn && BARRIER_P (this_insn))
23159 this_insn = next_nonnote_insn (this_insn);
23160
23161 if (this_insn && this_insn == label
23162 && insns_skipped < max_insns_skipped)
23163 {
23164 arm_ccfsm_state = 1;
23165 succeed = TRUE;
23166 }
23167 else
23168 fail = TRUE;
23169 break;
23170
23171 case JUMP_INSN:
23172 /* If this is an unconditional branch to the same label, succeed.
23173 If it is to another label, do nothing. If it is conditional,
23174 fail. */
23175 /* XXX Probably, the tests for SET and the PC are
23176 unnecessary. */
23177
23178 scanbody = PATTERN (this_insn);
23179 if (GET_CODE (scanbody) == SET
23180 && GET_CODE (SET_DEST (scanbody)) == PC)
23181 {
23182 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23183 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23184 {
23185 arm_ccfsm_state = 2;
23186 succeed = TRUE;
23187 }
23188 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23189 fail = TRUE;
23190 }
23191 /* Fail if a conditional return is undesirable (e.g. on a
23192 StrongARM), but still allow this if optimizing for size. */
23193 else if (GET_CODE (scanbody) == return_code
23194 && !use_return_insn (TRUE, NULL)
23195 && !optimize_size)
23196 fail = TRUE;
23197 else if (GET_CODE (scanbody) == return_code)
23198 {
23199 arm_ccfsm_state = 2;
23200 succeed = TRUE;
23201 }
23202 else if (GET_CODE (scanbody) == PARALLEL)
23203 {
23204 switch (get_attr_conds (this_insn))
23205 {
23206 case CONDS_NOCOND:
23207 break;
23208 default:
23209 fail = TRUE;
23210 break;
23211 }
23212 }
23213 else
23214 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23215
23216 break;
23217
23218 case INSN:
23219 /* Instructions using or affecting the condition codes make it
23220 fail. */
23221 scanbody = PATTERN (this_insn);
23222 if (!(GET_CODE (scanbody) == SET
23223 || GET_CODE (scanbody) == PARALLEL)
23224 || get_attr_conds (this_insn) != CONDS_NOCOND)
23225 fail = TRUE;
23226 break;
23227
23228 default:
23229 break;
23230 }
23231 }
23232 if (succeed)
23233 {
23234 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23235 arm_target_label = CODE_LABEL_NUMBER (label);
23236 else
23237 {
23238 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23239
23240 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23241 {
23242 this_insn = next_nonnote_insn (this_insn);
23243 gcc_assert (!this_insn
23244 || (!BARRIER_P (this_insn)
23245 && !LABEL_P (this_insn)));
23246 }
23247 if (!this_insn)
23248 {
23249 /* Oh, dear! we ran off the end.. give up. */
23250 extract_constrain_insn_cached (insn);
23251 arm_ccfsm_state = 0;
23252 arm_target_insn = NULL;
23253 return;
23254 }
23255 arm_target_insn = this_insn;
23256 }
23257
23258 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23259 what it was. */
23260 if (!reverse)
23261 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23262
23263 if (reverse || then_not_else)
23264 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23265 }
23266
23267 /* Restore recog_data (getting the attributes of other insns can
23268 destroy this array, but final.c assumes that it remains intact
23269 across this call. */
23270 extract_constrain_insn_cached (insn);
23271 }
23272 }
23273
23274 /* Output IT instructions. */
23275 void
23276 thumb2_asm_output_opcode (FILE * stream)
23277 {
23278 char buff[5];
23279 int n;
23280
23281 if (arm_condexec_mask)
23282 {
23283 for (n = 0; n < arm_condexec_masklen; n++)
23284 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23285 buff[n] = 0;
23286 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23287 arm_condition_codes[arm_current_cc]);
23288 arm_condexec_mask = 0;
23289 }
23290 }
23291
23292 /* Returns true if REGNO is a valid register
23293 for holding a quantity of type MODE. */
23294 int
23295 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23296 {
23297 if (GET_MODE_CLASS (mode) == MODE_CC)
23298 return (regno == CC_REGNUM
23299 || (TARGET_HARD_FLOAT
23300 && regno == VFPCC_REGNUM));
23301
23302 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23303 return false;
23304
23305 if (TARGET_THUMB1)
23306 /* For the Thumb we only allow values bigger than SImode in
23307 registers 0 - 6, so that there is always a second low
23308 register available to hold the upper part of the value.
23309 We probably we ought to ensure that the register is the
23310 start of an even numbered register pair. */
23311 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23312
23313 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23314 {
23315 if (mode == SFmode || mode == SImode)
23316 return VFP_REGNO_OK_FOR_SINGLE (regno);
23317
23318 if (mode == DFmode)
23319 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23320
23321 if (mode == HFmode)
23322 return VFP_REGNO_OK_FOR_SINGLE (regno);
23323
23324 /* VFP registers can hold HImode values. */
23325 if (mode == HImode)
23326 return VFP_REGNO_OK_FOR_SINGLE (regno);
23327
23328 if (TARGET_NEON)
23329 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23330 || (VALID_NEON_QREG_MODE (mode)
23331 && NEON_REGNO_OK_FOR_QUAD (regno))
23332 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23333 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23334 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23335 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23336 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23337
23338 return FALSE;
23339 }
23340
23341 if (TARGET_REALLY_IWMMXT)
23342 {
23343 if (IS_IWMMXT_GR_REGNUM (regno))
23344 return mode == SImode;
23345
23346 if (IS_IWMMXT_REGNUM (regno))
23347 return VALID_IWMMXT_REG_MODE (mode);
23348 }
23349
23350 /* We allow almost any value to be stored in the general registers.
23351 Restrict doubleword quantities to even register pairs in ARM state
23352 so that we can use ldrd. Do not allow very large Neon structure
23353 opaque modes in general registers; they would use too many. */
23354 if (regno <= LAST_ARM_REGNUM)
23355 {
23356 if (ARM_NUM_REGS (mode) > 4)
23357 return FALSE;
23358
23359 if (TARGET_THUMB2)
23360 return TRUE;
23361
23362 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23363 }
23364
23365 if (regno == FRAME_POINTER_REGNUM
23366 || regno == ARG_POINTER_REGNUM)
23367 /* We only allow integers in the fake hard registers. */
23368 return GET_MODE_CLASS (mode) == MODE_INT;
23369
23370 return FALSE;
23371 }
23372
23373 /* Implement MODES_TIEABLE_P. */
23374
23375 bool
23376 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23377 {
23378 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23379 return true;
23380
23381 /* We specifically want to allow elements of "structure" modes to
23382 be tieable to the structure. This more general condition allows
23383 other rarer situations too. */
23384 if (TARGET_NEON
23385 && (VALID_NEON_DREG_MODE (mode1)
23386 || VALID_NEON_QREG_MODE (mode1)
23387 || VALID_NEON_STRUCT_MODE (mode1))
23388 && (VALID_NEON_DREG_MODE (mode2)
23389 || VALID_NEON_QREG_MODE (mode2)
23390 || VALID_NEON_STRUCT_MODE (mode2)))
23391 return true;
23392
23393 return false;
23394 }
23395
23396 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23397 not used in arm mode. */
23398
23399 enum reg_class
23400 arm_regno_class (int regno)
23401 {
23402 if (regno == PC_REGNUM)
23403 return NO_REGS;
23404
23405 if (TARGET_THUMB1)
23406 {
23407 if (regno == STACK_POINTER_REGNUM)
23408 return STACK_REG;
23409 if (regno == CC_REGNUM)
23410 return CC_REG;
23411 if (regno < 8)
23412 return LO_REGS;
23413 return HI_REGS;
23414 }
23415
23416 if (TARGET_THUMB2 && regno < 8)
23417 return LO_REGS;
23418
23419 if ( regno <= LAST_ARM_REGNUM
23420 || regno == FRAME_POINTER_REGNUM
23421 || regno == ARG_POINTER_REGNUM)
23422 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23423
23424 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23425 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23426
23427 if (IS_VFP_REGNUM (regno))
23428 {
23429 if (regno <= D7_VFP_REGNUM)
23430 return VFP_D0_D7_REGS;
23431 else if (regno <= LAST_LO_VFP_REGNUM)
23432 return VFP_LO_REGS;
23433 else
23434 return VFP_HI_REGS;
23435 }
23436
23437 if (IS_IWMMXT_REGNUM (regno))
23438 return IWMMXT_REGS;
23439
23440 if (IS_IWMMXT_GR_REGNUM (regno))
23441 return IWMMXT_GR_REGS;
23442
23443 return NO_REGS;
23444 }
23445
23446 /* Handle a special case when computing the offset
23447 of an argument from the frame pointer. */
23448 int
23449 arm_debugger_arg_offset (int value, rtx addr)
23450 {
23451 rtx_insn *insn;
23452
23453 /* We are only interested if dbxout_parms() failed to compute the offset. */
23454 if (value != 0)
23455 return 0;
23456
23457 /* We can only cope with the case where the address is held in a register. */
23458 if (!REG_P (addr))
23459 return 0;
23460
23461 /* If we are using the frame pointer to point at the argument, then
23462 an offset of 0 is correct. */
23463 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23464 return 0;
23465
23466 /* If we are using the stack pointer to point at the
23467 argument, then an offset of 0 is correct. */
23468 /* ??? Check this is consistent with thumb2 frame layout. */
23469 if ((TARGET_THUMB || !frame_pointer_needed)
23470 && REGNO (addr) == SP_REGNUM)
23471 return 0;
23472
23473 /* Oh dear. The argument is pointed to by a register rather
23474 than being held in a register, or being stored at a known
23475 offset from the frame pointer. Since GDB only understands
23476 those two kinds of argument we must translate the address
23477 held in the register into an offset from the frame pointer.
23478 We do this by searching through the insns for the function
23479 looking to see where this register gets its value. If the
23480 register is initialized from the frame pointer plus an offset
23481 then we are in luck and we can continue, otherwise we give up.
23482
23483 This code is exercised by producing debugging information
23484 for a function with arguments like this:
23485
23486 double func (double a, double b, int c, double d) {return d;}
23487
23488 Without this code the stab for parameter 'd' will be set to
23489 an offset of 0 from the frame pointer, rather than 8. */
23490
23491 /* The if() statement says:
23492
23493 If the insn is a normal instruction
23494 and if the insn is setting the value in a register
23495 and if the register being set is the register holding the address of the argument
23496 and if the address is computing by an addition
23497 that involves adding to a register
23498 which is the frame pointer
23499 a constant integer
23500
23501 then... */
23502
23503 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23504 {
23505 if ( NONJUMP_INSN_P (insn)
23506 && GET_CODE (PATTERN (insn)) == SET
23507 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23508 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23509 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23510 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23511 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23512 )
23513 {
23514 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23515
23516 break;
23517 }
23518 }
23519
23520 if (value == 0)
23521 {
23522 debug_rtx (addr);
23523 warning (0, "unable to compute real location of stacked parameter");
23524 value = 8; /* XXX magic hack */
23525 }
23526
23527 return value;
23528 }
23529 \f
23530 /* Implement TARGET_PROMOTED_TYPE. */
23531
23532 static tree
23533 arm_promoted_type (const_tree t)
23534 {
23535 if (SCALAR_FLOAT_TYPE_P (t)
23536 && TYPE_PRECISION (t) == 16
23537 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23538 return float_type_node;
23539 return NULL_TREE;
23540 }
23541
23542 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23543 This simply adds HFmode as a supported mode; even though we don't
23544 implement arithmetic on this type directly, it's supported by
23545 optabs conversions, much the way the double-word arithmetic is
23546 special-cased in the default hook. */
23547
23548 static bool
23549 arm_scalar_mode_supported_p (machine_mode mode)
23550 {
23551 if (mode == HFmode)
23552 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23553 else if (ALL_FIXED_POINT_MODE_P (mode))
23554 return true;
23555 else
23556 return default_scalar_mode_supported_p (mode);
23557 }
23558
23559 /* Set the value of FLT_EVAL_METHOD.
23560 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23561
23562 0: evaluate all operations and constants, whose semantic type has at
23563 most the range and precision of type float, to the range and
23564 precision of float; evaluate all other operations and constants to
23565 the range and precision of the semantic type;
23566
23567 N, where _FloatN is a supported interchange floating type
23568 evaluate all operations and constants, whose semantic type has at
23569 most the range and precision of _FloatN type, to the range and
23570 precision of the _FloatN type; evaluate all other operations and
23571 constants to the range and precision of the semantic type;
23572
23573 If we have the ARMv8.2-A extensions then we support _Float16 in native
23574 precision, so we should set this to 16. Otherwise, we support the type,
23575 but want to evaluate expressions in float precision, so set this to
23576 0. */
23577
23578 static enum flt_eval_method
23579 arm_excess_precision (enum excess_precision_type type)
23580 {
23581 switch (type)
23582 {
23583 case EXCESS_PRECISION_TYPE_FAST:
23584 case EXCESS_PRECISION_TYPE_STANDARD:
23585 /* We can calculate either in 16-bit range and precision or
23586 32-bit range and precision. Make that decision based on whether
23587 we have native support for the ARMv8.2-A 16-bit floating-point
23588 instructions or not. */
23589 return (TARGET_VFP_FP16INST
23590 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23591 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23592 case EXCESS_PRECISION_TYPE_IMPLICIT:
23593 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23594 default:
23595 gcc_unreachable ();
23596 }
23597 return FLT_EVAL_METHOD_UNPREDICTABLE;
23598 }
23599
23600
23601 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23602 _Float16 if we are using anything other than ieee format for 16-bit
23603 floating point. Otherwise, punt to the default implementation. */
23604 static machine_mode
23605 arm_floatn_mode (int n, bool extended)
23606 {
23607 if (!extended && n == 16)
23608 return arm_fp16_format == ARM_FP16_FORMAT_IEEE ? HFmode : VOIDmode;
23609
23610 return default_floatn_mode (n, extended);
23611 }
23612
23613
23614 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23615 not to early-clobber SRC registers in the process.
23616
23617 We assume that the operands described by SRC and DEST represent a
23618 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23619 number of components into which the copy has been decomposed. */
23620 void
23621 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23622 {
23623 unsigned int i;
23624
23625 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23626 || REGNO (operands[0]) < REGNO (operands[1]))
23627 {
23628 for (i = 0; i < count; i++)
23629 {
23630 operands[2 * i] = dest[i];
23631 operands[2 * i + 1] = src[i];
23632 }
23633 }
23634 else
23635 {
23636 for (i = 0; i < count; i++)
23637 {
23638 operands[2 * i] = dest[count - i - 1];
23639 operands[2 * i + 1] = src[count - i - 1];
23640 }
23641 }
23642 }
23643
23644 /* Split operands into moves from op[1] + op[2] into op[0]. */
23645
23646 void
23647 neon_split_vcombine (rtx operands[3])
23648 {
23649 unsigned int dest = REGNO (operands[0]);
23650 unsigned int src1 = REGNO (operands[1]);
23651 unsigned int src2 = REGNO (operands[2]);
23652 machine_mode halfmode = GET_MODE (operands[1]);
23653 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23654 rtx destlo, desthi;
23655
23656 if (src1 == dest && src2 == dest + halfregs)
23657 {
23658 /* No-op move. Can't split to nothing; emit something. */
23659 emit_note (NOTE_INSN_DELETED);
23660 return;
23661 }
23662
23663 /* Preserve register attributes for variable tracking. */
23664 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23665 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23666 GET_MODE_SIZE (halfmode));
23667
23668 /* Special case of reversed high/low parts. Use VSWP. */
23669 if (src2 == dest && src1 == dest + halfregs)
23670 {
23671 rtx x = gen_rtx_SET (destlo, operands[1]);
23672 rtx y = gen_rtx_SET (desthi, operands[2]);
23673 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23674 return;
23675 }
23676
23677 if (!reg_overlap_mentioned_p (operands[2], destlo))
23678 {
23679 /* Try to avoid unnecessary moves if part of the result
23680 is in the right place already. */
23681 if (src1 != dest)
23682 emit_move_insn (destlo, operands[1]);
23683 if (src2 != dest + halfregs)
23684 emit_move_insn (desthi, operands[2]);
23685 }
23686 else
23687 {
23688 if (src2 != dest + halfregs)
23689 emit_move_insn (desthi, operands[2]);
23690 if (src1 != dest)
23691 emit_move_insn (destlo, operands[1]);
23692 }
23693 }
23694 \f
23695 /* Return the number (counting from 0) of
23696 the least significant set bit in MASK. */
23697
23698 inline static int
23699 number_of_first_bit_set (unsigned mask)
23700 {
23701 return ctz_hwi (mask);
23702 }
23703
23704 /* Like emit_multi_reg_push, but allowing for a different set of
23705 registers to be described as saved. MASK is the set of registers
23706 to be saved; REAL_REGS is the set of registers to be described as
23707 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23708
23709 static rtx_insn *
23710 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23711 {
23712 unsigned long regno;
23713 rtx par[10], tmp, reg;
23714 rtx_insn *insn;
23715 int i, j;
23716
23717 /* Build the parallel of the registers actually being stored. */
23718 for (i = 0; mask; ++i, mask &= mask - 1)
23719 {
23720 regno = ctz_hwi (mask);
23721 reg = gen_rtx_REG (SImode, regno);
23722
23723 if (i == 0)
23724 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23725 else
23726 tmp = gen_rtx_USE (VOIDmode, reg);
23727
23728 par[i] = tmp;
23729 }
23730
23731 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23732 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23733 tmp = gen_frame_mem (BLKmode, tmp);
23734 tmp = gen_rtx_SET (tmp, par[0]);
23735 par[0] = tmp;
23736
23737 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23738 insn = emit_insn (tmp);
23739
23740 /* Always build the stack adjustment note for unwind info. */
23741 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23742 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23743 par[0] = tmp;
23744
23745 /* Build the parallel of the registers recorded as saved for unwind. */
23746 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23747 {
23748 regno = ctz_hwi (real_regs);
23749 reg = gen_rtx_REG (SImode, regno);
23750
23751 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23752 tmp = gen_frame_mem (SImode, tmp);
23753 tmp = gen_rtx_SET (tmp, reg);
23754 RTX_FRAME_RELATED_P (tmp) = 1;
23755 par[j + 1] = tmp;
23756 }
23757
23758 if (j == 0)
23759 tmp = par[0];
23760 else
23761 {
23762 RTX_FRAME_RELATED_P (par[0]) = 1;
23763 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23764 }
23765
23766 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23767
23768 return insn;
23769 }
23770
23771 /* Emit code to push or pop registers to or from the stack. F is the
23772 assembly file. MASK is the registers to pop. */
23773 static void
23774 thumb_pop (FILE *f, unsigned long mask)
23775 {
23776 int regno;
23777 int lo_mask = mask & 0xFF;
23778 int pushed_words = 0;
23779
23780 gcc_assert (mask);
23781
23782 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23783 {
23784 /* Special case. Do not generate a POP PC statement here, do it in
23785 thumb_exit() */
23786 thumb_exit (f, -1);
23787 return;
23788 }
23789
23790 fprintf (f, "\tpop\t{");
23791
23792 /* Look at the low registers first. */
23793 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23794 {
23795 if (lo_mask & 1)
23796 {
23797 asm_fprintf (f, "%r", regno);
23798
23799 if ((lo_mask & ~1) != 0)
23800 fprintf (f, ", ");
23801
23802 pushed_words++;
23803 }
23804 }
23805
23806 if (mask & (1 << PC_REGNUM))
23807 {
23808 /* Catch popping the PC. */
23809 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23810 || IS_CMSE_ENTRY (arm_current_func_type ()))
23811 {
23812 /* The PC is never poped directly, instead
23813 it is popped into r3 and then BX is used. */
23814 fprintf (f, "}\n");
23815
23816 thumb_exit (f, -1);
23817
23818 return;
23819 }
23820 else
23821 {
23822 if (mask & 0xFF)
23823 fprintf (f, ", ");
23824
23825 asm_fprintf (f, "%r", PC_REGNUM);
23826 }
23827 }
23828
23829 fprintf (f, "}\n");
23830 }
23831
23832 /* Generate code to return from a thumb function.
23833 If 'reg_containing_return_addr' is -1, then the return address is
23834 actually on the stack, at the stack pointer. */
23835 static void
23836 thumb_exit (FILE *f, int reg_containing_return_addr)
23837 {
23838 unsigned regs_available_for_popping;
23839 unsigned regs_to_pop;
23840 int pops_needed;
23841 unsigned available;
23842 unsigned required;
23843 machine_mode mode;
23844 int size;
23845 int restore_a4 = FALSE;
23846
23847 /* Compute the registers we need to pop. */
23848 regs_to_pop = 0;
23849 pops_needed = 0;
23850
23851 if (reg_containing_return_addr == -1)
23852 {
23853 regs_to_pop |= 1 << LR_REGNUM;
23854 ++pops_needed;
23855 }
23856
23857 if (TARGET_BACKTRACE)
23858 {
23859 /* Restore the (ARM) frame pointer and stack pointer. */
23860 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23861 pops_needed += 2;
23862 }
23863
23864 /* If there is nothing to pop then just emit the BX instruction and
23865 return. */
23866 if (pops_needed == 0)
23867 {
23868 if (crtl->calls_eh_return)
23869 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23870
23871 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23872 {
23873 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23874 reg_containing_return_addr);
23875 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23876 }
23877 else
23878 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23879 return;
23880 }
23881 /* Otherwise if we are not supporting interworking and we have not created
23882 a backtrace structure and the function was not entered in ARM mode then
23883 just pop the return address straight into the PC. */
23884 else if (!TARGET_INTERWORK
23885 && !TARGET_BACKTRACE
23886 && !is_called_in_ARM_mode (current_function_decl)
23887 && !crtl->calls_eh_return
23888 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23889 {
23890 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23891 return;
23892 }
23893
23894 /* Find out how many of the (return) argument registers we can corrupt. */
23895 regs_available_for_popping = 0;
23896
23897 /* If returning via __builtin_eh_return, the bottom three registers
23898 all contain information needed for the return. */
23899 if (crtl->calls_eh_return)
23900 size = 12;
23901 else
23902 {
23903 /* If we can deduce the registers used from the function's
23904 return value. This is more reliable that examining
23905 df_regs_ever_live_p () because that will be set if the register is
23906 ever used in the function, not just if the register is used
23907 to hold a return value. */
23908
23909 if (crtl->return_rtx != 0)
23910 mode = GET_MODE (crtl->return_rtx);
23911 else
23912 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23913
23914 size = GET_MODE_SIZE (mode);
23915
23916 if (size == 0)
23917 {
23918 /* In a void function we can use any argument register.
23919 In a function that returns a structure on the stack
23920 we can use the second and third argument registers. */
23921 if (mode == VOIDmode)
23922 regs_available_for_popping =
23923 (1 << ARG_REGISTER (1))
23924 | (1 << ARG_REGISTER (2))
23925 | (1 << ARG_REGISTER (3));
23926 else
23927 regs_available_for_popping =
23928 (1 << ARG_REGISTER (2))
23929 | (1 << ARG_REGISTER (3));
23930 }
23931 else if (size <= 4)
23932 regs_available_for_popping =
23933 (1 << ARG_REGISTER (2))
23934 | (1 << ARG_REGISTER (3));
23935 else if (size <= 8)
23936 regs_available_for_popping =
23937 (1 << ARG_REGISTER (3));
23938 }
23939
23940 /* Match registers to be popped with registers into which we pop them. */
23941 for (available = regs_available_for_popping,
23942 required = regs_to_pop;
23943 required != 0 && available != 0;
23944 available &= ~(available & - available),
23945 required &= ~(required & - required))
23946 -- pops_needed;
23947
23948 /* If we have any popping registers left over, remove them. */
23949 if (available > 0)
23950 regs_available_for_popping &= ~available;
23951
23952 /* Otherwise if we need another popping register we can use
23953 the fourth argument register. */
23954 else if (pops_needed)
23955 {
23956 /* If we have not found any free argument registers and
23957 reg a4 contains the return address, we must move it. */
23958 if (regs_available_for_popping == 0
23959 && reg_containing_return_addr == LAST_ARG_REGNUM)
23960 {
23961 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23962 reg_containing_return_addr = LR_REGNUM;
23963 }
23964 else if (size > 12)
23965 {
23966 /* Register a4 is being used to hold part of the return value,
23967 but we have dire need of a free, low register. */
23968 restore_a4 = TRUE;
23969
23970 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23971 }
23972
23973 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23974 {
23975 /* The fourth argument register is available. */
23976 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23977
23978 --pops_needed;
23979 }
23980 }
23981
23982 /* Pop as many registers as we can. */
23983 thumb_pop (f, regs_available_for_popping);
23984
23985 /* Process the registers we popped. */
23986 if (reg_containing_return_addr == -1)
23987 {
23988 /* The return address was popped into the lowest numbered register. */
23989 regs_to_pop &= ~(1 << LR_REGNUM);
23990
23991 reg_containing_return_addr =
23992 number_of_first_bit_set (regs_available_for_popping);
23993
23994 /* Remove this register for the mask of available registers, so that
23995 the return address will not be corrupted by further pops. */
23996 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23997 }
23998
23999 /* If we popped other registers then handle them here. */
24000 if (regs_available_for_popping)
24001 {
24002 int frame_pointer;
24003
24004 /* Work out which register currently contains the frame pointer. */
24005 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24006
24007 /* Move it into the correct place. */
24008 asm_fprintf (f, "\tmov\t%r, %r\n",
24009 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24010
24011 /* (Temporarily) remove it from the mask of popped registers. */
24012 regs_available_for_popping &= ~(1 << frame_pointer);
24013 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24014
24015 if (regs_available_for_popping)
24016 {
24017 int stack_pointer;
24018
24019 /* We popped the stack pointer as well,
24020 find the register that contains it. */
24021 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24022
24023 /* Move it into the stack register. */
24024 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24025
24026 /* At this point we have popped all necessary registers, so
24027 do not worry about restoring regs_available_for_popping
24028 to its correct value:
24029
24030 assert (pops_needed == 0)
24031 assert (regs_available_for_popping == (1 << frame_pointer))
24032 assert (regs_to_pop == (1 << STACK_POINTER)) */
24033 }
24034 else
24035 {
24036 /* Since we have just move the popped value into the frame
24037 pointer, the popping register is available for reuse, and
24038 we know that we still have the stack pointer left to pop. */
24039 regs_available_for_popping |= (1 << frame_pointer);
24040 }
24041 }
24042
24043 /* If we still have registers left on the stack, but we no longer have
24044 any registers into which we can pop them, then we must move the return
24045 address into the link register and make available the register that
24046 contained it. */
24047 if (regs_available_for_popping == 0 && pops_needed > 0)
24048 {
24049 regs_available_for_popping |= 1 << reg_containing_return_addr;
24050
24051 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24052 reg_containing_return_addr);
24053
24054 reg_containing_return_addr = LR_REGNUM;
24055 }
24056
24057 /* If we have registers left on the stack then pop some more.
24058 We know that at most we will want to pop FP and SP. */
24059 if (pops_needed > 0)
24060 {
24061 int popped_into;
24062 int move_to;
24063
24064 thumb_pop (f, regs_available_for_popping);
24065
24066 /* We have popped either FP or SP.
24067 Move whichever one it is into the correct register. */
24068 popped_into = number_of_first_bit_set (regs_available_for_popping);
24069 move_to = number_of_first_bit_set (regs_to_pop);
24070
24071 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24072
24073 regs_to_pop &= ~(1 << move_to);
24074
24075 --pops_needed;
24076 }
24077
24078 /* If we still have not popped everything then we must have only
24079 had one register available to us and we are now popping the SP. */
24080 if (pops_needed > 0)
24081 {
24082 int popped_into;
24083
24084 thumb_pop (f, regs_available_for_popping);
24085
24086 popped_into = number_of_first_bit_set (regs_available_for_popping);
24087
24088 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24089 /*
24090 assert (regs_to_pop == (1 << STACK_POINTER))
24091 assert (pops_needed == 1)
24092 */
24093 }
24094
24095 /* If necessary restore the a4 register. */
24096 if (restore_a4)
24097 {
24098 if (reg_containing_return_addr != LR_REGNUM)
24099 {
24100 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24101 reg_containing_return_addr = LR_REGNUM;
24102 }
24103
24104 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24105 }
24106
24107 if (crtl->calls_eh_return)
24108 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24109
24110 /* Return to caller. */
24111 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24112 {
24113 /* This is for the cases where LR is not being used to contain the return
24114 address. It may therefore contain information that we might not want
24115 to leak, hence it must be cleared. The value in R0 will never be a
24116 secret at this point, so it is safe to use it, see the clearing code
24117 in 'cmse_nonsecure_entry_clear_before_return'. */
24118 if (reg_containing_return_addr != LR_REGNUM)
24119 asm_fprintf (f, "\tmov\tlr, r0\n");
24120
24121 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24122 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24123 }
24124 else
24125 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24126 }
24127 \f
24128 /* Scan INSN just before assembler is output for it.
24129 For Thumb-1, we track the status of the condition codes; this
24130 information is used in the cbranchsi4_insn pattern. */
24131 void
24132 thumb1_final_prescan_insn (rtx_insn *insn)
24133 {
24134 if (flag_print_asm_name)
24135 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24136 INSN_ADDRESSES (INSN_UID (insn)));
24137 /* Don't overwrite the previous setter when we get to a cbranch. */
24138 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24139 {
24140 enum attr_conds conds;
24141
24142 if (cfun->machine->thumb1_cc_insn)
24143 {
24144 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24145 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24146 CC_STATUS_INIT;
24147 }
24148 conds = get_attr_conds (insn);
24149 if (conds == CONDS_SET)
24150 {
24151 rtx set = single_set (insn);
24152 cfun->machine->thumb1_cc_insn = insn;
24153 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24154 cfun->machine->thumb1_cc_op1 = const0_rtx;
24155 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24156 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24157 {
24158 rtx src1 = XEXP (SET_SRC (set), 1);
24159 if (src1 == const0_rtx)
24160 cfun->machine->thumb1_cc_mode = CCmode;
24161 }
24162 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24163 {
24164 /* Record the src register operand instead of dest because
24165 cprop_hardreg pass propagates src. */
24166 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24167 }
24168 }
24169 else if (conds != CONDS_NOCOND)
24170 cfun->machine->thumb1_cc_insn = NULL_RTX;
24171 }
24172
24173 /* Check if unexpected far jump is used. */
24174 if (cfun->machine->lr_save_eliminated
24175 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24176 internal_error("Unexpected thumb1 far jump");
24177 }
24178
24179 int
24180 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24181 {
24182 unsigned HOST_WIDE_INT mask = 0xff;
24183 int i;
24184
24185 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24186 if (val == 0) /* XXX */
24187 return 0;
24188
24189 for (i = 0; i < 25; i++)
24190 if ((val & (mask << i)) == val)
24191 return 1;
24192
24193 return 0;
24194 }
24195
24196 /* Returns nonzero if the current function contains,
24197 or might contain a far jump. */
24198 static int
24199 thumb_far_jump_used_p (void)
24200 {
24201 rtx_insn *insn;
24202 bool far_jump = false;
24203 unsigned int func_size = 0;
24204
24205 /* If we have already decided that far jumps may be used,
24206 do not bother checking again, and always return true even if
24207 it turns out that they are not being used. Once we have made
24208 the decision that far jumps are present (and that hence the link
24209 register will be pushed onto the stack) we cannot go back on it. */
24210 if (cfun->machine->far_jump_used)
24211 return 1;
24212
24213 /* If this function is not being called from the prologue/epilogue
24214 generation code then it must be being called from the
24215 INITIAL_ELIMINATION_OFFSET macro. */
24216 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24217 {
24218 /* In this case we know that we are being asked about the elimination
24219 of the arg pointer register. If that register is not being used,
24220 then there are no arguments on the stack, and we do not have to
24221 worry that a far jump might force the prologue to push the link
24222 register, changing the stack offsets. In this case we can just
24223 return false, since the presence of far jumps in the function will
24224 not affect stack offsets.
24225
24226 If the arg pointer is live (or if it was live, but has now been
24227 eliminated and so set to dead) then we do have to test to see if
24228 the function might contain a far jump. This test can lead to some
24229 false negatives, since before reload is completed, then length of
24230 branch instructions is not known, so gcc defaults to returning their
24231 longest length, which in turn sets the far jump attribute to true.
24232
24233 A false negative will not result in bad code being generated, but it
24234 will result in a needless push and pop of the link register. We
24235 hope that this does not occur too often.
24236
24237 If we need doubleword stack alignment this could affect the other
24238 elimination offsets so we can't risk getting it wrong. */
24239 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24240 cfun->machine->arg_pointer_live = 1;
24241 else if (!cfun->machine->arg_pointer_live)
24242 return 0;
24243 }
24244
24245 /* We should not change far_jump_used during or after reload, as there is
24246 no chance to change stack frame layout. */
24247 if (reload_in_progress || reload_completed)
24248 return 0;
24249
24250 /* Check to see if the function contains a branch
24251 insn with the far jump attribute set. */
24252 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24253 {
24254 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24255 {
24256 far_jump = true;
24257 }
24258 func_size += get_attr_length (insn);
24259 }
24260
24261 /* Attribute far_jump will always be true for thumb1 before
24262 shorten_branch pass. So checking far_jump attribute before
24263 shorten_branch isn't much useful.
24264
24265 Following heuristic tries to estimate more accurately if a far jump
24266 may finally be used. The heuristic is very conservative as there is
24267 no chance to roll-back the decision of not to use far jump.
24268
24269 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24270 2-byte insn is associated with a 4 byte constant pool. Using
24271 function size 2048/3 as the threshold is conservative enough. */
24272 if (far_jump)
24273 {
24274 if ((func_size * 3) >= 2048)
24275 {
24276 /* Record the fact that we have decided that
24277 the function does use far jumps. */
24278 cfun->machine->far_jump_used = 1;
24279 return 1;
24280 }
24281 }
24282
24283 return 0;
24284 }
24285
24286 /* Return nonzero if FUNC must be entered in ARM mode. */
24287 static bool
24288 is_called_in_ARM_mode (tree func)
24289 {
24290 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24291
24292 /* Ignore the problem about functions whose address is taken. */
24293 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24294 return true;
24295
24296 #ifdef ARM_PE
24297 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24298 #else
24299 return false;
24300 #endif
24301 }
24302
24303 /* Given the stack offsets and register mask in OFFSETS, decide how
24304 many additional registers to push instead of subtracting a constant
24305 from SP. For epilogues the principle is the same except we use pop.
24306 FOR_PROLOGUE indicates which we're generating. */
24307 static int
24308 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24309 {
24310 HOST_WIDE_INT amount;
24311 unsigned long live_regs_mask = offsets->saved_regs_mask;
24312 /* Extract a mask of the ones we can give to the Thumb's push/pop
24313 instruction. */
24314 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24315 /* Then count how many other high registers will need to be pushed. */
24316 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24317 int n_free, reg_base, size;
24318
24319 if (!for_prologue && frame_pointer_needed)
24320 amount = offsets->locals_base - offsets->saved_regs;
24321 else
24322 amount = offsets->outgoing_args - offsets->saved_regs;
24323
24324 /* If the stack frame size is 512 exactly, we can save one load
24325 instruction, which should make this a win even when optimizing
24326 for speed. */
24327 if (!optimize_size && amount != 512)
24328 return 0;
24329
24330 /* Can't do this if there are high registers to push. */
24331 if (high_regs_pushed != 0)
24332 return 0;
24333
24334 /* Shouldn't do it in the prologue if no registers would normally
24335 be pushed at all. In the epilogue, also allow it if we'll have
24336 a pop insn for the PC. */
24337 if (l_mask == 0
24338 && (for_prologue
24339 || TARGET_BACKTRACE
24340 || (live_regs_mask & 1 << LR_REGNUM) == 0
24341 || TARGET_INTERWORK
24342 || crtl->args.pretend_args_size != 0))
24343 return 0;
24344
24345 /* Don't do this if thumb_expand_prologue wants to emit instructions
24346 between the push and the stack frame allocation. */
24347 if (for_prologue
24348 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24349 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24350 return 0;
24351
24352 reg_base = 0;
24353 n_free = 0;
24354 if (!for_prologue)
24355 {
24356 size = arm_size_return_regs ();
24357 reg_base = ARM_NUM_INTS (size);
24358 live_regs_mask >>= reg_base;
24359 }
24360
24361 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24362 && (for_prologue || call_used_regs[reg_base + n_free]))
24363 {
24364 live_regs_mask >>= 1;
24365 n_free++;
24366 }
24367
24368 if (n_free == 0)
24369 return 0;
24370 gcc_assert (amount / 4 * 4 == amount);
24371
24372 if (amount >= 512 && (amount - n_free * 4) < 512)
24373 return (amount - 508) / 4;
24374 if (amount <= n_free * 4)
24375 return amount / 4;
24376 return 0;
24377 }
24378
24379 /* The bits which aren't usefully expanded as rtl. */
24380 const char *
24381 thumb1_unexpanded_epilogue (void)
24382 {
24383 arm_stack_offsets *offsets;
24384 int regno;
24385 unsigned long live_regs_mask = 0;
24386 int high_regs_pushed = 0;
24387 int extra_pop;
24388 int had_to_push_lr;
24389 int size;
24390
24391 if (cfun->machine->return_used_this_function != 0)
24392 return "";
24393
24394 if (IS_NAKED (arm_current_func_type ()))
24395 return "";
24396
24397 offsets = arm_get_frame_offsets ();
24398 live_regs_mask = offsets->saved_regs_mask;
24399 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24400
24401 /* If we can deduce the registers used from the function's return value.
24402 This is more reliable that examining df_regs_ever_live_p () because that
24403 will be set if the register is ever used in the function, not just if
24404 the register is used to hold a return value. */
24405 size = arm_size_return_regs ();
24406
24407 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24408 if (extra_pop > 0)
24409 {
24410 unsigned long extra_mask = (1 << extra_pop) - 1;
24411 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24412 }
24413
24414 /* The prolog may have pushed some high registers to use as
24415 work registers. e.g. the testsuite file:
24416 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24417 compiles to produce:
24418 push {r4, r5, r6, r7, lr}
24419 mov r7, r9
24420 mov r6, r8
24421 push {r6, r7}
24422 as part of the prolog. We have to undo that pushing here. */
24423
24424 if (high_regs_pushed)
24425 {
24426 unsigned long mask = live_regs_mask & 0xff;
24427 int next_hi_reg;
24428
24429 /* The available low registers depend on the size of the value we are
24430 returning. */
24431 if (size <= 12)
24432 mask |= 1 << 3;
24433 if (size <= 8)
24434 mask |= 1 << 2;
24435
24436 if (mask == 0)
24437 /* Oh dear! We have no low registers into which we can pop
24438 high registers! */
24439 internal_error
24440 ("no low registers available for popping high registers");
24441
24442 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24443 if (live_regs_mask & (1 << next_hi_reg))
24444 break;
24445
24446 while (high_regs_pushed)
24447 {
24448 /* Find lo register(s) into which the high register(s) can
24449 be popped. */
24450 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24451 {
24452 if (mask & (1 << regno))
24453 high_regs_pushed--;
24454 if (high_regs_pushed == 0)
24455 break;
24456 }
24457
24458 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24459
24460 /* Pop the values into the low register(s). */
24461 thumb_pop (asm_out_file, mask);
24462
24463 /* Move the value(s) into the high registers. */
24464 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24465 {
24466 if (mask & (1 << regno))
24467 {
24468 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24469 regno);
24470
24471 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24472 if (live_regs_mask & (1 << next_hi_reg))
24473 break;
24474 }
24475 }
24476 }
24477 live_regs_mask &= ~0x0f00;
24478 }
24479
24480 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24481 live_regs_mask &= 0xff;
24482
24483 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24484 {
24485 /* Pop the return address into the PC. */
24486 if (had_to_push_lr)
24487 live_regs_mask |= 1 << PC_REGNUM;
24488
24489 /* Either no argument registers were pushed or a backtrace
24490 structure was created which includes an adjusted stack
24491 pointer, so just pop everything. */
24492 if (live_regs_mask)
24493 thumb_pop (asm_out_file, live_regs_mask);
24494
24495 /* We have either just popped the return address into the
24496 PC or it is was kept in LR for the entire function.
24497 Note that thumb_pop has already called thumb_exit if the
24498 PC was in the list. */
24499 if (!had_to_push_lr)
24500 thumb_exit (asm_out_file, LR_REGNUM);
24501 }
24502 else
24503 {
24504 /* Pop everything but the return address. */
24505 if (live_regs_mask)
24506 thumb_pop (asm_out_file, live_regs_mask);
24507
24508 if (had_to_push_lr)
24509 {
24510 if (size > 12)
24511 {
24512 /* We have no free low regs, so save one. */
24513 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24514 LAST_ARG_REGNUM);
24515 }
24516
24517 /* Get the return address into a temporary register. */
24518 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24519
24520 if (size > 12)
24521 {
24522 /* Move the return address to lr. */
24523 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24524 LAST_ARG_REGNUM);
24525 /* Restore the low register. */
24526 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24527 IP_REGNUM);
24528 regno = LR_REGNUM;
24529 }
24530 else
24531 regno = LAST_ARG_REGNUM;
24532 }
24533 else
24534 regno = LR_REGNUM;
24535
24536 /* Remove the argument registers that were pushed onto the stack. */
24537 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24538 SP_REGNUM, SP_REGNUM,
24539 crtl->args.pretend_args_size);
24540
24541 thumb_exit (asm_out_file, regno);
24542 }
24543
24544 return "";
24545 }
24546
24547 /* Functions to save and restore machine-specific function data. */
24548 static struct machine_function *
24549 arm_init_machine_status (void)
24550 {
24551 struct machine_function *machine;
24552 machine = ggc_cleared_alloc<machine_function> ();
24553
24554 #if ARM_FT_UNKNOWN != 0
24555 machine->func_type = ARM_FT_UNKNOWN;
24556 #endif
24557 return machine;
24558 }
24559
24560 /* Return an RTX indicating where the return address to the
24561 calling function can be found. */
24562 rtx
24563 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24564 {
24565 if (count != 0)
24566 return NULL_RTX;
24567
24568 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24569 }
24570
24571 /* Do anything needed before RTL is emitted for each function. */
24572 void
24573 arm_init_expanders (void)
24574 {
24575 /* Arrange to initialize and mark the machine per-function status. */
24576 init_machine_status = arm_init_machine_status;
24577
24578 /* This is to stop the combine pass optimizing away the alignment
24579 adjustment of va_arg. */
24580 /* ??? It is claimed that this should not be necessary. */
24581 if (cfun)
24582 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24583 }
24584
24585 /* Check that FUNC is called with a different mode. */
24586
24587 bool
24588 arm_change_mode_p (tree func)
24589 {
24590 if (TREE_CODE (func) != FUNCTION_DECL)
24591 return false;
24592
24593 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24594
24595 if (!callee_tree)
24596 callee_tree = target_option_default_node;
24597
24598 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24599 int flags = callee_opts->x_target_flags;
24600
24601 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24602 }
24603
24604 /* Like arm_compute_initial_elimination offset. Simpler because there
24605 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24606 to point at the base of the local variables after static stack
24607 space for a function has been allocated. */
24608
24609 HOST_WIDE_INT
24610 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24611 {
24612 arm_stack_offsets *offsets;
24613
24614 offsets = arm_get_frame_offsets ();
24615
24616 switch (from)
24617 {
24618 case ARG_POINTER_REGNUM:
24619 switch (to)
24620 {
24621 case STACK_POINTER_REGNUM:
24622 return offsets->outgoing_args - offsets->saved_args;
24623
24624 case FRAME_POINTER_REGNUM:
24625 return offsets->soft_frame - offsets->saved_args;
24626
24627 case ARM_HARD_FRAME_POINTER_REGNUM:
24628 return offsets->saved_regs - offsets->saved_args;
24629
24630 case THUMB_HARD_FRAME_POINTER_REGNUM:
24631 return offsets->locals_base - offsets->saved_args;
24632
24633 default:
24634 gcc_unreachable ();
24635 }
24636 break;
24637
24638 case FRAME_POINTER_REGNUM:
24639 switch (to)
24640 {
24641 case STACK_POINTER_REGNUM:
24642 return offsets->outgoing_args - offsets->soft_frame;
24643
24644 case ARM_HARD_FRAME_POINTER_REGNUM:
24645 return offsets->saved_regs - offsets->soft_frame;
24646
24647 case THUMB_HARD_FRAME_POINTER_REGNUM:
24648 return offsets->locals_base - offsets->soft_frame;
24649
24650 default:
24651 gcc_unreachable ();
24652 }
24653 break;
24654
24655 default:
24656 gcc_unreachable ();
24657 }
24658 }
24659
24660 /* Generate the function's prologue. */
24661
24662 void
24663 thumb1_expand_prologue (void)
24664 {
24665 rtx_insn *insn;
24666
24667 HOST_WIDE_INT amount;
24668 HOST_WIDE_INT size;
24669 arm_stack_offsets *offsets;
24670 unsigned long func_type;
24671 int regno;
24672 unsigned long live_regs_mask;
24673 unsigned long l_mask;
24674 unsigned high_regs_pushed = 0;
24675 bool lr_needs_saving;
24676
24677 func_type = arm_current_func_type ();
24678
24679 /* Naked functions don't have prologues. */
24680 if (IS_NAKED (func_type))
24681 {
24682 if (flag_stack_usage_info)
24683 current_function_static_stack_size = 0;
24684 return;
24685 }
24686
24687 if (IS_INTERRUPT (func_type))
24688 {
24689 error ("interrupt Service Routines cannot be coded in Thumb mode");
24690 return;
24691 }
24692
24693 if (is_called_in_ARM_mode (current_function_decl))
24694 emit_insn (gen_prologue_thumb1_interwork ());
24695
24696 offsets = arm_get_frame_offsets ();
24697 live_regs_mask = offsets->saved_regs_mask;
24698 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24699
24700 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24701 l_mask = live_regs_mask & 0x40ff;
24702 /* Then count how many other high registers will need to be pushed. */
24703 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24704
24705 if (crtl->args.pretend_args_size)
24706 {
24707 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24708
24709 if (cfun->machine->uses_anonymous_args)
24710 {
24711 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24712 unsigned long mask;
24713
24714 mask = 1ul << (LAST_ARG_REGNUM + 1);
24715 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24716
24717 insn = thumb1_emit_multi_reg_push (mask, 0);
24718 }
24719 else
24720 {
24721 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24722 stack_pointer_rtx, x));
24723 }
24724 RTX_FRAME_RELATED_P (insn) = 1;
24725 }
24726
24727 if (TARGET_BACKTRACE)
24728 {
24729 HOST_WIDE_INT offset = 0;
24730 unsigned work_register;
24731 rtx work_reg, x, arm_hfp_rtx;
24732
24733 /* We have been asked to create a stack backtrace structure.
24734 The code looks like this:
24735
24736 0 .align 2
24737 0 func:
24738 0 sub SP, #16 Reserve space for 4 registers.
24739 2 push {R7} Push low registers.
24740 4 add R7, SP, #20 Get the stack pointer before the push.
24741 6 str R7, [SP, #8] Store the stack pointer
24742 (before reserving the space).
24743 8 mov R7, PC Get hold of the start of this code + 12.
24744 10 str R7, [SP, #16] Store it.
24745 12 mov R7, FP Get hold of the current frame pointer.
24746 14 str R7, [SP, #4] Store it.
24747 16 mov R7, LR Get hold of the current return address.
24748 18 str R7, [SP, #12] Store it.
24749 20 add R7, SP, #16 Point at the start of the
24750 backtrace structure.
24751 22 mov FP, R7 Put this value into the frame pointer. */
24752
24753 work_register = thumb_find_work_register (live_regs_mask);
24754 work_reg = gen_rtx_REG (SImode, work_register);
24755 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24756
24757 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24758 stack_pointer_rtx, GEN_INT (-16)));
24759 RTX_FRAME_RELATED_P (insn) = 1;
24760
24761 if (l_mask)
24762 {
24763 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24764 RTX_FRAME_RELATED_P (insn) = 1;
24765 lr_needs_saving = false;
24766
24767 offset = bit_count (l_mask) * UNITS_PER_WORD;
24768 }
24769
24770 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24771 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24772
24773 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24774 x = gen_frame_mem (SImode, x);
24775 emit_move_insn (x, work_reg);
24776
24777 /* Make sure that the instruction fetching the PC is in the right place
24778 to calculate "start of backtrace creation code + 12". */
24779 /* ??? The stores using the common WORK_REG ought to be enough to
24780 prevent the scheduler from doing anything weird. Failing that
24781 we could always move all of the following into an UNSPEC_VOLATILE. */
24782 if (l_mask)
24783 {
24784 x = gen_rtx_REG (SImode, PC_REGNUM);
24785 emit_move_insn (work_reg, x);
24786
24787 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24788 x = gen_frame_mem (SImode, x);
24789 emit_move_insn (x, work_reg);
24790
24791 emit_move_insn (work_reg, arm_hfp_rtx);
24792
24793 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24794 x = gen_frame_mem (SImode, x);
24795 emit_move_insn (x, work_reg);
24796 }
24797 else
24798 {
24799 emit_move_insn (work_reg, arm_hfp_rtx);
24800
24801 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24802 x = gen_frame_mem (SImode, x);
24803 emit_move_insn (x, work_reg);
24804
24805 x = gen_rtx_REG (SImode, PC_REGNUM);
24806 emit_move_insn (work_reg, x);
24807
24808 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24809 x = gen_frame_mem (SImode, x);
24810 emit_move_insn (x, work_reg);
24811 }
24812
24813 x = gen_rtx_REG (SImode, LR_REGNUM);
24814 emit_move_insn (work_reg, x);
24815
24816 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24817 x = gen_frame_mem (SImode, x);
24818 emit_move_insn (x, work_reg);
24819
24820 x = GEN_INT (offset + 12);
24821 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24822
24823 emit_move_insn (arm_hfp_rtx, work_reg);
24824 }
24825 /* Optimization: If we are not pushing any low registers but we are going
24826 to push some high registers then delay our first push. This will just
24827 be a push of LR and we can combine it with the push of the first high
24828 register. */
24829 else if ((l_mask & 0xff) != 0
24830 || (high_regs_pushed == 0 && lr_needs_saving))
24831 {
24832 unsigned long mask = l_mask;
24833 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24834 insn = thumb1_emit_multi_reg_push (mask, mask);
24835 RTX_FRAME_RELATED_P (insn) = 1;
24836 lr_needs_saving = false;
24837 }
24838
24839 if (high_regs_pushed)
24840 {
24841 unsigned pushable_regs;
24842 unsigned next_hi_reg;
24843 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24844 : crtl->args.info.nregs;
24845 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24846
24847 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24848 if (live_regs_mask & (1 << next_hi_reg))
24849 break;
24850
24851 /* Here we need to mask out registers used for passing arguments
24852 even if they can be pushed. This is to avoid using them to stash the high
24853 registers. Such kind of stash may clobber the use of arguments. */
24854 pushable_regs = l_mask & (~arg_regs_mask);
24855 if (lr_needs_saving)
24856 pushable_regs &= ~(1 << LR_REGNUM);
24857
24858 if (pushable_regs == 0)
24859 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24860
24861 while (high_regs_pushed > 0)
24862 {
24863 unsigned long real_regs_mask = 0;
24864 unsigned long push_mask = 0;
24865
24866 for (regno = LR_REGNUM; regno >= 0; regno --)
24867 {
24868 if (pushable_regs & (1 << regno))
24869 {
24870 emit_move_insn (gen_rtx_REG (SImode, regno),
24871 gen_rtx_REG (SImode, next_hi_reg));
24872
24873 high_regs_pushed --;
24874 real_regs_mask |= (1 << next_hi_reg);
24875 push_mask |= (1 << regno);
24876
24877 if (high_regs_pushed)
24878 {
24879 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24880 next_hi_reg --)
24881 if (live_regs_mask & (1 << next_hi_reg))
24882 break;
24883 }
24884 else
24885 break;
24886 }
24887 }
24888
24889 /* If we had to find a work register and we have not yet
24890 saved the LR then add it to the list of regs to push. */
24891 if (lr_needs_saving)
24892 {
24893 push_mask |= 1 << LR_REGNUM;
24894 real_regs_mask |= 1 << LR_REGNUM;
24895 lr_needs_saving = false;
24896 }
24897
24898 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24899 RTX_FRAME_RELATED_P (insn) = 1;
24900 }
24901 }
24902
24903 /* Load the pic register before setting the frame pointer,
24904 so we can use r7 as a temporary work register. */
24905 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24906 arm_load_pic_register (live_regs_mask);
24907
24908 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24909 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24910 stack_pointer_rtx);
24911
24912 size = offsets->outgoing_args - offsets->saved_args;
24913 if (flag_stack_usage_info)
24914 current_function_static_stack_size = size;
24915
24916 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24917 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24918 sorry ("-fstack-check=specific for Thumb-1");
24919
24920 amount = offsets->outgoing_args - offsets->saved_regs;
24921 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24922 if (amount)
24923 {
24924 if (amount < 512)
24925 {
24926 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24927 GEN_INT (- amount)));
24928 RTX_FRAME_RELATED_P (insn) = 1;
24929 }
24930 else
24931 {
24932 rtx reg, dwarf;
24933
24934 /* The stack decrement is too big for an immediate value in a single
24935 insn. In theory we could issue multiple subtracts, but after
24936 three of them it becomes more space efficient to place the full
24937 value in the constant pool and load into a register. (Also the
24938 ARM debugger really likes to see only one stack decrement per
24939 function). So instead we look for a scratch register into which
24940 we can load the decrement, and then we subtract this from the
24941 stack pointer. Unfortunately on the thumb the only available
24942 scratch registers are the argument registers, and we cannot use
24943 these as they may hold arguments to the function. Instead we
24944 attempt to locate a call preserved register which is used by this
24945 function. If we can find one, then we know that it will have
24946 been pushed at the start of the prologue and so we can corrupt
24947 it now. */
24948 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24949 if (live_regs_mask & (1 << regno))
24950 break;
24951
24952 gcc_assert(regno <= LAST_LO_REGNUM);
24953
24954 reg = gen_rtx_REG (SImode, regno);
24955
24956 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24957
24958 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24959 stack_pointer_rtx, reg));
24960
24961 dwarf = gen_rtx_SET (stack_pointer_rtx,
24962 plus_constant (Pmode, stack_pointer_rtx,
24963 -amount));
24964 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24965 RTX_FRAME_RELATED_P (insn) = 1;
24966 }
24967 }
24968
24969 if (frame_pointer_needed)
24970 thumb_set_frame_pointer (offsets);
24971
24972 /* If we are profiling, make sure no instructions are scheduled before
24973 the call to mcount. Similarly if the user has requested no
24974 scheduling in the prolog. Similarly if we want non-call exceptions
24975 using the EABI unwinder, to prevent faulting instructions from being
24976 swapped with a stack adjustment. */
24977 if (crtl->profile || !TARGET_SCHED_PROLOG
24978 || (arm_except_unwind_info (&global_options) == UI_TARGET
24979 && cfun->can_throw_non_call_exceptions))
24980 emit_insn (gen_blockage ());
24981
24982 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24983 if (live_regs_mask & 0xff)
24984 cfun->machine->lr_save_eliminated = 0;
24985 }
24986
24987 /* Clear caller saved registers not used to pass return values and leaked
24988 condition flags before exiting a cmse_nonsecure_entry function. */
24989
24990 void
24991 cmse_nonsecure_entry_clear_before_return (void)
24992 {
24993 uint64_t to_clear_mask[2];
24994 uint32_t padding_bits_to_clear = 0;
24995 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
24996 int regno, maxregno = IP_REGNUM;
24997 tree result_type;
24998 rtx result_rtl;
24999
25000 to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
25001 to_clear_mask[0] |= (1ULL << IP_REGNUM);
25002
25003 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25004 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
25005 to make sure the instructions used to clear them are present. */
25006 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
25007 {
25008 uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
25009 maxregno = LAST_VFP_REGNUM;
25010
25011 float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
25012 to_clear_mask[0] |= float_mask;
25013
25014 float_mask = (1ULL << (maxregno - 63)) - 1;
25015 to_clear_mask[1] = float_mask;
25016
25017 /* Make sure we don't clear the two scratch registers used to clear the
25018 relevant FPSCR bits in output_return_instruction. */
25019 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25020 to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
25021 emit_use (gen_rtx_REG (SImode, 4));
25022 to_clear_mask[0] &= ~(1ULL << 4);
25023 }
25024
25025 /* If the user has defined registers to be caller saved, these are no longer
25026 restored by the function before returning and must thus be cleared for
25027 security purposes. */
25028 for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
25029 {
25030 /* We do not touch registers that can be used to pass arguments as per
25031 the AAPCS, since these should never be made callee-saved by user
25032 options. */
25033 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25034 continue;
25035 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25036 continue;
25037 if (call_used_regs[regno])
25038 to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
25039 }
25040
25041 /* Make sure we do not clear the registers used to return the result in. */
25042 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25043 if (!VOID_TYPE_P (result_type))
25044 {
25045 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25046
25047 /* No need to check that we return in registers, because we don't
25048 support returning on stack yet. */
25049 to_clear_mask[0]
25050 &= ~compute_not_to_clear_mask (result_type, result_rtl, 0,
25051 padding_bits_to_clear_ptr);
25052 }
25053
25054 if (padding_bits_to_clear != 0)
25055 {
25056 rtx reg_rtx;
25057 /* Padding bits to clear is not 0 so we know we are dealing with
25058 returning a composite type, which only uses r0. Let's make sure that
25059 r1-r3 is cleared too, we will use r1 as a scratch register. */
25060 gcc_assert ((to_clear_mask[0] & 0xe) == 0xe);
25061
25062 reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25063
25064 /* Fill the lower half of the negated padding_bits_to_clear. */
25065 emit_move_insn (reg_rtx,
25066 GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25067
25068 /* Also fill the top half of the negated padding_bits_to_clear. */
25069 if (((~padding_bits_to_clear) >> 16) > 0)
25070 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25071 GEN_INT (16),
25072 GEN_INT (16)),
25073 GEN_INT ((~padding_bits_to_clear) >> 16)));
25074
25075 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25076 gen_rtx_REG (SImode, R0_REGNUM),
25077 reg_rtx));
25078 }
25079
25080 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25081 {
25082 if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64))))
25083 continue;
25084
25085 if (IS_VFP_REGNUM (regno))
25086 {
25087 /* If regno is an even vfp register and its successor is also to
25088 be cleared, use vmov. */
25089 if (TARGET_VFP_DOUBLE
25090 && VFP_REGNO_OK_FOR_DOUBLE (regno)
25091 && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1)))
25092 {
25093 emit_move_insn (gen_rtx_REG (DFmode, regno),
25094 CONST1_RTX (DFmode));
25095 emit_use (gen_rtx_REG (DFmode, regno));
25096 regno++;
25097 }
25098 else
25099 {
25100 emit_move_insn (gen_rtx_REG (SFmode, regno),
25101 CONST1_RTX (SFmode));
25102 emit_use (gen_rtx_REG (SFmode, regno));
25103 }
25104 }
25105 else
25106 {
25107 if (TARGET_THUMB1)
25108 {
25109 if (regno == R0_REGNUM)
25110 emit_move_insn (gen_rtx_REG (SImode, regno),
25111 const0_rtx);
25112 else
25113 /* R0 has either been cleared before, see code above, or it
25114 holds a return value, either way it is not secret
25115 information. */
25116 emit_move_insn (gen_rtx_REG (SImode, regno),
25117 gen_rtx_REG (SImode, R0_REGNUM));
25118 emit_use (gen_rtx_REG (SImode, regno));
25119 }
25120 else
25121 {
25122 emit_move_insn (gen_rtx_REG (SImode, regno),
25123 gen_rtx_REG (SImode, LR_REGNUM));
25124 emit_use (gen_rtx_REG (SImode, regno));
25125 }
25126 }
25127 }
25128 }
25129
25130 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25131 POP instruction can be generated. LR should be replaced by PC. All
25132 the checks required are already done by USE_RETURN_INSN (). Hence,
25133 all we really need to check here is if single register is to be
25134 returned, or multiple register return. */
25135 void
25136 thumb2_expand_return (bool simple_return)
25137 {
25138 int i, num_regs;
25139 unsigned long saved_regs_mask;
25140 arm_stack_offsets *offsets;
25141
25142 offsets = arm_get_frame_offsets ();
25143 saved_regs_mask = offsets->saved_regs_mask;
25144
25145 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25146 if (saved_regs_mask & (1 << i))
25147 num_regs++;
25148
25149 if (!simple_return && saved_regs_mask)
25150 {
25151 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25152 functions or adapt code to handle according to ACLE. This path should
25153 not be reachable for cmse_nonsecure_entry functions though we prefer
25154 to assert it for now to ensure that future code changes do not silently
25155 change this behavior. */
25156 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25157 if (num_regs == 1)
25158 {
25159 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25160 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25161 rtx addr = gen_rtx_MEM (SImode,
25162 gen_rtx_POST_INC (SImode,
25163 stack_pointer_rtx));
25164 set_mem_alias_set (addr, get_frame_alias_set ());
25165 XVECEXP (par, 0, 0) = ret_rtx;
25166 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25167 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25168 emit_jump_insn (par);
25169 }
25170 else
25171 {
25172 saved_regs_mask &= ~ (1 << LR_REGNUM);
25173 saved_regs_mask |= (1 << PC_REGNUM);
25174 arm_emit_multi_reg_pop (saved_regs_mask);
25175 }
25176 }
25177 else
25178 {
25179 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25180 cmse_nonsecure_entry_clear_before_return ();
25181 emit_jump_insn (simple_return_rtx);
25182 }
25183 }
25184
25185 void
25186 thumb1_expand_epilogue (void)
25187 {
25188 HOST_WIDE_INT amount;
25189 arm_stack_offsets *offsets;
25190 int regno;
25191
25192 /* Naked functions don't have prologues. */
25193 if (IS_NAKED (arm_current_func_type ()))
25194 return;
25195
25196 offsets = arm_get_frame_offsets ();
25197 amount = offsets->outgoing_args - offsets->saved_regs;
25198
25199 if (frame_pointer_needed)
25200 {
25201 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25202 amount = offsets->locals_base - offsets->saved_regs;
25203 }
25204 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25205
25206 gcc_assert (amount >= 0);
25207 if (amount)
25208 {
25209 emit_insn (gen_blockage ());
25210
25211 if (amount < 512)
25212 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25213 GEN_INT (amount)));
25214 else
25215 {
25216 /* r3 is always free in the epilogue. */
25217 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25218
25219 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25220 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25221 }
25222 }
25223
25224 /* Emit a USE (stack_pointer_rtx), so that
25225 the stack adjustment will not be deleted. */
25226 emit_insn (gen_force_register_use (stack_pointer_rtx));
25227
25228 if (crtl->profile || !TARGET_SCHED_PROLOG)
25229 emit_insn (gen_blockage ());
25230
25231 /* Emit a clobber for each insn that will be restored in the epilogue,
25232 so that flow2 will get register lifetimes correct. */
25233 for (regno = 0; regno < 13; regno++)
25234 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25235 emit_clobber (gen_rtx_REG (SImode, regno));
25236
25237 if (! df_regs_ever_live_p (LR_REGNUM))
25238 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25239
25240 /* Clear all caller-saved regs that are not used to return. */
25241 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25242 cmse_nonsecure_entry_clear_before_return ();
25243 }
25244
25245 /* Epilogue code for APCS frame. */
25246 static void
25247 arm_expand_epilogue_apcs_frame (bool really_return)
25248 {
25249 unsigned long func_type;
25250 unsigned long saved_regs_mask;
25251 int num_regs = 0;
25252 int i;
25253 int floats_from_frame = 0;
25254 arm_stack_offsets *offsets;
25255
25256 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25257 func_type = arm_current_func_type ();
25258
25259 /* Get frame offsets for ARM. */
25260 offsets = arm_get_frame_offsets ();
25261 saved_regs_mask = offsets->saved_regs_mask;
25262
25263 /* Find the offset of the floating-point save area in the frame. */
25264 floats_from_frame
25265 = (offsets->saved_args
25266 + arm_compute_static_chain_stack_bytes ()
25267 - offsets->frame);
25268
25269 /* Compute how many core registers saved and how far away the floats are. */
25270 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25271 if (saved_regs_mask & (1 << i))
25272 {
25273 num_regs++;
25274 floats_from_frame += 4;
25275 }
25276
25277 if (TARGET_HARD_FLOAT)
25278 {
25279 int start_reg;
25280 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25281
25282 /* The offset is from IP_REGNUM. */
25283 int saved_size = arm_get_vfp_saved_size ();
25284 if (saved_size > 0)
25285 {
25286 rtx_insn *insn;
25287 floats_from_frame += saved_size;
25288 insn = emit_insn (gen_addsi3 (ip_rtx,
25289 hard_frame_pointer_rtx,
25290 GEN_INT (-floats_from_frame)));
25291 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25292 ip_rtx, hard_frame_pointer_rtx);
25293 }
25294
25295 /* Generate VFP register multi-pop. */
25296 start_reg = FIRST_VFP_REGNUM;
25297
25298 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25299 /* Look for a case where a reg does not need restoring. */
25300 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25301 && (!df_regs_ever_live_p (i + 1)
25302 || call_used_regs[i + 1]))
25303 {
25304 if (start_reg != i)
25305 arm_emit_vfp_multi_reg_pop (start_reg,
25306 (i - start_reg) / 2,
25307 gen_rtx_REG (SImode,
25308 IP_REGNUM));
25309 start_reg = i + 2;
25310 }
25311
25312 /* Restore the remaining regs that we have discovered (or possibly
25313 even all of them, if the conditional in the for loop never
25314 fired). */
25315 if (start_reg != i)
25316 arm_emit_vfp_multi_reg_pop (start_reg,
25317 (i - start_reg) / 2,
25318 gen_rtx_REG (SImode, IP_REGNUM));
25319 }
25320
25321 if (TARGET_IWMMXT)
25322 {
25323 /* The frame pointer is guaranteed to be non-double-word aligned, as
25324 it is set to double-word-aligned old_stack_pointer - 4. */
25325 rtx_insn *insn;
25326 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25327
25328 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25329 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25330 {
25331 rtx addr = gen_frame_mem (V2SImode,
25332 plus_constant (Pmode, hard_frame_pointer_rtx,
25333 - lrm_count * 4));
25334 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25335 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25336 gen_rtx_REG (V2SImode, i),
25337 NULL_RTX);
25338 lrm_count += 2;
25339 }
25340 }
25341
25342 /* saved_regs_mask should contain IP which contains old stack pointer
25343 at the time of activation creation. Since SP and IP are adjacent registers,
25344 we can restore the value directly into SP. */
25345 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25346 saved_regs_mask &= ~(1 << IP_REGNUM);
25347 saved_regs_mask |= (1 << SP_REGNUM);
25348
25349 /* There are two registers left in saved_regs_mask - LR and PC. We
25350 only need to restore LR (the return address), but to
25351 save time we can load it directly into PC, unless we need a
25352 special function exit sequence, or we are not really returning. */
25353 if (really_return
25354 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25355 && !crtl->calls_eh_return)
25356 /* Delete LR from the register mask, so that LR on
25357 the stack is loaded into the PC in the register mask. */
25358 saved_regs_mask &= ~(1 << LR_REGNUM);
25359 else
25360 saved_regs_mask &= ~(1 << PC_REGNUM);
25361
25362 num_regs = bit_count (saved_regs_mask);
25363 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25364 {
25365 rtx_insn *insn;
25366 emit_insn (gen_blockage ());
25367 /* Unwind the stack to just below the saved registers. */
25368 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25369 hard_frame_pointer_rtx,
25370 GEN_INT (- 4 * num_regs)));
25371
25372 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25373 stack_pointer_rtx, hard_frame_pointer_rtx);
25374 }
25375
25376 arm_emit_multi_reg_pop (saved_regs_mask);
25377
25378 if (IS_INTERRUPT (func_type))
25379 {
25380 /* Interrupt handlers will have pushed the
25381 IP onto the stack, so restore it now. */
25382 rtx_insn *insn;
25383 rtx addr = gen_rtx_MEM (SImode,
25384 gen_rtx_POST_INC (SImode,
25385 stack_pointer_rtx));
25386 set_mem_alias_set (addr, get_frame_alias_set ());
25387 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25388 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25389 gen_rtx_REG (SImode, IP_REGNUM),
25390 NULL_RTX);
25391 }
25392
25393 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25394 return;
25395
25396 if (crtl->calls_eh_return)
25397 emit_insn (gen_addsi3 (stack_pointer_rtx,
25398 stack_pointer_rtx,
25399 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25400
25401 if (IS_STACKALIGN (func_type))
25402 /* Restore the original stack pointer. Before prologue, the stack was
25403 realigned and the original stack pointer saved in r0. For details,
25404 see comment in arm_expand_prologue. */
25405 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25406
25407 emit_jump_insn (simple_return_rtx);
25408 }
25409
25410 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25411 function is not a sibcall. */
25412 void
25413 arm_expand_epilogue (bool really_return)
25414 {
25415 unsigned long func_type;
25416 unsigned long saved_regs_mask;
25417 int num_regs = 0;
25418 int i;
25419 int amount;
25420 arm_stack_offsets *offsets;
25421
25422 func_type = arm_current_func_type ();
25423
25424 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25425 let output_return_instruction take care of instruction emission if any. */
25426 if (IS_NAKED (func_type)
25427 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25428 {
25429 if (really_return)
25430 emit_jump_insn (simple_return_rtx);
25431 return;
25432 }
25433
25434 /* If we are throwing an exception, then we really must be doing a
25435 return, so we can't tail-call. */
25436 gcc_assert (!crtl->calls_eh_return || really_return);
25437
25438 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25439 {
25440 arm_expand_epilogue_apcs_frame (really_return);
25441 return;
25442 }
25443
25444 /* Get frame offsets for ARM. */
25445 offsets = arm_get_frame_offsets ();
25446 saved_regs_mask = offsets->saved_regs_mask;
25447 num_regs = bit_count (saved_regs_mask);
25448
25449 if (frame_pointer_needed)
25450 {
25451 rtx_insn *insn;
25452 /* Restore stack pointer if necessary. */
25453 if (TARGET_ARM)
25454 {
25455 /* In ARM mode, frame pointer points to first saved register.
25456 Restore stack pointer to last saved register. */
25457 amount = offsets->frame - offsets->saved_regs;
25458
25459 /* Force out any pending memory operations that reference stacked data
25460 before stack de-allocation occurs. */
25461 emit_insn (gen_blockage ());
25462 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25463 hard_frame_pointer_rtx,
25464 GEN_INT (amount)));
25465 arm_add_cfa_adjust_cfa_note (insn, amount,
25466 stack_pointer_rtx,
25467 hard_frame_pointer_rtx);
25468
25469 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25470 deleted. */
25471 emit_insn (gen_force_register_use (stack_pointer_rtx));
25472 }
25473 else
25474 {
25475 /* In Thumb-2 mode, the frame pointer points to the last saved
25476 register. */
25477 amount = offsets->locals_base - offsets->saved_regs;
25478 if (amount)
25479 {
25480 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25481 hard_frame_pointer_rtx,
25482 GEN_INT (amount)));
25483 arm_add_cfa_adjust_cfa_note (insn, amount,
25484 hard_frame_pointer_rtx,
25485 hard_frame_pointer_rtx);
25486 }
25487
25488 /* Force out any pending memory operations that reference stacked data
25489 before stack de-allocation occurs. */
25490 emit_insn (gen_blockage ());
25491 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25492 hard_frame_pointer_rtx));
25493 arm_add_cfa_adjust_cfa_note (insn, 0,
25494 stack_pointer_rtx,
25495 hard_frame_pointer_rtx);
25496 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25497 deleted. */
25498 emit_insn (gen_force_register_use (stack_pointer_rtx));
25499 }
25500 }
25501 else
25502 {
25503 /* Pop off outgoing args and local frame to adjust stack pointer to
25504 last saved register. */
25505 amount = offsets->outgoing_args - offsets->saved_regs;
25506 if (amount)
25507 {
25508 rtx_insn *tmp;
25509 /* Force out any pending memory operations that reference stacked data
25510 before stack de-allocation occurs. */
25511 emit_insn (gen_blockage ());
25512 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25513 stack_pointer_rtx,
25514 GEN_INT (amount)));
25515 arm_add_cfa_adjust_cfa_note (tmp, amount,
25516 stack_pointer_rtx, stack_pointer_rtx);
25517 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25518 not deleted. */
25519 emit_insn (gen_force_register_use (stack_pointer_rtx));
25520 }
25521 }
25522
25523 if (TARGET_HARD_FLOAT)
25524 {
25525 /* Generate VFP register multi-pop. */
25526 int end_reg = LAST_VFP_REGNUM + 1;
25527
25528 /* Scan the registers in reverse order. We need to match
25529 any groupings made in the prologue and generate matching
25530 vldm operations. The need to match groups is because,
25531 unlike pop, vldm can only do consecutive regs. */
25532 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25533 /* Look for a case where a reg does not need restoring. */
25534 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25535 && (!df_regs_ever_live_p (i + 1)
25536 || call_used_regs[i + 1]))
25537 {
25538 /* Restore the regs discovered so far (from reg+2 to
25539 end_reg). */
25540 if (end_reg > i + 2)
25541 arm_emit_vfp_multi_reg_pop (i + 2,
25542 (end_reg - (i + 2)) / 2,
25543 stack_pointer_rtx);
25544 end_reg = i;
25545 }
25546
25547 /* Restore the remaining regs that we have discovered (or possibly
25548 even all of them, if the conditional in the for loop never
25549 fired). */
25550 if (end_reg > i + 2)
25551 arm_emit_vfp_multi_reg_pop (i + 2,
25552 (end_reg - (i + 2)) / 2,
25553 stack_pointer_rtx);
25554 }
25555
25556 if (TARGET_IWMMXT)
25557 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25558 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25559 {
25560 rtx_insn *insn;
25561 rtx addr = gen_rtx_MEM (V2SImode,
25562 gen_rtx_POST_INC (SImode,
25563 stack_pointer_rtx));
25564 set_mem_alias_set (addr, get_frame_alias_set ());
25565 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25566 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25567 gen_rtx_REG (V2SImode, i),
25568 NULL_RTX);
25569 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25570 stack_pointer_rtx, stack_pointer_rtx);
25571 }
25572
25573 if (saved_regs_mask)
25574 {
25575 rtx insn;
25576 bool return_in_pc = false;
25577
25578 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25579 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25580 && !IS_CMSE_ENTRY (func_type)
25581 && !IS_STACKALIGN (func_type)
25582 && really_return
25583 && crtl->args.pretend_args_size == 0
25584 && saved_regs_mask & (1 << LR_REGNUM)
25585 && !crtl->calls_eh_return)
25586 {
25587 saved_regs_mask &= ~(1 << LR_REGNUM);
25588 saved_regs_mask |= (1 << PC_REGNUM);
25589 return_in_pc = true;
25590 }
25591
25592 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25593 {
25594 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25595 if (saved_regs_mask & (1 << i))
25596 {
25597 rtx addr = gen_rtx_MEM (SImode,
25598 gen_rtx_POST_INC (SImode,
25599 stack_pointer_rtx));
25600 set_mem_alias_set (addr, get_frame_alias_set ());
25601
25602 if (i == PC_REGNUM)
25603 {
25604 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25605 XVECEXP (insn, 0, 0) = ret_rtx;
25606 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25607 addr);
25608 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25609 insn = emit_jump_insn (insn);
25610 }
25611 else
25612 {
25613 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25614 addr));
25615 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25616 gen_rtx_REG (SImode, i),
25617 NULL_RTX);
25618 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25619 stack_pointer_rtx,
25620 stack_pointer_rtx);
25621 }
25622 }
25623 }
25624 else
25625 {
25626 if (TARGET_LDRD
25627 && current_tune->prefer_ldrd_strd
25628 && !optimize_function_for_size_p (cfun))
25629 {
25630 if (TARGET_THUMB2)
25631 thumb2_emit_ldrd_pop (saved_regs_mask);
25632 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25633 arm_emit_ldrd_pop (saved_regs_mask);
25634 else
25635 arm_emit_multi_reg_pop (saved_regs_mask);
25636 }
25637 else
25638 arm_emit_multi_reg_pop (saved_regs_mask);
25639 }
25640
25641 if (return_in_pc)
25642 return;
25643 }
25644
25645 amount
25646 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25647 if (amount)
25648 {
25649 int i, j;
25650 rtx dwarf = NULL_RTX;
25651 rtx_insn *tmp =
25652 emit_insn (gen_addsi3 (stack_pointer_rtx,
25653 stack_pointer_rtx,
25654 GEN_INT (amount)));
25655
25656 RTX_FRAME_RELATED_P (tmp) = 1;
25657
25658 if (cfun->machine->uses_anonymous_args)
25659 {
25660 /* Restore pretend args. Refer arm_expand_prologue on how to save
25661 pretend_args in stack. */
25662 int num_regs = crtl->args.pretend_args_size / 4;
25663 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25664 for (j = 0, i = 0; j < num_regs; i++)
25665 if (saved_regs_mask & (1 << i))
25666 {
25667 rtx reg = gen_rtx_REG (SImode, i);
25668 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25669 j++;
25670 }
25671 REG_NOTES (tmp) = dwarf;
25672 }
25673 arm_add_cfa_adjust_cfa_note (tmp, amount,
25674 stack_pointer_rtx, stack_pointer_rtx);
25675 }
25676
25677 /* Clear all caller-saved regs that are not used to return. */
25678 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25679 {
25680 /* CMSE_ENTRY always returns. */
25681 gcc_assert (really_return);
25682 cmse_nonsecure_entry_clear_before_return ();
25683 }
25684
25685 if (!really_return)
25686 return;
25687
25688 if (crtl->calls_eh_return)
25689 emit_insn (gen_addsi3 (stack_pointer_rtx,
25690 stack_pointer_rtx,
25691 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25692
25693 if (IS_STACKALIGN (func_type))
25694 /* Restore the original stack pointer. Before prologue, the stack was
25695 realigned and the original stack pointer saved in r0. For details,
25696 see comment in arm_expand_prologue. */
25697 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25698
25699 emit_jump_insn (simple_return_rtx);
25700 }
25701
25702 /* Implementation of insn prologue_thumb1_interwork. This is the first
25703 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25704
25705 const char *
25706 thumb1_output_interwork (void)
25707 {
25708 const char * name;
25709 FILE *f = asm_out_file;
25710
25711 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25712 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25713 == SYMBOL_REF);
25714 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25715
25716 /* Generate code sequence to switch us into Thumb mode. */
25717 /* The .code 32 directive has already been emitted by
25718 ASM_DECLARE_FUNCTION_NAME. */
25719 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25720 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25721
25722 /* Generate a label, so that the debugger will notice the
25723 change in instruction sets. This label is also used by
25724 the assembler to bypass the ARM code when this function
25725 is called from a Thumb encoded function elsewhere in the
25726 same file. Hence the definition of STUB_NAME here must
25727 agree with the definition in gas/config/tc-arm.c. */
25728
25729 #define STUB_NAME ".real_start_of"
25730
25731 fprintf (f, "\t.code\t16\n");
25732 #ifdef ARM_PE
25733 if (arm_dllexport_name_p (name))
25734 name = arm_strip_name_encoding (name);
25735 #endif
25736 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25737 fprintf (f, "\t.thumb_func\n");
25738 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25739
25740 return "";
25741 }
25742
25743 /* Handle the case of a double word load into a low register from
25744 a computed memory address. The computed address may involve a
25745 register which is overwritten by the load. */
25746 const char *
25747 thumb_load_double_from_address (rtx *operands)
25748 {
25749 rtx addr;
25750 rtx base;
25751 rtx offset;
25752 rtx arg1;
25753 rtx arg2;
25754
25755 gcc_assert (REG_P (operands[0]));
25756 gcc_assert (MEM_P (operands[1]));
25757
25758 /* Get the memory address. */
25759 addr = XEXP (operands[1], 0);
25760
25761 /* Work out how the memory address is computed. */
25762 switch (GET_CODE (addr))
25763 {
25764 case REG:
25765 operands[2] = adjust_address (operands[1], SImode, 4);
25766
25767 if (REGNO (operands[0]) == REGNO (addr))
25768 {
25769 output_asm_insn ("ldr\t%H0, %2", operands);
25770 output_asm_insn ("ldr\t%0, %1", operands);
25771 }
25772 else
25773 {
25774 output_asm_insn ("ldr\t%0, %1", operands);
25775 output_asm_insn ("ldr\t%H0, %2", operands);
25776 }
25777 break;
25778
25779 case CONST:
25780 /* Compute <address> + 4 for the high order load. */
25781 operands[2] = adjust_address (operands[1], SImode, 4);
25782
25783 output_asm_insn ("ldr\t%0, %1", operands);
25784 output_asm_insn ("ldr\t%H0, %2", operands);
25785 break;
25786
25787 case PLUS:
25788 arg1 = XEXP (addr, 0);
25789 arg2 = XEXP (addr, 1);
25790
25791 if (CONSTANT_P (arg1))
25792 base = arg2, offset = arg1;
25793 else
25794 base = arg1, offset = arg2;
25795
25796 gcc_assert (REG_P (base));
25797
25798 /* Catch the case of <address> = <reg> + <reg> */
25799 if (REG_P (offset))
25800 {
25801 int reg_offset = REGNO (offset);
25802 int reg_base = REGNO (base);
25803 int reg_dest = REGNO (operands[0]);
25804
25805 /* Add the base and offset registers together into the
25806 higher destination register. */
25807 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25808 reg_dest + 1, reg_base, reg_offset);
25809
25810 /* Load the lower destination register from the address in
25811 the higher destination register. */
25812 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25813 reg_dest, reg_dest + 1);
25814
25815 /* Load the higher destination register from its own address
25816 plus 4. */
25817 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25818 reg_dest + 1, reg_dest + 1);
25819 }
25820 else
25821 {
25822 /* Compute <address> + 4 for the high order load. */
25823 operands[2] = adjust_address (operands[1], SImode, 4);
25824
25825 /* If the computed address is held in the low order register
25826 then load the high order register first, otherwise always
25827 load the low order register first. */
25828 if (REGNO (operands[0]) == REGNO (base))
25829 {
25830 output_asm_insn ("ldr\t%H0, %2", operands);
25831 output_asm_insn ("ldr\t%0, %1", operands);
25832 }
25833 else
25834 {
25835 output_asm_insn ("ldr\t%0, %1", operands);
25836 output_asm_insn ("ldr\t%H0, %2", operands);
25837 }
25838 }
25839 break;
25840
25841 case LABEL_REF:
25842 /* With no registers to worry about we can just load the value
25843 directly. */
25844 operands[2] = adjust_address (operands[1], SImode, 4);
25845
25846 output_asm_insn ("ldr\t%H0, %2", operands);
25847 output_asm_insn ("ldr\t%0, %1", operands);
25848 break;
25849
25850 default:
25851 gcc_unreachable ();
25852 }
25853
25854 return "";
25855 }
25856
25857 const char *
25858 thumb_output_move_mem_multiple (int n, rtx *operands)
25859 {
25860 switch (n)
25861 {
25862 case 2:
25863 if (REGNO (operands[4]) > REGNO (operands[5]))
25864 std::swap (operands[4], operands[5]);
25865
25866 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25867 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25868 break;
25869
25870 case 3:
25871 if (REGNO (operands[4]) > REGNO (operands[5]))
25872 std::swap (operands[4], operands[5]);
25873 if (REGNO (operands[5]) > REGNO (operands[6]))
25874 std::swap (operands[5], operands[6]);
25875 if (REGNO (operands[4]) > REGNO (operands[5]))
25876 std::swap (operands[4], operands[5]);
25877
25878 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25879 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25880 break;
25881
25882 default:
25883 gcc_unreachable ();
25884 }
25885
25886 return "";
25887 }
25888
25889 /* Output a call-via instruction for thumb state. */
25890 const char *
25891 thumb_call_via_reg (rtx reg)
25892 {
25893 int regno = REGNO (reg);
25894 rtx *labelp;
25895
25896 gcc_assert (regno < LR_REGNUM);
25897
25898 /* If we are in the normal text section we can use a single instance
25899 per compilation unit. If we are doing function sections, then we need
25900 an entry per section, since we can't rely on reachability. */
25901 if (in_section == text_section)
25902 {
25903 thumb_call_reg_needed = 1;
25904
25905 if (thumb_call_via_label[regno] == NULL)
25906 thumb_call_via_label[regno] = gen_label_rtx ();
25907 labelp = thumb_call_via_label + regno;
25908 }
25909 else
25910 {
25911 if (cfun->machine->call_via[regno] == NULL)
25912 cfun->machine->call_via[regno] = gen_label_rtx ();
25913 labelp = cfun->machine->call_via + regno;
25914 }
25915
25916 output_asm_insn ("bl\t%a0", labelp);
25917 return "";
25918 }
25919
25920 /* Routines for generating rtl. */
25921 void
25922 thumb_expand_movmemqi (rtx *operands)
25923 {
25924 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25925 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25926 HOST_WIDE_INT len = INTVAL (operands[2]);
25927 HOST_WIDE_INT offset = 0;
25928
25929 while (len >= 12)
25930 {
25931 emit_insn (gen_movmem12b (out, in, out, in));
25932 len -= 12;
25933 }
25934
25935 if (len >= 8)
25936 {
25937 emit_insn (gen_movmem8b (out, in, out, in));
25938 len -= 8;
25939 }
25940
25941 if (len >= 4)
25942 {
25943 rtx reg = gen_reg_rtx (SImode);
25944 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25945 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25946 len -= 4;
25947 offset += 4;
25948 }
25949
25950 if (len >= 2)
25951 {
25952 rtx reg = gen_reg_rtx (HImode);
25953 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25954 plus_constant (Pmode, in,
25955 offset))));
25956 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25957 offset)),
25958 reg));
25959 len -= 2;
25960 offset += 2;
25961 }
25962
25963 if (len)
25964 {
25965 rtx reg = gen_reg_rtx (QImode);
25966 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25967 plus_constant (Pmode, in,
25968 offset))));
25969 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25970 offset)),
25971 reg));
25972 }
25973 }
25974
25975 void
25976 thumb_reload_out_hi (rtx *operands)
25977 {
25978 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25979 }
25980
25981 /* Return the length of a function name prefix
25982 that starts with the character 'c'. */
25983 static int
25984 arm_get_strip_length (int c)
25985 {
25986 switch (c)
25987 {
25988 ARM_NAME_ENCODING_LENGTHS
25989 default: return 0;
25990 }
25991 }
25992
25993 /* Return a pointer to a function's name with any
25994 and all prefix encodings stripped from it. */
25995 const char *
25996 arm_strip_name_encoding (const char *name)
25997 {
25998 int skip;
25999
26000 while ((skip = arm_get_strip_length (* name)))
26001 name += skip;
26002
26003 return name;
26004 }
26005
26006 /* If there is a '*' anywhere in the name's prefix, then
26007 emit the stripped name verbatim, otherwise prepend an
26008 underscore if leading underscores are being used. */
26009 void
26010 arm_asm_output_labelref (FILE *stream, const char *name)
26011 {
26012 int skip;
26013 int verbatim = 0;
26014
26015 while ((skip = arm_get_strip_length (* name)))
26016 {
26017 verbatim |= (*name == '*');
26018 name += skip;
26019 }
26020
26021 if (verbatim)
26022 fputs (name, stream);
26023 else
26024 asm_fprintf (stream, "%U%s", name);
26025 }
26026
26027 /* This function is used to emit an EABI tag and its associated value.
26028 We emit the numerical value of the tag in case the assembler does not
26029 support textual tags. (Eg gas prior to 2.20). If requested we include
26030 the tag name in a comment so that anyone reading the assembler output
26031 will know which tag is being set.
26032
26033 This function is not static because arm-c.c needs it too. */
26034
26035 void
26036 arm_emit_eabi_attribute (const char *name, int num, int val)
26037 {
26038 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26039 if (flag_verbose_asm || flag_debug_asm)
26040 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26041 asm_fprintf (asm_out_file, "\n");
26042 }
26043
26044 /* This function is used to print CPU tuning information as comment
26045 in assembler file. Pointers are not printed for now. */
26046
26047 void
26048 arm_print_tune_info (void)
26049 {
26050 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26051 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26052 current_tune->constant_limit);
26053 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26054 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26055 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26056 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26057 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26058 "prefetch.l1_cache_size:\t%d\n",
26059 current_tune->prefetch.l1_cache_size);
26060 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26061 "prefetch.l1_cache_line_size:\t%d\n",
26062 current_tune->prefetch.l1_cache_line_size);
26063 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26064 "prefer_constant_pool:\t%d\n",
26065 (int) current_tune->prefer_constant_pool);
26066 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26067 "branch_cost:\t(s:speed, p:predictable)\n");
26068 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26069 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26070 current_tune->branch_cost (false, false));
26071 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26072 current_tune->branch_cost (false, true));
26073 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26074 current_tune->branch_cost (true, false));
26075 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26076 current_tune->branch_cost (true, true));
26077 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26078 "prefer_ldrd_strd:\t%d\n",
26079 (int) current_tune->prefer_ldrd_strd);
26080 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26081 "logical_op_non_short_circuit:\t[%d,%d]\n",
26082 (int) current_tune->logical_op_non_short_circuit_thumb,
26083 (int) current_tune->logical_op_non_short_circuit_arm);
26084 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26085 "prefer_neon_for_64bits:\t%d\n",
26086 (int) current_tune->prefer_neon_for_64bits);
26087 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26088 "disparage_flag_setting_t16_encodings:\t%d\n",
26089 (int) current_tune->disparage_flag_setting_t16_encodings);
26090 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26091 "string_ops_prefer_neon:\t%d\n",
26092 (int) current_tune->string_ops_prefer_neon);
26093 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26094 "max_insns_inline_memset:\t%d\n",
26095 current_tune->max_insns_inline_memset);
26096 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26097 current_tune->fusible_ops);
26098 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26099 (int) current_tune->sched_autopref);
26100 }
26101
26102 static void
26103 arm_file_start (void)
26104 {
26105 int val;
26106
26107 if (TARGET_BPABI)
26108 {
26109 /* We don't have a specified CPU. Use the architecture to
26110 generate the tags.
26111
26112 Note: it might be better to do this unconditionally, then the
26113 assembler would not need to know about all new CPU names as
26114 they are added. */
26115 if (!arm_active_target.core_name)
26116 {
26117 /* armv7ve doesn't support any extensions. */
26118 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26119 {
26120 /* Keep backward compatability for assemblers
26121 which don't support armv7ve. */
26122 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26123 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26124 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26125 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26126 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26127 }
26128 else
26129 {
26130 const char* pos = strchr (arm_active_target.arch_name, '+');
26131 if (pos)
26132 {
26133 char buf[32];
26134 gcc_assert (strlen (arm_active_target.arch_name)
26135 <= sizeof (buf) / sizeof (*pos));
26136 strncpy (buf, arm_active_target.arch_name,
26137 (pos - arm_active_target.arch_name) * sizeof (*pos));
26138 buf[pos - arm_active_target.arch_name] = '\0';
26139 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
26140 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
26141 }
26142 else
26143 asm_fprintf (asm_out_file, "\t.arch %s\n",
26144 arm_active_target.arch_name);
26145 }
26146 }
26147 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26148 asm_fprintf (asm_out_file, "\t.arch %s\n",
26149 arm_active_target.core_name + 8);
26150 else
26151 {
26152 const char* truncated_name
26153 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26154 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26155 }
26156
26157 if (print_tune_info)
26158 arm_print_tune_info ();
26159
26160 if (! TARGET_SOFT_FLOAT)
26161 {
26162 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26163 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26164
26165 if (TARGET_HARD_FLOAT_ABI)
26166 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26167 }
26168
26169 /* Some of these attributes only apply when the corresponding features
26170 are used. However we don't have any easy way of figuring this out.
26171 Conservatively record the setting that would have been used. */
26172
26173 if (flag_rounding_math)
26174 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26175
26176 if (!flag_unsafe_math_optimizations)
26177 {
26178 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26179 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26180 }
26181 if (flag_signaling_nans)
26182 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26183
26184 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26185 flag_finite_math_only ? 1 : 3);
26186
26187 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26188 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26189 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26190 flag_short_enums ? 1 : 2);
26191
26192 /* Tag_ABI_optimization_goals. */
26193 if (optimize_size)
26194 val = 4;
26195 else if (optimize >= 2)
26196 val = 2;
26197 else if (optimize)
26198 val = 1;
26199 else
26200 val = 6;
26201 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26202
26203 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26204 unaligned_access);
26205
26206 if (arm_fp16_format)
26207 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26208 (int) arm_fp16_format);
26209
26210 if (arm_lang_output_object_attributes_hook)
26211 arm_lang_output_object_attributes_hook();
26212 }
26213
26214 default_file_start ();
26215 }
26216
26217 static void
26218 arm_file_end (void)
26219 {
26220 int regno;
26221
26222 if (NEED_INDICATE_EXEC_STACK)
26223 /* Add .note.GNU-stack. */
26224 file_end_indicate_exec_stack ();
26225
26226 if (! thumb_call_reg_needed)
26227 return;
26228
26229 switch_to_section (text_section);
26230 asm_fprintf (asm_out_file, "\t.code 16\n");
26231 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26232
26233 for (regno = 0; regno < LR_REGNUM; regno++)
26234 {
26235 rtx label = thumb_call_via_label[regno];
26236
26237 if (label != 0)
26238 {
26239 targetm.asm_out.internal_label (asm_out_file, "L",
26240 CODE_LABEL_NUMBER (label));
26241 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26242 }
26243 }
26244 }
26245
26246 #ifndef ARM_PE
26247 /* Symbols in the text segment can be accessed without indirecting via the
26248 constant pool; it may take an extra binary operation, but this is still
26249 faster than indirecting via memory. Don't do this when not optimizing,
26250 since we won't be calculating al of the offsets necessary to do this
26251 simplification. */
26252
26253 static void
26254 arm_encode_section_info (tree decl, rtx rtl, int first)
26255 {
26256 if (optimize > 0 && TREE_CONSTANT (decl))
26257 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26258
26259 default_encode_section_info (decl, rtl, first);
26260 }
26261 #endif /* !ARM_PE */
26262
26263 static void
26264 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26265 {
26266 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26267 && !strcmp (prefix, "L"))
26268 {
26269 arm_ccfsm_state = 0;
26270 arm_target_insn = NULL;
26271 }
26272 default_internal_label (stream, prefix, labelno);
26273 }
26274
26275 /* Output code to add DELTA to the first argument, and then jump
26276 to FUNCTION. Used for C++ multiple inheritance. */
26277
26278 static void
26279 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26280 HOST_WIDE_INT, tree function)
26281 {
26282 static int thunk_label = 0;
26283 char label[256];
26284 char labelpc[256];
26285 int mi_delta = delta;
26286 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26287 int shift = 0;
26288 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26289 ? 1 : 0);
26290 if (mi_delta < 0)
26291 mi_delta = - mi_delta;
26292
26293 final_start_function (emit_barrier (), file, 1);
26294
26295 if (TARGET_THUMB1)
26296 {
26297 int labelno = thunk_label++;
26298 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26299 /* Thunks are entered in arm mode when available. */
26300 if (TARGET_THUMB1_ONLY)
26301 {
26302 /* push r3 so we can use it as a temporary. */
26303 /* TODO: Omit this save if r3 is not used. */
26304 fputs ("\tpush {r3}\n", file);
26305 fputs ("\tldr\tr3, ", file);
26306 }
26307 else
26308 {
26309 fputs ("\tldr\tr12, ", file);
26310 }
26311 assemble_name (file, label);
26312 fputc ('\n', file);
26313 if (flag_pic)
26314 {
26315 /* If we are generating PIC, the ldr instruction below loads
26316 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26317 the address of the add + 8, so we have:
26318
26319 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26320 = target + 1.
26321
26322 Note that we have "+ 1" because some versions of GNU ld
26323 don't set the low bit of the result for R_ARM_REL32
26324 relocations against thumb function symbols.
26325 On ARMv6M this is +4, not +8. */
26326 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26327 assemble_name (file, labelpc);
26328 fputs (":\n", file);
26329 if (TARGET_THUMB1_ONLY)
26330 {
26331 /* This is 2 insns after the start of the thunk, so we know it
26332 is 4-byte aligned. */
26333 fputs ("\tadd\tr3, pc, r3\n", file);
26334 fputs ("\tmov r12, r3\n", file);
26335 }
26336 else
26337 fputs ("\tadd\tr12, pc, r12\n", file);
26338 }
26339 else if (TARGET_THUMB1_ONLY)
26340 fputs ("\tmov r12, r3\n", file);
26341 }
26342 if (TARGET_THUMB1_ONLY)
26343 {
26344 if (mi_delta > 255)
26345 {
26346 fputs ("\tldr\tr3, ", file);
26347 assemble_name (file, label);
26348 fputs ("+4\n", file);
26349 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26350 mi_op, this_regno, this_regno);
26351 }
26352 else if (mi_delta != 0)
26353 {
26354 /* Thumb1 unified syntax requires s suffix in instruction name when
26355 one of the operands is immediate. */
26356 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26357 mi_op, this_regno, this_regno,
26358 mi_delta);
26359 }
26360 }
26361 else
26362 {
26363 /* TODO: Use movw/movt for large constants when available. */
26364 while (mi_delta != 0)
26365 {
26366 if ((mi_delta & (3 << shift)) == 0)
26367 shift += 2;
26368 else
26369 {
26370 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26371 mi_op, this_regno, this_regno,
26372 mi_delta & (0xff << shift));
26373 mi_delta &= ~(0xff << shift);
26374 shift += 8;
26375 }
26376 }
26377 }
26378 if (TARGET_THUMB1)
26379 {
26380 if (TARGET_THUMB1_ONLY)
26381 fputs ("\tpop\t{r3}\n", file);
26382
26383 fprintf (file, "\tbx\tr12\n");
26384 ASM_OUTPUT_ALIGN (file, 2);
26385 assemble_name (file, label);
26386 fputs (":\n", file);
26387 if (flag_pic)
26388 {
26389 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26390 rtx tem = XEXP (DECL_RTL (function), 0);
26391 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26392 pipeline offset is four rather than eight. Adjust the offset
26393 accordingly. */
26394 tem = plus_constant (GET_MODE (tem), tem,
26395 TARGET_THUMB1_ONLY ? -3 : -7);
26396 tem = gen_rtx_MINUS (GET_MODE (tem),
26397 tem,
26398 gen_rtx_SYMBOL_REF (Pmode,
26399 ggc_strdup (labelpc)));
26400 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26401 }
26402 else
26403 /* Output ".word .LTHUNKn". */
26404 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26405
26406 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26407 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26408 }
26409 else
26410 {
26411 fputs ("\tb\t", file);
26412 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26413 if (NEED_PLT_RELOC)
26414 fputs ("(PLT)", file);
26415 fputc ('\n', file);
26416 }
26417
26418 final_end_function ();
26419 }
26420
26421 /* MI thunk handling for TARGET_32BIT. */
26422
26423 static void
26424 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26425 HOST_WIDE_INT vcall_offset, tree function)
26426 {
26427 /* On ARM, this_regno is R0 or R1 depending on
26428 whether the function returns an aggregate or not.
26429 */
26430 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26431 function)
26432 ? R1_REGNUM : R0_REGNUM);
26433
26434 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26435 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26436 reload_completed = 1;
26437 emit_note (NOTE_INSN_PROLOGUE_END);
26438
26439 /* Add DELTA to THIS_RTX. */
26440 if (delta != 0)
26441 arm_split_constant (PLUS, Pmode, NULL_RTX,
26442 delta, this_rtx, this_rtx, false);
26443
26444 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26445 if (vcall_offset != 0)
26446 {
26447 /* Load *THIS_RTX. */
26448 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26449 /* Compute *THIS_RTX + VCALL_OFFSET. */
26450 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26451 false);
26452 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26453 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26454 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26455 }
26456
26457 /* Generate a tail call to the target function. */
26458 if (!TREE_USED (function))
26459 {
26460 assemble_external (function);
26461 TREE_USED (function) = 1;
26462 }
26463 rtx funexp = XEXP (DECL_RTL (function), 0);
26464 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26465 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26466 SIBLING_CALL_P (insn) = 1;
26467
26468 insn = get_insns ();
26469 shorten_branches (insn);
26470 final_start_function (insn, file, 1);
26471 final (insn, file, 1);
26472 final_end_function ();
26473
26474 /* Stop pretending this is a post-reload pass. */
26475 reload_completed = 0;
26476 }
26477
26478 /* Output code to add DELTA to the first argument, and then jump
26479 to FUNCTION. Used for C++ multiple inheritance. */
26480
26481 static void
26482 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26483 HOST_WIDE_INT vcall_offset, tree function)
26484 {
26485 if (TARGET_32BIT)
26486 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26487 else
26488 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26489 }
26490
26491 int
26492 arm_emit_vector_const (FILE *file, rtx x)
26493 {
26494 int i;
26495 const char * pattern;
26496
26497 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26498
26499 switch (GET_MODE (x))
26500 {
26501 case V2SImode: pattern = "%08x"; break;
26502 case V4HImode: pattern = "%04x"; break;
26503 case V8QImode: pattern = "%02x"; break;
26504 default: gcc_unreachable ();
26505 }
26506
26507 fprintf (file, "0x");
26508 for (i = CONST_VECTOR_NUNITS (x); i--;)
26509 {
26510 rtx element;
26511
26512 element = CONST_VECTOR_ELT (x, i);
26513 fprintf (file, pattern, INTVAL (element));
26514 }
26515
26516 return 1;
26517 }
26518
26519 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26520 HFmode constant pool entries are actually loaded with ldr. */
26521 void
26522 arm_emit_fp16_const (rtx c)
26523 {
26524 long bits;
26525
26526 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26527 if (WORDS_BIG_ENDIAN)
26528 assemble_zeros (2);
26529 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26530 if (!WORDS_BIG_ENDIAN)
26531 assemble_zeros (2);
26532 }
26533
26534 const char *
26535 arm_output_load_gr (rtx *operands)
26536 {
26537 rtx reg;
26538 rtx offset;
26539 rtx wcgr;
26540 rtx sum;
26541
26542 if (!MEM_P (operands [1])
26543 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26544 || !REG_P (reg = XEXP (sum, 0))
26545 || !CONST_INT_P (offset = XEXP (sum, 1))
26546 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26547 return "wldrw%?\t%0, %1";
26548
26549 /* Fix up an out-of-range load of a GR register. */
26550 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26551 wcgr = operands[0];
26552 operands[0] = reg;
26553 output_asm_insn ("ldr%?\t%0, %1", operands);
26554
26555 operands[0] = wcgr;
26556 operands[1] = reg;
26557 output_asm_insn ("tmcr%?\t%0, %1", operands);
26558 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26559
26560 return "";
26561 }
26562
26563 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26564
26565 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26566 named arg and all anonymous args onto the stack.
26567 XXX I know the prologue shouldn't be pushing registers, but it is faster
26568 that way. */
26569
26570 static void
26571 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26572 machine_mode mode,
26573 tree type,
26574 int *pretend_size,
26575 int second_time ATTRIBUTE_UNUSED)
26576 {
26577 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26578 int nregs;
26579
26580 cfun->machine->uses_anonymous_args = 1;
26581 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26582 {
26583 nregs = pcum->aapcs_ncrn;
26584 if (nregs & 1)
26585 {
26586 int res = arm_needs_doubleword_align (mode, type);
26587 if (res < 0 && warn_psabi)
26588 inform (input_location, "parameter passing for argument of "
26589 "type %qT changed in GCC 7.1", type);
26590 else if (res > 0)
26591 nregs++;
26592 }
26593 }
26594 else
26595 nregs = pcum->nregs;
26596
26597 if (nregs < NUM_ARG_REGS)
26598 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26599 }
26600
26601 /* We can't rely on the caller doing the proper promotion when
26602 using APCS or ATPCS. */
26603
26604 static bool
26605 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26606 {
26607 return !TARGET_AAPCS_BASED;
26608 }
26609
26610 static machine_mode
26611 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26612 machine_mode mode,
26613 int *punsignedp ATTRIBUTE_UNUSED,
26614 const_tree fntype ATTRIBUTE_UNUSED,
26615 int for_return ATTRIBUTE_UNUSED)
26616 {
26617 if (GET_MODE_CLASS (mode) == MODE_INT
26618 && GET_MODE_SIZE (mode) < 4)
26619 return SImode;
26620
26621 return mode;
26622 }
26623
26624
26625 static bool
26626 arm_default_short_enums (void)
26627 {
26628 return ARM_DEFAULT_SHORT_ENUMS;
26629 }
26630
26631
26632 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26633
26634 static bool
26635 arm_align_anon_bitfield (void)
26636 {
26637 return TARGET_AAPCS_BASED;
26638 }
26639
26640
26641 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26642
26643 static tree
26644 arm_cxx_guard_type (void)
26645 {
26646 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26647 }
26648
26649
26650 /* The EABI says test the least significant bit of a guard variable. */
26651
26652 static bool
26653 arm_cxx_guard_mask_bit (void)
26654 {
26655 return TARGET_AAPCS_BASED;
26656 }
26657
26658
26659 /* The EABI specifies that all array cookies are 8 bytes long. */
26660
26661 static tree
26662 arm_get_cookie_size (tree type)
26663 {
26664 tree size;
26665
26666 if (!TARGET_AAPCS_BASED)
26667 return default_cxx_get_cookie_size (type);
26668
26669 size = build_int_cst (sizetype, 8);
26670 return size;
26671 }
26672
26673
26674 /* The EABI says that array cookies should also contain the element size. */
26675
26676 static bool
26677 arm_cookie_has_size (void)
26678 {
26679 return TARGET_AAPCS_BASED;
26680 }
26681
26682
26683 /* The EABI says constructors and destructors should return a pointer to
26684 the object constructed/destroyed. */
26685
26686 static bool
26687 arm_cxx_cdtor_returns_this (void)
26688 {
26689 return TARGET_AAPCS_BASED;
26690 }
26691
26692 /* The EABI says that an inline function may never be the key
26693 method. */
26694
26695 static bool
26696 arm_cxx_key_method_may_be_inline (void)
26697 {
26698 return !TARGET_AAPCS_BASED;
26699 }
26700
26701 static void
26702 arm_cxx_determine_class_data_visibility (tree decl)
26703 {
26704 if (!TARGET_AAPCS_BASED
26705 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26706 return;
26707
26708 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26709 is exported. However, on systems without dynamic vague linkage,
26710 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26711 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26712 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26713 else
26714 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26715 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26716 }
26717
26718 static bool
26719 arm_cxx_class_data_always_comdat (void)
26720 {
26721 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26722 vague linkage if the class has no key function. */
26723 return !TARGET_AAPCS_BASED;
26724 }
26725
26726
26727 /* The EABI says __aeabi_atexit should be used to register static
26728 destructors. */
26729
26730 static bool
26731 arm_cxx_use_aeabi_atexit (void)
26732 {
26733 return TARGET_AAPCS_BASED;
26734 }
26735
26736
26737 void
26738 arm_set_return_address (rtx source, rtx scratch)
26739 {
26740 arm_stack_offsets *offsets;
26741 HOST_WIDE_INT delta;
26742 rtx addr;
26743 unsigned long saved_regs;
26744
26745 offsets = arm_get_frame_offsets ();
26746 saved_regs = offsets->saved_regs_mask;
26747
26748 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26749 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26750 else
26751 {
26752 if (frame_pointer_needed)
26753 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26754 else
26755 {
26756 /* LR will be the first saved register. */
26757 delta = offsets->outgoing_args - (offsets->frame + 4);
26758
26759
26760 if (delta >= 4096)
26761 {
26762 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26763 GEN_INT (delta & ~4095)));
26764 addr = scratch;
26765 delta &= 4095;
26766 }
26767 else
26768 addr = stack_pointer_rtx;
26769
26770 addr = plus_constant (Pmode, addr, delta);
26771 }
26772 /* The store needs to be marked as frame related in order to prevent
26773 DSE from deleting it as dead if it is based on fp. */
26774 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26775 RTX_FRAME_RELATED_P (insn) = 1;
26776 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26777 }
26778 }
26779
26780
26781 void
26782 thumb_set_return_address (rtx source, rtx scratch)
26783 {
26784 arm_stack_offsets *offsets;
26785 HOST_WIDE_INT delta;
26786 HOST_WIDE_INT limit;
26787 int reg;
26788 rtx addr;
26789 unsigned long mask;
26790
26791 emit_use (source);
26792
26793 offsets = arm_get_frame_offsets ();
26794 mask = offsets->saved_regs_mask;
26795 if (mask & (1 << LR_REGNUM))
26796 {
26797 limit = 1024;
26798 /* Find the saved regs. */
26799 if (frame_pointer_needed)
26800 {
26801 delta = offsets->soft_frame - offsets->saved_args;
26802 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26803 if (TARGET_THUMB1)
26804 limit = 128;
26805 }
26806 else
26807 {
26808 delta = offsets->outgoing_args - offsets->saved_args;
26809 reg = SP_REGNUM;
26810 }
26811 /* Allow for the stack frame. */
26812 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26813 delta -= 16;
26814 /* The link register is always the first saved register. */
26815 delta -= 4;
26816
26817 /* Construct the address. */
26818 addr = gen_rtx_REG (SImode, reg);
26819 if (delta > limit)
26820 {
26821 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26822 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26823 addr = scratch;
26824 }
26825 else
26826 addr = plus_constant (Pmode, addr, delta);
26827
26828 /* The store needs to be marked as frame related in order to prevent
26829 DSE from deleting it as dead if it is based on fp. */
26830 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26831 RTX_FRAME_RELATED_P (insn) = 1;
26832 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26833 }
26834 else
26835 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26836 }
26837
26838 /* Implements target hook vector_mode_supported_p. */
26839 bool
26840 arm_vector_mode_supported_p (machine_mode mode)
26841 {
26842 /* Neon also supports V2SImode, etc. listed in the clause below. */
26843 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26844 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26845 || mode == V2DImode || mode == V8HFmode))
26846 return true;
26847
26848 if ((TARGET_NEON || TARGET_IWMMXT)
26849 && ((mode == V2SImode)
26850 || (mode == V4HImode)
26851 || (mode == V8QImode)))
26852 return true;
26853
26854 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26855 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26856 || mode == V2HAmode))
26857 return true;
26858
26859 return false;
26860 }
26861
26862 /* Implements target hook array_mode_supported_p. */
26863
26864 static bool
26865 arm_array_mode_supported_p (machine_mode mode,
26866 unsigned HOST_WIDE_INT nelems)
26867 {
26868 if (TARGET_NEON
26869 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26870 && (nelems >= 2 && nelems <= 4))
26871 return true;
26872
26873 return false;
26874 }
26875
26876 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26877 registers when autovectorizing for Neon, at least until multiple vector
26878 widths are supported properly by the middle-end. */
26879
26880 static machine_mode
26881 arm_preferred_simd_mode (machine_mode mode)
26882 {
26883 if (TARGET_NEON)
26884 switch (mode)
26885 {
26886 case SFmode:
26887 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26888 case SImode:
26889 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26890 case HImode:
26891 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26892 case QImode:
26893 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26894 case DImode:
26895 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26896 return V2DImode;
26897 break;
26898
26899 default:;
26900 }
26901
26902 if (TARGET_REALLY_IWMMXT)
26903 switch (mode)
26904 {
26905 case SImode:
26906 return V2SImode;
26907 case HImode:
26908 return V4HImode;
26909 case QImode:
26910 return V8QImode;
26911
26912 default:;
26913 }
26914
26915 return word_mode;
26916 }
26917
26918 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26919
26920 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26921 using r0-r4 for function arguments, r7 for the stack frame and don't have
26922 enough left over to do doubleword arithmetic. For Thumb-2 all the
26923 potentially problematic instructions accept high registers so this is not
26924 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26925 that require many low registers. */
26926 static bool
26927 arm_class_likely_spilled_p (reg_class_t rclass)
26928 {
26929 if ((TARGET_THUMB1 && rclass == LO_REGS)
26930 || rclass == CC_REG)
26931 return true;
26932
26933 return false;
26934 }
26935
26936 /* Implements target hook small_register_classes_for_mode_p. */
26937 bool
26938 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26939 {
26940 return TARGET_THUMB1;
26941 }
26942
26943 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26944 ARM insns and therefore guarantee that the shift count is modulo 256.
26945 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26946 guarantee no particular behavior for out-of-range counts. */
26947
26948 static unsigned HOST_WIDE_INT
26949 arm_shift_truncation_mask (machine_mode mode)
26950 {
26951 return mode == SImode ? 255 : 0;
26952 }
26953
26954
26955 /* Map internal gcc register numbers to DWARF2 register numbers. */
26956
26957 unsigned int
26958 arm_dbx_register_number (unsigned int regno)
26959 {
26960 if (regno < 16)
26961 return regno;
26962
26963 if (IS_VFP_REGNUM (regno))
26964 {
26965 /* See comment in arm_dwarf_register_span. */
26966 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26967 return 64 + regno - FIRST_VFP_REGNUM;
26968 else
26969 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26970 }
26971
26972 if (IS_IWMMXT_GR_REGNUM (regno))
26973 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26974
26975 if (IS_IWMMXT_REGNUM (regno))
26976 return 112 + regno - FIRST_IWMMXT_REGNUM;
26977
26978 return DWARF_FRAME_REGISTERS;
26979 }
26980
26981 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26982 GCC models tham as 64 32-bit registers, so we need to describe this to
26983 the DWARF generation code. Other registers can use the default. */
26984 static rtx
26985 arm_dwarf_register_span (rtx rtl)
26986 {
26987 machine_mode mode;
26988 unsigned regno;
26989 rtx parts[16];
26990 int nregs;
26991 int i;
26992
26993 regno = REGNO (rtl);
26994 if (!IS_VFP_REGNUM (regno))
26995 return NULL_RTX;
26996
26997 /* XXX FIXME: The EABI defines two VFP register ranges:
26998 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26999 256-287: D0-D31
27000 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27001 corresponding D register. Until GDB supports this, we shall use the
27002 legacy encodings. We also use these encodings for D0-D15 for
27003 compatibility with older debuggers. */
27004 mode = GET_MODE (rtl);
27005 if (GET_MODE_SIZE (mode) < 8)
27006 return NULL_RTX;
27007
27008 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27009 {
27010 nregs = GET_MODE_SIZE (mode) / 4;
27011 for (i = 0; i < nregs; i += 2)
27012 if (TARGET_BIG_END)
27013 {
27014 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27015 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27016 }
27017 else
27018 {
27019 parts[i] = gen_rtx_REG (SImode, regno + i);
27020 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27021 }
27022 }
27023 else
27024 {
27025 nregs = GET_MODE_SIZE (mode) / 8;
27026 for (i = 0; i < nregs; i++)
27027 parts[i] = gen_rtx_REG (DImode, regno + i);
27028 }
27029
27030 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27031 }
27032
27033 #if ARM_UNWIND_INFO
27034 /* Emit unwind directives for a store-multiple instruction or stack pointer
27035 push during alignment.
27036 These should only ever be generated by the function prologue code, so
27037 expect them to have a particular form.
27038 The store-multiple instruction sometimes pushes pc as the last register,
27039 although it should not be tracked into unwind information, or for -Os
27040 sometimes pushes some dummy registers before first register that needs
27041 to be tracked in unwind information; such dummy registers are there just
27042 to avoid separate stack adjustment, and will not be restored in the
27043 epilogue. */
27044
27045 static void
27046 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27047 {
27048 int i;
27049 HOST_WIDE_INT offset;
27050 HOST_WIDE_INT nregs;
27051 int reg_size;
27052 unsigned reg;
27053 unsigned lastreg;
27054 unsigned padfirst = 0, padlast = 0;
27055 rtx e;
27056
27057 e = XVECEXP (p, 0, 0);
27058 gcc_assert (GET_CODE (e) == SET);
27059
27060 /* First insn will adjust the stack pointer. */
27061 gcc_assert (GET_CODE (e) == SET
27062 && REG_P (SET_DEST (e))
27063 && REGNO (SET_DEST (e)) == SP_REGNUM
27064 && GET_CODE (SET_SRC (e)) == PLUS);
27065
27066 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27067 nregs = XVECLEN (p, 0) - 1;
27068 gcc_assert (nregs);
27069
27070 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27071 if (reg < 16)
27072 {
27073 /* For -Os dummy registers can be pushed at the beginning to
27074 avoid separate stack pointer adjustment. */
27075 e = XVECEXP (p, 0, 1);
27076 e = XEXP (SET_DEST (e), 0);
27077 if (GET_CODE (e) == PLUS)
27078 padfirst = INTVAL (XEXP (e, 1));
27079 gcc_assert (padfirst == 0 || optimize_size);
27080 /* The function prologue may also push pc, but not annotate it as it is
27081 never restored. We turn this into a stack pointer adjustment. */
27082 e = XVECEXP (p, 0, nregs);
27083 e = XEXP (SET_DEST (e), 0);
27084 if (GET_CODE (e) == PLUS)
27085 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27086 else
27087 padlast = offset - 4;
27088 gcc_assert (padlast == 0 || padlast == 4);
27089 if (padlast == 4)
27090 fprintf (asm_out_file, "\t.pad #4\n");
27091 reg_size = 4;
27092 fprintf (asm_out_file, "\t.save {");
27093 }
27094 else if (IS_VFP_REGNUM (reg))
27095 {
27096 reg_size = 8;
27097 fprintf (asm_out_file, "\t.vsave {");
27098 }
27099 else
27100 /* Unknown register type. */
27101 gcc_unreachable ();
27102
27103 /* If the stack increment doesn't match the size of the saved registers,
27104 something has gone horribly wrong. */
27105 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27106
27107 offset = padfirst;
27108 lastreg = 0;
27109 /* The remaining insns will describe the stores. */
27110 for (i = 1; i <= nregs; i++)
27111 {
27112 /* Expect (set (mem <addr>) (reg)).
27113 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27114 e = XVECEXP (p, 0, i);
27115 gcc_assert (GET_CODE (e) == SET
27116 && MEM_P (SET_DEST (e))
27117 && REG_P (SET_SRC (e)));
27118
27119 reg = REGNO (SET_SRC (e));
27120 gcc_assert (reg >= lastreg);
27121
27122 if (i != 1)
27123 fprintf (asm_out_file, ", ");
27124 /* We can't use %r for vfp because we need to use the
27125 double precision register names. */
27126 if (IS_VFP_REGNUM (reg))
27127 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27128 else
27129 asm_fprintf (asm_out_file, "%r", reg);
27130
27131 if (flag_checking)
27132 {
27133 /* Check that the addresses are consecutive. */
27134 e = XEXP (SET_DEST (e), 0);
27135 if (GET_CODE (e) == PLUS)
27136 gcc_assert (REG_P (XEXP (e, 0))
27137 && REGNO (XEXP (e, 0)) == SP_REGNUM
27138 && CONST_INT_P (XEXP (e, 1))
27139 && offset == INTVAL (XEXP (e, 1)));
27140 else
27141 gcc_assert (i == 1
27142 && REG_P (e)
27143 && REGNO (e) == SP_REGNUM);
27144 offset += reg_size;
27145 }
27146 }
27147 fprintf (asm_out_file, "}\n");
27148 if (padfirst)
27149 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27150 }
27151
27152 /* Emit unwind directives for a SET. */
27153
27154 static void
27155 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27156 {
27157 rtx e0;
27158 rtx e1;
27159 unsigned reg;
27160
27161 e0 = XEXP (p, 0);
27162 e1 = XEXP (p, 1);
27163 switch (GET_CODE (e0))
27164 {
27165 case MEM:
27166 /* Pushing a single register. */
27167 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27168 || !REG_P (XEXP (XEXP (e0, 0), 0))
27169 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27170 abort ();
27171
27172 asm_fprintf (asm_out_file, "\t.save ");
27173 if (IS_VFP_REGNUM (REGNO (e1)))
27174 asm_fprintf(asm_out_file, "{d%d}\n",
27175 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27176 else
27177 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27178 break;
27179
27180 case REG:
27181 if (REGNO (e0) == SP_REGNUM)
27182 {
27183 /* A stack increment. */
27184 if (GET_CODE (e1) != PLUS
27185 || !REG_P (XEXP (e1, 0))
27186 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27187 || !CONST_INT_P (XEXP (e1, 1)))
27188 abort ();
27189
27190 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27191 -INTVAL (XEXP (e1, 1)));
27192 }
27193 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27194 {
27195 HOST_WIDE_INT offset;
27196
27197 if (GET_CODE (e1) == PLUS)
27198 {
27199 if (!REG_P (XEXP (e1, 0))
27200 || !CONST_INT_P (XEXP (e1, 1)))
27201 abort ();
27202 reg = REGNO (XEXP (e1, 0));
27203 offset = INTVAL (XEXP (e1, 1));
27204 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27205 HARD_FRAME_POINTER_REGNUM, reg,
27206 offset);
27207 }
27208 else if (REG_P (e1))
27209 {
27210 reg = REGNO (e1);
27211 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27212 HARD_FRAME_POINTER_REGNUM, reg);
27213 }
27214 else
27215 abort ();
27216 }
27217 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27218 {
27219 /* Move from sp to reg. */
27220 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27221 }
27222 else if (GET_CODE (e1) == PLUS
27223 && REG_P (XEXP (e1, 0))
27224 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27225 && CONST_INT_P (XEXP (e1, 1)))
27226 {
27227 /* Set reg to offset from sp. */
27228 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27229 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27230 }
27231 else
27232 abort ();
27233 break;
27234
27235 default:
27236 abort ();
27237 }
27238 }
27239
27240
27241 /* Emit unwind directives for the given insn. */
27242
27243 static void
27244 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27245 {
27246 rtx note, pat;
27247 bool handled_one = false;
27248
27249 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27250 return;
27251
27252 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27253 && (TREE_NOTHROW (current_function_decl)
27254 || crtl->all_throwers_are_sibcalls))
27255 return;
27256
27257 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27258 return;
27259
27260 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27261 {
27262 switch (REG_NOTE_KIND (note))
27263 {
27264 case REG_FRAME_RELATED_EXPR:
27265 pat = XEXP (note, 0);
27266 goto found;
27267
27268 case REG_CFA_REGISTER:
27269 pat = XEXP (note, 0);
27270 if (pat == NULL)
27271 {
27272 pat = PATTERN (insn);
27273 if (GET_CODE (pat) == PARALLEL)
27274 pat = XVECEXP (pat, 0, 0);
27275 }
27276
27277 /* Only emitted for IS_STACKALIGN re-alignment. */
27278 {
27279 rtx dest, src;
27280 unsigned reg;
27281
27282 src = SET_SRC (pat);
27283 dest = SET_DEST (pat);
27284
27285 gcc_assert (src == stack_pointer_rtx);
27286 reg = REGNO (dest);
27287 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27288 reg + 0x90, reg);
27289 }
27290 handled_one = true;
27291 break;
27292
27293 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27294 to get correct dwarf information for shrink-wrap. We should not
27295 emit unwind information for it because these are used either for
27296 pretend arguments or notes to adjust sp and restore registers from
27297 stack. */
27298 case REG_CFA_DEF_CFA:
27299 case REG_CFA_ADJUST_CFA:
27300 case REG_CFA_RESTORE:
27301 return;
27302
27303 case REG_CFA_EXPRESSION:
27304 case REG_CFA_OFFSET:
27305 /* ??? Only handling here what we actually emit. */
27306 gcc_unreachable ();
27307
27308 default:
27309 break;
27310 }
27311 }
27312 if (handled_one)
27313 return;
27314 pat = PATTERN (insn);
27315 found:
27316
27317 switch (GET_CODE (pat))
27318 {
27319 case SET:
27320 arm_unwind_emit_set (asm_out_file, pat);
27321 break;
27322
27323 case SEQUENCE:
27324 /* Store multiple. */
27325 arm_unwind_emit_sequence (asm_out_file, pat);
27326 break;
27327
27328 default:
27329 abort();
27330 }
27331 }
27332
27333
27334 /* Output a reference from a function exception table to the type_info
27335 object X. The EABI specifies that the symbol should be relocated by
27336 an R_ARM_TARGET2 relocation. */
27337
27338 static bool
27339 arm_output_ttype (rtx x)
27340 {
27341 fputs ("\t.word\t", asm_out_file);
27342 output_addr_const (asm_out_file, x);
27343 /* Use special relocations for symbol references. */
27344 if (!CONST_INT_P (x))
27345 fputs ("(TARGET2)", asm_out_file);
27346 fputc ('\n', asm_out_file);
27347
27348 return TRUE;
27349 }
27350
27351 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27352
27353 static void
27354 arm_asm_emit_except_personality (rtx personality)
27355 {
27356 fputs ("\t.personality\t", asm_out_file);
27357 output_addr_const (asm_out_file, personality);
27358 fputc ('\n', asm_out_file);
27359 }
27360 #endif /* ARM_UNWIND_INFO */
27361
27362 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27363
27364 static void
27365 arm_asm_init_sections (void)
27366 {
27367 #if ARM_UNWIND_INFO
27368 exception_section = get_unnamed_section (0, output_section_asm_op,
27369 "\t.handlerdata");
27370 #endif /* ARM_UNWIND_INFO */
27371
27372 #ifdef OBJECT_FORMAT_ELF
27373 if (target_pure_code)
27374 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27375 #endif
27376 }
27377
27378 /* Output unwind directives for the start/end of a function. */
27379
27380 void
27381 arm_output_fn_unwind (FILE * f, bool prologue)
27382 {
27383 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27384 return;
27385
27386 if (prologue)
27387 fputs ("\t.fnstart\n", f);
27388 else
27389 {
27390 /* If this function will never be unwound, then mark it as such.
27391 The came condition is used in arm_unwind_emit to suppress
27392 the frame annotations. */
27393 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27394 && (TREE_NOTHROW (current_function_decl)
27395 || crtl->all_throwers_are_sibcalls))
27396 fputs("\t.cantunwind\n", f);
27397
27398 fputs ("\t.fnend\n", f);
27399 }
27400 }
27401
27402 static bool
27403 arm_emit_tls_decoration (FILE *fp, rtx x)
27404 {
27405 enum tls_reloc reloc;
27406 rtx val;
27407
27408 val = XVECEXP (x, 0, 0);
27409 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27410
27411 output_addr_const (fp, val);
27412
27413 switch (reloc)
27414 {
27415 case TLS_GD32:
27416 fputs ("(tlsgd)", fp);
27417 break;
27418 case TLS_LDM32:
27419 fputs ("(tlsldm)", fp);
27420 break;
27421 case TLS_LDO32:
27422 fputs ("(tlsldo)", fp);
27423 break;
27424 case TLS_IE32:
27425 fputs ("(gottpoff)", fp);
27426 break;
27427 case TLS_LE32:
27428 fputs ("(tpoff)", fp);
27429 break;
27430 case TLS_DESCSEQ:
27431 fputs ("(tlsdesc)", fp);
27432 break;
27433 default:
27434 gcc_unreachable ();
27435 }
27436
27437 switch (reloc)
27438 {
27439 case TLS_GD32:
27440 case TLS_LDM32:
27441 case TLS_IE32:
27442 case TLS_DESCSEQ:
27443 fputs (" + (. - ", fp);
27444 output_addr_const (fp, XVECEXP (x, 0, 2));
27445 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27446 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27447 output_addr_const (fp, XVECEXP (x, 0, 3));
27448 fputc (')', fp);
27449 break;
27450 default:
27451 break;
27452 }
27453
27454 return TRUE;
27455 }
27456
27457 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27458
27459 static void
27460 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27461 {
27462 gcc_assert (size == 4);
27463 fputs ("\t.word\t", file);
27464 output_addr_const (file, x);
27465 fputs ("(tlsldo)", file);
27466 }
27467
27468 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27469
27470 static bool
27471 arm_output_addr_const_extra (FILE *fp, rtx x)
27472 {
27473 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27474 return arm_emit_tls_decoration (fp, x);
27475 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27476 {
27477 char label[256];
27478 int labelno = INTVAL (XVECEXP (x, 0, 0));
27479
27480 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27481 assemble_name_raw (fp, label);
27482
27483 return TRUE;
27484 }
27485 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27486 {
27487 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27488 if (GOT_PCREL)
27489 fputs ("+.", fp);
27490 fputs ("-(", fp);
27491 output_addr_const (fp, XVECEXP (x, 0, 0));
27492 fputc (')', fp);
27493 return TRUE;
27494 }
27495 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27496 {
27497 output_addr_const (fp, XVECEXP (x, 0, 0));
27498 if (GOT_PCREL)
27499 fputs ("+.", fp);
27500 fputs ("-(", fp);
27501 output_addr_const (fp, XVECEXP (x, 0, 1));
27502 fputc (')', fp);
27503 return TRUE;
27504 }
27505 else if (GET_CODE (x) == CONST_VECTOR)
27506 return arm_emit_vector_const (fp, x);
27507
27508 return FALSE;
27509 }
27510
27511 /* Output assembly for a shift instruction.
27512 SET_FLAGS determines how the instruction modifies the condition codes.
27513 0 - Do not set condition codes.
27514 1 - Set condition codes.
27515 2 - Use smallest instruction. */
27516 const char *
27517 arm_output_shift(rtx * operands, int set_flags)
27518 {
27519 char pattern[100];
27520 static const char flag_chars[3] = {'?', '.', '!'};
27521 const char *shift;
27522 HOST_WIDE_INT val;
27523 char c;
27524
27525 c = flag_chars[set_flags];
27526 shift = shift_op(operands[3], &val);
27527 if (shift)
27528 {
27529 if (val != -1)
27530 operands[2] = GEN_INT(val);
27531 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27532 }
27533 else
27534 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27535
27536 output_asm_insn (pattern, operands);
27537 return "";
27538 }
27539
27540 /* Output assembly for a WMMX immediate shift instruction. */
27541 const char *
27542 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27543 {
27544 int shift = INTVAL (operands[2]);
27545 char templ[50];
27546 machine_mode opmode = GET_MODE (operands[0]);
27547
27548 gcc_assert (shift >= 0);
27549
27550 /* If the shift value in the register versions is > 63 (for D qualifier),
27551 31 (for W qualifier) or 15 (for H qualifier). */
27552 if (((opmode == V4HImode) && (shift > 15))
27553 || ((opmode == V2SImode) && (shift > 31))
27554 || ((opmode == DImode) && (shift > 63)))
27555 {
27556 if (wror_or_wsra)
27557 {
27558 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27559 output_asm_insn (templ, operands);
27560 if (opmode == DImode)
27561 {
27562 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27563 output_asm_insn (templ, operands);
27564 }
27565 }
27566 else
27567 {
27568 /* The destination register will contain all zeros. */
27569 sprintf (templ, "wzero\t%%0");
27570 output_asm_insn (templ, operands);
27571 }
27572 return "";
27573 }
27574
27575 if ((opmode == DImode) && (shift > 32))
27576 {
27577 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27578 output_asm_insn (templ, operands);
27579 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27580 output_asm_insn (templ, operands);
27581 }
27582 else
27583 {
27584 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27585 output_asm_insn (templ, operands);
27586 }
27587 return "";
27588 }
27589
27590 /* Output assembly for a WMMX tinsr instruction. */
27591 const char *
27592 arm_output_iwmmxt_tinsr (rtx *operands)
27593 {
27594 int mask = INTVAL (operands[3]);
27595 int i;
27596 char templ[50];
27597 int units = mode_nunits[GET_MODE (operands[0])];
27598 gcc_assert ((mask & (mask - 1)) == 0);
27599 for (i = 0; i < units; ++i)
27600 {
27601 if ((mask & 0x01) == 1)
27602 {
27603 break;
27604 }
27605 mask >>= 1;
27606 }
27607 gcc_assert (i < units);
27608 {
27609 switch (GET_MODE (operands[0]))
27610 {
27611 case V8QImode:
27612 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27613 break;
27614 case V4HImode:
27615 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27616 break;
27617 case V2SImode:
27618 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27619 break;
27620 default:
27621 gcc_unreachable ();
27622 break;
27623 }
27624 output_asm_insn (templ, operands);
27625 }
27626 return "";
27627 }
27628
27629 /* Output a Thumb-1 casesi dispatch sequence. */
27630 const char *
27631 thumb1_output_casesi (rtx *operands)
27632 {
27633 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27634
27635 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27636
27637 switch (GET_MODE(diff_vec))
27638 {
27639 case QImode:
27640 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27641 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27642 case HImode:
27643 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27644 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27645 case SImode:
27646 return "bl\t%___gnu_thumb1_case_si";
27647 default:
27648 gcc_unreachable ();
27649 }
27650 }
27651
27652 /* Output a Thumb-2 casesi instruction. */
27653 const char *
27654 thumb2_output_casesi (rtx *operands)
27655 {
27656 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27657
27658 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27659
27660 output_asm_insn ("cmp\t%0, %1", operands);
27661 output_asm_insn ("bhi\t%l3", operands);
27662 switch (GET_MODE(diff_vec))
27663 {
27664 case QImode:
27665 return "tbb\t[%|pc, %0]";
27666 case HImode:
27667 return "tbh\t[%|pc, %0, lsl #1]";
27668 case SImode:
27669 if (flag_pic)
27670 {
27671 output_asm_insn ("adr\t%4, %l2", operands);
27672 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27673 output_asm_insn ("add\t%4, %4, %5", operands);
27674 return "bx\t%4";
27675 }
27676 else
27677 {
27678 output_asm_insn ("adr\t%4, %l2", operands);
27679 return "ldr\t%|pc, [%4, %0, lsl #2]";
27680 }
27681 default:
27682 gcc_unreachable ();
27683 }
27684 }
27685
27686 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27687 per-core tuning structs. */
27688 static int
27689 arm_issue_rate (void)
27690 {
27691 return current_tune->issue_rate;
27692 }
27693
27694 /* Return how many instructions should scheduler lookahead to choose the
27695 best one. */
27696 static int
27697 arm_first_cycle_multipass_dfa_lookahead (void)
27698 {
27699 int issue_rate = arm_issue_rate ();
27700
27701 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27702 }
27703
27704 /* Enable modeling of L2 auto-prefetcher. */
27705 static int
27706 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27707 {
27708 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27709 }
27710
27711 const char *
27712 arm_mangle_type (const_tree type)
27713 {
27714 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27715 has to be managled as if it is in the "std" namespace. */
27716 if (TARGET_AAPCS_BASED
27717 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27718 return "St9__va_list";
27719
27720 /* Half-precision float. */
27721 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27722 return "Dh";
27723
27724 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27725 builtin type. */
27726 if (TYPE_NAME (type) != NULL)
27727 return arm_mangle_builtin_type (type);
27728
27729 /* Use the default mangling. */
27730 return NULL;
27731 }
27732
27733 /* Order of allocation of core registers for Thumb: this allocation is
27734 written over the corresponding initial entries of the array
27735 initialized with REG_ALLOC_ORDER. We allocate all low registers
27736 first. Saving and restoring a low register is usually cheaper than
27737 using a call-clobbered high register. */
27738
27739 static const int thumb_core_reg_alloc_order[] =
27740 {
27741 3, 2, 1, 0, 4, 5, 6, 7,
27742 12, 14, 8, 9, 10, 11
27743 };
27744
27745 /* Adjust register allocation order when compiling for Thumb. */
27746
27747 void
27748 arm_order_regs_for_local_alloc (void)
27749 {
27750 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27751 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27752 if (TARGET_THUMB)
27753 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27754 sizeof (thumb_core_reg_alloc_order));
27755 }
27756
27757 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27758
27759 bool
27760 arm_frame_pointer_required (void)
27761 {
27762 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27763 return true;
27764
27765 /* If the function receives nonlocal gotos, it needs to save the frame
27766 pointer in the nonlocal_goto_save_area object. */
27767 if (cfun->has_nonlocal_label)
27768 return true;
27769
27770 /* The frame pointer is required for non-leaf APCS frames. */
27771 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27772 return true;
27773
27774 /* If we are probing the stack in the prologue, we will have a faulting
27775 instruction prior to the stack adjustment and this requires a frame
27776 pointer if we want to catch the exception using the EABI unwinder. */
27777 if (!IS_INTERRUPT (arm_current_func_type ())
27778 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27779 && arm_except_unwind_info (&global_options) == UI_TARGET
27780 && cfun->can_throw_non_call_exceptions)
27781 {
27782 HOST_WIDE_INT size = get_frame_size ();
27783
27784 /* That's irrelevant if there is no stack adjustment. */
27785 if (size <= 0)
27786 return false;
27787
27788 /* That's relevant only if there is a stack probe. */
27789 if (crtl->is_leaf && !cfun->calls_alloca)
27790 {
27791 /* We don't have the final size of the frame so adjust. */
27792 size += 32 * UNITS_PER_WORD;
27793 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27794 return true;
27795 }
27796 else
27797 return true;
27798 }
27799
27800 return false;
27801 }
27802
27803 /* Only thumb1 can't support conditional execution, so return true if
27804 the target is not thumb1. */
27805 static bool
27806 arm_have_conditional_execution (void)
27807 {
27808 return !TARGET_THUMB1;
27809 }
27810
27811 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27812 static HOST_WIDE_INT
27813 arm_vector_alignment (const_tree type)
27814 {
27815 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27816
27817 if (TARGET_AAPCS_BASED)
27818 align = MIN (align, 64);
27819
27820 return align;
27821 }
27822
27823 static unsigned int
27824 arm_autovectorize_vector_sizes (void)
27825 {
27826 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27827 }
27828
27829 static bool
27830 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27831 {
27832 /* Vectors which aren't in packed structures will not be less aligned than
27833 the natural alignment of their element type, so this is safe. */
27834 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27835 return !is_packed;
27836
27837 return default_builtin_vector_alignment_reachable (type, is_packed);
27838 }
27839
27840 static bool
27841 arm_builtin_support_vector_misalignment (machine_mode mode,
27842 const_tree type, int misalignment,
27843 bool is_packed)
27844 {
27845 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27846 {
27847 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27848
27849 if (is_packed)
27850 return align == 1;
27851
27852 /* If the misalignment is unknown, we should be able to handle the access
27853 so long as it is not to a member of a packed data structure. */
27854 if (misalignment == -1)
27855 return true;
27856
27857 /* Return true if the misalignment is a multiple of the natural alignment
27858 of the vector's element type. This is probably always going to be
27859 true in practice, since we've already established that this isn't a
27860 packed access. */
27861 return ((misalignment % align) == 0);
27862 }
27863
27864 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27865 is_packed);
27866 }
27867
27868 static void
27869 arm_conditional_register_usage (void)
27870 {
27871 int regno;
27872
27873 if (TARGET_THUMB1 && optimize_size)
27874 {
27875 /* When optimizing for size on Thumb-1, it's better not
27876 to use the HI regs, because of the overhead of
27877 stacking them. */
27878 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27879 fixed_regs[regno] = call_used_regs[regno] = 1;
27880 }
27881
27882 /* The link register can be clobbered by any branch insn,
27883 but we have no way to track that at present, so mark
27884 it as unavailable. */
27885 if (TARGET_THUMB1)
27886 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27887
27888 if (TARGET_32BIT && TARGET_HARD_FLOAT)
27889 {
27890 /* VFPv3 registers are disabled when earlier VFP
27891 versions are selected due to the definition of
27892 LAST_VFP_REGNUM. */
27893 for (regno = FIRST_VFP_REGNUM;
27894 regno <= LAST_VFP_REGNUM; ++ regno)
27895 {
27896 fixed_regs[regno] = 0;
27897 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27898 || regno >= FIRST_VFP_REGNUM + 32;
27899 }
27900 }
27901
27902 if (TARGET_REALLY_IWMMXT)
27903 {
27904 regno = FIRST_IWMMXT_GR_REGNUM;
27905 /* The 2002/10/09 revision of the XScale ABI has wCG0
27906 and wCG1 as call-preserved registers. The 2002/11/21
27907 revision changed this so that all wCG registers are
27908 scratch registers. */
27909 for (regno = FIRST_IWMMXT_GR_REGNUM;
27910 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27911 fixed_regs[regno] = 0;
27912 /* The XScale ABI has wR0 - wR9 as scratch registers,
27913 the rest as call-preserved registers. */
27914 for (regno = FIRST_IWMMXT_REGNUM;
27915 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27916 {
27917 fixed_regs[regno] = 0;
27918 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27919 }
27920 }
27921
27922 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27923 {
27924 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27925 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27926 }
27927 else if (TARGET_APCS_STACK)
27928 {
27929 fixed_regs[10] = 1;
27930 call_used_regs[10] = 1;
27931 }
27932 /* -mcaller-super-interworking reserves r11 for calls to
27933 _interwork_r11_call_via_rN(). Making the register global
27934 is an easy way of ensuring that it remains valid for all
27935 calls. */
27936 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27937 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27938 {
27939 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27940 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27941 if (TARGET_CALLER_INTERWORKING)
27942 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27943 }
27944 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27945 }
27946
27947 static reg_class_t
27948 arm_preferred_rename_class (reg_class_t rclass)
27949 {
27950 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27951 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27952 and code size can be reduced. */
27953 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27954 return LO_REGS;
27955 else
27956 return NO_REGS;
27957 }
27958
27959 /* Compute the attribute "length" of insn "*push_multi".
27960 So this function MUST be kept in sync with that insn pattern. */
27961 int
27962 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27963 {
27964 int i, regno, hi_reg;
27965 int num_saves = XVECLEN (parallel_op, 0);
27966
27967 /* ARM mode. */
27968 if (TARGET_ARM)
27969 return 4;
27970 /* Thumb1 mode. */
27971 if (TARGET_THUMB1)
27972 return 2;
27973
27974 /* Thumb2 mode. */
27975 regno = REGNO (first_op);
27976 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
27977 list is 8-bit. Normally this means all registers in the list must be
27978 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
27979 encodings. There is one exception for PUSH that LR in HI_REGS can be used
27980 with 16-bit encoding. */
27981 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27982 for (i = 1; i < num_saves && !hi_reg; i++)
27983 {
27984 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27985 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27986 }
27987
27988 if (!hi_reg)
27989 return 2;
27990 return 4;
27991 }
27992
27993 /* Compute the attribute "length" of insn. Currently, this function is used
27994 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
27995 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
27996 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
27997 true if OPERANDS contains insn which explicit updates base register. */
27998
27999 int
28000 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28001 {
28002 /* ARM mode. */
28003 if (TARGET_ARM)
28004 return 4;
28005 /* Thumb1 mode. */
28006 if (TARGET_THUMB1)
28007 return 2;
28008
28009 rtx parallel_op = operands[0];
28010 /* Initialize to elements number of PARALLEL. */
28011 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28012 /* Initialize the value to base register. */
28013 unsigned regno = REGNO (operands[1]);
28014 /* Skip return and write back pattern.
28015 We only need register pop pattern for later analysis. */
28016 unsigned first_indx = 0;
28017 first_indx += return_pc ? 1 : 0;
28018 first_indx += write_back_p ? 1 : 0;
28019
28020 /* A pop operation can be done through LDM or POP. If the base register is SP
28021 and if it's with write back, then a LDM will be alias of POP. */
28022 bool pop_p = (regno == SP_REGNUM && write_back_p);
28023 bool ldm_p = !pop_p;
28024
28025 /* Check base register for LDM. */
28026 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28027 return 4;
28028
28029 /* Check each register in the list. */
28030 for (; indx >= first_indx; indx--)
28031 {
28032 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28033 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28034 comment in arm_attr_length_push_multi. */
28035 if (REGNO_REG_CLASS (regno) == HI_REGS
28036 && (regno != PC_REGNUM || ldm_p))
28037 return 4;
28038 }
28039
28040 return 2;
28041 }
28042
28043 /* Compute the number of instructions emitted by output_move_double. */
28044 int
28045 arm_count_output_move_double_insns (rtx *operands)
28046 {
28047 int count;
28048 rtx ops[2];
28049 /* output_move_double may modify the operands array, so call it
28050 here on a copy of the array. */
28051 ops[0] = operands[0];
28052 ops[1] = operands[1];
28053 output_move_double (ops, false, &count);
28054 return count;
28055 }
28056
28057 int
28058 vfp3_const_double_for_fract_bits (rtx operand)
28059 {
28060 REAL_VALUE_TYPE r0;
28061
28062 if (!CONST_DOUBLE_P (operand))
28063 return 0;
28064
28065 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28066 if (exact_real_inverse (DFmode, &r0)
28067 && !REAL_VALUE_NEGATIVE (r0))
28068 {
28069 if (exact_real_truncate (DFmode, &r0))
28070 {
28071 HOST_WIDE_INT value = real_to_integer (&r0);
28072 value = value & 0xffffffff;
28073 if ((value != 0) && ( (value & (value - 1)) == 0))
28074 {
28075 int ret = exact_log2 (value);
28076 gcc_assert (IN_RANGE (ret, 0, 31));
28077 return ret;
28078 }
28079 }
28080 }
28081 return 0;
28082 }
28083
28084 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28085 log2 is in [1, 32], return that log2. Otherwise return -1.
28086 This is used in the patterns for vcvt.s32.f32 floating-point to
28087 fixed-point conversions. */
28088
28089 int
28090 vfp3_const_double_for_bits (rtx x)
28091 {
28092 const REAL_VALUE_TYPE *r;
28093
28094 if (!CONST_DOUBLE_P (x))
28095 return -1;
28096
28097 r = CONST_DOUBLE_REAL_VALUE (x);
28098
28099 if (REAL_VALUE_NEGATIVE (*r)
28100 || REAL_VALUE_ISNAN (*r)
28101 || REAL_VALUE_ISINF (*r)
28102 || !real_isinteger (r, SFmode))
28103 return -1;
28104
28105 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28106
28107 /* The exact_log2 above will have returned -1 if this is
28108 not an exact log2. */
28109 if (!IN_RANGE (hwint, 1, 32))
28110 return -1;
28111
28112 return hwint;
28113 }
28114
28115 \f
28116 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28117
28118 static void
28119 arm_pre_atomic_barrier (enum memmodel model)
28120 {
28121 if (need_atomic_barrier_p (model, true))
28122 emit_insn (gen_memory_barrier ());
28123 }
28124
28125 static void
28126 arm_post_atomic_barrier (enum memmodel model)
28127 {
28128 if (need_atomic_barrier_p (model, false))
28129 emit_insn (gen_memory_barrier ());
28130 }
28131
28132 /* Emit the load-exclusive and store-exclusive instructions.
28133 Use acquire and release versions if necessary. */
28134
28135 static void
28136 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28137 {
28138 rtx (*gen) (rtx, rtx);
28139
28140 if (acq)
28141 {
28142 switch (mode)
28143 {
28144 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28145 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28146 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28147 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28148 default:
28149 gcc_unreachable ();
28150 }
28151 }
28152 else
28153 {
28154 switch (mode)
28155 {
28156 case QImode: gen = gen_arm_load_exclusiveqi; break;
28157 case HImode: gen = gen_arm_load_exclusivehi; break;
28158 case SImode: gen = gen_arm_load_exclusivesi; break;
28159 case DImode: gen = gen_arm_load_exclusivedi; break;
28160 default:
28161 gcc_unreachable ();
28162 }
28163 }
28164
28165 emit_insn (gen (rval, mem));
28166 }
28167
28168 static void
28169 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28170 rtx mem, bool rel)
28171 {
28172 rtx (*gen) (rtx, rtx, rtx);
28173
28174 if (rel)
28175 {
28176 switch (mode)
28177 {
28178 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
28179 case HImode: gen = gen_arm_store_release_exclusivehi; break;
28180 case SImode: gen = gen_arm_store_release_exclusivesi; break;
28181 case DImode: gen = gen_arm_store_release_exclusivedi; break;
28182 default:
28183 gcc_unreachable ();
28184 }
28185 }
28186 else
28187 {
28188 switch (mode)
28189 {
28190 case QImode: gen = gen_arm_store_exclusiveqi; break;
28191 case HImode: gen = gen_arm_store_exclusivehi; break;
28192 case SImode: gen = gen_arm_store_exclusivesi; break;
28193 case DImode: gen = gen_arm_store_exclusivedi; break;
28194 default:
28195 gcc_unreachable ();
28196 }
28197 }
28198
28199 emit_insn (gen (bval, rval, mem));
28200 }
28201
28202 /* Mark the previous jump instruction as unlikely. */
28203
28204 static void
28205 emit_unlikely_jump (rtx insn)
28206 {
28207 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
28208
28209 rtx_insn *jump = emit_jump_insn (insn);
28210 add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
28211 }
28212
28213 /* Expand a compare and swap pattern. */
28214
28215 void
28216 arm_expand_compare_and_swap (rtx operands[])
28217 {
28218 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28219 machine_mode mode;
28220 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28221
28222 bval = operands[0];
28223 rval = operands[1];
28224 mem = operands[2];
28225 oldval = operands[3];
28226 newval = operands[4];
28227 is_weak = operands[5];
28228 mod_s = operands[6];
28229 mod_f = operands[7];
28230 mode = GET_MODE (mem);
28231
28232 /* Normally the succ memory model must be stronger than fail, but in the
28233 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28234 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28235
28236 if (TARGET_HAVE_LDACQ
28237 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28238 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28239 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28240
28241 switch (mode)
28242 {
28243 case QImode:
28244 case HImode:
28245 /* For narrow modes, we're going to perform the comparison in SImode,
28246 so do the zero-extension now. */
28247 rval = gen_reg_rtx (SImode);
28248 oldval = convert_modes (SImode, mode, oldval, true);
28249 /* FALLTHRU */
28250
28251 case SImode:
28252 /* Force the value into a register if needed. We waited until after
28253 the zero-extension above to do this properly. */
28254 if (!arm_add_operand (oldval, SImode))
28255 oldval = force_reg (SImode, oldval);
28256 break;
28257
28258 case DImode:
28259 if (!cmpdi_operand (oldval, mode))
28260 oldval = force_reg (mode, oldval);
28261 break;
28262
28263 default:
28264 gcc_unreachable ();
28265 }
28266
28267 if (TARGET_THUMB1)
28268 {
28269 switch (mode)
28270 {
28271 case QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28272 case HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28273 case SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28274 case DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28275 default:
28276 gcc_unreachable ();
28277 }
28278 }
28279 else
28280 {
28281 switch (mode)
28282 {
28283 case QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28284 case HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28285 case SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28286 case DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28287 default:
28288 gcc_unreachable ();
28289 }
28290 }
28291
28292 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28293 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28294
28295 if (mode == QImode || mode == HImode)
28296 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28297
28298 /* In all cases, we arrange for success to be signaled by Z set.
28299 This arrangement allows for the boolean result to be used directly
28300 in a subsequent branch, post optimization. For Thumb-1 targets, the
28301 boolean negation of the result is also stored in bval because Thumb-1
28302 backend lacks dependency tracking for CC flag due to flag-setting not
28303 being represented at RTL level. */
28304 if (TARGET_THUMB1)
28305 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28306 else
28307 {
28308 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28309 emit_insn (gen_rtx_SET (bval, x));
28310 }
28311 }
28312
28313 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28314 another memory store between the load-exclusive and store-exclusive can
28315 reset the monitor from Exclusive to Open state. This means we must wait
28316 until after reload to split the pattern, lest we get a register spill in
28317 the middle of the atomic sequence. Success of the compare and swap is
28318 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28319 for Thumb-1 targets (ie. negation of the boolean value returned by
28320 atomic_compare_and_swapmode standard pattern in operand 0). */
28321
28322 void
28323 arm_split_compare_and_swap (rtx operands[])
28324 {
28325 rtx rval, mem, oldval, newval, neg_bval;
28326 machine_mode mode;
28327 enum memmodel mod_s, mod_f;
28328 bool is_weak;
28329 rtx_code_label *label1, *label2;
28330 rtx x, cond;
28331
28332 rval = operands[1];
28333 mem = operands[2];
28334 oldval = operands[3];
28335 newval = operands[4];
28336 is_weak = (operands[5] != const0_rtx);
28337 mod_s = memmodel_from_int (INTVAL (operands[6]));
28338 mod_f = memmodel_from_int (INTVAL (operands[7]));
28339 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28340 mode = GET_MODE (mem);
28341
28342 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28343
28344 bool use_acquire = TARGET_HAVE_LDACQ
28345 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28346 || is_mm_release (mod_s));
28347
28348 bool use_release = TARGET_HAVE_LDACQ
28349 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28350 || is_mm_acquire (mod_s));
28351
28352 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28353 a full barrier is emitted after the store-release. */
28354 if (is_armv8_sync)
28355 use_acquire = false;
28356
28357 /* Checks whether a barrier is needed and emits one accordingly. */
28358 if (!(use_acquire || use_release))
28359 arm_pre_atomic_barrier (mod_s);
28360
28361 label1 = NULL;
28362 if (!is_weak)
28363 {
28364 label1 = gen_label_rtx ();
28365 emit_label (label1);
28366 }
28367 label2 = gen_label_rtx ();
28368
28369 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28370
28371 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28372 as required to communicate with arm_expand_compare_and_swap. */
28373 if (TARGET_32BIT)
28374 {
28375 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28376 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28377 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28378 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28379 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28380 }
28381 else
28382 {
28383 emit_move_insn (neg_bval, const1_rtx);
28384 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28385 if (thumb1_cmpneg_operand (oldval, SImode))
28386 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28387 label2, cond));
28388 else
28389 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28390 }
28391
28392 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28393
28394 /* Weak or strong, we want EQ to be true for success, so that we
28395 match the flags that we got from the compare above. */
28396 if (TARGET_32BIT)
28397 {
28398 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28399 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28400 emit_insn (gen_rtx_SET (cond, x));
28401 }
28402
28403 if (!is_weak)
28404 {
28405 /* Z is set to boolean value of !neg_bval, as required to communicate
28406 with arm_expand_compare_and_swap. */
28407 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28408 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28409 }
28410
28411 if (!is_mm_relaxed (mod_f))
28412 emit_label (label2);
28413
28414 /* Checks whether a barrier is needed and emits one accordingly. */
28415 if (is_armv8_sync
28416 || !(use_acquire || use_release))
28417 arm_post_atomic_barrier (mod_s);
28418
28419 if (is_mm_relaxed (mod_f))
28420 emit_label (label2);
28421 }
28422
28423 /* Split an atomic operation pattern. Operation is given by CODE and is one
28424 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28425 operation). Operation is performed on the content at MEM and on VALUE
28426 following the memory model MODEL_RTX. The content at MEM before and after
28427 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28428 success of the operation is returned in COND. Using a scratch register or
28429 an operand register for these determines what result is returned for that
28430 pattern. */
28431
28432 void
28433 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28434 rtx value, rtx model_rtx, rtx cond)
28435 {
28436 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28437 machine_mode mode = GET_MODE (mem);
28438 machine_mode wmode = (mode == DImode ? DImode : SImode);
28439 rtx_code_label *label;
28440 bool all_low_regs, bind_old_new;
28441 rtx x;
28442
28443 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28444
28445 bool use_acquire = TARGET_HAVE_LDACQ
28446 && !(is_mm_relaxed (model) || is_mm_consume (model)
28447 || is_mm_release (model));
28448
28449 bool use_release = TARGET_HAVE_LDACQ
28450 && !(is_mm_relaxed (model) || is_mm_consume (model)
28451 || is_mm_acquire (model));
28452
28453 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28454 a full barrier is emitted after the store-release. */
28455 if (is_armv8_sync)
28456 use_acquire = false;
28457
28458 /* Checks whether a barrier is needed and emits one accordingly. */
28459 if (!(use_acquire || use_release))
28460 arm_pre_atomic_barrier (model);
28461
28462 label = gen_label_rtx ();
28463 emit_label (label);
28464
28465 if (new_out)
28466 new_out = gen_lowpart (wmode, new_out);
28467 if (old_out)
28468 old_out = gen_lowpart (wmode, old_out);
28469 else
28470 old_out = new_out;
28471 value = simplify_gen_subreg (wmode, value, mode, 0);
28472
28473 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28474
28475 /* Does the operation require destination and first operand to use the same
28476 register? This is decided by register constraints of relevant insn
28477 patterns in thumb1.md. */
28478 gcc_assert (!new_out || REG_P (new_out));
28479 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28480 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28481 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28482 bind_old_new =
28483 (TARGET_THUMB1
28484 && code != SET
28485 && code != MINUS
28486 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28487
28488 /* We want to return the old value while putting the result of the operation
28489 in the same register as the old value so copy the old value over to the
28490 destination register and use that register for the operation. */
28491 if (old_out && bind_old_new)
28492 {
28493 emit_move_insn (new_out, old_out);
28494 old_out = new_out;
28495 }
28496
28497 switch (code)
28498 {
28499 case SET:
28500 new_out = value;
28501 break;
28502
28503 case NOT:
28504 x = gen_rtx_AND (wmode, old_out, value);
28505 emit_insn (gen_rtx_SET (new_out, x));
28506 x = gen_rtx_NOT (wmode, new_out);
28507 emit_insn (gen_rtx_SET (new_out, x));
28508 break;
28509
28510 case MINUS:
28511 if (CONST_INT_P (value))
28512 {
28513 value = GEN_INT (-INTVAL (value));
28514 code = PLUS;
28515 }
28516 /* FALLTHRU */
28517
28518 case PLUS:
28519 if (mode == DImode)
28520 {
28521 /* DImode plus/minus need to clobber flags. */
28522 /* The adddi3 and subdi3 patterns are incorrectly written so that
28523 they require matching operands, even when we could easily support
28524 three operands. Thankfully, this can be fixed up post-splitting,
28525 as the individual add+adc patterns do accept three operands and
28526 post-reload cprop can make these moves go away. */
28527 emit_move_insn (new_out, old_out);
28528 if (code == PLUS)
28529 x = gen_adddi3 (new_out, new_out, value);
28530 else
28531 x = gen_subdi3 (new_out, new_out, value);
28532 emit_insn (x);
28533 break;
28534 }
28535 /* FALLTHRU */
28536
28537 default:
28538 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28539 emit_insn (gen_rtx_SET (new_out, x));
28540 break;
28541 }
28542
28543 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28544 use_release);
28545
28546 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28547 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28548
28549 /* Checks whether a barrier is needed and emits one accordingly. */
28550 if (is_armv8_sync
28551 || !(use_acquire || use_release))
28552 arm_post_atomic_barrier (model);
28553 }
28554 \f
28555 #define MAX_VECT_LEN 16
28556
28557 struct expand_vec_perm_d
28558 {
28559 rtx target, op0, op1;
28560 unsigned char perm[MAX_VECT_LEN];
28561 machine_mode vmode;
28562 unsigned char nelt;
28563 bool one_vector_p;
28564 bool testing_p;
28565 };
28566
28567 /* Generate a variable permutation. */
28568
28569 static void
28570 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28571 {
28572 machine_mode vmode = GET_MODE (target);
28573 bool one_vector_p = rtx_equal_p (op0, op1);
28574
28575 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28576 gcc_checking_assert (GET_MODE (op0) == vmode);
28577 gcc_checking_assert (GET_MODE (op1) == vmode);
28578 gcc_checking_assert (GET_MODE (sel) == vmode);
28579 gcc_checking_assert (TARGET_NEON);
28580
28581 if (one_vector_p)
28582 {
28583 if (vmode == V8QImode)
28584 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28585 else
28586 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28587 }
28588 else
28589 {
28590 rtx pair;
28591
28592 if (vmode == V8QImode)
28593 {
28594 pair = gen_reg_rtx (V16QImode);
28595 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28596 pair = gen_lowpart (TImode, pair);
28597 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28598 }
28599 else
28600 {
28601 pair = gen_reg_rtx (OImode);
28602 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28603 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28604 }
28605 }
28606 }
28607
28608 void
28609 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28610 {
28611 machine_mode vmode = GET_MODE (target);
28612 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28613 bool one_vector_p = rtx_equal_p (op0, op1);
28614 rtx rmask[MAX_VECT_LEN], mask;
28615
28616 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28617 numbering of elements for big-endian, we must reverse the order. */
28618 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28619
28620 /* The VTBL instruction does not use a modulo index, so we must take care
28621 of that ourselves. */
28622 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28623 for (i = 0; i < nelt; ++i)
28624 rmask[i] = mask;
28625 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28626 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28627
28628 arm_expand_vec_perm_1 (target, op0, op1, sel);
28629 }
28630
28631 /* Map lane ordering between architectural lane order, and GCC lane order,
28632 taking into account ABI. See comment above output_move_neon for details. */
28633
28634 static int
28635 neon_endian_lane_map (machine_mode mode, int lane)
28636 {
28637 if (BYTES_BIG_ENDIAN)
28638 {
28639 int nelems = GET_MODE_NUNITS (mode);
28640 /* Reverse lane order. */
28641 lane = (nelems - 1 - lane);
28642 /* Reverse D register order, to match ABI. */
28643 if (GET_MODE_SIZE (mode) == 16)
28644 lane = lane ^ (nelems / 2);
28645 }
28646 return lane;
28647 }
28648
28649 /* Some permutations index into pairs of vectors, this is a helper function
28650 to map indexes into those pairs of vectors. */
28651
28652 static int
28653 neon_pair_endian_lane_map (machine_mode mode, int lane)
28654 {
28655 int nelem = GET_MODE_NUNITS (mode);
28656 if (BYTES_BIG_ENDIAN)
28657 lane =
28658 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28659 return lane;
28660 }
28661
28662 /* Generate or test for an insn that supports a constant permutation. */
28663
28664 /* Recognize patterns for the VUZP insns. */
28665
28666 static bool
28667 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28668 {
28669 unsigned int i, odd, mask, nelt = d->nelt;
28670 rtx out0, out1, in0, in1;
28671 rtx (*gen)(rtx, rtx, rtx, rtx);
28672 int first_elem;
28673 int swap_nelt;
28674
28675 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28676 return false;
28677
28678 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28679 big endian pattern on 64 bit vectors, so we correct for that. */
28680 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28681 && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28682
28683 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28684
28685 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28686 odd = 0;
28687 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28688 odd = 1;
28689 else
28690 return false;
28691 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28692
28693 for (i = 0; i < nelt; i++)
28694 {
28695 unsigned elt =
28696 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28697 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28698 return false;
28699 }
28700
28701 /* Success! */
28702 if (d->testing_p)
28703 return true;
28704
28705 switch (d->vmode)
28706 {
28707 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28708 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28709 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28710 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28711 case V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
28712 case V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
28713 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28714 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28715 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28716 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28717 default:
28718 gcc_unreachable ();
28719 }
28720
28721 in0 = d->op0;
28722 in1 = d->op1;
28723 if (swap_nelt != 0)
28724 std::swap (in0, in1);
28725
28726 out0 = d->target;
28727 out1 = gen_reg_rtx (d->vmode);
28728 if (odd)
28729 std::swap (out0, out1);
28730
28731 emit_insn (gen (out0, in0, in1, out1));
28732 return true;
28733 }
28734
28735 /* Recognize patterns for the VZIP insns. */
28736
28737 static bool
28738 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28739 {
28740 unsigned int i, high, mask, nelt = d->nelt;
28741 rtx out0, out1, in0, in1;
28742 rtx (*gen)(rtx, rtx, rtx, rtx);
28743 int first_elem;
28744 bool is_swapped;
28745
28746 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28747 return false;
28748
28749 is_swapped = BYTES_BIG_ENDIAN;
28750
28751 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28752
28753 high = nelt / 2;
28754 if (first_elem == neon_endian_lane_map (d->vmode, high))
28755 ;
28756 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28757 high = 0;
28758 else
28759 return false;
28760 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28761
28762 for (i = 0; i < nelt / 2; i++)
28763 {
28764 unsigned elt =
28765 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28766 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28767 != elt)
28768 return false;
28769 elt =
28770 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28771 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28772 != elt)
28773 return false;
28774 }
28775
28776 /* Success! */
28777 if (d->testing_p)
28778 return true;
28779
28780 switch (d->vmode)
28781 {
28782 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28783 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28784 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28785 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28786 case V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
28787 case V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
28788 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
28789 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
28790 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28791 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28792 default:
28793 gcc_unreachable ();
28794 }
28795
28796 in0 = d->op0;
28797 in1 = d->op1;
28798 if (is_swapped)
28799 std::swap (in0, in1);
28800
28801 out0 = d->target;
28802 out1 = gen_reg_rtx (d->vmode);
28803 if (high)
28804 std::swap (out0, out1);
28805
28806 emit_insn (gen (out0, in0, in1, out1));
28807 return true;
28808 }
28809
28810 /* Recognize patterns for the VREV insns. */
28811
28812 static bool
28813 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28814 {
28815 unsigned int i, j, diff, nelt = d->nelt;
28816 rtx (*gen)(rtx, rtx);
28817
28818 if (!d->one_vector_p)
28819 return false;
28820
28821 diff = d->perm[0];
28822 switch (diff)
28823 {
28824 case 7:
28825 switch (d->vmode)
28826 {
28827 case V16QImode: gen = gen_neon_vrev64v16qi; break;
28828 case V8QImode: gen = gen_neon_vrev64v8qi; break;
28829 default:
28830 return false;
28831 }
28832 break;
28833 case 3:
28834 switch (d->vmode)
28835 {
28836 case V16QImode: gen = gen_neon_vrev32v16qi; break;
28837 case V8QImode: gen = gen_neon_vrev32v8qi; break;
28838 case V8HImode: gen = gen_neon_vrev64v8hi; break;
28839 case V4HImode: gen = gen_neon_vrev64v4hi; break;
28840 case V8HFmode: gen = gen_neon_vrev64v8hf; break;
28841 case V4HFmode: gen = gen_neon_vrev64v4hf; break;
28842 default:
28843 return false;
28844 }
28845 break;
28846 case 1:
28847 switch (d->vmode)
28848 {
28849 case V16QImode: gen = gen_neon_vrev16v16qi; break;
28850 case V8QImode: gen = gen_neon_vrev16v8qi; break;
28851 case V8HImode: gen = gen_neon_vrev32v8hi; break;
28852 case V4HImode: gen = gen_neon_vrev32v4hi; break;
28853 case V4SImode: gen = gen_neon_vrev64v4si; break;
28854 case V2SImode: gen = gen_neon_vrev64v2si; break;
28855 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
28856 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
28857 default:
28858 return false;
28859 }
28860 break;
28861 default:
28862 return false;
28863 }
28864
28865 for (i = 0; i < nelt ; i += diff + 1)
28866 for (j = 0; j <= diff; j += 1)
28867 {
28868 /* This is guaranteed to be true as the value of diff
28869 is 7, 3, 1 and we should have enough elements in the
28870 queue to generate this. Getting a vector mask with a
28871 value of diff other than these values implies that
28872 something is wrong by the time we get here. */
28873 gcc_assert (i + j < nelt);
28874 if (d->perm[i + j] != i + diff - j)
28875 return false;
28876 }
28877
28878 /* Success! */
28879 if (d->testing_p)
28880 return true;
28881
28882 emit_insn (gen (d->target, d->op0));
28883 return true;
28884 }
28885
28886 /* Recognize patterns for the VTRN insns. */
28887
28888 static bool
28889 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28890 {
28891 unsigned int i, odd, mask, nelt = d->nelt;
28892 rtx out0, out1, in0, in1;
28893 rtx (*gen)(rtx, rtx, rtx, rtx);
28894
28895 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28896 return false;
28897
28898 /* Note that these are little-endian tests. Adjust for big-endian later. */
28899 if (d->perm[0] == 0)
28900 odd = 0;
28901 else if (d->perm[0] == 1)
28902 odd = 1;
28903 else
28904 return false;
28905 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28906
28907 for (i = 0; i < nelt; i += 2)
28908 {
28909 if (d->perm[i] != i + odd)
28910 return false;
28911 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28912 return false;
28913 }
28914
28915 /* Success! */
28916 if (d->testing_p)
28917 return true;
28918
28919 switch (d->vmode)
28920 {
28921 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28922 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28923 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28924 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28925 case V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
28926 case V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
28927 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28928 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28929 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28930 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28931 default:
28932 gcc_unreachable ();
28933 }
28934
28935 in0 = d->op0;
28936 in1 = d->op1;
28937 if (BYTES_BIG_ENDIAN)
28938 {
28939 std::swap (in0, in1);
28940 odd = !odd;
28941 }
28942
28943 out0 = d->target;
28944 out1 = gen_reg_rtx (d->vmode);
28945 if (odd)
28946 std::swap (out0, out1);
28947
28948 emit_insn (gen (out0, in0, in1, out1));
28949 return true;
28950 }
28951
28952 /* Recognize patterns for the VEXT insns. */
28953
28954 static bool
28955 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28956 {
28957 unsigned int i, nelt = d->nelt;
28958 rtx (*gen) (rtx, rtx, rtx, rtx);
28959 rtx offset;
28960
28961 unsigned int location;
28962
28963 unsigned int next = d->perm[0] + 1;
28964
28965 /* TODO: Handle GCC's numbering of elements for big-endian. */
28966 if (BYTES_BIG_ENDIAN)
28967 return false;
28968
28969 /* Check if the extracted indexes are increasing by one. */
28970 for (i = 1; i < nelt; next++, i++)
28971 {
28972 /* If we hit the most significant element of the 2nd vector in
28973 the previous iteration, no need to test further. */
28974 if (next == 2 * nelt)
28975 return false;
28976
28977 /* If we are operating on only one vector: it could be a
28978 rotation. If there are only two elements of size < 64, let
28979 arm_evpc_neon_vrev catch it. */
28980 if (d->one_vector_p && (next == nelt))
28981 {
28982 if ((nelt == 2) && (d->vmode != V2DImode))
28983 return false;
28984 else
28985 next = 0;
28986 }
28987
28988 if (d->perm[i] != next)
28989 return false;
28990 }
28991
28992 location = d->perm[0];
28993
28994 switch (d->vmode)
28995 {
28996 case V16QImode: gen = gen_neon_vextv16qi; break;
28997 case V8QImode: gen = gen_neon_vextv8qi; break;
28998 case V4HImode: gen = gen_neon_vextv4hi; break;
28999 case V8HImode: gen = gen_neon_vextv8hi; break;
29000 case V2SImode: gen = gen_neon_vextv2si; break;
29001 case V4SImode: gen = gen_neon_vextv4si; break;
29002 case V4HFmode: gen = gen_neon_vextv4hf; break;
29003 case V8HFmode: gen = gen_neon_vextv8hf; break;
29004 case V2SFmode: gen = gen_neon_vextv2sf; break;
29005 case V4SFmode: gen = gen_neon_vextv4sf; break;
29006 case V2DImode: gen = gen_neon_vextv2di; break;
29007 default:
29008 return false;
29009 }
29010
29011 /* Success! */
29012 if (d->testing_p)
29013 return true;
29014
29015 offset = GEN_INT (location);
29016 emit_insn (gen (d->target, d->op0, d->op1, offset));
29017 return true;
29018 }
29019
29020 /* The NEON VTBL instruction is a fully variable permuation that's even
29021 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29022 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29023 can do slightly better by expanding this as a constant where we don't
29024 have to apply a mask. */
29025
29026 static bool
29027 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29028 {
29029 rtx rperm[MAX_VECT_LEN], sel;
29030 machine_mode vmode = d->vmode;
29031 unsigned int i, nelt = d->nelt;
29032
29033 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29034 numbering of elements for big-endian, we must reverse the order. */
29035 if (BYTES_BIG_ENDIAN)
29036 return false;
29037
29038 if (d->testing_p)
29039 return true;
29040
29041 /* Generic code will try constant permutation twice. Once with the
29042 original mode and again with the elements lowered to QImode.
29043 So wait and don't do the selector expansion ourselves. */
29044 if (vmode != V8QImode && vmode != V16QImode)
29045 return false;
29046
29047 for (i = 0; i < nelt; ++i)
29048 rperm[i] = GEN_INT (d->perm[i]);
29049 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29050 sel = force_reg (vmode, sel);
29051
29052 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29053 return true;
29054 }
29055
29056 static bool
29057 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29058 {
29059 /* Check if the input mask matches vext before reordering the
29060 operands. */
29061 if (TARGET_NEON)
29062 if (arm_evpc_neon_vext (d))
29063 return true;
29064
29065 /* The pattern matching functions above are written to look for a small
29066 number to begin the sequence (0, 1, N/2). If we begin with an index
29067 from the second operand, we can swap the operands. */
29068 if (d->perm[0] >= d->nelt)
29069 {
29070 unsigned i, nelt = d->nelt;
29071
29072 for (i = 0; i < nelt; ++i)
29073 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29074
29075 std::swap (d->op0, d->op1);
29076 }
29077
29078 if (TARGET_NEON)
29079 {
29080 if (arm_evpc_neon_vuzp (d))
29081 return true;
29082 if (arm_evpc_neon_vzip (d))
29083 return true;
29084 if (arm_evpc_neon_vrev (d))
29085 return true;
29086 if (arm_evpc_neon_vtrn (d))
29087 return true;
29088 return arm_evpc_neon_vtbl (d);
29089 }
29090 return false;
29091 }
29092
29093 /* Expand a vec_perm_const pattern. */
29094
29095 bool
29096 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29097 {
29098 struct expand_vec_perm_d d;
29099 int i, nelt, which;
29100
29101 d.target = target;
29102 d.op0 = op0;
29103 d.op1 = op1;
29104
29105 d.vmode = GET_MODE (target);
29106 gcc_assert (VECTOR_MODE_P (d.vmode));
29107 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29108 d.testing_p = false;
29109
29110 for (i = which = 0; i < nelt; ++i)
29111 {
29112 rtx e = XVECEXP (sel, 0, i);
29113 int ei = INTVAL (e) & (2 * nelt - 1);
29114 which |= (ei < nelt ? 1 : 2);
29115 d.perm[i] = ei;
29116 }
29117
29118 switch (which)
29119 {
29120 default:
29121 gcc_unreachable();
29122
29123 case 3:
29124 d.one_vector_p = false;
29125 if (!rtx_equal_p (op0, op1))
29126 break;
29127
29128 /* The elements of PERM do not suggest that only the first operand
29129 is used, but both operands are identical. Allow easier matching
29130 of the permutation by folding the permutation into the single
29131 input vector. */
29132 /* FALLTHRU */
29133 case 2:
29134 for (i = 0; i < nelt; ++i)
29135 d.perm[i] &= nelt - 1;
29136 d.op0 = op1;
29137 d.one_vector_p = true;
29138 break;
29139
29140 case 1:
29141 d.op1 = op0;
29142 d.one_vector_p = true;
29143 break;
29144 }
29145
29146 return arm_expand_vec_perm_const_1 (&d);
29147 }
29148
29149 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29150
29151 static bool
29152 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
29153 const unsigned char *sel)
29154 {
29155 struct expand_vec_perm_d d;
29156 unsigned int i, nelt, which;
29157 bool ret;
29158
29159 d.vmode = vmode;
29160 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29161 d.testing_p = true;
29162 memcpy (d.perm, sel, nelt);
29163
29164 /* Categorize the set of elements in the selector. */
29165 for (i = which = 0; i < nelt; ++i)
29166 {
29167 unsigned char e = d.perm[i];
29168 gcc_assert (e < 2 * nelt);
29169 which |= (e < nelt ? 1 : 2);
29170 }
29171
29172 /* For all elements from second vector, fold the elements to first. */
29173 if (which == 2)
29174 for (i = 0; i < nelt; ++i)
29175 d.perm[i] -= nelt;
29176
29177 /* Check whether the mask can be applied to the vector type. */
29178 d.one_vector_p = (which != 3);
29179
29180 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29181 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29182 if (!d.one_vector_p)
29183 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29184
29185 start_sequence ();
29186 ret = arm_expand_vec_perm_const_1 (&d);
29187 end_sequence ();
29188
29189 return ret;
29190 }
29191
29192 bool
29193 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29194 {
29195 /* If we are soft float and we do not have ldrd
29196 then all auto increment forms are ok. */
29197 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29198 return true;
29199
29200 switch (code)
29201 {
29202 /* Post increment and Pre Decrement are supported for all
29203 instruction forms except for vector forms. */
29204 case ARM_POST_INC:
29205 case ARM_PRE_DEC:
29206 if (VECTOR_MODE_P (mode))
29207 {
29208 if (code != ARM_PRE_DEC)
29209 return true;
29210 else
29211 return false;
29212 }
29213
29214 return true;
29215
29216 case ARM_POST_DEC:
29217 case ARM_PRE_INC:
29218 /* Without LDRD and mode size greater than
29219 word size, there is no point in auto-incrementing
29220 because ldm and stm will not have these forms. */
29221 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29222 return false;
29223
29224 /* Vector and floating point modes do not support
29225 these auto increment forms. */
29226 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29227 return false;
29228
29229 return true;
29230
29231 default:
29232 return false;
29233
29234 }
29235
29236 return false;
29237 }
29238
29239 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29240 on ARM, since we know that shifts by negative amounts are no-ops.
29241 Additionally, the default expansion code is not available or suitable
29242 for post-reload insn splits (this can occur when the register allocator
29243 chooses not to do a shift in NEON).
29244
29245 This function is used in both initial expand and post-reload splits, and
29246 handles all kinds of 64-bit shifts.
29247
29248 Input requirements:
29249 - It is safe for the input and output to be the same register, but
29250 early-clobber rules apply for the shift amount and scratch registers.
29251 - Shift by register requires both scratch registers. In all other cases
29252 the scratch registers may be NULL.
29253 - Ashiftrt by a register also clobbers the CC register. */
29254 void
29255 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29256 rtx amount, rtx scratch1, rtx scratch2)
29257 {
29258 rtx out_high = gen_highpart (SImode, out);
29259 rtx out_low = gen_lowpart (SImode, out);
29260 rtx in_high = gen_highpart (SImode, in);
29261 rtx in_low = gen_lowpart (SImode, in);
29262
29263 /* Terminology:
29264 in = the register pair containing the input value.
29265 out = the destination register pair.
29266 up = the high- or low-part of each pair.
29267 down = the opposite part to "up".
29268 In a shift, we can consider bits to shift from "up"-stream to
29269 "down"-stream, so in a left-shift "up" is the low-part and "down"
29270 is the high-part of each register pair. */
29271
29272 rtx out_up = code == ASHIFT ? out_low : out_high;
29273 rtx out_down = code == ASHIFT ? out_high : out_low;
29274 rtx in_up = code == ASHIFT ? in_low : in_high;
29275 rtx in_down = code == ASHIFT ? in_high : in_low;
29276
29277 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29278 gcc_assert (out
29279 && (REG_P (out) || GET_CODE (out) == SUBREG)
29280 && GET_MODE (out) == DImode);
29281 gcc_assert (in
29282 && (REG_P (in) || GET_CODE (in) == SUBREG)
29283 && GET_MODE (in) == DImode);
29284 gcc_assert (amount
29285 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29286 && GET_MODE (amount) == SImode)
29287 || CONST_INT_P (amount)));
29288 gcc_assert (scratch1 == NULL
29289 || (GET_CODE (scratch1) == SCRATCH)
29290 || (GET_MODE (scratch1) == SImode
29291 && REG_P (scratch1)));
29292 gcc_assert (scratch2 == NULL
29293 || (GET_CODE (scratch2) == SCRATCH)
29294 || (GET_MODE (scratch2) == SImode
29295 && REG_P (scratch2)));
29296 gcc_assert (!REG_P (out) || !REG_P (amount)
29297 || !HARD_REGISTER_P (out)
29298 || (REGNO (out) != REGNO (amount)
29299 && REGNO (out) + 1 != REGNO (amount)));
29300
29301 /* Macros to make following code more readable. */
29302 #define SUB_32(DEST,SRC) \
29303 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29304 #define RSB_32(DEST,SRC) \
29305 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29306 #define SUB_S_32(DEST,SRC) \
29307 gen_addsi3_compare0 ((DEST), (SRC), \
29308 GEN_INT (-32))
29309 #define SET(DEST,SRC) \
29310 gen_rtx_SET ((DEST), (SRC))
29311 #define SHIFT(CODE,SRC,AMOUNT) \
29312 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29313 #define LSHIFT(CODE,SRC,AMOUNT) \
29314 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29315 SImode, (SRC), (AMOUNT))
29316 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29317 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29318 SImode, (SRC), (AMOUNT))
29319 #define ORR(A,B) \
29320 gen_rtx_IOR (SImode, (A), (B))
29321 #define BRANCH(COND,LABEL) \
29322 gen_arm_cond_branch ((LABEL), \
29323 gen_rtx_ ## COND (CCmode, cc_reg, \
29324 const0_rtx), \
29325 cc_reg)
29326
29327 /* Shifts by register and shifts by constant are handled separately. */
29328 if (CONST_INT_P (amount))
29329 {
29330 /* We have a shift-by-constant. */
29331
29332 /* First, handle out-of-range shift amounts.
29333 In both cases we try to match the result an ARM instruction in a
29334 shift-by-register would give. This helps reduce execution
29335 differences between optimization levels, but it won't stop other
29336 parts of the compiler doing different things. This is "undefined
29337 behavior, in any case. */
29338 if (INTVAL (amount) <= 0)
29339 emit_insn (gen_movdi (out, in));
29340 else if (INTVAL (amount) >= 64)
29341 {
29342 if (code == ASHIFTRT)
29343 {
29344 rtx const31_rtx = GEN_INT (31);
29345 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29346 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29347 }
29348 else
29349 emit_insn (gen_movdi (out, const0_rtx));
29350 }
29351
29352 /* Now handle valid shifts. */
29353 else if (INTVAL (amount) < 32)
29354 {
29355 /* Shifts by a constant less than 32. */
29356 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29357
29358 /* Clearing the out register in DImode first avoids lots
29359 of spilling and results in less stack usage.
29360 Later this redundant insn is completely removed.
29361 Do that only if "in" and "out" are different registers. */
29362 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29363 emit_insn (SET (out, const0_rtx));
29364 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29365 emit_insn (SET (out_down,
29366 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29367 out_down)));
29368 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29369 }
29370 else
29371 {
29372 /* Shifts by a constant greater than 31. */
29373 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29374
29375 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29376 emit_insn (SET (out, const0_rtx));
29377 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29378 if (code == ASHIFTRT)
29379 emit_insn (gen_ashrsi3 (out_up, in_up,
29380 GEN_INT (31)));
29381 else
29382 emit_insn (SET (out_up, const0_rtx));
29383 }
29384 }
29385 else
29386 {
29387 /* We have a shift-by-register. */
29388 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29389
29390 /* This alternative requires the scratch registers. */
29391 gcc_assert (scratch1 && REG_P (scratch1));
29392 gcc_assert (scratch2 && REG_P (scratch2));
29393
29394 /* We will need the values "amount-32" and "32-amount" later.
29395 Swapping them around now allows the later code to be more general. */
29396 switch (code)
29397 {
29398 case ASHIFT:
29399 emit_insn (SUB_32 (scratch1, amount));
29400 emit_insn (RSB_32 (scratch2, amount));
29401 break;
29402 case ASHIFTRT:
29403 emit_insn (RSB_32 (scratch1, amount));
29404 /* Also set CC = amount > 32. */
29405 emit_insn (SUB_S_32 (scratch2, amount));
29406 break;
29407 case LSHIFTRT:
29408 emit_insn (RSB_32 (scratch1, amount));
29409 emit_insn (SUB_32 (scratch2, amount));
29410 break;
29411 default:
29412 gcc_unreachable ();
29413 }
29414
29415 /* Emit code like this:
29416
29417 arithmetic-left:
29418 out_down = in_down << amount;
29419 out_down = (in_up << (amount - 32)) | out_down;
29420 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29421 out_up = in_up << amount;
29422
29423 arithmetic-right:
29424 out_down = in_down >> amount;
29425 out_down = (in_up << (32 - amount)) | out_down;
29426 if (amount < 32)
29427 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29428 out_up = in_up << amount;
29429
29430 logical-right:
29431 out_down = in_down >> amount;
29432 out_down = (in_up << (32 - amount)) | out_down;
29433 if (amount < 32)
29434 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29435 out_up = in_up << amount;
29436
29437 The ARM and Thumb2 variants are the same but implemented slightly
29438 differently. If this were only called during expand we could just
29439 use the Thumb2 case and let combine do the right thing, but this
29440 can also be called from post-reload splitters. */
29441
29442 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29443
29444 if (!TARGET_THUMB2)
29445 {
29446 /* Emit code for ARM mode. */
29447 emit_insn (SET (out_down,
29448 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29449 if (code == ASHIFTRT)
29450 {
29451 rtx_code_label *done_label = gen_label_rtx ();
29452 emit_jump_insn (BRANCH (LT, done_label));
29453 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29454 out_down)));
29455 emit_label (done_label);
29456 }
29457 else
29458 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29459 out_down)));
29460 }
29461 else
29462 {
29463 /* Emit code for Thumb2 mode.
29464 Thumb2 can't do shift and or in one insn. */
29465 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29466 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29467
29468 if (code == ASHIFTRT)
29469 {
29470 rtx_code_label *done_label = gen_label_rtx ();
29471 emit_jump_insn (BRANCH (LT, done_label));
29472 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29473 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29474 emit_label (done_label);
29475 }
29476 else
29477 {
29478 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29479 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29480 }
29481 }
29482
29483 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29484 }
29485
29486 #undef SUB_32
29487 #undef RSB_32
29488 #undef SUB_S_32
29489 #undef SET
29490 #undef SHIFT
29491 #undef LSHIFT
29492 #undef REV_LSHIFT
29493 #undef ORR
29494 #undef BRANCH
29495 }
29496
29497 /* Returns true if the pattern is a valid symbolic address, which is either a
29498 symbol_ref or (symbol_ref + addend).
29499
29500 According to the ARM ELF ABI, the initial addend of REL-type relocations
29501 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29502 literal field of the instruction as a 16-bit signed value in the range
29503 -32768 <= A < 32768. */
29504
29505 bool
29506 arm_valid_symbolic_address_p (rtx addr)
29507 {
29508 rtx xop0, xop1 = NULL_RTX;
29509 rtx tmp = addr;
29510
29511 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29512 return true;
29513
29514 /* (const (plus: symbol_ref const_int)) */
29515 if (GET_CODE (addr) == CONST)
29516 tmp = XEXP (addr, 0);
29517
29518 if (GET_CODE (tmp) == PLUS)
29519 {
29520 xop0 = XEXP (tmp, 0);
29521 xop1 = XEXP (tmp, 1);
29522
29523 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29524 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29525 }
29526
29527 return false;
29528 }
29529
29530 /* Returns true if a valid comparison operation and makes
29531 the operands in a form that is valid. */
29532 bool
29533 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29534 {
29535 enum rtx_code code = GET_CODE (*comparison);
29536 int code_int;
29537 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29538 ? GET_MODE (*op2) : GET_MODE (*op1);
29539
29540 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29541
29542 if (code == UNEQ || code == LTGT)
29543 return false;
29544
29545 code_int = (int)code;
29546 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29547 PUT_CODE (*comparison, (enum rtx_code)code_int);
29548
29549 switch (mode)
29550 {
29551 case SImode:
29552 if (!arm_add_operand (*op1, mode))
29553 *op1 = force_reg (mode, *op1);
29554 if (!arm_add_operand (*op2, mode))
29555 *op2 = force_reg (mode, *op2);
29556 return true;
29557
29558 case DImode:
29559 if (!cmpdi_operand (*op1, mode))
29560 *op1 = force_reg (mode, *op1);
29561 if (!cmpdi_operand (*op2, mode))
29562 *op2 = force_reg (mode, *op2);
29563 return true;
29564
29565 case HFmode:
29566 if (!TARGET_VFP_FP16INST)
29567 break;
29568 /* FP16 comparisons are done in SF mode. */
29569 mode = SFmode;
29570 *op1 = convert_to_mode (mode, *op1, 1);
29571 *op2 = convert_to_mode (mode, *op2, 1);
29572 /* Fall through. */
29573 case SFmode:
29574 case DFmode:
29575 if (!vfp_compare_operand (*op1, mode))
29576 *op1 = force_reg (mode, *op1);
29577 if (!vfp_compare_operand (*op2, mode))
29578 *op2 = force_reg (mode, *op2);
29579 return true;
29580 default:
29581 break;
29582 }
29583
29584 return false;
29585
29586 }
29587
29588 /* Maximum number of instructions to set block of memory. */
29589 static int
29590 arm_block_set_max_insns (void)
29591 {
29592 if (optimize_function_for_size_p (cfun))
29593 return 4;
29594 else
29595 return current_tune->max_insns_inline_memset;
29596 }
29597
29598 /* Return TRUE if it's profitable to set block of memory for
29599 non-vectorized case. VAL is the value to set the memory
29600 with. LENGTH is the number of bytes to set. ALIGN is the
29601 alignment of the destination memory in bytes. UNALIGNED_P
29602 is TRUE if we can only set the memory with instructions
29603 meeting alignment requirements. USE_STRD_P is TRUE if we
29604 can use strd to set the memory. */
29605 static bool
29606 arm_block_set_non_vect_profit_p (rtx val,
29607 unsigned HOST_WIDE_INT length,
29608 unsigned HOST_WIDE_INT align,
29609 bool unaligned_p, bool use_strd_p)
29610 {
29611 int num = 0;
29612 /* For leftovers in bytes of 0-7, we can set the memory block using
29613 strb/strh/str with minimum instruction number. */
29614 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29615
29616 if (unaligned_p)
29617 {
29618 num = arm_const_inline_cost (SET, val);
29619 num += length / align + length % align;
29620 }
29621 else if (use_strd_p)
29622 {
29623 num = arm_const_double_inline_cost (val);
29624 num += (length >> 3) + leftover[length & 7];
29625 }
29626 else
29627 {
29628 num = arm_const_inline_cost (SET, val);
29629 num += (length >> 2) + leftover[length & 3];
29630 }
29631
29632 /* We may be able to combine last pair STRH/STRB into a single STR
29633 by shifting one byte back. */
29634 if (unaligned_access && length > 3 && (length & 3) == 3)
29635 num--;
29636
29637 return (num <= arm_block_set_max_insns ());
29638 }
29639
29640 /* Return TRUE if it's profitable to set block of memory for
29641 vectorized case. LENGTH is the number of bytes to set.
29642 ALIGN is the alignment of destination memory in bytes.
29643 MODE is the vector mode used to set the memory. */
29644 static bool
29645 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29646 unsigned HOST_WIDE_INT align,
29647 machine_mode mode)
29648 {
29649 int num;
29650 bool unaligned_p = ((align & 3) != 0);
29651 unsigned int nelt = GET_MODE_NUNITS (mode);
29652
29653 /* Instruction loading constant value. */
29654 num = 1;
29655 /* Instructions storing the memory. */
29656 num += (length + nelt - 1) / nelt;
29657 /* Instructions adjusting the address expression. Only need to
29658 adjust address expression if it's 4 bytes aligned and bytes
29659 leftover can only be stored by mis-aligned store instruction. */
29660 if (!unaligned_p && (length & 3) != 0)
29661 num++;
29662
29663 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29664 if (!unaligned_p && mode == V16QImode)
29665 num--;
29666
29667 return (num <= arm_block_set_max_insns ());
29668 }
29669
29670 /* Set a block of memory using vectorization instructions for the
29671 unaligned case. We fill the first LENGTH bytes of the memory
29672 area starting from DSTBASE with byte constant VALUE. ALIGN is
29673 the alignment requirement of memory. Return TRUE if succeeded. */
29674 static bool
29675 arm_block_set_unaligned_vect (rtx dstbase,
29676 unsigned HOST_WIDE_INT length,
29677 unsigned HOST_WIDE_INT value,
29678 unsigned HOST_WIDE_INT align)
29679 {
29680 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29681 rtx dst, mem;
29682 rtx val_elt, val_vec, reg;
29683 rtx rval[MAX_VECT_LEN];
29684 rtx (*gen_func) (rtx, rtx);
29685 machine_mode mode;
29686 unsigned HOST_WIDE_INT v = value;
29687 unsigned int offset = 0;
29688 gcc_assert ((align & 0x3) != 0);
29689 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29690 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29691 if (length >= nelt_v16)
29692 {
29693 mode = V16QImode;
29694 gen_func = gen_movmisalignv16qi;
29695 }
29696 else
29697 {
29698 mode = V8QImode;
29699 gen_func = gen_movmisalignv8qi;
29700 }
29701 nelt_mode = GET_MODE_NUNITS (mode);
29702 gcc_assert (length >= nelt_mode);
29703 /* Skip if it isn't profitable. */
29704 if (!arm_block_set_vect_profit_p (length, align, mode))
29705 return false;
29706
29707 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29708 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29709
29710 v = sext_hwi (v, BITS_PER_WORD);
29711 val_elt = GEN_INT (v);
29712 for (j = 0; j < nelt_mode; j++)
29713 rval[j] = val_elt;
29714
29715 reg = gen_reg_rtx (mode);
29716 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29717 /* Emit instruction loading the constant value. */
29718 emit_move_insn (reg, val_vec);
29719
29720 /* Handle nelt_mode bytes in a vector. */
29721 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29722 {
29723 emit_insn ((*gen_func) (mem, reg));
29724 if (i + 2 * nelt_mode <= length)
29725 {
29726 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29727 offset += nelt_mode;
29728 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29729 }
29730 }
29731
29732 /* If there are not less than nelt_v8 bytes leftover, we must be in
29733 V16QI mode. */
29734 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29735
29736 /* Handle (8, 16) bytes leftover. */
29737 if (i + nelt_v8 < length)
29738 {
29739 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29740 offset += length - i;
29741 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29742
29743 /* We are shifting bytes back, set the alignment accordingly. */
29744 if ((length & 1) != 0 && align >= 2)
29745 set_mem_align (mem, BITS_PER_UNIT);
29746
29747 emit_insn (gen_movmisalignv16qi (mem, reg));
29748 }
29749 /* Handle (0, 8] bytes leftover. */
29750 else if (i < length && i + nelt_v8 >= length)
29751 {
29752 if (mode == V16QImode)
29753 reg = gen_lowpart (V8QImode, reg);
29754
29755 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29756 + (nelt_mode - nelt_v8))));
29757 offset += (length - i) + (nelt_mode - nelt_v8);
29758 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29759
29760 /* We are shifting bytes back, set the alignment accordingly. */
29761 if ((length & 1) != 0 && align >= 2)
29762 set_mem_align (mem, BITS_PER_UNIT);
29763
29764 emit_insn (gen_movmisalignv8qi (mem, reg));
29765 }
29766
29767 return true;
29768 }
29769
29770 /* Set a block of memory using vectorization instructions for the
29771 aligned case. We fill the first LENGTH bytes of the memory area
29772 starting from DSTBASE with byte constant VALUE. ALIGN is the
29773 alignment requirement of memory. Return TRUE if succeeded. */
29774 static bool
29775 arm_block_set_aligned_vect (rtx dstbase,
29776 unsigned HOST_WIDE_INT length,
29777 unsigned HOST_WIDE_INT value,
29778 unsigned HOST_WIDE_INT align)
29779 {
29780 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29781 rtx dst, addr, mem;
29782 rtx val_elt, val_vec, reg;
29783 rtx rval[MAX_VECT_LEN];
29784 machine_mode mode;
29785 unsigned HOST_WIDE_INT v = value;
29786 unsigned int offset = 0;
29787
29788 gcc_assert ((align & 0x3) == 0);
29789 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29790 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29791 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29792 mode = V16QImode;
29793 else
29794 mode = V8QImode;
29795
29796 nelt_mode = GET_MODE_NUNITS (mode);
29797 gcc_assert (length >= nelt_mode);
29798 /* Skip if it isn't profitable. */
29799 if (!arm_block_set_vect_profit_p (length, align, mode))
29800 return false;
29801
29802 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29803
29804 v = sext_hwi (v, BITS_PER_WORD);
29805 val_elt = GEN_INT (v);
29806 for (j = 0; j < nelt_mode; j++)
29807 rval[j] = val_elt;
29808
29809 reg = gen_reg_rtx (mode);
29810 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29811 /* Emit instruction loading the constant value. */
29812 emit_move_insn (reg, val_vec);
29813
29814 i = 0;
29815 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29816 if (mode == V16QImode)
29817 {
29818 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29819 emit_insn (gen_movmisalignv16qi (mem, reg));
29820 i += nelt_mode;
29821 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29822 if (i + nelt_v8 < length && i + nelt_v16 > length)
29823 {
29824 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29825 offset += length - nelt_mode;
29826 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29827 /* We are shifting bytes back, set the alignment accordingly. */
29828 if ((length & 0x3) == 0)
29829 set_mem_align (mem, BITS_PER_UNIT * 4);
29830 else if ((length & 0x1) == 0)
29831 set_mem_align (mem, BITS_PER_UNIT * 2);
29832 else
29833 set_mem_align (mem, BITS_PER_UNIT);
29834
29835 emit_insn (gen_movmisalignv16qi (mem, reg));
29836 return true;
29837 }
29838 /* Fall through for bytes leftover. */
29839 mode = V8QImode;
29840 nelt_mode = GET_MODE_NUNITS (mode);
29841 reg = gen_lowpart (V8QImode, reg);
29842 }
29843
29844 /* Handle 8 bytes in a vector. */
29845 for (; (i + nelt_mode <= length); i += nelt_mode)
29846 {
29847 addr = plus_constant (Pmode, dst, i);
29848 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29849 emit_move_insn (mem, reg);
29850 }
29851
29852 /* Handle single word leftover by shifting 4 bytes back. We can
29853 use aligned access for this case. */
29854 if (i + UNITS_PER_WORD == length)
29855 {
29856 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29857 offset += i - UNITS_PER_WORD;
29858 mem = adjust_automodify_address (dstbase, mode, addr, offset);
29859 /* We are shifting 4 bytes back, set the alignment accordingly. */
29860 if (align > UNITS_PER_WORD)
29861 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29862
29863 emit_move_insn (mem, reg);
29864 }
29865 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29866 We have to use unaligned access for this case. */
29867 else if (i < length)
29868 {
29869 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29870 offset += length - nelt_mode;
29871 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29872 /* We are shifting bytes back, set the alignment accordingly. */
29873 if ((length & 1) == 0)
29874 set_mem_align (mem, BITS_PER_UNIT * 2);
29875 else
29876 set_mem_align (mem, BITS_PER_UNIT);
29877
29878 emit_insn (gen_movmisalignv8qi (mem, reg));
29879 }
29880
29881 return true;
29882 }
29883
29884 /* Set a block of memory using plain strh/strb instructions, only
29885 using instructions allowed by ALIGN on processor. We fill the
29886 first LENGTH bytes of the memory area starting from DSTBASE
29887 with byte constant VALUE. ALIGN is the alignment requirement
29888 of memory. */
29889 static bool
29890 arm_block_set_unaligned_non_vect (rtx dstbase,
29891 unsigned HOST_WIDE_INT length,
29892 unsigned HOST_WIDE_INT value,
29893 unsigned HOST_WIDE_INT align)
29894 {
29895 unsigned int i;
29896 rtx dst, addr, mem;
29897 rtx val_exp, val_reg, reg;
29898 machine_mode mode;
29899 HOST_WIDE_INT v = value;
29900
29901 gcc_assert (align == 1 || align == 2);
29902
29903 if (align == 2)
29904 v |= (value << BITS_PER_UNIT);
29905
29906 v = sext_hwi (v, BITS_PER_WORD);
29907 val_exp = GEN_INT (v);
29908 /* Skip if it isn't profitable. */
29909 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29910 align, true, false))
29911 return false;
29912
29913 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29914 mode = (align == 2 ? HImode : QImode);
29915 val_reg = force_reg (SImode, val_exp);
29916 reg = gen_lowpart (mode, val_reg);
29917
29918 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29919 {
29920 addr = plus_constant (Pmode, dst, i);
29921 mem = adjust_automodify_address (dstbase, mode, addr, i);
29922 emit_move_insn (mem, reg);
29923 }
29924
29925 /* Handle single byte leftover. */
29926 if (i + 1 == length)
29927 {
29928 reg = gen_lowpart (QImode, val_reg);
29929 addr = plus_constant (Pmode, dst, i);
29930 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29931 emit_move_insn (mem, reg);
29932 i++;
29933 }
29934
29935 gcc_assert (i == length);
29936 return true;
29937 }
29938
29939 /* Set a block of memory using plain strd/str/strh/strb instructions,
29940 to permit unaligned copies on processors which support unaligned
29941 semantics for those instructions. We fill the first LENGTH bytes
29942 of the memory area starting from DSTBASE with byte constant VALUE.
29943 ALIGN is the alignment requirement of memory. */
29944 static bool
29945 arm_block_set_aligned_non_vect (rtx dstbase,
29946 unsigned HOST_WIDE_INT length,
29947 unsigned HOST_WIDE_INT value,
29948 unsigned HOST_WIDE_INT align)
29949 {
29950 unsigned int i;
29951 rtx dst, addr, mem;
29952 rtx val_exp, val_reg, reg;
29953 unsigned HOST_WIDE_INT v;
29954 bool use_strd_p;
29955
29956 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29957 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29958
29959 v = (value | (value << 8) | (value << 16) | (value << 24));
29960 if (length < UNITS_PER_WORD)
29961 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29962
29963 if (use_strd_p)
29964 v |= (v << BITS_PER_WORD);
29965 else
29966 v = sext_hwi (v, BITS_PER_WORD);
29967
29968 val_exp = GEN_INT (v);
29969 /* Skip if it isn't profitable. */
29970 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29971 align, false, use_strd_p))
29972 {
29973 if (!use_strd_p)
29974 return false;
29975
29976 /* Try without strd. */
29977 v = (v >> BITS_PER_WORD);
29978 v = sext_hwi (v, BITS_PER_WORD);
29979 val_exp = GEN_INT (v);
29980 use_strd_p = false;
29981 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29982 align, false, use_strd_p))
29983 return false;
29984 }
29985
29986 i = 0;
29987 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29988 /* Handle double words using strd if possible. */
29989 if (use_strd_p)
29990 {
29991 val_reg = force_reg (DImode, val_exp);
29992 reg = val_reg;
29993 for (; (i + 8 <= length); i += 8)
29994 {
29995 addr = plus_constant (Pmode, dst, i);
29996 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29997 emit_move_insn (mem, reg);
29998 }
29999 }
30000 else
30001 val_reg = force_reg (SImode, val_exp);
30002
30003 /* Handle words. */
30004 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30005 for (; (i + 4 <= length); i += 4)
30006 {
30007 addr = plus_constant (Pmode, dst, i);
30008 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30009 if ((align & 3) == 0)
30010 emit_move_insn (mem, reg);
30011 else
30012 emit_insn (gen_unaligned_storesi (mem, reg));
30013 }
30014
30015 /* Merge last pair of STRH and STRB into a STR if possible. */
30016 if (unaligned_access && i > 0 && (i + 3) == length)
30017 {
30018 addr = plus_constant (Pmode, dst, i - 1);
30019 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30020 /* We are shifting one byte back, set the alignment accordingly. */
30021 if ((align & 1) == 0)
30022 set_mem_align (mem, BITS_PER_UNIT);
30023
30024 /* Most likely this is an unaligned access, and we can't tell at
30025 compilation time. */
30026 emit_insn (gen_unaligned_storesi (mem, reg));
30027 return true;
30028 }
30029
30030 /* Handle half word leftover. */
30031 if (i + 2 <= length)
30032 {
30033 reg = gen_lowpart (HImode, val_reg);
30034 addr = plus_constant (Pmode, dst, i);
30035 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30036 if ((align & 1) == 0)
30037 emit_move_insn (mem, reg);
30038 else
30039 emit_insn (gen_unaligned_storehi (mem, reg));
30040
30041 i += 2;
30042 }
30043
30044 /* Handle single byte leftover. */
30045 if (i + 1 == length)
30046 {
30047 reg = gen_lowpart (QImode, val_reg);
30048 addr = plus_constant (Pmode, dst, i);
30049 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30050 emit_move_insn (mem, reg);
30051 }
30052
30053 return true;
30054 }
30055
30056 /* Set a block of memory using vectorization instructions for both
30057 aligned and unaligned cases. We fill the first LENGTH bytes of
30058 the memory area starting from DSTBASE with byte constant VALUE.
30059 ALIGN is the alignment requirement of memory. */
30060 static bool
30061 arm_block_set_vect (rtx dstbase,
30062 unsigned HOST_WIDE_INT length,
30063 unsigned HOST_WIDE_INT value,
30064 unsigned HOST_WIDE_INT align)
30065 {
30066 /* Check whether we need to use unaligned store instruction. */
30067 if (((align & 3) != 0 || (length & 3) != 0)
30068 /* Check whether unaligned store instruction is available. */
30069 && (!unaligned_access || BYTES_BIG_ENDIAN))
30070 return false;
30071
30072 if ((align & 3) == 0)
30073 return arm_block_set_aligned_vect (dstbase, length, value, align);
30074 else
30075 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30076 }
30077
30078 /* Expand string store operation. Firstly we try to do that by using
30079 vectorization instructions, then try with ARM unaligned access and
30080 double-word store if profitable. OPERANDS[0] is the destination,
30081 OPERANDS[1] is the number of bytes, operands[2] is the value to
30082 initialize the memory, OPERANDS[3] is the known alignment of the
30083 destination. */
30084 bool
30085 arm_gen_setmem (rtx *operands)
30086 {
30087 rtx dstbase = operands[0];
30088 unsigned HOST_WIDE_INT length;
30089 unsigned HOST_WIDE_INT value;
30090 unsigned HOST_WIDE_INT align;
30091
30092 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30093 return false;
30094
30095 length = UINTVAL (operands[1]);
30096 if (length > 64)
30097 return false;
30098
30099 value = (UINTVAL (operands[2]) & 0xFF);
30100 align = UINTVAL (operands[3]);
30101 if (TARGET_NEON && length >= 8
30102 && current_tune->string_ops_prefer_neon
30103 && arm_block_set_vect (dstbase, length, value, align))
30104 return true;
30105
30106 if (!unaligned_access && (align & 3) != 0)
30107 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30108
30109 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30110 }
30111
30112
30113 static bool
30114 arm_macro_fusion_p (void)
30115 {
30116 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30117 }
30118
30119 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30120 for MOVW / MOVT macro fusion. */
30121
30122 static bool
30123 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30124 {
30125 /* We are trying to fuse
30126 movw imm / movt imm
30127 instructions as a group that gets scheduled together. */
30128
30129 rtx set_dest = SET_DEST (curr_set);
30130
30131 if (GET_MODE (set_dest) != SImode)
30132 return false;
30133
30134 /* We are trying to match:
30135 prev (movw) == (set (reg r0) (const_int imm16))
30136 curr (movt) == (set (zero_extract (reg r0)
30137 (const_int 16)
30138 (const_int 16))
30139 (const_int imm16_1))
30140 or
30141 prev (movw) == (set (reg r1)
30142 (high (symbol_ref ("SYM"))))
30143 curr (movt) == (set (reg r0)
30144 (lo_sum (reg r1)
30145 (symbol_ref ("SYM")))) */
30146
30147 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30148 {
30149 if (CONST_INT_P (SET_SRC (curr_set))
30150 && CONST_INT_P (SET_SRC (prev_set))
30151 && REG_P (XEXP (set_dest, 0))
30152 && REG_P (SET_DEST (prev_set))
30153 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30154 return true;
30155
30156 }
30157 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30158 && REG_P (SET_DEST (curr_set))
30159 && REG_P (SET_DEST (prev_set))
30160 && GET_CODE (SET_SRC (prev_set)) == HIGH
30161 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30162 return true;
30163
30164 return false;
30165 }
30166
30167 static bool
30168 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30169 {
30170 rtx prev_set = single_set (prev);
30171 rtx curr_set = single_set (curr);
30172
30173 if (!prev_set
30174 || !curr_set)
30175 return false;
30176
30177 if (any_condjump_p (curr))
30178 return false;
30179
30180 if (!arm_macro_fusion_p ())
30181 return false;
30182
30183 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30184 && aarch_crypto_can_dual_issue (prev, curr))
30185 return true;
30186
30187 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30188 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30189 return true;
30190
30191 return false;
30192 }
30193
30194 /* Return true iff the instruction fusion described by OP is enabled. */
30195 bool
30196 arm_fusion_enabled_p (tune_params::fuse_ops op)
30197 {
30198 return current_tune->fusible_ops & op;
30199 }
30200
30201 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30202 scheduled for speculative execution. Reject the long-running division
30203 and square-root instructions. */
30204
30205 static bool
30206 arm_sched_can_speculate_insn (rtx_insn *insn)
30207 {
30208 switch (get_attr_type (insn))
30209 {
30210 case TYPE_SDIV:
30211 case TYPE_UDIV:
30212 case TYPE_FDIVS:
30213 case TYPE_FDIVD:
30214 case TYPE_FSQRTS:
30215 case TYPE_FSQRTD:
30216 case TYPE_NEON_FP_SQRT_S:
30217 case TYPE_NEON_FP_SQRT_D:
30218 case TYPE_NEON_FP_SQRT_S_Q:
30219 case TYPE_NEON_FP_SQRT_D_Q:
30220 case TYPE_NEON_FP_DIV_S:
30221 case TYPE_NEON_FP_DIV_D:
30222 case TYPE_NEON_FP_DIV_S_Q:
30223 case TYPE_NEON_FP_DIV_D_Q:
30224 return false;
30225 default:
30226 return true;
30227 }
30228 }
30229
30230 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30231
30232 static unsigned HOST_WIDE_INT
30233 arm_asan_shadow_offset (void)
30234 {
30235 return HOST_WIDE_INT_1U << 29;
30236 }
30237
30238
30239 /* This is a temporary fix for PR60655. Ideally we need
30240 to handle most of these cases in the generic part but
30241 currently we reject minus (..) (sym_ref). We try to
30242 ameliorate the case with minus (sym_ref1) (sym_ref2)
30243 where they are in the same section. */
30244
30245 static bool
30246 arm_const_not_ok_for_debug_p (rtx p)
30247 {
30248 tree decl_op0 = NULL;
30249 tree decl_op1 = NULL;
30250
30251 if (GET_CODE (p) == MINUS)
30252 {
30253 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30254 {
30255 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30256 if (decl_op1
30257 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30258 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30259 {
30260 if ((VAR_P (decl_op1)
30261 || TREE_CODE (decl_op1) == CONST_DECL)
30262 && (VAR_P (decl_op0)
30263 || TREE_CODE (decl_op0) == CONST_DECL))
30264 return (get_variable_section (decl_op1, false)
30265 != get_variable_section (decl_op0, false));
30266
30267 if (TREE_CODE (decl_op1) == LABEL_DECL
30268 && TREE_CODE (decl_op0) == LABEL_DECL)
30269 return (DECL_CONTEXT (decl_op1)
30270 != DECL_CONTEXT (decl_op0));
30271 }
30272
30273 return true;
30274 }
30275 }
30276
30277 return false;
30278 }
30279
30280 /* return TRUE if x is a reference to a value in a constant pool */
30281 extern bool
30282 arm_is_constant_pool_ref (rtx x)
30283 {
30284 return (MEM_P (x)
30285 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30286 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30287 }
30288
30289 /* Remember the last target of arm_set_current_function. */
30290 static GTY(()) tree arm_previous_fndecl;
30291
30292 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30293
30294 void
30295 save_restore_target_globals (tree new_tree)
30296 {
30297 /* If we have a previous state, use it. */
30298 if (TREE_TARGET_GLOBALS (new_tree))
30299 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30300 else if (new_tree == target_option_default_node)
30301 restore_target_globals (&default_target_globals);
30302 else
30303 {
30304 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30305 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30306 }
30307
30308 arm_option_params_internal ();
30309 }
30310
30311 /* Invalidate arm_previous_fndecl. */
30312
30313 void
30314 arm_reset_previous_fndecl (void)
30315 {
30316 arm_previous_fndecl = NULL_TREE;
30317 }
30318
30319 /* Establish appropriate back-end context for processing the function
30320 FNDECL. The argument might be NULL to indicate processing at top
30321 level, outside of any function scope. */
30322
30323 static void
30324 arm_set_current_function (tree fndecl)
30325 {
30326 if (!fndecl || fndecl == arm_previous_fndecl)
30327 return;
30328
30329 tree old_tree = (arm_previous_fndecl
30330 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30331 : NULL_TREE);
30332
30333 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30334
30335 /* If current function has no attributes but previous one did,
30336 use the default node. */
30337 if (! new_tree && old_tree)
30338 new_tree = target_option_default_node;
30339
30340 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30341 the default have been handled by save_restore_target_globals from
30342 arm_pragma_target_parse. */
30343 if (old_tree == new_tree)
30344 return;
30345
30346 arm_previous_fndecl = fndecl;
30347
30348 /* First set the target options. */
30349 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30350
30351 save_restore_target_globals (new_tree);
30352 }
30353
30354 /* Implement TARGET_OPTION_PRINT. */
30355
30356 static void
30357 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30358 {
30359 int flags = ptr->x_target_flags;
30360 const char *fpu_name;
30361
30362 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30363 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30364
30365 fprintf (file, "%*sselected arch %s\n", indent, "",
30366 TARGET_THUMB2_P (flags) ? "thumb2" :
30367 TARGET_THUMB_P (flags) ? "thumb1" :
30368 "arm");
30369
30370 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30371 }
30372
30373 /* Hook to determine if one function can safely inline another. */
30374
30375 static bool
30376 arm_can_inline_p (tree caller, tree callee)
30377 {
30378 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30379 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30380 bool can_inline = true;
30381
30382 struct cl_target_option *caller_opts
30383 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30384 : target_option_default_node);
30385
30386 struct cl_target_option *callee_opts
30387 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30388 : target_option_default_node);
30389
30390 if (callee_opts == caller_opts)
30391 return true;
30392
30393 /* Callee's ISA features should be a subset of the caller's. */
30394 struct arm_build_target caller_target;
30395 struct arm_build_target callee_target;
30396 caller_target.isa = sbitmap_alloc (isa_num_bits);
30397 callee_target.isa = sbitmap_alloc (isa_num_bits);
30398
30399 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30400 false);
30401 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30402 false);
30403 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30404 can_inline = false;
30405
30406 sbitmap_free (caller_target.isa);
30407 sbitmap_free (callee_target.isa);
30408
30409 /* OK to inline between different modes.
30410 Function with mode specific instructions, e.g using asm,
30411 must be explicitly protected with noinline. */
30412 return can_inline;
30413 }
30414
30415 /* Hook to fix function's alignment affected by target attribute. */
30416
30417 static void
30418 arm_relayout_function (tree fndecl)
30419 {
30420 if (DECL_USER_ALIGN (fndecl))
30421 return;
30422
30423 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30424
30425 if (!callee_tree)
30426 callee_tree = target_option_default_node;
30427
30428 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30429 SET_DECL_ALIGN
30430 (fndecl,
30431 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30432 }
30433
30434 /* Inner function to process the attribute((target(...))), take an argument and
30435 set the current options from the argument. If we have a list, recursively
30436 go over the list. */
30437
30438 static bool
30439 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30440 {
30441 if (TREE_CODE (args) == TREE_LIST)
30442 {
30443 bool ret = true;
30444
30445 for (; args; args = TREE_CHAIN (args))
30446 if (TREE_VALUE (args)
30447 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30448 ret = false;
30449 return ret;
30450 }
30451
30452 else if (TREE_CODE (args) != STRING_CST)
30453 {
30454 error ("attribute %<target%> argument not a string");
30455 return false;
30456 }
30457
30458 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30459 char *q;
30460
30461 while ((q = strtok (argstr, ",")) != NULL)
30462 {
30463 while (ISSPACE (*q)) ++q;
30464
30465 argstr = NULL;
30466 if (!strncmp (q, "thumb", 5))
30467 opts->x_target_flags |= MASK_THUMB;
30468
30469 else if (!strncmp (q, "arm", 3))
30470 opts->x_target_flags &= ~MASK_THUMB;
30471
30472 else if (!strncmp (q, "fpu=", 4))
30473 {
30474 int fpu_index;
30475 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30476 &fpu_index, CL_TARGET))
30477 {
30478 error ("invalid fpu for attribute(target(\"%s\"))", q);
30479 return false;
30480 }
30481 if (fpu_index == TARGET_FPU_auto)
30482 {
30483 /* This doesn't really make sense until we support
30484 general dynamic selection of the architecture and all
30485 sub-features. */
30486 sorry ("auto fpu selection not currently permitted here");
30487 return false;
30488 }
30489 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30490 }
30491 else
30492 {
30493 error ("attribute(target(\"%s\")) is unknown", q);
30494 return false;
30495 }
30496 }
30497
30498 return true;
30499 }
30500
30501 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30502
30503 tree
30504 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30505 struct gcc_options *opts_set)
30506 {
30507 struct cl_target_option cl_opts;
30508
30509 if (!arm_valid_target_attribute_rec (args, opts))
30510 return NULL_TREE;
30511
30512 cl_target_option_save (&cl_opts, opts);
30513 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30514 arm_option_check_internal (opts);
30515 /* Do any overrides, such as global options arch=xxx. */
30516 arm_option_override_internal (opts, opts_set);
30517
30518 return build_target_option_node (opts);
30519 }
30520
30521 static void
30522 add_attribute (const char * mode, tree *attributes)
30523 {
30524 size_t len = strlen (mode);
30525 tree value = build_string (len, mode);
30526
30527 TREE_TYPE (value) = build_array_type (char_type_node,
30528 build_index_type (size_int (len)));
30529
30530 *attributes = tree_cons (get_identifier ("target"),
30531 build_tree_list (NULL_TREE, value),
30532 *attributes);
30533 }
30534
30535 /* For testing. Insert thumb or arm modes alternatively on functions. */
30536
30537 static void
30538 arm_insert_attributes (tree fndecl, tree * attributes)
30539 {
30540 const char *mode;
30541
30542 if (! TARGET_FLIP_THUMB)
30543 return;
30544
30545 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30546 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30547 return;
30548
30549 /* Nested definitions must inherit mode. */
30550 if (current_function_decl)
30551 {
30552 mode = TARGET_THUMB ? "thumb" : "arm";
30553 add_attribute (mode, attributes);
30554 return;
30555 }
30556
30557 /* If there is already a setting don't change it. */
30558 if (lookup_attribute ("target", *attributes) != NULL)
30559 return;
30560
30561 mode = thumb_flipper ? "thumb" : "arm";
30562 add_attribute (mode, attributes);
30563
30564 thumb_flipper = !thumb_flipper;
30565 }
30566
30567 /* Hook to validate attribute((target("string"))). */
30568
30569 static bool
30570 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30571 tree args, int ARG_UNUSED (flags))
30572 {
30573 bool ret = true;
30574 struct gcc_options func_options;
30575 tree cur_tree, new_optimize;
30576 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30577
30578 /* Get the optimization options of the current function. */
30579 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30580
30581 /* If the function changed the optimization levels as well as setting target
30582 options, start with the optimizations specified. */
30583 if (!func_optimize)
30584 func_optimize = optimization_default_node;
30585
30586 /* Init func_options. */
30587 memset (&func_options, 0, sizeof (func_options));
30588 init_options_struct (&func_options, NULL);
30589 lang_hooks.init_options_struct (&func_options);
30590
30591 /* Initialize func_options to the defaults. */
30592 cl_optimization_restore (&func_options,
30593 TREE_OPTIMIZATION (func_optimize));
30594
30595 cl_target_option_restore (&func_options,
30596 TREE_TARGET_OPTION (target_option_default_node));
30597
30598 /* Set func_options flags with new target mode. */
30599 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30600 &global_options_set);
30601
30602 if (cur_tree == NULL_TREE)
30603 ret = false;
30604
30605 new_optimize = build_optimization_node (&func_options);
30606
30607 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30608
30609 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30610
30611 finalize_options_struct (&func_options);
30612
30613 return ret;
30614 }
30615
30616 /* Match an ISA feature bitmap to a named FPU. We always use the
30617 first entry that exactly matches the feature set, so that we
30618 effectively canonicalize the FPU name for the assembler. */
30619 static const char*
30620 arm_identify_fpu_from_isa (sbitmap isa)
30621 {
30622 auto_sbitmap fpubits (isa_num_bits);
30623 auto_sbitmap cand_fpubits (isa_num_bits);
30624
30625 bitmap_and (fpubits, isa, isa_all_fpubits);
30626
30627 /* If there are no ISA feature bits relating to the FPU, we must be
30628 doing soft-float. */
30629 if (bitmap_empty_p (fpubits))
30630 return "softvfp";
30631
30632 for (unsigned int i = 0; i < ARRAY_SIZE (all_fpus); i++)
30633 {
30634 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30635 if (bitmap_equal_p (fpubits, cand_fpubits))
30636 return all_fpus[i].name;
30637 }
30638 /* We must find an entry, or things have gone wrong. */
30639 gcc_unreachable ();
30640 }
30641
30642 void
30643 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30644 {
30645
30646 fprintf (stream, "\t.syntax unified\n");
30647
30648 if (TARGET_THUMB)
30649 {
30650 if (is_called_in_ARM_mode (decl)
30651 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30652 && cfun->is_thunk))
30653 fprintf (stream, "\t.code 32\n");
30654 else if (TARGET_THUMB1)
30655 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30656 else
30657 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30658 }
30659 else
30660 fprintf (stream, "\t.arm\n");
30661
30662 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30663 (TARGET_SOFT_FLOAT
30664 ? "softvfp"
30665 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30666
30667 if (TARGET_POKE_FUNCTION_NAME)
30668 arm_poke_function_name (stream, (const char *) name);
30669 }
30670
30671 /* If MEM is in the form of [base+offset], extract the two parts
30672 of address and set to BASE and OFFSET, otherwise return false
30673 after clearing BASE and OFFSET. */
30674
30675 static bool
30676 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30677 {
30678 rtx addr;
30679
30680 gcc_assert (MEM_P (mem));
30681
30682 addr = XEXP (mem, 0);
30683
30684 /* Strip off const from addresses like (const (addr)). */
30685 if (GET_CODE (addr) == CONST)
30686 addr = XEXP (addr, 0);
30687
30688 if (GET_CODE (addr) == REG)
30689 {
30690 *base = addr;
30691 *offset = const0_rtx;
30692 return true;
30693 }
30694
30695 if (GET_CODE (addr) == PLUS
30696 && GET_CODE (XEXP (addr, 0)) == REG
30697 && CONST_INT_P (XEXP (addr, 1)))
30698 {
30699 *base = XEXP (addr, 0);
30700 *offset = XEXP (addr, 1);
30701 return true;
30702 }
30703
30704 *base = NULL_RTX;
30705 *offset = NULL_RTX;
30706
30707 return false;
30708 }
30709
30710 /* If INSN is a load or store of address in the form of [base+offset],
30711 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30712 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30713 otherwise return FALSE. */
30714
30715 static bool
30716 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30717 {
30718 rtx x, dest, src;
30719
30720 gcc_assert (INSN_P (insn));
30721 x = PATTERN (insn);
30722 if (GET_CODE (x) != SET)
30723 return false;
30724
30725 src = SET_SRC (x);
30726 dest = SET_DEST (x);
30727 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30728 {
30729 *is_load = false;
30730 extract_base_offset_in_addr (dest, base, offset);
30731 }
30732 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30733 {
30734 *is_load = true;
30735 extract_base_offset_in_addr (src, base, offset);
30736 }
30737 else
30738 return false;
30739
30740 return (*base != NULL_RTX && *offset != NULL_RTX);
30741 }
30742
30743 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30744
30745 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30746 and PRI are only calculated for these instructions. For other instruction,
30747 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30748 instruction fusion can be supported by returning different priorities.
30749
30750 It's important that irrelevant instructions get the largest FUSION_PRI. */
30751
30752 static void
30753 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30754 int *fusion_pri, int *pri)
30755 {
30756 int tmp, off_val;
30757 bool is_load;
30758 rtx base, offset;
30759
30760 gcc_assert (INSN_P (insn));
30761
30762 tmp = max_pri - 1;
30763 if (!fusion_load_store (insn, &base, &offset, &is_load))
30764 {
30765 *pri = tmp;
30766 *fusion_pri = tmp;
30767 return;
30768 }
30769
30770 /* Load goes first. */
30771 if (is_load)
30772 *fusion_pri = tmp - 1;
30773 else
30774 *fusion_pri = tmp - 2;
30775
30776 tmp /= 2;
30777
30778 /* INSN with smaller base register goes first. */
30779 tmp -= ((REGNO (base) & 0xff) << 20);
30780
30781 /* INSN with smaller offset goes first. */
30782 off_val = (int)(INTVAL (offset));
30783 if (off_val >= 0)
30784 tmp -= (off_val & 0xfffff);
30785 else
30786 tmp += ((- off_val) & 0xfffff);
30787
30788 *pri = tmp;
30789 return;
30790 }
30791
30792
30793 /* Construct and return a PARALLEL RTX vector with elements numbering the
30794 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30795 the vector - from the perspective of the architecture. This does not
30796 line up with GCC's perspective on lane numbers, so we end up with
30797 different masks depending on our target endian-ness. The diagram
30798 below may help. We must draw the distinction when building masks
30799 which select one half of the vector. An instruction selecting
30800 architectural low-lanes for a big-endian target, must be described using
30801 a mask selecting GCC high-lanes.
30802
30803 Big-Endian Little-Endian
30804
30805 GCC 0 1 2 3 3 2 1 0
30806 | x | x | x | x | | x | x | x | x |
30807 Architecture 3 2 1 0 3 2 1 0
30808
30809 Low Mask: { 2, 3 } { 0, 1 }
30810 High Mask: { 0, 1 } { 2, 3 }
30811 */
30812
30813 rtx
30814 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30815 {
30816 int nunits = GET_MODE_NUNITS (mode);
30817 rtvec v = rtvec_alloc (nunits / 2);
30818 int high_base = nunits / 2;
30819 int low_base = 0;
30820 int base;
30821 rtx t1;
30822 int i;
30823
30824 if (BYTES_BIG_ENDIAN)
30825 base = high ? low_base : high_base;
30826 else
30827 base = high ? high_base : low_base;
30828
30829 for (i = 0; i < nunits / 2; i++)
30830 RTVEC_ELT (v, i) = GEN_INT (base + i);
30831
30832 t1 = gen_rtx_PARALLEL (mode, v);
30833 return t1;
30834 }
30835
30836 /* Check OP for validity as a PARALLEL RTX vector with elements
30837 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30838 from the perspective of the architecture. See the diagram above
30839 arm_simd_vect_par_cnst_half_p for more details. */
30840
30841 bool
30842 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30843 bool high)
30844 {
30845 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30846 HOST_WIDE_INT count_op = XVECLEN (op, 0);
30847 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30848 int i = 0;
30849
30850 if (!VECTOR_MODE_P (mode))
30851 return false;
30852
30853 if (count_op != count_ideal)
30854 return false;
30855
30856 for (i = 0; i < count_ideal; i++)
30857 {
30858 rtx elt_op = XVECEXP (op, 0, i);
30859 rtx elt_ideal = XVECEXP (ideal, 0, i);
30860
30861 if (!CONST_INT_P (elt_op)
30862 || INTVAL (elt_ideal) != INTVAL (elt_op))
30863 return false;
30864 }
30865 return true;
30866 }
30867
30868 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30869 in Thumb1. */
30870 static bool
30871 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30872 const_tree)
30873 {
30874 /* For now, we punt and not handle this for TARGET_THUMB1. */
30875 if (vcall_offset && TARGET_THUMB1)
30876 return false;
30877
30878 /* Otherwise ok. */
30879 return true;
30880 }
30881
30882 /* Generate RTL for a conditional branch with rtx comparison CODE in
30883 mode CC_MODE. The destination of the unlikely conditional branch
30884 is LABEL_REF. */
30885
30886 void
30887 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
30888 rtx label_ref)
30889 {
30890 rtx x;
30891 x = gen_rtx_fmt_ee (code, VOIDmode,
30892 gen_rtx_REG (cc_mode, CC_REGNUM),
30893 const0_rtx);
30894
30895 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30896 gen_rtx_LABEL_REF (VOIDmode, label_ref),
30897 pc_rtx);
30898 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30899 }
30900
30901 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30902
30903 For pure-code sections there is no letter code for this attribute, so
30904 output all the section flags numerically when this is needed. */
30905
30906 static bool
30907 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
30908 {
30909
30910 if (flags & SECTION_ARM_PURECODE)
30911 {
30912 *num = 0x20000000;
30913
30914 if (!(flags & SECTION_DEBUG))
30915 *num |= 0x2;
30916 if (flags & SECTION_EXCLUDE)
30917 *num |= 0x80000000;
30918 if (flags & SECTION_WRITE)
30919 *num |= 0x1;
30920 if (flags & SECTION_CODE)
30921 *num |= 0x4;
30922 if (flags & SECTION_MERGE)
30923 *num |= 0x10;
30924 if (flags & SECTION_STRINGS)
30925 *num |= 0x20;
30926 if (flags & SECTION_TLS)
30927 *num |= 0x400;
30928 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
30929 *num |= 0x200;
30930
30931 return true;
30932 }
30933
30934 return false;
30935 }
30936
30937 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
30938
30939 If pure-code is passed as an option, make sure all functions are in
30940 sections that have the SHF_ARM_PURECODE attribute. */
30941
30942 static section *
30943 arm_function_section (tree decl, enum node_frequency freq,
30944 bool startup, bool exit)
30945 {
30946 const char * section_name;
30947 section * sec;
30948
30949 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
30950 return default_function_section (decl, freq, startup, exit);
30951
30952 if (!target_pure_code)
30953 return default_function_section (decl, freq, startup, exit);
30954
30955
30956 section_name = DECL_SECTION_NAME (decl);
30957
30958 /* If a function is not in a named section then it falls under the 'default'
30959 text section, also known as '.text'. We can preserve previous behavior as
30960 the default text section already has the SHF_ARM_PURECODE section
30961 attribute. */
30962 if (!section_name)
30963 {
30964 section *default_sec = default_function_section (decl, freq, startup,
30965 exit);
30966
30967 /* If default_sec is not null, then it must be a special section like for
30968 example .text.startup. We set the pure-code attribute and return the
30969 same section to preserve existing behavior. */
30970 if (default_sec)
30971 default_sec->common.flags |= SECTION_ARM_PURECODE;
30972 return default_sec;
30973 }
30974
30975 /* Otherwise look whether a section has already been created with
30976 'section_name'. */
30977 sec = get_named_section (decl, section_name, 0);
30978 if (!sec)
30979 /* If that is not the case passing NULL as the section's name to
30980 'get_named_section' will create a section with the declaration's
30981 section name. */
30982 sec = get_named_section (decl, NULL, 0);
30983
30984 /* Set the SHF_ARM_PURECODE attribute. */
30985 sec->common.flags |= SECTION_ARM_PURECODE;
30986
30987 return sec;
30988 }
30989
30990 /* Implements the TARGET_SECTION_FLAGS hook.
30991
30992 If DECL is a function declaration and pure-code is passed as an option
30993 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
30994 section's name and RELOC indicates whether the declarations initializer may
30995 contain runtime relocations. */
30996
30997 static unsigned int
30998 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
30999 {
31000 unsigned int flags = default_section_type_flags (decl, name, reloc);
31001
31002 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31003 flags |= SECTION_ARM_PURECODE;
31004
31005 return flags;
31006 }
31007
31008 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31009
31010 static void
31011 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31012 rtx op0, rtx op1,
31013 rtx *quot_p, rtx *rem_p)
31014 {
31015 if (mode == SImode)
31016 gcc_assert (!TARGET_IDIV);
31017
31018 machine_mode libval_mode = smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode),
31019 MODE_INT);
31020
31021 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31022 libval_mode, 2,
31023 op0, GET_MODE (op0),
31024 op1, GET_MODE (op1));
31025
31026 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31027 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31028 GET_MODE_SIZE (mode));
31029
31030 gcc_assert (quotient);
31031 gcc_assert (remainder);
31032
31033 *quot_p = quotient;
31034 *rem_p = remainder;
31035 }
31036
31037 /* This function checks for the availability of the coprocessor builtin passed
31038 in BUILTIN for the current target. Returns true if it is available and
31039 false otherwise. If a BUILTIN is passed for which this function has not
31040 been implemented it will cause an exception. */
31041
31042 bool
31043 arm_coproc_builtin_available (enum unspecv builtin)
31044 {
31045 /* None of these builtins are available in Thumb mode if the target only
31046 supports Thumb-1. */
31047 if (TARGET_THUMB1)
31048 return false;
31049
31050 switch (builtin)
31051 {
31052 case VUNSPEC_CDP:
31053 case VUNSPEC_LDC:
31054 case VUNSPEC_LDCL:
31055 case VUNSPEC_STC:
31056 case VUNSPEC_STCL:
31057 case VUNSPEC_MCR:
31058 case VUNSPEC_MRC:
31059 if (arm_arch4)
31060 return true;
31061 break;
31062 case VUNSPEC_CDP2:
31063 case VUNSPEC_LDC2:
31064 case VUNSPEC_LDC2L:
31065 case VUNSPEC_STC2:
31066 case VUNSPEC_STC2L:
31067 case VUNSPEC_MCR2:
31068 case VUNSPEC_MRC2:
31069 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31070 ARMv8-{A,M}. */
31071 if (arm_arch5)
31072 return true;
31073 break;
31074 case VUNSPEC_MCRR:
31075 case VUNSPEC_MRRC:
31076 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31077 ARMv8-{A,M}. */
31078 if (arm_arch6 || arm_arch5te)
31079 return true;
31080 break;
31081 case VUNSPEC_MCRR2:
31082 case VUNSPEC_MRRC2:
31083 if (arm_arch6)
31084 return true;
31085 break;
31086 default:
31087 gcc_unreachable ();
31088 }
31089 return false;
31090 }
31091
31092 /* This function returns true if OP is a valid memory operand for the ldc and
31093 stc coprocessor instructions and false otherwise. */
31094
31095 bool
31096 arm_coproc_ldc_stc_legitimate_address (rtx op)
31097 {
31098 HOST_WIDE_INT range;
31099 /* Has to be a memory operand. */
31100 if (!MEM_P (op))
31101 return false;
31102
31103 op = XEXP (op, 0);
31104
31105 /* We accept registers. */
31106 if (REG_P (op))
31107 return true;
31108
31109 switch GET_CODE (op)
31110 {
31111 case PLUS:
31112 {
31113 /* Or registers with an offset. */
31114 if (!REG_P (XEXP (op, 0)))
31115 return false;
31116
31117 op = XEXP (op, 1);
31118
31119 /* The offset must be an immediate though. */
31120 if (!CONST_INT_P (op))
31121 return false;
31122
31123 range = INTVAL (op);
31124
31125 /* Within the range of [-1020,1020]. */
31126 if (!IN_RANGE (range, -1020, 1020))
31127 return false;
31128
31129 /* And a multiple of 4. */
31130 return (range % 4) == 0;
31131 }
31132 case PRE_INC:
31133 case POST_INC:
31134 case PRE_DEC:
31135 case POST_DEC:
31136 return REG_P (XEXP (op, 0));
31137 default:
31138 gcc_unreachable ();
31139 }
31140 return false;
31141 }
31142 #include "gt-arm.h"