]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
[arm] PR target/88469 fix incorrect argument passing with 64-bit bitfields
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2019 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "df.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "flags.h"
53 #include "reload.h"
54 #include "explow.h"
55 #include "expr.h"
56 #include "cfgrtl.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
60 #include "intl.h"
61 #include "libfuncs.h"
62 #include "params.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "selftest.h"
73
74 /* This file should be included last. */
75 #include "target-def.h"
76
77 /* Forward definitions of types. */
78 typedef struct minipool_node Mnode;
79 typedef struct minipool_fixup Mfix;
80
81 /* The last .arch and .fpu assembly strings that we printed. */
82 static std::string arm_last_printed_arch_string;
83 static std::string arm_last_printed_fpu_string;
84
85 void (*arm_lang_output_object_attributes_hook)(void);
86
87 struct four_ints
88 {
89 int i[4];
90 };
91
92 /* Forward function declarations. */
93 static bool arm_const_not_ok_for_debug_p (rtx);
94 static int arm_needs_doubleword_align (machine_mode, const_tree);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets *arm_get_frame_offsets (void);
97 static void arm_compute_frame_layout (void);
98 static void arm_add_gc_roots (void);
99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
100 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
101 static unsigned bit_count (unsigned long);
102 static unsigned bitmap_popcount (const sbitmap);
103 static int arm_address_register_rtx_p (rtx, int);
104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
105 static bool is_called_in_ARM_mode (tree);
106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
111 inline static int thumb1_index_register_rtx_p (rtx, int);
112 static int thumb_far_jump_used_p (void);
113 static bool thumb_force_lr_save (void);
114 static unsigned arm_size_return_regs (void);
115 static bool arm_assemble_integer (rtx, unsigned int, int);
116 static void arm_print_operand (FILE *, rtx, int);
117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
118 static bool arm_print_operand_punct_valid_p (unsigned char code);
119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
120 static arm_cc get_arm_condition_code (rtx);
121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
122 static const char *output_multi_immediate (rtx *, const char *, const char *,
123 int, HOST_WIDE_INT);
124 static const char *shift_op (rtx, HOST_WIDE_INT *);
125 static struct machine_function *arm_init_machine_status (void);
126 static void thumb_exit (FILE *, int);
127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
129 static Mnode *add_minipool_forward_ref (Mfix *);
130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
131 static Mnode *add_minipool_backward_ref (Mfix *);
132 static void assign_minipool_offsets (Mfix *);
133 static void arm_print_value (FILE *, rtx);
134 static void dump_minipool (rtx_insn *);
135 static int arm_barrier_cost (rtx_insn *);
136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
139 machine_mode, rtx);
140 static void arm_reorg (void);
141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
143 static unsigned long arm_compute_save_core_reg_mask (void);
144 static unsigned long arm_isr_value (tree);
145 static unsigned long arm_compute_func_type (void);
146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
151 #endif
152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
154 static void arm_output_function_epilogue (FILE *);
155 static void arm_output_function_prologue (FILE *);
156 static int arm_comp_type_attributes (const_tree, const_tree);
157 static void arm_set_default_type_attributes (tree);
158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
160 static int optimal_immediate_sequence (enum rtx_code code,
161 unsigned HOST_WIDE_INT val,
162 struct four_ints *return_sequence);
163 static int optimal_immediate_sequence_1 (enum rtx_code code,
164 unsigned HOST_WIDE_INT val,
165 struct four_ints *return_sequence,
166 int i);
167 static int arm_get_strip_length (int);
168 static bool arm_function_ok_for_sibcall (tree, tree);
169 static machine_mode arm_promote_function_mode (const_tree,
170 machine_mode, int *,
171 const_tree, int);
172 static bool arm_return_in_memory (const_tree, const_tree);
173 static rtx arm_function_value (const_tree, const_tree, bool);
174 static rtx arm_libcall_value_1 (machine_mode);
175 static rtx arm_libcall_value (machine_mode, const_rtx);
176 static bool arm_function_value_regno_p (const unsigned int);
177 static void arm_internal_label (FILE *, const char *, unsigned long);
178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
179 tree);
180 static bool arm_have_conditional_execution (void);
181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
182 static bool arm_legitimate_constant_p (machine_mode, rtx);
183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
184 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
185 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
186 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
187 static void emit_constant_insn (rtx cond, rtx pattern);
188 static rtx_insn *emit_set_insn (rtx, rtx);
189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
190 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
191 tree, bool);
192 static rtx arm_function_arg (cumulative_args_t, machine_mode,
193 const_tree, bool);
194 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
195 const_tree, bool);
196 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
197 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
198 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
199 const_tree);
200 static rtx aapcs_libcall_value (machine_mode);
201 static int aapcs_select_return_coproc (const_tree, const_tree);
202
203 #ifdef OBJECT_FORMAT_ELF
204 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
205 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
206 #endif
207 #ifndef ARM_PE
208 static void arm_encode_section_info (tree, rtx, int);
209 #endif
210
211 static void arm_file_end (void);
212 static void arm_file_start (void);
213 static void arm_insert_attributes (tree, tree *);
214
215 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
216 tree, int *, int);
217 static bool arm_pass_by_reference (cumulative_args_t,
218 machine_mode, const_tree, bool);
219 static bool arm_promote_prototypes (const_tree);
220 static bool arm_default_short_enums (void);
221 static bool arm_align_anon_bitfield (void);
222 static bool arm_return_in_msb (const_tree);
223 static bool arm_must_pass_in_stack (machine_mode, const_tree);
224 static bool arm_return_in_memory (const_tree, const_tree);
225 #if ARM_UNWIND_INFO
226 static void arm_unwind_emit (FILE *, rtx_insn *);
227 static bool arm_output_ttype (rtx);
228 static void arm_asm_emit_except_personality (rtx);
229 #endif
230 static void arm_asm_init_sections (void);
231 static rtx arm_dwarf_register_span (rtx);
232
233 static tree arm_cxx_guard_type (void);
234 static bool arm_cxx_guard_mask_bit (void);
235 static tree arm_get_cookie_size (tree);
236 static bool arm_cookie_has_size (void);
237 static bool arm_cxx_cdtor_returns_this (void);
238 static bool arm_cxx_key_method_may_be_inline (void);
239 static void arm_cxx_determine_class_data_visibility (tree);
240 static bool arm_cxx_class_data_always_comdat (void);
241 static bool arm_cxx_use_aeabi_atexit (void);
242 static void arm_init_libfuncs (void);
243 static tree arm_build_builtin_va_list (void);
244 static void arm_expand_builtin_va_start (tree, rtx);
245 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
246 static void arm_option_override (void);
247 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
248 static void arm_option_restore (struct gcc_options *,
249 struct cl_target_option *);
250 static void arm_override_options_after_change (void);
251 static void arm_option_print (FILE *, int, struct cl_target_option *);
252 static void arm_set_current_function (tree);
253 static bool arm_can_inline_p (tree, tree);
254 static void arm_relayout_function (tree);
255 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
256 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
257 static bool arm_sched_can_speculate_insn (rtx_insn *);
258 static bool arm_macro_fusion_p (void);
259 static bool arm_cannot_copy_insn_p (rtx_insn *);
260 static int arm_issue_rate (void);
261 static int arm_first_cycle_multipass_dfa_lookahead (void);
262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
263 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
264 static bool arm_output_addr_const_extra (FILE *, rtx);
265 static bool arm_allocate_stack_slots_for_args (void);
266 static bool arm_warn_func_return (tree);
267 static tree arm_promoted_type (const_tree t);
268 static bool arm_scalar_mode_supported_p (scalar_mode);
269 static bool arm_frame_pointer_required (void);
270 static bool arm_can_eliminate (const int, const int);
271 static void arm_asm_trampoline_template (FILE *);
272 static void arm_trampoline_init (rtx, tree, rtx);
273 static rtx arm_trampoline_adjust_address (rtx);
274 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
275 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
276 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
277 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
278 static bool arm_array_mode_supported_p (machine_mode,
279 unsigned HOST_WIDE_INT);
280 static machine_mode arm_preferred_simd_mode (scalar_mode);
281 static bool arm_class_likely_spilled_p (reg_class_t);
282 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
283 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
284 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
285 const_tree type,
286 int misalignment,
287 bool is_packed);
288 static void arm_conditional_register_usage (void);
289 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
290 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
291 static void arm_autovectorize_vector_sizes (vector_sizes *);
292 static int arm_default_branch_cost (bool, bool);
293 static int arm_cortex_a5_branch_cost (bool, bool);
294 static int arm_cortex_m_branch_cost (bool, bool);
295 static int arm_cortex_m7_branch_cost (bool, bool);
296
297 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
298 const vec_perm_indices &);
299
300 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
301
302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
303 tree vectype,
304 int misalign ATTRIBUTE_UNUSED);
305 static unsigned arm_add_stmt_cost (void *data, int count,
306 enum vect_cost_for_stmt kind,
307 struct _stmt_vec_info *stmt_info,
308 int misalign,
309 enum vect_cost_model_location where);
310
311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
312 bool op0_preserve_value);
313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
314
315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
317 const_tree);
318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
321 int reloc);
322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
328 \f
329 /* Table of machine attributes. */
330 static const struct attribute_spec arm_attribute_table[] =
331 {
332 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
333 affects_type_identity, handler, exclude } */
334 /* Function calls made to this symbol must be done indirectly, because
335 it may lie outside of the 26 bit addressing range of a normal function
336 call. */
337 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
338 /* Whereas these functions are always known to reside within the 26 bit
339 addressing range. */
340 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
341 /* Specify the procedure call conventions for a function. */
342 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
343 NULL },
344 /* Interrupt Service Routines have special prologue and epilogue requirements. */
345 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
346 NULL },
347 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
348 NULL },
349 { "naked", 0, 0, true, false, false, false,
350 arm_handle_fndecl_attribute, NULL },
351 #ifdef ARM_PE
352 /* ARM/PE has three new attributes:
353 interfacearm - ?
354 dllexport - for exporting a function/variable that will live in a dll
355 dllimport - for importing a function/variable from a dll
356
357 Microsoft allows multiple declspecs in one __declspec, separating
358 them with spaces. We do NOT support this. Instead, use __declspec
359 multiple times.
360 */
361 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
362 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
363 { "interfacearm", 0, 0, true, false, false, false,
364 arm_handle_fndecl_attribute, NULL },
365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
366 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
367 NULL },
368 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
369 NULL },
370 { "notshared", 0, 0, false, true, false, false,
371 arm_handle_notshared_attribute, NULL },
372 #endif
373 /* ARMv8-M Security Extensions support. */
374 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
375 arm_handle_cmse_nonsecure_entry, NULL },
376 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
377 arm_handle_cmse_nonsecure_call, NULL },
378 { NULL, 0, 0, false, false, false, false, NULL, NULL }
379 };
380 \f
381 /* Initialize the GCC target structure. */
382 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
383 #undef TARGET_MERGE_DECL_ATTRIBUTES
384 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
385 #endif
386
387 #undef TARGET_LEGITIMIZE_ADDRESS
388 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
389
390 #undef TARGET_ATTRIBUTE_TABLE
391 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
392
393 #undef TARGET_INSERT_ATTRIBUTES
394 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
395
396 #undef TARGET_ASM_FILE_START
397 #define TARGET_ASM_FILE_START arm_file_start
398 #undef TARGET_ASM_FILE_END
399 #define TARGET_ASM_FILE_END arm_file_end
400
401 #undef TARGET_ASM_ALIGNED_SI_OP
402 #define TARGET_ASM_ALIGNED_SI_OP NULL
403 #undef TARGET_ASM_INTEGER
404 #define TARGET_ASM_INTEGER arm_assemble_integer
405
406 #undef TARGET_PRINT_OPERAND
407 #define TARGET_PRINT_OPERAND arm_print_operand
408 #undef TARGET_PRINT_OPERAND_ADDRESS
409 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
410 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
411 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
412
413 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
414 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
415
416 #undef TARGET_ASM_FUNCTION_PROLOGUE
417 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
418
419 #undef TARGET_ASM_FUNCTION_EPILOGUE
420 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
421
422 #undef TARGET_CAN_INLINE_P
423 #define TARGET_CAN_INLINE_P arm_can_inline_p
424
425 #undef TARGET_RELAYOUT_FUNCTION
426 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
427
428 #undef TARGET_OPTION_OVERRIDE
429 #define TARGET_OPTION_OVERRIDE arm_option_override
430
431 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
432 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
433
434 #undef TARGET_OPTION_SAVE
435 #define TARGET_OPTION_SAVE arm_option_save
436
437 #undef TARGET_OPTION_RESTORE
438 #define TARGET_OPTION_RESTORE arm_option_restore
439
440 #undef TARGET_OPTION_PRINT
441 #define TARGET_OPTION_PRINT arm_option_print
442
443 #undef TARGET_COMP_TYPE_ATTRIBUTES
444 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
445
446 #undef TARGET_SCHED_CAN_SPECULATE_INSN
447 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
448
449 #undef TARGET_SCHED_MACRO_FUSION_P
450 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
451
452 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
453 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
454
455 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
456 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
457
458 #undef TARGET_SCHED_ADJUST_COST
459 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
460
461 #undef TARGET_SET_CURRENT_FUNCTION
462 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
463
464 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
465 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
466
467 #undef TARGET_SCHED_REORDER
468 #define TARGET_SCHED_REORDER arm_sched_reorder
469
470 #undef TARGET_REGISTER_MOVE_COST
471 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
472
473 #undef TARGET_MEMORY_MOVE_COST
474 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
475
476 #undef TARGET_ENCODE_SECTION_INFO
477 #ifdef ARM_PE
478 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
479 #else
480 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
481 #endif
482
483 #undef TARGET_STRIP_NAME_ENCODING
484 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
485
486 #undef TARGET_ASM_INTERNAL_LABEL
487 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
488
489 #undef TARGET_FLOATN_MODE
490 #define TARGET_FLOATN_MODE arm_floatn_mode
491
492 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
493 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
494
495 #undef TARGET_FUNCTION_VALUE
496 #define TARGET_FUNCTION_VALUE arm_function_value
497
498 #undef TARGET_LIBCALL_VALUE
499 #define TARGET_LIBCALL_VALUE arm_libcall_value
500
501 #undef TARGET_FUNCTION_VALUE_REGNO_P
502 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
503
504 #undef TARGET_ASM_OUTPUT_MI_THUNK
505 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
506 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
507 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
508
509 #undef TARGET_RTX_COSTS
510 #define TARGET_RTX_COSTS arm_rtx_costs
511 #undef TARGET_ADDRESS_COST
512 #define TARGET_ADDRESS_COST arm_address_cost
513
514 #undef TARGET_SHIFT_TRUNCATION_MASK
515 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
516 #undef TARGET_VECTOR_MODE_SUPPORTED_P
517 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
518 #undef TARGET_ARRAY_MODE_SUPPORTED_P
519 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
520 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
521 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
522 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
523 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
524 arm_autovectorize_vector_sizes
525
526 #undef TARGET_MACHINE_DEPENDENT_REORG
527 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
528
529 #undef TARGET_INIT_BUILTINS
530 #define TARGET_INIT_BUILTINS arm_init_builtins
531 #undef TARGET_EXPAND_BUILTIN
532 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
533 #undef TARGET_BUILTIN_DECL
534 #define TARGET_BUILTIN_DECL arm_builtin_decl
535
536 #undef TARGET_INIT_LIBFUNCS
537 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
538
539 #undef TARGET_PROMOTE_FUNCTION_MODE
540 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
541 #undef TARGET_PROMOTE_PROTOTYPES
542 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
543 #undef TARGET_PASS_BY_REFERENCE
544 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
545 #undef TARGET_ARG_PARTIAL_BYTES
546 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
547 #undef TARGET_FUNCTION_ARG
548 #define TARGET_FUNCTION_ARG arm_function_arg
549 #undef TARGET_FUNCTION_ARG_ADVANCE
550 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
551 #undef TARGET_FUNCTION_ARG_PADDING
552 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
553 #undef TARGET_FUNCTION_ARG_BOUNDARY
554 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
555
556 #undef TARGET_SETUP_INCOMING_VARARGS
557 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
558
559 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
560 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
561
562 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
563 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
564 #undef TARGET_TRAMPOLINE_INIT
565 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
566 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
567 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
568
569 #undef TARGET_WARN_FUNC_RETURN
570 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
571
572 #undef TARGET_DEFAULT_SHORT_ENUMS
573 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
574
575 #undef TARGET_ALIGN_ANON_BITFIELD
576 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
577
578 #undef TARGET_NARROW_VOLATILE_BITFIELD
579 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
580
581 #undef TARGET_CXX_GUARD_TYPE
582 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
583
584 #undef TARGET_CXX_GUARD_MASK_BIT
585 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
586
587 #undef TARGET_CXX_GET_COOKIE_SIZE
588 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
589
590 #undef TARGET_CXX_COOKIE_HAS_SIZE
591 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
592
593 #undef TARGET_CXX_CDTOR_RETURNS_THIS
594 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
595
596 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
597 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
598
599 #undef TARGET_CXX_USE_AEABI_ATEXIT
600 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
601
602 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
603 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
604 arm_cxx_determine_class_data_visibility
605
606 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
607 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
608
609 #undef TARGET_RETURN_IN_MSB
610 #define TARGET_RETURN_IN_MSB arm_return_in_msb
611
612 #undef TARGET_RETURN_IN_MEMORY
613 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
614
615 #undef TARGET_MUST_PASS_IN_STACK
616 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
617
618 #if ARM_UNWIND_INFO
619 #undef TARGET_ASM_UNWIND_EMIT
620 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
621
622 /* EABI unwinding tables use a different format for the typeinfo tables. */
623 #undef TARGET_ASM_TTYPE
624 #define TARGET_ASM_TTYPE arm_output_ttype
625
626 #undef TARGET_ARM_EABI_UNWINDER
627 #define TARGET_ARM_EABI_UNWINDER true
628
629 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
630 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
631
632 #endif /* ARM_UNWIND_INFO */
633
634 #undef TARGET_ASM_INIT_SECTIONS
635 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
636
637 #undef TARGET_DWARF_REGISTER_SPAN
638 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
639
640 #undef TARGET_CANNOT_COPY_INSN_P
641 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
642
643 #ifdef HAVE_AS_TLS
644 #undef TARGET_HAVE_TLS
645 #define TARGET_HAVE_TLS true
646 #endif
647
648 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
649 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
650
651 #undef TARGET_LEGITIMATE_CONSTANT_P
652 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
653
654 #undef TARGET_CANNOT_FORCE_CONST_MEM
655 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
656
657 #undef TARGET_MAX_ANCHOR_OFFSET
658 #define TARGET_MAX_ANCHOR_OFFSET 4095
659
660 /* The minimum is set such that the total size of the block
661 for a particular anchor is -4088 + 1 + 4095 bytes, which is
662 divisible by eight, ensuring natural spacing of anchors. */
663 #undef TARGET_MIN_ANCHOR_OFFSET
664 #define TARGET_MIN_ANCHOR_OFFSET -4088
665
666 #undef TARGET_SCHED_ISSUE_RATE
667 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
668
669 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
670 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
671 arm_first_cycle_multipass_dfa_lookahead
672
673 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
674 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
675 arm_first_cycle_multipass_dfa_lookahead_guard
676
677 #undef TARGET_MANGLE_TYPE
678 #define TARGET_MANGLE_TYPE arm_mangle_type
679
680 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
681 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
682
683 #undef TARGET_BUILD_BUILTIN_VA_LIST
684 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
685 #undef TARGET_EXPAND_BUILTIN_VA_START
686 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
687 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
688 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
689
690 #ifdef HAVE_AS_TLS
691 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
692 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
693 #endif
694
695 #undef TARGET_LEGITIMATE_ADDRESS_P
696 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
697
698 #undef TARGET_PREFERRED_RELOAD_CLASS
699 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
700
701 #undef TARGET_PROMOTED_TYPE
702 #define TARGET_PROMOTED_TYPE arm_promoted_type
703
704 #undef TARGET_SCALAR_MODE_SUPPORTED_P
705 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
706
707 #undef TARGET_COMPUTE_FRAME_LAYOUT
708 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
709
710 #undef TARGET_FRAME_POINTER_REQUIRED
711 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
712
713 #undef TARGET_CAN_ELIMINATE
714 #define TARGET_CAN_ELIMINATE arm_can_eliminate
715
716 #undef TARGET_CONDITIONAL_REGISTER_USAGE
717 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
718
719 #undef TARGET_CLASS_LIKELY_SPILLED_P
720 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
721
722 #undef TARGET_VECTORIZE_BUILTINS
723 #define TARGET_VECTORIZE_BUILTINS
724
725 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
726 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
727 arm_builtin_vectorized_function
728
729 #undef TARGET_VECTOR_ALIGNMENT
730 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
731
732 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
733 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
734 arm_vector_alignment_reachable
735
736 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
737 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
738 arm_builtin_support_vector_misalignment
739
740 #undef TARGET_PREFERRED_RENAME_CLASS
741 #define TARGET_PREFERRED_RENAME_CLASS \
742 arm_preferred_rename_class
743
744 #undef TARGET_VECTORIZE_VEC_PERM_CONST
745 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
746
747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
749 arm_builtin_vectorization_cost
750 #undef TARGET_VECTORIZE_ADD_STMT_COST
751 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
752
753 #undef TARGET_CANONICALIZE_COMPARISON
754 #define TARGET_CANONICALIZE_COMPARISON \
755 arm_canonicalize_comparison
756
757 #undef TARGET_ASAN_SHADOW_OFFSET
758 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
759
760 #undef MAX_INSN_PER_IT_BLOCK
761 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
762
763 #undef TARGET_CAN_USE_DOLOOP_P
764 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
765
766 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
767 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
768
769 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
770 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
771
772 #undef TARGET_SCHED_FUSION_PRIORITY
773 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
774
775 #undef TARGET_ASM_FUNCTION_SECTION
776 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
777
778 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
779 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
780
781 #undef TARGET_SECTION_TYPE_FLAGS
782 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
783
784 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
785 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
786
787 #undef TARGET_C_EXCESS_PRECISION
788 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
789
790 /* Although the architecture reserves bits 0 and 1, only the former is
791 used for ARM/Thumb ISA selection in v7 and earlier versions. */
792 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
793 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
794
795 #undef TARGET_FIXED_CONDITION_CODE_REGS
796 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
797
798 #undef TARGET_HARD_REGNO_NREGS
799 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
800 #undef TARGET_HARD_REGNO_MODE_OK
801 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
802
803 #undef TARGET_MODES_TIEABLE_P
804 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
805
806 #undef TARGET_CAN_CHANGE_MODE_CLASS
807 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
808
809 #undef TARGET_CONSTANT_ALIGNMENT
810 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
811 \f
812 /* Obstack for minipool constant handling. */
813 static struct obstack minipool_obstack;
814 static char * minipool_startobj;
815
816 /* The maximum number of insns skipped which
817 will be conditionalised if possible. */
818 static int max_insns_skipped = 5;
819
820 extern FILE * asm_out_file;
821
822 /* True if we are currently building a constant table. */
823 int making_const_table;
824
825 /* The processor for which instructions should be scheduled. */
826 enum processor_type arm_tune = TARGET_CPU_arm_none;
827
828 /* The current tuning set. */
829 const struct tune_params *current_tune;
830
831 /* Which floating point hardware to schedule for. */
832 int arm_fpu_attr;
833
834 /* Used for Thumb call_via trampolines. */
835 rtx thumb_call_via_label[14];
836 static int thumb_call_reg_needed;
837
838 /* The bits in this mask specify which instruction scheduling options should
839 be used. */
840 unsigned int tune_flags = 0;
841
842 /* The highest ARM architecture version supported by the
843 target. */
844 enum base_architecture arm_base_arch = BASE_ARCH_0;
845
846 /* Active target architecture and tuning. */
847
848 struct arm_build_target arm_active_target;
849
850 /* The following are used in the arm.md file as equivalents to bits
851 in the above two flag variables. */
852
853 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
854 int arm_arch4 = 0;
855
856 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
857 int arm_arch4t = 0;
858
859 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
860 int arm_arch5t = 0;
861
862 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
863 int arm_arch5te = 0;
864
865 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
866 int arm_arch6 = 0;
867
868 /* Nonzero if this chip supports the ARM 6K extensions. */
869 int arm_arch6k = 0;
870
871 /* Nonzero if this chip supports the ARM 6KZ extensions. */
872 int arm_arch6kz = 0;
873
874 /* Nonzero if instructions present in ARMv6-M can be used. */
875 int arm_arch6m = 0;
876
877 /* Nonzero if this chip supports the ARM 7 extensions. */
878 int arm_arch7 = 0;
879
880 /* Nonzero if this chip supports the Large Physical Address Extension. */
881 int arm_arch_lpae = 0;
882
883 /* Nonzero if instructions not present in the 'M' profile can be used. */
884 int arm_arch_notm = 0;
885
886 /* Nonzero if instructions present in ARMv7E-M can be used. */
887 int arm_arch7em = 0;
888
889 /* Nonzero if instructions present in ARMv8 can be used. */
890 int arm_arch8 = 0;
891
892 /* Nonzero if this chip supports the ARMv8.1 extensions. */
893 int arm_arch8_1 = 0;
894
895 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
896 int arm_arch8_2 = 0;
897
898 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
899 int arm_arch8_3 = 0;
900
901 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
902 int arm_arch8_4 = 0;
903
904 /* Nonzero if this chip supports the FP16 instructions extension of ARM
905 Architecture 8.2. */
906 int arm_fp16_inst = 0;
907
908 /* Nonzero if this chip can benefit from load scheduling. */
909 int arm_ld_sched = 0;
910
911 /* Nonzero if this chip is a StrongARM. */
912 int arm_tune_strongarm = 0;
913
914 /* Nonzero if this chip supports Intel Wireless MMX technology. */
915 int arm_arch_iwmmxt = 0;
916
917 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
918 int arm_arch_iwmmxt2 = 0;
919
920 /* Nonzero if this chip is an XScale. */
921 int arm_arch_xscale = 0;
922
923 /* Nonzero if tuning for XScale */
924 int arm_tune_xscale = 0;
925
926 /* Nonzero if we want to tune for stores that access the write-buffer.
927 This typically means an ARM6 or ARM7 with MMU or MPU. */
928 int arm_tune_wbuf = 0;
929
930 /* Nonzero if tuning for Cortex-A9. */
931 int arm_tune_cortex_a9 = 0;
932
933 /* Nonzero if we should define __THUMB_INTERWORK__ in the
934 preprocessor.
935 XXX This is a bit of a hack, it's intended to help work around
936 problems in GLD which doesn't understand that armv5t code is
937 interworking clean. */
938 int arm_cpp_interwork = 0;
939
940 /* Nonzero if chip supports Thumb 1. */
941 int arm_arch_thumb1;
942
943 /* Nonzero if chip supports Thumb 2. */
944 int arm_arch_thumb2;
945
946 /* Nonzero if chip supports integer division instruction. */
947 int arm_arch_arm_hwdiv;
948 int arm_arch_thumb_hwdiv;
949
950 /* Nonzero if chip disallows volatile memory access in IT block. */
951 int arm_arch_no_volatile_ce;
952
953 /* Nonzero if we should use Neon to handle 64-bits operations rather
954 than core registers. */
955 int prefer_neon_for_64bits = 0;
956
957 /* Nonzero if we shouldn't use literal pools. */
958 bool arm_disable_literal_pool = false;
959
960 /* The register number to be used for the PIC offset register. */
961 unsigned arm_pic_register = INVALID_REGNUM;
962
963 enum arm_pcs arm_pcs_default;
964
965 /* For an explanation of these variables, see final_prescan_insn below. */
966 int arm_ccfsm_state;
967 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
968 enum arm_cond_code arm_current_cc;
969
970 rtx arm_target_insn;
971 int arm_target_label;
972 /* The number of conditionally executed insns, including the current insn. */
973 int arm_condexec_count = 0;
974 /* A bitmask specifying the patterns for the IT block.
975 Zero means do not output an IT block before this insn. */
976 int arm_condexec_mask = 0;
977 /* The number of bits used in arm_condexec_mask. */
978 int arm_condexec_masklen = 0;
979
980 /* Nonzero if chip supports the ARMv8 CRC instructions. */
981 int arm_arch_crc = 0;
982
983 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
984 int arm_arch_dotprod = 0;
985
986 /* Nonzero if chip supports the ARMv8-M security extensions. */
987 int arm_arch_cmse = 0;
988
989 /* Nonzero if the core has a very small, high-latency, multiply unit. */
990 int arm_m_profile_small_mul = 0;
991
992 /* The condition codes of the ARM, and the inverse function. */
993 static const char * const arm_condition_codes[] =
994 {
995 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
996 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
997 };
998
999 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1000 int arm_regs_in_sequence[] =
1001 {
1002 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1003 };
1004
1005 #define ARM_LSL_NAME "lsl"
1006 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1007
1008 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1009 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1010 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1011 \f
1012 /* Initialization code. */
1013
1014 struct cpu_tune
1015 {
1016 enum processor_type scheduler;
1017 unsigned int tune_flags;
1018 const struct tune_params *tune;
1019 };
1020
1021 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1022 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1023 { \
1024 num_slots, \
1025 l1_size, \
1026 l1_line_size \
1027 }
1028
1029 /* arm generic vectorizer costs. */
1030 static const
1031 struct cpu_vec_costs arm_default_vec_cost = {
1032 1, /* scalar_stmt_cost. */
1033 1, /* scalar load_cost. */
1034 1, /* scalar_store_cost. */
1035 1, /* vec_stmt_cost. */
1036 1, /* vec_to_scalar_cost. */
1037 1, /* scalar_to_vec_cost. */
1038 1, /* vec_align_load_cost. */
1039 1, /* vec_unalign_load_cost. */
1040 1, /* vec_unalign_store_cost. */
1041 1, /* vec_store_cost. */
1042 3, /* cond_taken_branch_cost. */
1043 1, /* cond_not_taken_branch_cost. */
1044 };
1045
1046 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1047 #include "aarch-cost-tables.h"
1048
1049
1050
1051 const struct cpu_cost_table cortexa9_extra_costs =
1052 {
1053 /* ALU */
1054 {
1055 0, /* arith. */
1056 0, /* logical. */
1057 0, /* shift. */
1058 COSTS_N_INSNS (1), /* shift_reg. */
1059 COSTS_N_INSNS (1), /* arith_shift. */
1060 COSTS_N_INSNS (2), /* arith_shift_reg. */
1061 0, /* log_shift. */
1062 COSTS_N_INSNS (1), /* log_shift_reg. */
1063 COSTS_N_INSNS (1), /* extend. */
1064 COSTS_N_INSNS (2), /* extend_arith. */
1065 COSTS_N_INSNS (1), /* bfi. */
1066 COSTS_N_INSNS (1), /* bfx. */
1067 0, /* clz. */
1068 0, /* rev. */
1069 0, /* non_exec. */
1070 true /* non_exec_costs_exec. */
1071 },
1072 {
1073 /* MULT SImode */
1074 {
1075 COSTS_N_INSNS (3), /* simple. */
1076 COSTS_N_INSNS (3), /* flag_setting. */
1077 COSTS_N_INSNS (2), /* extend. */
1078 COSTS_N_INSNS (3), /* add. */
1079 COSTS_N_INSNS (2), /* extend_add. */
1080 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1081 },
1082 /* MULT DImode */
1083 {
1084 0, /* simple (N/A). */
1085 0, /* flag_setting (N/A). */
1086 COSTS_N_INSNS (4), /* extend. */
1087 0, /* add (N/A). */
1088 COSTS_N_INSNS (4), /* extend_add. */
1089 0 /* idiv (N/A). */
1090 }
1091 },
1092 /* LD/ST */
1093 {
1094 COSTS_N_INSNS (2), /* load. */
1095 COSTS_N_INSNS (2), /* load_sign_extend. */
1096 COSTS_N_INSNS (2), /* ldrd. */
1097 COSTS_N_INSNS (2), /* ldm_1st. */
1098 1, /* ldm_regs_per_insn_1st. */
1099 2, /* ldm_regs_per_insn_subsequent. */
1100 COSTS_N_INSNS (5), /* loadf. */
1101 COSTS_N_INSNS (5), /* loadd. */
1102 COSTS_N_INSNS (1), /* load_unaligned. */
1103 COSTS_N_INSNS (2), /* store. */
1104 COSTS_N_INSNS (2), /* strd. */
1105 COSTS_N_INSNS (2), /* stm_1st. */
1106 1, /* stm_regs_per_insn_1st. */
1107 2, /* stm_regs_per_insn_subsequent. */
1108 COSTS_N_INSNS (1), /* storef. */
1109 COSTS_N_INSNS (1), /* stored. */
1110 COSTS_N_INSNS (1), /* store_unaligned. */
1111 COSTS_N_INSNS (1), /* loadv. */
1112 COSTS_N_INSNS (1) /* storev. */
1113 },
1114 {
1115 /* FP SFmode */
1116 {
1117 COSTS_N_INSNS (14), /* div. */
1118 COSTS_N_INSNS (4), /* mult. */
1119 COSTS_N_INSNS (7), /* mult_addsub. */
1120 COSTS_N_INSNS (30), /* fma. */
1121 COSTS_N_INSNS (3), /* addsub. */
1122 COSTS_N_INSNS (1), /* fpconst. */
1123 COSTS_N_INSNS (1), /* neg. */
1124 COSTS_N_INSNS (3), /* compare. */
1125 COSTS_N_INSNS (3), /* widen. */
1126 COSTS_N_INSNS (3), /* narrow. */
1127 COSTS_N_INSNS (3), /* toint. */
1128 COSTS_N_INSNS (3), /* fromint. */
1129 COSTS_N_INSNS (3) /* roundint. */
1130 },
1131 /* FP DFmode */
1132 {
1133 COSTS_N_INSNS (24), /* div. */
1134 COSTS_N_INSNS (5), /* mult. */
1135 COSTS_N_INSNS (8), /* mult_addsub. */
1136 COSTS_N_INSNS (30), /* fma. */
1137 COSTS_N_INSNS (3), /* addsub. */
1138 COSTS_N_INSNS (1), /* fpconst. */
1139 COSTS_N_INSNS (1), /* neg. */
1140 COSTS_N_INSNS (3), /* compare. */
1141 COSTS_N_INSNS (3), /* widen. */
1142 COSTS_N_INSNS (3), /* narrow. */
1143 COSTS_N_INSNS (3), /* toint. */
1144 COSTS_N_INSNS (3), /* fromint. */
1145 COSTS_N_INSNS (3) /* roundint. */
1146 }
1147 },
1148 /* Vector */
1149 {
1150 COSTS_N_INSNS (1) /* alu. */
1151 }
1152 };
1153
1154 const struct cpu_cost_table cortexa8_extra_costs =
1155 {
1156 /* ALU */
1157 {
1158 0, /* arith. */
1159 0, /* logical. */
1160 COSTS_N_INSNS (1), /* shift. */
1161 0, /* shift_reg. */
1162 COSTS_N_INSNS (1), /* arith_shift. */
1163 0, /* arith_shift_reg. */
1164 COSTS_N_INSNS (1), /* log_shift. */
1165 0, /* log_shift_reg. */
1166 0, /* extend. */
1167 0, /* extend_arith. */
1168 0, /* bfi. */
1169 0, /* bfx. */
1170 0, /* clz. */
1171 0, /* rev. */
1172 0, /* non_exec. */
1173 true /* non_exec_costs_exec. */
1174 },
1175 {
1176 /* MULT SImode */
1177 {
1178 COSTS_N_INSNS (1), /* simple. */
1179 COSTS_N_INSNS (1), /* flag_setting. */
1180 COSTS_N_INSNS (1), /* extend. */
1181 COSTS_N_INSNS (1), /* add. */
1182 COSTS_N_INSNS (1), /* extend_add. */
1183 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1184 },
1185 /* MULT DImode */
1186 {
1187 0, /* simple (N/A). */
1188 0, /* flag_setting (N/A). */
1189 COSTS_N_INSNS (2), /* extend. */
1190 0, /* add (N/A). */
1191 COSTS_N_INSNS (2), /* extend_add. */
1192 0 /* idiv (N/A). */
1193 }
1194 },
1195 /* LD/ST */
1196 {
1197 COSTS_N_INSNS (1), /* load. */
1198 COSTS_N_INSNS (1), /* load_sign_extend. */
1199 COSTS_N_INSNS (1), /* ldrd. */
1200 COSTS_N_INSNS (1), /* ldm_1st. */
1201 1, /* ldm_regs_per_insn_1st. */
1202 2, /* ldm_regs_per_insn_subsequent. */
1203 COSTS_N_INSNS (1), /* loadf. */
1204 COSTS_N_INSNS (1), /* loadd. */
1205 COSTS_N_INSNS (1), /* load_unaligned. */
1206 COSTS_N_INSNS (1), /* store. */
1207 COSTS_N_INSNS (1), /* strd. */
1208 COSTS_N_INSNS (1), /* stm_1st. */
1209 1, /* stm_regs_per_insn_1st. */
1210 2, /* stm_regs_per_insn_subsequent. */
1211 COSTS_N_INSNS (1), /* storef. */
1212 COSTS_N_INSNS (1), /* stored. */
1213 COSTS_N_INSNS (1), /* store_unaligned. */
1214 COSTS_N_INSNS (1), /* loadv. */
1215 COSTS_N_INSNS (1) /* storev. */
1216 },
1217 {
1218 /* FP SFmode */
1219 {
1220 COSTS_N_INSNS (36), /* div. */
1221 COSTS_N_INSNS (11), /* mult. */
1222 COSTS_N_INSNS (20), /* mult_addsub. */
1223 COSTS_N_INSNS (30), /* fma. */
1224 COSTS_N_INSNS (9), /* addsub. */
1225 COSTS_N_INSNS (3), /* fpconst. */
1226 COSTS_N_INSNS (3), /* neg. */
1227 COSTS_N_INSNS (6), /* compare. */
1228 COSTS_N_INSNS (4), /* widen. */
1229 COSTS_N_INSNS (4), /* narrow. */
1230 COSTS_N_INSNS (8), /* toint. */
1231 COSTS_N_INSNS (8), /* fromint. */
1232 COSTS_N_INSNS (8) /* roundint. */
1233 },
1234 /* FP DFmode */
1235 {
1236 COSTS_N_INSNS (64), /* div. */
1237 COSTS_N_INSNS (16), /* mult. */
1238 COSTS_N_INSNS (25), /* mult_addsub. */
1239 COSTS_N_INSNS (30), /* fma. */
1240 COSTS_N_INSNS (9), /* addsub. */
1241 COSTS_N_INSNS (3), /* fpconst. */
1242 COSTS_N_INSNS (3), /* neg. */
1243 COSTS_N_INSNS (6), /* compare. */
1244 COSTS_N_INSNS (6), /* widen. */
1245 COSTS_N_INSNS (6), /* narrow. */
1246 COSTS_N_INSNS (8), /* toint. */
1247 COSTS_N_INSNS (8), /* fromint. */
1248 COSTS_N_INSNS (8) /* roundint. */
1249 }
1250 },
1251 /* Vector */
1252 {
1253 COSTS_N_INSNS (1) /* alu. */
1254 }
1255 };
1256
1257 const struct cpu_cost_table cortexa5_extra_costs =
1258 {
1259 /* ALU */
1260 {
1261 0, /* arith. */
1262 0, /* logical. */
1263 COSTS_N_INSNS (1), /* shift. */
1264 COSTS_N_INSNS (1), /* shift_reg. */
1265 COSTS_N_INSNS (1), /* arith_shift. */
1266 COSTS_N_INSNS (1), /* arith_shift_reg. */
1267 COSTS_N_INSNS (1), /* log_shift. */
1268 COSTS_N_INSNS (1), /* log_shift_reg. */
1269 COSTS_N_INSNS (1), /* extend. */
1270 COSTS_N_INSNS (1), /* extend_arith. */
1271 COSTS_N_INSNS (1), /* bfi. */
1272 COSTS_N_INSNS (1), /* bfx. */
1273 COSTS_N_INSNS (1), /* clz. */
1274 COSTS_N_INSNS (1), /* rev. */
1275 0, /* non_exec. */
1276 true /* non_exec_costs_exec. */
1277 },
1278
1279 {
1280 /* MULT SImode */
1281 {
1282 0, /* simple. */
1283 COSTS_N_INSNS (1), /* flag_setting. */
1284 COSTS_N_INSNS (1), /* extend. */
1285 COSTS_N_INSNS (1), /* add. */
1286 COSTS_N_INSNS (1), /* extend_add. */
1287 COSTS_N_INSNS (7) /* idiv. */
1288 },
1289 /* MULT DImode */
1290 {
1291 0, /* simple (N/A). */
1292 0, /* flag_setting (N/A). */
1293 COSTS_N_INSNS (1), /* extend. */
1294 0, /* add. */
1295 COSTS_N_INSNS (2), /* extend_add. */
1296 0 /* idiv (N/A). */
1297 }
1298 },
1299 /* LD/ST */
1300 {
1301 COSTS_N_INSNS (1), /* load. */
1302 COSTS_N_INSNS (1), /* load_sign_extend. */
1303 COSTS_N_INSNS (6), /* ldrd. */
1304 COSTS_N_INSNS (1), /* ldm_1st. */
1305 1, /* ldm_regs_per_insn_1st. */
1306 2, /* ldm_regs_per_insn_subsequent. */
1307 COSTS_N_INSNS (2), /* loadf. */
1308 COSTS_N_INSNS (4), /* loadd. */
1309 COSTS_N_INSNS (1), /* load_unaligned. */
1310 COSTS_N_INSNS (1), /* store. */
1311 COSTS_N_INSNS (3), /* strd. */
1312 COSTS_N_INSNS (1), /* stm_1st. */
1313 1, /* stm_regs_per_insn_1st. */
1314 2, /* stm_regs_per_insn_subsequent. */
1315 COSTS_N_INSNS (2), /* storef. */
1316 COSTS_N_INSNS (2), /* stored. */
1317 COSTS_N_INSNS (1), /* store_unaligned. */
1318 COSTS_N_INSNS (1), /* loadv. */
1319 COSTS_N_INSNS (1) /* storev. */
1320 },
1321 {
1322 /* FP SFmode */
1323 {
1324 COSTS_N_INSNS (15), /* div. */
1325 COSTS_N_INSNS (3), /* mult. */
1326 COSTS_N_INSNS (7), /* mult_addsub. */
1327 COSTS_N_INSNS (7), /* fma. */
1328 COSTS_N_INSNS (3), /* addsub. */
1329 COSTS_N_INSNS (3), /* fpconst. */
1330 COSTS_N_INSNS (3), /* neg. */
1331 COSTS_N_INSNS (3), /* compare. */
1332 COSTS_N_INSNS (3), /* widen. */
1333 COSTS_N_INSNS (3), /* narrow. */
1334 COSTS_N_INSNS (3), /* toint. */
1335 COSTS_N_INSNS (3), /* fromint. */
1336 COSTS_N_INSNS (3) /* roundint. */
1337 },
1338 /* FP DFmode */
1339 {
1340 COSTS_N_INSNS (30), /* div. */
1341 COSTS_N_INSNS (6), /* mult. */
1342 COSTS_N_INSNS (10), /* mult_addsub. */
1343 COSTS_N_INSNS (7), /* fma. */
1344 COSTS_N_INSNS (3), /* addsub. */
1345 COSTS_N_INSNS (3), /* fpconst. */
1346 COSTS_N_INSNS (3), /* neg. */
1347 COSTS_N_INSNS (3), /* compare. */
1348 COSTS_N_INSNS (3), /* widen. */
1349 COSTS_N_INSNS (3), /* narrow. */
1350 COSTS_N_INSNS (3), /* toint. */
1351 COSTS_N_INSNS (3), /* fromint. */
1352 COSTS_N_INSNS (3) /* roundint. */
1353 }
1354 },
1355 /* Vector */
1356 {
1357 COSTS_N_INSNS (1) /* alu. */
1358 }
1359 };
1360
1361
1362 const struct cpu_cost_table cortexa7_extra_costs =
1363 {
1364 /* ALU */
1365 {
1366 0, /* arith. */
1367 0, /* logical. */
1368 COSTS_N_INSNS (1), /* shift. */
1369 COSTS_N_INSNS (1), /* shift_reg. */
1370 COSTS_N_INSNS (1), /* arith_shift. */
1371 COSTS_N_INSNS (1), /* arith_shift_reg. */
1372 COSTS_N_INSNS (1), /* log_shift. */
1373 COSTS_N_INSNS (1), /* log_shift_reg. */
1374 COSTS_N_INSNS (1), /* extend. */
1375 COSTS_N_INSNS (1), /* extend_arith. */
1376 COSTS_N_INSNS (1), /* bfi. */
1377 COSTS_N_INSNS (1), /* bfx. */
1378 COSTS_N_INSNS (1), /* clz. */
1379 COSTS_N_INSNS (1), /* rev. */
1380 0, /* non_exec. */
1381 true /* non_exec_costs_exec. */
1382 },
1383
1384 {
1385 /* MULT SImode */
1386 {
1387 0, /* simple. */
1388 COSTS_N_INSNS (1), /* flag_setting. */
1389 COSTS_N_INSNS (1), /* extend. */
1390 COSTS_N_INSNS (1), /* add. */
1391 COSTS_N_INSNS (1), /* extend_add. */
1392 COSTS_N_INSNS (7) /* idiv. */
1393 },
1394 /* MULT DImode */
1395 {
1396 0, /* simple (N/A). */
1397 0, /* flag_setting (N/A). */
1398 COSTS_N_INSNS (1), /* extend. */
1399 0, /* add. */
1400 COSTS_N_INSNS (2), /* extend_add. */
1401 0 /* idiv (N/A). */
1402 }
1403 },
1404 /* LD/ST */
1405 {
1406 COSTS_N_INSNS (1), /* load. */
1407 COSTS_N_INSNS (1), /* load_sign_extend. */
1408 COSTS_N_INSNS (3), /* ldrd. */
1409 COSTS_N_INSNS (1), /* ldm_1st. */
1410 1, /* ldm_regs_per_insn_1st. */
1411 2, /* ldm_regs_per_insn_subsequent. */
1412 COSTS_N_INSNS (2), /* loadf. */
1413 COSTS_N_INSNS (2), /* loadd. */
1414 COSTS_N_INSNS (1), /* load_unaligned. */
1415 COSTS_N_INSNS (1), /* store. */
1416 COSTS_N_INSNS (3), /* strd. */
1417 COSTS_N_INSNS (1), /* stm_1st. */
1418 1, /* stm_regs_per_insn_1st. */
1419 2, /* stm_regs_per_insn_subsequent. */
1420 COSTS_N_INSNS (2), /* storef. */
1421 COSTS_N_INSNS (2), /* stored. */
1422 COSTS_N_INSNS (1), /* store_unaligned. */
1423 COSTS_N_INSNS (1), /* loadv. */
1424 COSTS_N_INSNS (1) /* storev. */
1425 },
1426 {
1427 /* FP SFmode */
1428 {
1429 COSTS_N_INSNS (15), /* div. */
1430 COSTS_N_INSNS (3), /* mult. */
1431 COSTS_N_INSNS (7), /* mult_addsub. */
1432 COSTS_N_INSNS (7), /* fma. */
1433 COSTS_N_INSNS (3), /* addsub. */
1434 COSTS_N_INSNS (3), /* fpconst. */
1435 COSTS_N_INSNS (3), /* neg. */
1436 COSTS_N_INSNS (3), /* compare. */
1437 COSTS_N_INSNS (3), /* widen. */
1438 COSTS_N_INSNS (3), /* narrow. */
1439 COSTS_N_INSNS (3), /* toint. */
1440 COSTS_N_INSNS (3), /* fromint. */
1441 COSTS_N_INSNS (3) /* roundint. */
1442 },
1443 /* FP DFmode */
1444 {
1445 COSTS_N_INSNS (30), /* div. */
1446 COSTS_N_INSNS (6), /* mult. */
1447 COSTS_N_INSNS (10), /* mult_addsub. */
1448 COSTS_N_INSNS (7), /* fma. */
1449 COSTS_N_INSNS (3), /* addsub. */
1450 COSTS_N_INSNS (3), /* fpconst. */
1451 COSTS_N_INSNS (3), /* neg. */
1452 COSTS_N_INSNS (3), /* compare. */
1453 COSTS_N_INSNS (3), /* widen. */
1454 COSTS_N_INSNS (3), /* narrow. */
1455 COSTS_N_INSNS (3), /* toint. */
1456 COSTS_N_INSNS (3), /* fromint. */
1457 COSTS_N_INSNS (3) /* roundint. */
1458 }
1459 },
1460 /* Vector */
1461 {
1462 COSTS_N_INSNS (1) /* alu. */
1463 }
1464 };
1465
1466 const struct cpu_cost_table cortexa12_extra_costs =
1467 {
1468 /* ALU */
1469 {
1470 0, /* arith. */
1471 0, /* logical. */
1472 0, /* shift. */
1473 COSTS_N_INSNS (1), /* shift_reg. */
1474 COSTS_N_INSNS (1), /* arith_shift. */
1475 COSTS_N_INSNS (1), /* arith_shift_reg. */
1476 COSTS_N_INSNS (1), /* log_shift. */
1477 COSTS_N_INSNS (1), /* log_shift_reg. */
1478 0, /* extend. */
1479 COSTS_N_INSNS (1), /* extend_arith. */
1480 0, /* bfi. */
1481 COSTS_N_INSNS (1), /* bfx. */
1482 COSTS_N_INSNS (1), /* clz. */
1483 COSTS_N_INSNS (1), /* rev. */
1484 0, /* non_exec. */
1485 true /* non_exec_costs_exec. */
1486 },
1487 /* MULT SImode */
1488 {
1489 {
1490 COSTS_N_INSNS (2), /* simple. */
1491 COSTS_N_INSNS (3), /* flag_setting. */
1492 COSTS_N_INSNS (2), /* extend. */
1493 COSTS_N_INSNS (3), /* add. */
1494 COSTS_N_INSNS (2), /* extend_add. */
1495 COSTS_N_INSNS (18) /* idiv. */
1496 },
1497 /* MULT DImode */
1498 {
1499 0, /* simple (N/A). */
1500 0, /* flag_setting (N/A). */
1501 COSTS_N_INSNS (3), /* extend. */
1502 0, /* add (N/A). */
1503 COSTS_N_INSNS (3), /* extend_add. */
1504 0 /* idiv (N/A). */
1505 }
1506 },
1507 /* LD/ST */
1508 {
1509 COSTS_N_INSNS (3), /* load. */
1510 COSTS_N_INSNS (3), /* load_sign_extend. */
1511 COSTS_N_INSNS (3), /* ldrd. */
1512 COSTS_N_INSNS (3), /* ldm_1st. */
1513 1, /* ldm_regs_per_insn_1st. */
1514 2, /* ldm_regs_per_insn_subsequent. */
1515 COSTS_N_INSNS (3), /* loadf. */
1516 COSTS_N_INSNS (3), /* loadd. */
1517 0, /* load_unaligned. */
1518 0, /* store. */
1519 0, /* strd. */
1520 0, /* stm_1st. */
1521 1, /* stm_regs_per_insn_1st. */
1522 2, /* stm_regs_per_insn_subsequent. */
1523 COSTS_N_INSNS (2), /* storef. */
1524 COSTS_N_INSNS (2), /* stored. */
1525 0, /* store_unaligned. */
1526 COSTS_N_INSNS (1), /* loadv. */
1527 COSTS_N_INSNS (1) /* storev. */
1528 },
1529 {
1530 /* FP SFmode */
1531 {
1532 COSTS_N_INSNS (17), /* div. */
1533 COSTS_N_INSNS (4), /* mult. */
1534 COSTS_N_INSNS (8), /* mult_addsub. */
1535 COSTS_N_INSNS (8), /* fma. */
1536 COSTS_N_INSNS (4), /* addsub. */
1537 COSTS_N_INSNS (2), /* fpconst. */
1538 COSTS_N_INSNS (2), /* neg. */
1539 COSTS_N_INSNS (2), /* compare. */
1540 COSTS_N_INSNS (4), /* widen. */
1541 COSTS_N_INSNS (4), /* narrow. */
1542 COSTS_N_INSNS (4), /* toint. */
1543 COSTS_N_INSNS (4), /* fromint. */
1544 COSTS_N_INSNS (4) /* roundint. */
1545 },
1546 /* FP DFmode */
1547 {
1548 COSTS_N_INSNS (31), /* div. */
1549 COSTS_N_INSNS (4), /* mult. */
1550 COSTS_N_INSNS (8), /* mult_addsub. */
1551 COSTS_N_INSNS (8), /* fma. */
1552 COSTS_N_INSNS (4), /* addsub. */
1553 COSTS_N_INSNS (2), /* fpconst. */
1554 COSTS_N_INSNS (2), /* neg. */
1555 COSTS_N_INSNS (2), /* compare. */
1556 COSTS_N_INSNS (4), /* widen. */
1557 COSTS_N_INSNS (4), /* narrow. */
1558 COSTS_N_INSNS (4), /* toint. */
1559 COSTS_N_INSNS (4), /* fromint. */
1560 COSTS_N_INSNS (4) /* roundint. */
1561 }
1562 },
1563 /* Vector */
1564 {
1565 COSTS_N_INSNS (1) /* alu. */
1566 }
1567 };
1568
1569 const struct cpu_cost_table cortexa15_extra_costs =
1570 {
1571 /* ALU */
1572 {
1573 0, /* arith. */
1574 0, /* logical. */
1575 0, /* shift. */
1576 0, /* shift_reg. */
1577 COSTS_N_INSNS (1), /* arith_shift. */
1578 COSTS_N_INSNS (1), /* arith_shift_reg. */
1579 COSTS_N_INSNS (1), /* log_shift. */
1580 COSTS_N_INSNS (1), /* log_shift_reg. */
1581 0, /* extend. */
1582 COSTS_N_INSNS (1), /* extend_arith. */
1583 COSTS_N_INSNS (1), /* bfi. */
1584 0, /* bfx. */
1585 0, /* clz. */
1586 0, /* rev. */
1587 0, /* non_exec. */
1588 true /* non_exec_costs_exec. */
1589 },
1590 /* MULT SImode */
1591 {
1592 {
1593 COSTS_N_INSNS (2), /* simple. */
1594 COSTS_N_INSNS (3), /* flag_setting. */
1595 COSTS_N_INSNS (2), /* extend. */
1596 COSTS_N_INSNS (2), /* add. */
1597 COSTS_N_INSNS (2), /* extend_add. */
1598 COSTS_N_INSNS (18) /* idiv. */
1599 },
1600 /* MULT DImode */
1601 {
1602 0, /* simple (N/A). */
1603 0, /* flag_setting (N/A). */
1604 COSTS_N_INSNS (3), /* extend. */
1605 0, /* add (N/A). */
1606 COSTS_N_INSNS (3), /* extend_add. */
1607 0 /* idiv (N/A). */
1608 }
1609 },
1610 /* LD/ST */
1611 {
1612 COSTS_N_INSNS (3), /* load. */
1613 COSTS_N_INSNS (3), /* load_sign_extend. */
1614 COSTS_N_INSNS (3), /* ldrd. */
1615 COSTS_N_INSNS (4), /* ldm_1st. */
1616 1, /* ldm_regs_per_insn_1st. */
1617 2, /* ldm_regs_per_insn_subsequent. */
1618 COSTS_N_INSNS (4), /* loadf. */
1619 COSTS_N_INSNS (4), /* loadd. */
1620 0, /* load_unaligned. */
1621 0, /* store. */
1622 0, /* strd. */
1623 COSTS_N_INSNS (1), /* stm_1st. */
1624 1, /* stm_regs_per_insn_1st. */
1625 2, /* stm_regs_per_insn_subsequent. */
1626 0, /* storef. */
1627 0, /* stored. */
1628 0, /* store_unaligned. */
1629 COSTS_N_INSNS (1), /* loadv. */
1630 COSTS_N_INSNS (1) /* storev. */
1631 },
1632 {
1633 /* FP SFmode */
1634 {
1635 COSTS_N_INSNS (17), /* div. */
1636 COSTS_N_INSNS (4), /* mult. */
1637 COSTS_N_INSNS (8), /* mult_addsub. */
1638 COSTS_N_INSNS (8), /* fma. */
1639 COSTS_N_INSNS (4), /* addsub. */
1640 COSTS_N_INSNS (2), /* fpconst. */
1641 COSTS_N_INSNS (2), /* neg. */
1642 COSTS_N_INSNS (5), /* compare. */
1643 COSTS_N_INSNS (4), /* widen. */
1644 COSTS_N_INSNS (4), /* narrow. */
1645 COSTS_N_INSNS (4), /* toint. */
1646 COSTS_N_INSNS (4), /* fromint. */
1647 COSTS_N_INSNS (4) /* roundint. */
1648 },
1649 /* FP DFmode */
1650 {
1651 COSTS_N_INSNS (31), /* div. */
1652 COSTS_N_INSNS (4), /* mult. */
1653 COSTS_N_INSNS (8), /* mult_addsub. */
1654 COSTS_N_INSNS (8), /* fma. */
1655 COSTS_N_INSNS (4), /* addsub. */
1656 COSTS_N_INSNS (2), /* fpconst. */
1657 COSTS_N_INSNS (2), /* neg. */
1658 COSTS_N_INSNS (2), /* compare. */
1659 COSTS_N_INSNS (4), /* widen. */
1660 COSTS_N_INSNS (4), /* narrow. */
1661 COSTS_N_INSNS (4), /* toint. */
1662 COSTS_N_INSNS (4), /* fromint. */
1663 COSTS_N_INSNS (4) /* roundint. */
1664 }
1665 },
1666 /* Vector */
1667 {
1668 COSTS_N_INSNS (1) /* alu. */
1669 }
1670 };
1671
1672 const struct cpu_cost_table v7m_extra_costs =
1673 {
1674 /* ALU */
1675 {
1676 0, /* arith. */
1677 0, /* logical. */
1678 0, /* shift. */
1679 0, /* shift_reg. */
1680 0, /* arith_shift. */
1681 COSTS_N_INSNS (1), /* arith_shift_reg. */
1682 0, /* log_shift. */
1683 COSTS_N_INSNS (1), /* log_shift_reg. */
1684 0, /* extend. */
1685 COSTS_N_INSNS (1), /* extend_arith. */
1686 0, /* bfi. */
1687 0, /* bfx. */
1688 0, /* clz. */
1689 0, /* rev. */
1690 COSTS_N_INSNS (1), /* non_exec. */
1691 false /* non_exec_costs_exec. */
1692 },
1693 {
1694 /* MULT SImode */
1695 {
1696 COSTS_N_INSNS (1), /* simple. */
1697 COSTS_N_INSNS (1), /* flag_setting. */
1698 COSTS_N_INSNS (2), /* extend. */
1699 COSTS_N_INSNS (1), /* add. */
1700 COSTS_N_INSNS (3), /* extend_add. */
1701 COSTS_N_INSNS (8) /* idiv. */
1702 },
1703 /* MULT DImode */
1704 {
1705 0, /* simple (N/A). */
1706 0, /* flag_setting (N/A). */
1707 COSTS_N_INSNS (2), /* extend. */
1708 0, /* add (N/A). */
1709 COSTS_N_INSNS (3), /* extend_add. */
1710 0 /* idiv (N/A). */
1711 }
1712 },
1713 /* LD/ST */
1714 {
1715 COSTS_N_INSNS (2), /* load. */
1716 0, /* load_sign_extend. */
1717 COSTS_N_INSNS (3), /* ldrd. */
1718 COSTS_N_INSNS (2), /* ldm_1st. */
1719 1, /* ldm_regs_per_insn_1st. */
1720 1, /* ldm_regs_per_insn_subsequent. */
1721 COSTS_N_INSNS (2), /* loadf. */
1722 COSTS_N_INSNS (3), /* loadd. */
1723 COSTS_N_INSNS (1), /* load_unaligned. */
1724 COSTS_N_INSNS (2), /* store. */
1725 COSTS_N_INSNS (3), /* strd. */
1726 COSTS_N_INSNS (2), /* stm_1st. */
1727 1, /* stm_regs_per_insn_1st. */
1728 1, /* stm_regs_per_insn_subsequent. */
1729 COSTS_N_INSNS (2), /* storef. */
1730 COSTS_N_INSNS (3), /* stored. */
1731 COSTS_N_INSNS (1), /* store_unaligned. */
1732 COSTS_N_INSNS (1), /* loadv. */
1733 COSTS_N_INSNS (1) /* storev. */
1734 },
1735 {
1736 /* FP SFmode */
1737 {
1738 COSTS_N_INSNS (7), /* div. */
1739 COSTS_N_INSNS (2), /* mult. */
1740 COSTS_N_INSNS (5), /* mult_addsub. */
1741 COSTS_N_INSNS (3), /* fma. */
1742 COSTS_N_INSNS (1), /* addsub. */
1743 0, /* fpconst. */
1744 0, /* neg. */
1745 0, /* compare. */
1746 0, /* widen. */
1747 0, /* narrow. */
1748 0, /* toint. */
1749 0, /* fromint. */
1750 0 /* roundint. */
1751 },
1752 /* FP DFmode */
1753 {
1754 COSTS_N_INSNS (15), /* div. */
1755 COSTS_N_INSNS (5), /* mult. */
1756 COSTS_N_INSNS (7), /* mult_addsub. */
1757 COSTS_N_INSNS (7), /* fma. */
1758 COSTS_N_INSNS (3), /* addsub. */
1759 0, /* fpconst. */
1760 0, /* neg. */
1761 0, /* compare. */
1762 0, /* widen. */
1763 0, /* narrow. */
1764 0, /* toint. */
1765 0, /* fromint. */
1766 0 /* roundint. */
1767 }
1768 },
1769 /* Vector */
1770 {
1771 COSTS_N_INSNS (1) /* alu. */
1772 }
1773 };
1774
1775 const struct addr_mode_cost_table generic_addr_mode_costs =
1776 {
1777 /* int. */
1778 {
1779 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1780 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1781 COSTS_N_INSNS (0) /* AMO_WB. */
1782 },
1783 /* float. */
1784 {
1785 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1786 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1787 COSTS_N_INSNS (0) /* AMO_WB. */
1788 },
1789 /* vector. */
1790 {
1791 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1792 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1793 COSTS_N_INSNS (0) /* AMO_WB. */
1794 }
1795 };
1796
1797 const struct tune_params arm_slowmul_tune =
1798 {
1799 &generic_extra_costs, /* Insn extra costs. */
1800 &generic_addr_mode_costs, /* Addressing mode costs. */
1801 NULL, /* Sched adj cost. */
1802 arm_default_branch_cost,
1803 &arm_default_vec_cost,
1804 3, /* Constant limit. */
1805 5, /* Max cond insns. */
1806 8, /* Memset max inline. */
1807 1, /* Issue rate. */
1808 ARM_PREFETCH_NOT_BENEFICIAL,
1809 tune_params::PREF_CONST_POOL_TRUE,
1810 tune_params::PREF_LDRD_FALSE,
1811 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1812 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1813 tune_params::DISPARAGE_FLAGS_NEITHER,
1814 tune_params::PREF_NEON_64_FALSE,
1815 tune_params::PREF_NEON_STRINGOPS_FALSE,
1816 tune_params::FUSE_NOTHING,
1817 tune_params::SCHED_AUTOPREF_OFF
1818 };
1819
1820 const struct tune_params arm_fastmul_tune =
1821 {
1822 &generic_extra_costs, /* Insn extra costs. */
1823 &generic_addr_mode_costs, /* Addressing mode costs. */
1824 NULL, /* Sched adj cost. */
1825 arm_default_branch_cost,
1826 &arm_default_vec_cost,
1827 1, /* Constant limit. */
1828 5, /* Max cond insns. */
1829 8, /* Memset max inline. */
1830 1, /* Issue rate. */
1831 ARM_PREFETCH_NOT_BENEFICIAL,
1832 tune_params::PREF_CONST_POOL_TRUE,
1833 tune_params::PREF_LDRD_FALSE,
1834 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1835 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1836 tune_params::DISPARAGE_FLAGS_NEITHER,
1837 tune_params::PREF_NEON_64_FALSE,
1838 tune_params::PREF_NEON_STRINGOPS_FALSE,
1839 tune_params::FUSE_NOTHING,
1840 tune_params::SCHED_AUTOPREF_OFF
1841 };
1842
1843 /* StrongARM has early execution of branches, so a sequence that is worth
1844 skipping is shorter. Set max_insns_skipped to a lower value. */
1845
1846 const struct tune_params arm_strongarm_tune =
1847 {
1848 &generic_extra_costs, /* Insn extra costs. */
1849 &generic_addr_mode_costs, /* Addressing mode costs. */
1850 NULL, /* Sched adj cost. */
1851 arm_default_branch_cost,
1852 &arm_default_vec_cost,
1853 1, /* Constant limit. */
1854 3, /* Max cond insns. */
1855 8, /* Memset max inline. */
1856 1, /* Issue rate. */
1857 ARM_PREFETCH_NOT_BENEFICIAL,
1858 tune_params::PREF_CONST_POOL_TRUE,
1859 tune_params::PREF_LDRD_FALSE,
1860 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1861 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1862 tune_params::DISPARAGE_FLAGS_NEITHER,
1863 tune_params::PREF_NEON_64_FALSE,
1864 tune_params::PREF_NEON_STRINGOPS_FALSE,
1865 tune_params::FUSE_NOTHING,
1866 tune_params::SCHED_AUTOPREF_OFF
1867 };
1868
1869 const struct tune_params arm_xscale_tune =
1870 {
1871 &generic_extra_costs, /* Insn extra costs. */
1872 &generic_addr_mode_costs, /* Addressing mode costs. */
1873 xscale_sched_adjust_cost,
1874 arm_default_branch_cost,
1875 &arm_default_vec_cost,
1876 2, /* Constant limit. */
1877 3, /* Max cond insns. */
1878 8, /* Memset max inline. */
1879 1, /* Issue rate. */
1880 ARM_PREFETCH_NOT_BENEFICIAL,
1881 tune_params::PREF_CONST_POOL_TRUE,
1882 tune_params::PREF_LDRD_FALSE,
1883 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1884 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1885 tune_params::DISPARAGE_FLAGS_NEITHER,
1886 tune_params::PREF_NEON_64_FALSE,
1887 tune_params::PREF_NEON_STRINGOPS_FALSE,
1888 tune_params::FUSE_NOTHING,
1889 tune_params::SCHED_AUTOPREF_OFF
1890 };
1891
1892 const struct tune_params arm_9e_tune =
1893 {
1894 &generic_extra_costs, /* Insn extra costs. */
1895 &generic_addr_mode_costs, /* Addressing mode costs. */
1896 NULL, /* Sched adj cost. */
1897 arm_default_branch_cost,
1898 &arm_default_vec_cost,
1899 1, /* Constant limit. */
1900 5, /* Max cond insns. */
1901 8, /* Memset max inline. */
1902 1, /* Issue rate. */
1903 ARM_PREFETCH_NOT_BENEFICIAL,
1904 tune_params::PREF_CONST_POOL_TRUE,
1905 tune_params::PREF_LDRD_FALSE,
1906 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1907 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1908 tune_params::DISPARAGE_FLAGS_NEITHER,
1909 tune_params::PREF_NEON_64_FALSE,
1910 tune_params::PREF_NEON_STRINGOPS_FALSE,
1911 tune_params::FUSE_NOTHING,
1912 tune_params::SCHED_AUTOPREF_OFF
1913 };
1914
1915 const struct tune_params arm_marvell_pj4_tune =
1916 {
1917 &generic_extra_costs, /* Insn extra costs. */
1918 &generic_addr_mode_costs, /* Addressing mode costs. */
1919 NULL, /* Sched adj cost. */
1920 arm_default_branch_cost,
1921 &arm_default_vec_cost,
1922 1, /* Constant limit. */
1923 5, /* Max cond insns. */
1924 8, /* Memset max inline. */
1925 2, /* Issue rate. */
1926 ARM_PREFETCH_NOT_BENEFICIAL,
1927 tune_params::PREF_CONST_POOL_TRUE,
1928 tune_params::PREF_LDRD_FALSE,
1929 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1930 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1931 tune_params::DISPARAGE_FLAGS_NEITHER,
1932 tune_params::PREF_NEON_64_FALSE,
1933 tune_params::PREF_NEON_STRINGOPS_FALSE,
1934 tune_params::FUSE_NOTHING,
1935 tune_params::SCHED_AUTOPREF_OFF
1936 };
1937
1938 const struct tune_params arm_v6t2_tune =
1939 {
1940 &generic_extra_costs, /* Insn extra costs. */
1941 &generic_addr_mode_costs, /* Addressing mode costs. */
1942 NULL, /* Sched adj cost. */
1943 arm_default_branch_cost,
1944 &arm_default_vec_cost,
1945 1, /* Constant limit. */
1946 5, /* Max cond insns. */
1947 8, /* Memset max inline. */
1948 1, /* Issue rate. */
1949 ARM_PREFETCH_NOT_BENEFICIAL,
1950 tune_params::PREF_CONST_POOL_FALSE,
1951 tune_params::PREF_LDRD_FALSE,
1952 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1953 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1954 tune_params::DISPARAGE_FLAGS_NEITHER,
1955 tune_params::PREF_NEON_64_FALSE,
1956 tune_params::PREF_NEON_STRINGOPS_FALSE,
1957 tune_params::FUSE_NOTHING,
1958 tune_params::SCHED_AUTOPREF_OFF
1959 };
1960
1961
1962 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1963 const struct tune_params arm_cortex_tune =
1964 {
1965 &generic_extra_costs,
1966 &generic_addr_mode_costs, /* Addressing mode costs. */
1967 NULL, /* Sched adj cost. */
1968 arm_default_branch_cost,
1969 &arm_default_vec_cost,
1970 1, /* Constant limit. */
1971 5, /* Max cond insns. */
1972 8, /* Memset max inline. */
1973 2, /* Issue rate. */
1974 ARM_PREFETCH_NOT_BENEFICIAL,
1975 tune_params::PREF_CONST_POOL_FALSE,
1976 tune_params::PREF_LDRD_FALSE,
1977 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1978 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1979 tune_params::DISPARAGE_FLAGS_NEITHER,
1980 tune_params::PREF_NEON_64_FALSE,
1981 tune_params::PREF_NEON_STRINGOPS_FALSE,
1982 tune_params::FUSE_NOTHING,
1983 tune_params::SCHED_AUTOPREF_OFF
1984 };
1985
1986 const struct tune_params arm_cortex_a8_tune =
1987 {
1988 &cortexa8_extra_costs,
1989 &generic_addr_mode_costs, /* Addressing mode costs. */
1990 NULL, /* Sched adj cost. */
1991 arm_default_branch_cost,
1992 &arm_default_vec_cost,
1993 1, /* Constant limit. */
1994 5, /* Max cond insns. */
1995 8, /* Memset max inline. */
1996 2, /* Issue rate. */
1997 ARM_PREFETCH_NOT_BENEFICIAL,
1998 tune_params::PREF_CONST_POOL_FALSE,
1999 tune_params::PREF_LDRD_FALSE,
2000 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2001 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2002 tune_params::DISPARAGE_FLAGS_NEITHER,
2003 tune_params::PREF_NEON_64_FALSE,
2004 tune_params::PREF_NEON_STRINGOPS_TRUE,
2005 tune_params::FUSE_NOTHING,
2006 tune_params::SCHED_AUTOPREF_OFF
2007 };
2008
2009 const struct tune_params arm_cortex_a7_tune =
2010 {
2011 &cortexa7_extra_costs,
2012 &generic_addr_mode_costs, /* Addressing mode costs. */
2013 NULL, /* Sched adj cost. */
2014 arm_default_branch_cost,
2015 &arm_default_vec_cost,
2016 1, /* Constant limit. */
2017 5, /* Max cond insns. */
2018 8, /* Memset max inline. */
2019 2, /* Issue rate. */
2020 ARM_PREFETCH_NOT_BENEFICIAL,
2021 tune_params::PREF_CONST_POOL_FALSE,
2022 tune_params::PREF_LDRD_FALSE,
2023 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2024 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2025 tune_params::DISPARAGE_FLAGS_NEITHER,
2026 tune_params::PREF_NEON_64_FALSE,
2027 tune_params::PREF_NEON_STRINGOPS_TRUE,
2028 tune_params::FUSE_NOTHING,
2029 tune_params::SCHED_AUTOPREF_OFF
2030 };
2031
2032 const struct tune_params arm_cortex_a15_tune =
2033 {
2034 &cortexa15_extra_costs,
2035 &generic_addr_mode_costs, /* Addressing mode costs. */
2036 NULL, /* Sched adj cost. */
2037 arm_default_branch_cost,
2038 &arm_default_vec_cost,
2039 1, /* Constant limit. */
2040 2, /* Max cond insns. */
2041 8, /* Memset max inline. */
2042 3, /* Issue rate. */
2043 ARM_PREFETCH_NOT_BENEFICIAL,
2044 tune_params::PREF_CONST_POOL_FALSE,
2045 tune_params::PREF_LDRD_TRUE,
2046 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2047 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2048 tune_params::DISPARAGE_FLAGS_ALL,
2049 tune_params::PREF_NEON_64_FALSE,
2050 tune_params::PREF_NEON_STRINGOPS_TRUE,
2051 tune_params::FUSE_NOTHING,
2052 tune_params::SCHED_AUTOPREF_FULL
2053 };
2054
2055 const struct tune_params arm_cortex_a35_tune =
2056 {
2057 &cortexa53_extra_costs,
2058 &generic_addr_mode_costs, /* Addressing mode costs. */
2059 NULL, /* Sched adj cost. */
2060 arm_default_branch_cost,
2061 &arm_default_vec_cost,
2062 1, /* Constant limit. */
2063 5, /* Max cond insns. */
2064 8, /* Memset max inline. */
2065 1, /* Issue rate. */
2066 ARM_PREFETCH_NOT_BENEFICIAL,
2067 tune_params::PREF_CONST_POOL_FALSE,
2068 tune_params::PREF_LDRD_FALSE,
2069 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2070 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2071 tune_params::DISPARAGE_FLAGS_NEITHER,
2072 tune_params::PREF_NEON_64_FALSE,
2073 tune_params::PREF_NEON_STRINGOPS_TRUE,
2074 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2075 tune_params::SCHED_AUTOPREF_OFF
2076 };
2077
2078 const struct tune_params arm_cortex_a53_tune =
2079 {
2080 &cortexa53_extra_costs,
2081 &generic_addr_mode_costs, /* Addressing mode costs. */
2082 NULL, /* Sched adj cost. */
2083 arm_default_branch_cost,
2084 &arm_default_vec_cost,
2085 1, /* Constant limit. */
2086 5, /* Max cond insns. */
2087 8, /* Memset max inline. */
2088 2, /* Issue rate. */
2089 ARM_PREFETCH_NOT_BENEFICIAL,
2090 tune_params::PREF_CONST_POOL_FALSE,
2091 tune_params::PREF_LDRD_FALSE,
2092 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2094 tune_params::DISPARAGE_FLAGS_NEITHER,
2095 tune_params::PREF_NEON_64_FALSE,
2096 tune_params::PREF_NEON_STRINGOPS_TRUE,
2097 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2098 tune_params::SCHED_AUTOPREF_OFF
2099 };
2100
2101 const struct tune_params arm_cortex_a57_tune =
2102 {
2103 &cortexa57_extra_costs,
2104 &generic_addr_mode_costs, /* addressing mode costs */
2105 NULL, /* Sched adj cost. */
2106 arm_default_branch_cost,
2107 &arm_default_vec_cost,
2108 1, /* Constant limit. */
2109 2, /* Max cond insns. */
2110 8, /* Memset max inline. */
2111 3, /* Issue rate. */
2112 ARM_PREFETCH_NOT_BENEFICIAL,
2113 tune_params::PREF_CONST_POOL_FALSE,
2114 tune_params::PREF_LDRD_TRUE,
2115 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2116 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2117 tune_params::DISPARAGE_FLAGS_ALL,
2118 tune_params::PREF_NEON_64_FALSE,
2119 tune_params::PREF_NEON_STRINGOPS_TRUE,
2120 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2121 tune_params::SCHED_AUTOPREF_FULL
2122 };
2123
2124 const struct tune_params arm_exynosm1_tune =
2125 {
2126 &exynosm1_extra_costs,
2127 &generic_addr_mode_costs, /* Addressing mode costs. */
2128 NULL, /* Sched adj cost. */
2129 arm_default_branch_cost,
2130 &arm_default_vec_cost,
2131 1, /* Constant limit. */
2132 2, /* Max cond insns. */
2133 8, /* Memset max inline. */
2134 3, /* Issue rate. */
2135 ARM_PREFETCH_NOT_BENEFICIAL,
2136 tune_params::PREF_CONST_POOL_FALSE,
2137 tune_params::PREF_LDRD_TRUE,
2138 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2139 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2140 tune_params::DISPARAGE_FLAGS_ALL,
2141 tune_params::PREF_NEON_64_FALSE,
2142 tune_params::PREF_NEON_STRINGOPS_TRUE,
2143 tune_params::FUSE_NOTHING,
2144 tune_params::SCHED_AUTOPREF_OFF
2145 };
2146
2147 const struct tune_params arm_xgene1_tune =
2148 {
2149 &xgene1_extra_costs,
2150 &generic_addr_mode_costs, /* Addressing mode costs. */
2151 NULL, /* Sched adj cost. */
2152 arm_default_branch_cost,
2153 &arm_default_vec_cost,
2154 1, /* Constant limit. */
2155 2, /* Max cond insns. */
2156 32, /* Memset max inline. */
2157 4, /* Issue rate. */
2158 ARM_PREFETCH_NOT_BENEFICIAL,
2159 tune_params::PREF_CONST_POOL_FALSE,
2160 tune_params::PREF_LDRD_TRUE,
2161 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2162 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2163 tune_params::DISPARAGE_FLAGS_ALL,
2164 tune_params::PREF_NEON_64_FALSE,
2165 tune_params::PREF_NEON_STRINGOPS_FALSE,
2166 tune_params::FUSE_NOTHING,
2167 tune_params::SCHED_AUTOPREF_OFF
2168 };
2169
2170 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2171 less appealing. Set max_insns_skipped to a low value. */
2172
2173 const struct tune_params arm_cortex_a5_tune =
2174 {
2175 &cortexa5_extra_costs,
2176 &generic_addr_mode_costs, /* Addressing mode costs. */
2177 NULL, /* Sched adj cost. */
2178 arm_cortex_a5_branch_cost,
2179 &arm_default_vec_cost,
2180 1, /* Constant limit. */
2181 1, /* Max cond insns. */
2182 8, /* Memset max inline. */
2183 2, /* Issue rate. */
2184 ARM_PREFETCH_NOT_BENEFICIAL,
2185 tune_params::PREF_CONST_POOL_FALSE,
2186 tune_params::PREF_LDRD_FALSE,
2187 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2188 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2189 tune_params::DISPARAGE_FLAGS_NEITHER,
2190 tune_params::PREF_NEON_64_FALSE,
2191 tune_params::PREF_NEON_STRINGOPS_TRUE,
2192 tune_params::FUSE_NOTHING,
2193 tune_params::SCHED_AUTOPREF_OFF
2194 };
2195
2196 const struct tune_params arm_cortex_a9_tune =
2197 {
2198 &cortexa9_extra_costs,
2199 &generic_addr_mode_costs, /* Addressing mode costs. */
2200 cortex_a9_sched_adjust_cost,
2201 arm_default_branch_cost,
2202 &arm_default_vec_cost,
2203 1, /* Constant limit. */
2204 5, /* Max cond insns. */
2205 8, /* Memset max inline. */
2206 2, /* Issue rate. */
2207 ARM_PREFETCH_BENEFICIAL(4,32,32),
2208 tune_params::PREF_CONST_POOL_FALSE,
2209 tune_params::PREF_LDRD_FALSE,
2210 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2211 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2212 tune_params::DISPARAGE_FLAGS_NEITHER,
2213 tune_params::PREF_NEON_64_FALSE,
2214 tune_params::PREF_NEON_STRINGOPS_FALSE,
2215 tune_params::FUSE_NOTHING,
2216 tune_params::SCHED_AUTOPREF_OFF
2217 };
2218
2219 const struct tune_params arm_cortex_a12_tune =
2220 {
2221 &cortexa12_extra_costs,
2222 &generic_addr_mode_costs, /* Addressing mode costs. */
2223 NULL, /* Sched adj cost. */
2224 arm_default_branch_cost,
2225 &arm_default_vec_cost, /* Vectorizer costs. */
2226 1, /* Constant limit. */
2227 2, /* Max cond insns. */
2228 8, /* Memset max inline. */
2229 2, /* Issue rate. */
2230 ARM_PREFETCH_NOT_BENEFICIAL,
2231 tune_params::PREF_CONST_POOL_FALSE,
2232 tune_params::PREF_LDRD_TRUE,
2233 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2234 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2235 tune_params::DISPARAGE_FLAGS_ALL,
2236 tune_params::PREF_NEON_64_FALSE,
2237 tune_params::PREF_NEON_STRINGOPS_TRUE,
2238 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2239 tune_params::SCHED_AUTOPREF_OFF
2240 };
2241
2242 const struct tune_params arm_cortex_a73_tune =
2243 {
2244 &cortexa57_extra_costs,
2245 &generic_addr_mode_costs, /* Addressing mode costs. */
2246 NULL, /* Sched adj cost. */
2247 arm_default_branch_cost,
2248 &arm_default_vec_cost, /* Vectorizer costs. */
2249 1, /* Constant limit. */
2250 2, /* Max cond insns. */
2251 8, /* Memset max inline. */
2252 2, /* Issue rate. */
2253 ARM_PREFETCH_NOT_BENEFICIAL,
2254 tune_params::PREF_CONST_POOL_FALSE,
2255 tune_params::PREF_LDRD_TRUE,
2256 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2257 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2258 tune_params::DISPARAGE_FLAGS_ALL,
2259 tune_params::PREF_NEON_64_FALSE,
2260 tune_params::PREF_NEON_STRINGOPS_TRUE,
2261 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2262 tune_params::SCHED_AUTOPREF_FULL
2263 };
2264
2265 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2266 cycle to execute each. An LDR from the constant pool also takes two cycles
2267 to execute, but mildly increases pipelining opportunity (consecutive
2268 loads/stores can be pipelined together, saving one cycle), and may also
2269 improve icache utilisation. Hence we prefer the constant pool for such
2270 processors. */
2271
2272 const struct tune_params arm_v7m_tune =
2273 {
2274 &v7m_extra_costs,
2275 &generic_addr_mode_costs, /* Addressing mode costs. */
2276 NULL, /* Sched adj cost. */
2277 arm_cortex_m_branch_cost,
2278 &arm_default_vec_cost,
2279 1, /* Constant limit. */
2280 2, /* Max cond insns. */
2281 8, /* Memset max inline. */
2282 1, /* Issue rate. */
2283 ARM_PREFETCH_NOT_BENEFICIAL,
2284 tune_params::PREF_CONST_POOL_TRUE,
2285 tune_params::PREF_LDRD_FALSE,
2286 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2287 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2288 tune_params::DISPARAGE_FLAGS_NEITHER,
2289 tune_params::PREF_NEON_64_FALSE,
2290 tune_params::PREF_NEON_STRINGOPS_FALSE,
2291 tune_params::FUSE_NOTHING,
2292 tune_params::SCHED_AUTOPREF_OFF
2293 };
2294
2295 /* Cortex-M7 tuning. */
2296
2297 const struct tune_params arm_cortex_m7_tune =
2298 {
2299 &v7m_extra_costs,
2300 &generic_addr_mode_costs, /* Addressing mode costs. */
2301 NULL, /* Sched adj cost. */
2302 arm_cortex_m7_branch_cost,
2303 &arm_default_vec_cost,
2304 0, /* Constant limit. */
2305 1, /* Max cond insns. */
2306 8, /* Memset max inline. */
2307 2, /* Issue rate. */
2308 ARM_PREFETCH_NOT_BENEFICIAL,
2309 tune_params::PREF_CONST_POOL_TRUE,
2310 tune_params::PREF_LDRD_FALSE,
2311 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2312 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2313 tune_params::DISPARAGE_FLAGS_NEITHER,
2314 tune_params::PREF_NEON_64_FALSE,
2315 tune_params::PREF_NEON_STRINGOPS_FALSE,
2316 tune_params::FUSE_NOTHING,
2317 tune_params::SCHED_AUTOPREF_OFF
2318 };
2319
2320 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2321 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2322 cortex-m23. */
2323 const struct tune_params arm_v6m_tune =
2324 {
2325 &generic_extra_costs, /* Insn extra costs. */
2326 &generic_addr_mode_costs, /* Addressing mode costs. */
2327 NULL, /* Sched adj cost. */
2328 arm_default_branch_cost,
2329 &arm_default_vec_cost, /* Vectorizer costs. */
2330 1, /* Constant limit. */
2331 5, /* Max cond insns. */
2332 8, /* Memset max inline. */
2333 1, /* Issue rate. */
2334 ARM_PREFETCH_NOT_BENEFICIAL,
2335 tune_params::PREF_CONST_POOL_FALSE,
2336 tune_params::PREF_LDRD_FALSE,
2337 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2338 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2339 tune_params::DISPARAGE_FLAGS_NEITHER,
2340 tune_params::PREF_NEON_64_FALSE,
2341 tune_params::PREF_NEON_STRINGOPS_FALSE,
2342 tune_params::FUSE_NOTHING,
2343 tune_params::SCHED_AUTOPREF_OFF
2344 };
2345
2346 const struct tune_params arm_fa726te_tune =
2347 {
2348 &generic_extra_costs, /* Insn extra costs. */
2349 &generic_addr_mode_costs, /* Addressing mode costs. */
2350 fa726te_sched_adjust_cost,
2351 arm_default_branch_cost,
2352 &arm_default_vec_cost,
2353 1, /* Constant limit. */
2354 5, /* Max cond insns. */
2355 8, /* Memset max inline. */
2356 2, /* Issue rate. */
2357 ARM_PREFETCH_NOT_BENEFICIAL,
2358 tune_params::PREF_CONST_POOL_TRUE,
2359 tune_params::PREF_LDRD_FALSE,
2360 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2361 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2362 tune_params::DISPARAGE_FLAGS_NEITHER,
2363 tune_params::PREF_NEON_64_FALSE,
2364 tune_params::PREF_NEON_STRINGOPS_FALSE,
2365 tune_params::FUSE_NOTHING,
2366 tune_params::SCHED_AUTOPREF_OFF
2367 };
2368
2369 /* Auto-generated CPU, FPU and architecture tables. */
2370 #include "arm-cpu-data.h"
2371
2372 /* The name of the preprocessor macro to define for this architecture. PROFILE
2373 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2374 is thus chosen to be big enough to hold the longest architecture name. */
2375
2376 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2377
2378 /* Supported TLS relocations. */
2379
2380 enum tls_reloc {
2381 TLS_GD32,
2382 TLS_LDM32,
2383 TLS_LDO32,
2384 TLS_IE32,
2385 TLS_LE32,
2386 TLS_DESCSEQ /* GNU scheme */
2387 };
2388
2389 /* The maximum number of insns to be used when loading a constant. */
2390 inline static int
2391 arm_constant_limit (bool size_p)
2392 {
2393 return size_p ? 1 : current_tune->constant_limit;
2394 }
2395
2396 /* Emit an insn that's a simple single-set. Both the operands must be known
2397 to be valid. */
2398 inline static rtx_insn *
2399 emit_set_insn (rtx x, rtx y)
2400 {
2401 return emit_insn (gen_rtx_SET (x, y));
2402 }
2403
2404 /* Return the number of bits set in VALUE. */
2405 static unsigned
2406 bit_count (unsigned long value)
2407 {
2408 unsigned long count = 0;
2409
2410 while (value)
2411 {
2412 count++;
2413 value &= value - 1; /* Clear the least-significant set bit. */
2414 }
2415
2416 return count;
2417 }
2418
2419 /* Return the number of bits set in BMAP. */
2420 static unsigned
2421 bitmap_popcount (const sbitmap bmap)
2422 {
2423 unsigned int count = 0;
2424 unsigned int n = 0;
2425 sbitmap_iterator sbi;
2426
2427 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2428 count++;
2429 return count;
2430 }
2431
2432 typedef struct
2433 {
2434 machine_mode mode;
2435 const char *name;
2436 } arm_fixed_mode_set;
2437
2438 /* A small helper for setting fixed-point library libfuncs. */
2439
2440 static void
2441 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2442 const char *funcname, const char *modename,
2443 int num_suffix)
2444 {
2445 char buffer[50];
2446
2447 if (num_suffix == 0)
2448 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2449 else
2450 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2451
2452 set_optab_libfunc (optable, mode, buffer);
2453 }
2454
2455 static void
2456 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2457 machine_mode from, const char *funcname,
2458 const char *toname, const char *fromname)
2459 {
2460 char buffer[50];
2461 const char *maybe_suffix_2 = "";
2462
2463 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2464 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2465 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2466 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2467 maybe_suffix_2 = "2";
2468
2469 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2470 maybe_suffix_2);
2471
2472 set_conv_libfunc (optable, to, from, buffer);
2473 }
2474
2475 static GTY(()) rtx speculation_barrier_libfunc;
2476
2477 /* Set up library functions unique to ARM. */
2478 static void
2479 arm_init_libfuncs (void)
2480 {
2481 /* For Linux, we have access to kernel support for atomic operations. */
2482 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2483 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2484
2485 /* There are no special library functions unless we are using the
2486 ARM BPABI. */
2487 if (!TARGET_BPABI)
2488 return;
2489
2490 /* The functions below are described in Section 4 of the "Run-Time
2491 ABI for the ARM architecture", Version 1.0. */
2492
2493 /* Double-precision floating-point arithmetic. Table 2. */
2494 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2495 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2496 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2497 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2498 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2499
2500 /* Double-precision comparisons. Table 3. */
2501 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2502 set_optab_libfunc (ne_optab, DFmode, NULL);
2503 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2504 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2505 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2506 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2507 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2508
2509 /* Single-precision floating-point arithmetic. Table 4. */
2510 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2511 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2512 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2513 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2514 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2515
2516 /* Single-precision comparisons. Table 5. */
2517 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2518 set_optab_libfunc (ne_optab, SFmode, NULL);
2519 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2520 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2521 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2522 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2523 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2524
2525 /* Floating-point to integer conversions. Table 6. */
2526 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2527 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2528 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2529 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2530 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2531 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2532 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2533 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2534
2535 /* Conversions between floating types. Table 7. */
2536 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2537 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2538
2539 /* Integer to floating-point conversions. Table 8. */
2540 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2541 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2542 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2543 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2544 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2545 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2546 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2547 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2548
2549 /* Long long. Table 9. */
2550 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2551 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2552 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2553 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2554 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2555 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2556 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2557 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2558
2559 /* Integer (32/32->32) division. \S 4.3.1. */
2560 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2561 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2562
2563 /* The divmod functions are designed so that they can be used for
2564 plain division, even though they return both the quotient and the
2565 remainder. The quotient is returned in the usual location (i.e.,
2566 r0 for SImode, {r0, r1} for DImode), just as would be expected
2567 for an ordinary division routine. Because the AAPCS calling
2568 conventions specify that all of { r0, r1, r2, r3 } are
2569 callee-saved registers, there is no need to tell the compiler
2570 explicitly that those registers are clobbered by these
2571 routines. */
2572 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2573 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2574
2575 /* For SImode division the ABI provides div-without-mod routines,
2576 which are faster. */
2577 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2578 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2579
2580 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2581 divmod libcalls instead. */
2582 set_optab_libfunc (smod_optab, DImode, NULL);
2583 set_optab_libfunc (umod_optab, DImode, NULL);
2584 set_optab_libfunc (smod_optab, SImode, NULL);
2585 set_optab_libfunc (umod_optab, SImode, NULL);
2586
2587 /* Half-precision float operations. The compiler handles all operations
2588 with NULL libfuncs by converting the SFmode. */
2589 switch (arm_fp16_format)
2590 {
2591 case ARM_FP16_FORMAT_IEEE:
2592 case ARM_FP16_FORMAT_ALTERNATIVE:
2593
2594 /* Conversions. */
2595 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2596 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2597 ? "__gnu_f2h_ieee"
2598 : "__gnu_f2h_alternative"));
2599 set_conv_libfunc (sext_optab, SFmode, HFmode,
2600 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2601 ? "__gnu_h2f_ieee"
2602 : "__gnu_h2f_alternative"));
2603
2604 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2605 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2606 ? "__gnu_d2h_ieee"
2607 : "__gnu_d2h_alternative"));
2608
2609 /* Arithmetic. */
2610 set_optab_libfunc (add_optab, HFmode, NULL);
2611 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2612 set_optab_libfunc (smul_optab, HFmode, NULL);
2613 set_optab_libfunc (neg_optab, HFmode, NULL);
2614 set_optab_libfunc (sub_optab, HFmode, NULL);
2615
2616 /* Comparisons. */
2617 set_optab_libfunc (eq_optab, HFmode, NULL);
2618 set_optab_libfunc (ne_optab, HFmode, NULL);
2619 set_optab_libfunc (lt_optab, HFmode, NULL);
2620 set_optab_libfunc (le_optab, HFmode, NULL);
2621 set_optab_libfunc (ge_optab, HFmode, NULL);
2622 set_optab_libfunc (gt_optab, HFmode, NULL);
2623 set_optab_libfunc (unord_optab, HFmode, NULL);
2624 break;
2625
2626 default:
2627 break;
2628 }
2629
2630 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2631 {
2632 const arm_fixed_mode_set fixed_arith_modes[] =
2633 {
2634 { E_QQmode, "qq" },
2635 { E_UQQmode, "uqq" },
2636 { E_HQmode, "hq" },
2637 { E_UHQmode, "uhq" },
2638 { E_SQmode, "sq" },
2639 { E_USQmode, "usq" },
2640 { E_DQmode, "dq" },
2641 { E_UDQmode, "udq" },
2642 { E_TQmode, "tq" },
2643 { E_UTQmode, "utq" },
2644 { E_HAmode, "ha" },
2645 { E_UHAmode, "uha" },
2646 { E_SAmode, "sa" },
2647 { E_USAmode, "usa" },
2648 { E_DAmode, "da" },
2649 { E_UDAmode, "uda" },
2650 { E_TAmode, "ta" },
2651 { E_UTAmode, "uta" }
2652 };
2653 const arm_fixed_mode_set fixed_conv_modes[] =
2654 {
2655 { E_QQmode, "qq" },
2656 { E_UQQmode, "uqq" },
2657 { E_HQmode, "hq" },
2658 { E_UHQmode, "uhq" },
2659 { E_SQmode, "sq" },
2660 { E_USQmode, "usq" },
2661 { E_DQmode, "dq" },
2662 { E_UDQmode, "udq" },
2663 { E_TQmode, "tq" },
2664 { E_UTQmode, "utq" },
2665 { E_HAmode, "ha" },
2666 { E_UHAmode, "uha" },
2667 { E_SAmode, "sa" },
2668 { E_USAmode, "usa" },
2669 { E_DAmode, "da" },
2670 { E_UDAmode, "uda" },
2671 { E_TAmode, "ta" },
2672 { E_UTAmode, "uta" },
2673 { E_QImode, "qi" },
2674 { E_HImode, "hi" },
2675 { E_SImode, "si" },
2676 { E_DImode, "di" },
2677 { E_TImode, "ti" },
2678 { E_SFmode, "sf" },
2679 { E_DFmode, "df" }
2680 };
2681 unsigned int i, j;
2682
2683 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2684 {
2685 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2686 "add", fixed_arith_modes[i].name, 3);
2687 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2688 "ssadd", fixed_arith_modes[i].name, 3);
2689 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2690 "usadd", fixed_arith_modes[i].name, 3);
2691 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2692 "sub", fixed_arith_modes[i].name, 3);
2693 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2694 "sssub", fixed_arith_modes[i].name, 3);
2695 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2696 "ussub", fixed_arith_modes[i].name, 3);
2697 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2698 "mul", fixed_arith_modes[i].name, 3);
2699 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2700 "ssmul", fixed_arith_modes[i].name, 3);
2701 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2702 "usmul", fixed_arith_modes[i].name, 3);
2703 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2704 "div", fixed_arith_modes[i].name, 3);
2705 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2706 "udiv", fixed_arith_modes[i].name, 3);
2707 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2708 "ssdiv", fixed_arith_modes[i].name, 3);
2709 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2710 "usdiv", fixed_arith_modes[i].name, 3);
2711 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2712 "neg", fixed_arith_modes[i].name, 2);
2713 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2714 "ssneg", fixed_arith_modes[i].name, 2);
2715 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2716 "usneg", fixed_arith_modes[i].name, 2);
2717 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2718 "ashl", fixed_arith_modes[i].name, 3);
2719 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2720 "ashr", fixed_arith_modes[i].name, 3);
2721 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2722 "lshr", fixed_arith_modes[i].name, 3);
2723 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2724 "ssashl", fixed_arith_modes[i].name, 3);
2725 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2726 "usashl", fixed_arith_modes[i].name, 3);
2727 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2728 "cmp", fixed_arith_modes[i].name, 2);
2729 }
2730
2731 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2732 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2733 {
2734 if (i == j
2735 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2736 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2737 continue;
2738
2739 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2740 fixed_conv_modes[j].mode, "fract",
2741 fixed_conv_modes[i].name,
2742 fixed_conv_modes[j].name);
2743 arm_set_fixed_conv_libfunc (satfract_optab,
2744 fixed_conv_modes[i].mode,
2745 fixed_conv_modes[j].mode, "satfract",
2746 fixed_conv_modes[i].name,
2747 fixed_conv_modes[j].name);
2748 arm_set_fixed_conv_libfunc (fractuns_optab,
2749 fixed_conv_modes[i].mode,
2750 fixed_conv_modes[j].mode, "fractuns",
2751 fixed_conv_modes[i].name,
2752 fixed_conv_modes[j].name);
2753 arm_set_fixed_conv_libfunc (satfractuns_optab,
2754 fixed_conv_modes[i].mode,
2755 fixed_conv_modes[j].mode, "satfractuns",
2756 fixed_conv_modes[i].name,
2757 fixed_conv_modes[j].name);
2758 }
2759 }
2760
2761 if (TARGET_AAPCS_BASED)
2762 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2763
2764 speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2765 }
2766
2767 /* On AAPCS systems, this is the "struct __va_list". */
2768 static GTY(()) tree va_list_type;
2769
2770 /* Return the type to use as __builtin_va_list. */
2771 static tree
2772 arm_build_builtin_va_list (void)
2773 {
2774 tree va_list_name;
2775 tree ap_field;
2776
2777 if (!TARGET_AAPCS_BASED)
2778 return std_build_builtin_va_list ();
2779
2780 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2781 defined as:
2782
2783 struct __va_list
2784 {
2785 void *__ap;
2786 };
2787
2788 The C Library ABI further reinforces this definition in \S
2789 4.1.
2790
2791 We must follow this definition exactly. The structure tag
2792 name is visible in C++ mangled names, and thus forms a part
2793 of the ABI. The field name may be used by people who
2794 #include <stdarg.h>. */
2795 /* Create the type. */
2796 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2797 /* Give it the required name. */
2798 va_list_name = build_decl (BUILTINS_LOCATION,
2799 TYPE_DECL,
2800 get_identifier ("__va_list"),
2801 va_list_type);
2802 DECL_ARTIFICIAL (va_list_name) = 1;
2803 TYPE_NAME (va_list_type) = va_list_name;
2804 TYPE_STUB_DECL (va_list_type) = va_list_name;
2805 /* Create the __ap field. */
2806 ap_field = build_decl (BUILTINS_LOCATION,
2807 FIELD_DECL,
2808 get_identifier ("__ap"),
2809 ptr_type_node);
2810 DECL_ARTIFICIAL (ap_field) = 1;
2811 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2812 TYPE_FIELDS (va_list_type) = ap_field;
2813 /* Compute its layout. */
2814 layout_type (va_list_type);
2815
2816 return va_list_type;
2817 }
2818
2819 /* Return an expression of type "void *" pointing to the next
2820 available argument in a variable-argument list. VALIST is the
2821 user-level va_list object, of type __builtin_va_list. */
2822 static tree
2823 arm_extract_valist_ptr (tree valist)
2824 {
2825 if (TREE_TYPE (valist) == error_mark_node)
2826 return error_mark_node;
2827
2828 /* On an AAPCS target, the pointer is stored within "struct
2829 va_list". */
2830 if (TARGET_AAPCS_BASED)
2831 {
2832 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2833 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2834 valist, ap_field, NULL_TREE);
2835 }
2836
2837 return valist;
2838 }
2839
2840 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2841 static void
2842 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2843 {
2844 valist = arm_extract_valist_ptr (valist);
2845 std_expand_builtin_va_start (valist, nextarg);
2846 }
2847
2848 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2849 static tree
2850 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2851 gimple_seq *post_p)
2852 {
2853 valist = arm_extract_valist_ptr (valist);
2854 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2855 }
2856
2857 /* Check any incompatible options that the user has specified. */
2858 static void
2859 arm_option_check_internal (struct gcc_options *opts)
2860 {
2861 int flags = opts->x_target_flags;
2862
2863 /* iWMMXt and NEON are incompatible. */
2864 if (TARGET_IWMMXT
2865 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2866 error ("iWMMXt and NEON are incompatible");
2867
2868 /* Make sure that the processor choice does not conflict with any of the
2869 other command line choices. */
2870 if (TARGET_ARM_P (flags)
2871 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2872 error ("target CPU does not support ARM mode");
2873
2874 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2875 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2876 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2877
2878 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2879 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2880
2881 /* If this target is normally configured to use APCS frames, warn if they
2882 are turned off and debugging is turned on. */
2883 if (TARGET_ARM_P (flags)
2884 && write_symbols != NO_DEBUG
2885 && !TARGET_APCS_FRAME
2886 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2887 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2888
2889 /* iWMMXt unsupported under Thumb mode. */
2890 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2891 error ("iWMMXt unsupported under Thumb mode");
2892
2893 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2894 error ("cannot use -mtp=cp15 with 16-bit Thumb");
2895
2896 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2897 {
2898 error ("RTP PIC is incompatible with Thumb");
2899 flag_pic = 0;
2900 }
2901
2902 if (target_pure_code || target_slow_flash_data)
2903 {
2904 const char *flag = (target_pure_code ? "-mpure-code" :
2905 "-mslow-flash-data");
2906
2907 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2908 with MOVT. */
2909 if (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON)
2910 error ("%s only supports non-pic code on M-profile targets with the "
2911 "MOVT instruction", flag);
2912
2913 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2914 -mword-relocations forbids relocation of MOVT/MOVW. */
2915 if (target_word_relocations)
2916 error ("%s incompatible with -mword-relocations", flag);
2917 }
2918 }
2919
2920 /* Recompute the global settings depending on target attribute options. */
2921
2922 static void
2923 arm_option_params_internal (void)
2924 {
2925 /* If we are not using the default (ARM mode) section anchor offset
2926 ranges, then set the correct ranges now. */
2927 if (TARGET_THUMB1)
2928 {
2929 /* Thumb-1 LDR instructions cannot have negative offsets.
2930 Permissible positive offset ranges are 5-bit (for byte loads),
2931 6-bit (for halfword loads), or 7-bit (for word loads).
2932 Empirical results suggest a 7-bit anchor range gives the best
2933 overall code size. */
2934 targetm.min_anchor_offset = 0;
2935 targetm.max_anchor_offset = 127;
2936 }
2937 else if (TARGET_THUMB2)
2938 {
2939 /* The minimum is set such that the total size of the block
2940 for a particular anchor is 248 + 1 + 4095 bytes, which is
2941 divisible by eight, ensuring natural spacing of anchors. */
2942 targetm.min_anchor_offset = -248;
2943 targetm.max_anchor_offset = 4095;
2944 }
2945 else
2946 {
2947 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2948 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2949 }
2950
2951 /* Increase the number of conditional instructions with -Os. */
2952 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2953
2954 /* For THUMB2, we limit the conditional sequence to one IT block. */
2955 if (TARGET_THUMB2)
2956 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2957 }
2958
2959 /* True if -mflip-thumb should next add an attribute for the default
2960 mode, false if it should next add an attribute for the opposite mode. */
2961 static GTY(()) bool thumb_flipper;
2962
2963 /* Options after initial target override. */
2964 static GTY(()) tree init_optimize;
2965
2966 static void
2967 arm_override_options_after_change_1 (struct gcc_options *opts)
2968 {
2969 /* -falign-functions without argument: supply one. */
2970 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
2971 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2972 && opts->x_optimize_size ? "2" : "4";
2973 }
2974
2975 /* Implement targetm.override_options_after_change. */
2976
2977 static void
2978 arm_override_options_after_change (void)
2979 {
2980 arm_configure_build_target (&arm_active_target,
2981 TREE_TARGET_OPTION (target_option_default_node),
2982 &global_options_set, false);
2983
2984 arm_override_options_after_change_1 (&global_options);
2985 }
2986
2987 /* Implement TARGET_OPTION_SAVE. */
2988 static void
2989 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2990 {
2991 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2992 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2993 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2994 }
2995
2996 /* Implement TARGET_OPTION_RESTORE. */
2997 static void
2998 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2999 {
3000 opts->x_arm_arch_string = ptr->x_arm_arch_string;
3001 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
3002 opts->x_arm_tune_string = ptr->x_arm_tune_string;
3003 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
3004 false);
3005 }
3006
3007 /* Reset options between modes that the user has specified. */
3008 static void
3009 arm_option_override_internal (struct gcc_options *opts,
3010 struct gcc_options *opts_set)
3011 {
3012 arm_override_options_after_change_1 (opts);
3013
3014 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3015 {
3016 /* The default is to enable interworking, so this warning message would
3017 be confusing to users who have just compiled with
3018 eg, -march=armv4. */
3019 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3020 opts->x_target_flags &= ~MASK_INTERWORK;
3021 }
3022
3023 if (TARGET_THUMB_P (opts->x_target_flags)
3024 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3025 {
3026 warning (0, "target CPU does not support THUMB instructions");
3027 opts->x_target_flags &= ~MASK_THUMB;
3028 }
3029
3030 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3031 {
3032 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3033 opts->x_target_flags &= ~MASK_APCS_FRAME;
3034 }
3035
3036 /* Callee super interworking implies thumb interworking. Adding
3037 this to the flags here simplifies the logic elsewhere. */
3038 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3039 opts->x_target_flags |= MASK_INTERWORK;
3040
3041 /* need to remember initial values so combinaisons of options like
3042 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3043 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3044
3045 if (! opts_set->x_arm_restrict_it)
3046 opts->x_arm_restrict_it = arm_arch8;
3047
3048 /* ARM execution state and M profile don't have [restrict] IT. */
3049 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3050 opts->x_arm_restrict_it = 0;
3051
3052 /* Enable -munaligned-access by default for
3053 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3054 i.e. Thumb2 and ARM state only.
3055 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3056 - ARMv8 architecture-base processors.
3057
3058 Disable -munaligned-access by default for
3059 - all pre-ARMv6 architecture-based processors
3060 - ARMv6-M architecture-based processors
3061 - ARMv8-M Baseline processors. */
3062
3063 if (! opts_set->x_unaligned_access)
3064 {
3065 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3066 && arm_arch6 && (arm_arch_notm || arm_arch7));
3067 }
3068 else if (opts->x_unaligned_access == 1
3069 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3070 {
3071 warning (0, "target CPU does not support unaligned accesses");
3072 opts->x_unaligned_access = 0;
3073 }
3074
3075 /* Don't warn since it's on by default in -O2. */
3076 if (TARGET_THUMB1_P (opts->x_target_flags))
3077 opts->x_flag_schedule_insns = 0;
3078 else
3079 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3080
3081 /* Disable shrink-wrap when optimizing function for size, since it tends to
3082 generate additional returns. */
3083 if (optimize_function_for_size_p (cfun)
3084 && TARGET_THUMB2_P (opts->x_target_flags))
3085 opts->x_flag_shrink_wrap = false;
3086 else
3087 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3088
3089 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3090 - epilogue_insns - does not accurately model the corresponding insns
3091 emitted in the asm file. In particular, see the comment in thumb_exit
3092 'Find out how many of the (return) argument registers we can corrupt'.
3093 As a consequence, the epilogue may clobber registers without fipa-ra
3094 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3095 TODO: Accurately model clobbers for epilogue_insns and reenable
3096 fipa-ra. */
3097 if (TARGET_THUMB1_P (opts->x_target_flags))
3098 opts->x_flag_ipa_ra = 0;
3099 else
3100 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3101
3102 /* Thumb2 inline assembly code should always use unified syntax.
3103 This will apply to ARM and Thumb1 eventually. */
3104 if (TARGET_THUMB2_P (opts->x_target_flags))
3105 opts->x_inline_asm_unified = true;
3106
3107 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3108 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3109 #endif
3110 }
3111
3112 static sbitmap isa_all_fpubits;
3113 static sbitmap isa_quirkbits;
3114
3115 /* Configure a build target TARGET from the user-specified options OPTS and
3116 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3117 architecture have been specified, but the two are not identical. */
3118 void
3119 arm_configure_build_target (struct arm_build_target *target,
3120 struct cl_target_option *opts,
3121 struct gcc_options *opts_set,
3122 bool warn_compatible)
3123 {
3124 const cpu_option *arm_selected_tune = NULL;
3125 const arch_option *arm_selected_arch = NULL;
3126 const cpu_option *arm_selected_cpu = NULL;
3127 const arm_fpu_desc *arm_selected_fpu = NULL;
3128 const char *tune_opts = NULL;
3129 const char *arch_opts = NULL;
3130 const char *cpu_opts = NULL;
3131
3132 bitmap_clear (target->isa);
3133 target->core_name = NULL;
3134 target->arch_name = NULL;
3135
3136 if (opts_set->x_arm_arch_string)
3137 {
3138 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3139 "-march",
3140 opts->x_arm_arch_string);
3141 arch_opts = strchr (opts->x_arm_arch_string, '+');
3142 }
3143
3144 if (opts_set->x_arm_cpu_string)
3145 {
3146 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3147 opts->x_arm_cpu_string);
3148 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3149 arm_selected_tune = arm_selected_cpu;
3150 /* If taking the tuning from -mcpu, we don't need to rescan the
3151 options for tuning. */
3152 }
3153
3154 if (opts_set->x_arm_tune_string)
3155 {
3156 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3157 opts->x_arm_tune_string);
3158 tune_opts = strchr (opts->x_arm_tune_string, '+');
3159 }
3160
3161 if (arm_selected_arch)
3162 {
3163 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3164 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3165 arch_opts);
3166
3167 if (arm_selected_cpu)
3168 {
3169 auto_sbitmap cpu_isa (isa_num_bits);
3170 auto_sbitmap isa_delta (isa_num_bits);
3171
3172 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3173 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3174 cpu_opts);
3175 bitmap_xor (isa_delta, cpu_isa, target->isa);
3176 /* Ignore any bits that are quirk bits. */
3177 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3178 /* Ignore (for now) any bits that might be set by -mfpu. */
3179 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3180
3181 if (!bitmap_empty_p (isa_delta))
3182 {
3183 if (warn_compatible)
3184 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3185 arm_selected_cpu->common.name,
3186 arm_selected_arch->common.name);
3187 /* -march wins for code generation.
3188 -mcpu wins for default tuning. */
3189 if (!arm_selected_tune)
3190 arm_selected_tune = arm_selected_cpu;
3191
3192 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3193 target->arch_name = arm_selected_arch->common.name;
3194 }
3195 else
3196 {
3197 /* Architecture and CPU are essentially the same.
3198 Prefer the CPU setting. */
3199 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3200 target->core_name = arm_selected_cpu->common.name;
3201 /* Copy the CPU's capabilities, so that we inherit the
3202 appropriate extensions and quirks. */
3203 bitmap_copy (target->isa, cpu_isa);
3204 }
3205 }
3206 else
3207 {
3208 /* Pick a CPU based on the architecture. */
3209 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3210 target->arch_name = arm_selected_arch->common.name;
3211 /* Note: target->core_name is left unset in this path. */
3212 }
3213 }
3214 else if (arm_selected_cpu)
3215 {
3216 target->core_name = arm_selected_cpu->common.name;
3217 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3218 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3219 cpu_opts);
3220 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3221 }
3222 /* If the user did not specify a processor or architecture, choose
3223 one for them. */
3224 else
3225 {
3226 const cpu_option *sel;
3227 auto_sbitmap sought_isa (isa_num_bits);
3228 bitmap_clear (sought_isa);
3229 auto_sbitmap default_isa (isa_num_bits);
3230
3231 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3232 TARGET_CPU_DEFAULT);
3233 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3234 gcc_assert (arm_selected_cpu->common.name);
3235
3236 /* RWE: All of the selection logic below (to the end of this
3237 'if' clause) looks somewhat suspect. It appears to be mostly
3238 there to support forcing thumb support when the default CPU
3239 does not have thumb (somewhat dubious in terms of what the
3240 user might be expecting). I think it should be removed once
3241 support for the pre-thumb era cores is removed. */
3242 sel = arm_selected_cpu;
3243 arm_initialize_isa (default_isa, sel->common.isa_bits);
3244 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3245 cpu_opts);
3246
3247 /* Now check to see if the user has specified any command line
3248 switches that require certain abilities from the cpu. */
3249
3250 if (TARGET_INTERWORK || TARGET_THUMB)
3251 bitmap_set_bit (sought_isa, isa_bit_thumb);
3252
3253 /* If there are such requirements and the default CPU does not
3254 satisfy them, we need to run over the complete list of
3255 cores looking for one that is satisfactory. */
3256 if (!bitmap_empty_p (sought_isa)
3257 && !bitmap_subset_p (sought_isa, default_isa))
3258 {
3259 auto_sbitmap candidate_isa (isa_num_bits);
3260 /* We're only interested in a CPU with at least the
3261 capabilities of the default CPU and the required
3262 additional features. */
3263 bitmap_ior (default_isa, default_isa, sought_isa);
3264
3265 /* Try to locate a CPU type that supports all of the abilities
3266 of the default CPU, plus the extra abilities requested by
3267 the user. */
3268 for (sel = all_cores; sel->common.name != NULL; sel++)
3269 {
3270 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3271 /* An exact match? */
3272 if (bitmap_equal_p (default_isa, candidate_isa))
3273 break;
3274 }
3275
3276 if (sel->common.name == NULL)
3277 {
3278 unsigned current_bit_count = isa_num_bits;
3279 const cpu_option *best_fit = NULL;
3280
3281 /* Ideally we would like to issue an error message here
3282 saying that it was not possible to find a CPU compatible
3283 with the default CPU, but which also supports the command
3284 line options specified by the programmer, and so they
3285 ought to use the -mcpu=<name> command line option to
3286 override the default CPU type.
3287
3288 If we cannot find a CPU that has exactly the
3289 characteristics of the default CPU and the given
3290 command line options we scan the array again looking
3291 for a best match. The best match must have at least
3292 the capabilities of the perfect match. */
3293 for (sel = all_cores; sel->common.name != NULL; sel++)
3294 {
3295 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3296
3297 if (bitmap_subset_p (default_isa, candidate_isa))
3298 {
3299 unsigned count;
3300
3301 bitmap_and_compl (candidate_isa, candidate_isa,
3302 default_isa);
3303 count = bitmap_popcount (candidate_isa);
3304
3305 if (count < current_bit_count)
3306 {
3307 best_fit = sel;
3308 current_bit_count = count;
3309 }
3310 }
3311
3312 gcc_assert (best_fit);
3313 sel = best_fit;
3314 }
3315 }
3316 arm_selected_cpu = sel;
3317 }
3318
3319 /* Now we know the CPU, we can finally initialize the target
3320 structure. */
3321 target->core_name = arm_selected_cpu->common.name;
3322 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3323 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3324 cpu_opts);
3325 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3326 }
3327
3328 gcc_assert (arm_selected_cpu);
3329 gcc_assert (arm_selected_arch);
3330
3331 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3332 {
3333 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3334 auto_sbitmap fpu_bits (isa_num_bits);
3335
3336 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3337 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3338 bitmap_ior (target->isa, target->isa, fpu_bits);
3339 }
3340
3341 if (!arm_selected_tune)
3342 arm_selected_tune = arm_selected_cpu;
3343 else /* Validate the features passed to -mtune. */
3344 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3345
3346 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3347
3348 /* Finish initializing the target structure. */
3349 target->arch_pp_name = arm_selected_arch->arch;
3350 target->base_arch = arm_selected_arch->base_arch;
3351 target->profile = arm_selected_arch->profile;
3352
3353 target->tune_flags = tune_data->tune_flags;
3354 target->tune = tune_data->tune;
3355 target->tune_core = tune_data->scheduler;
3356 arm_option_reconfigure_globals ();
3357 }
3358
3359 /* Fix up any incompatible options that the user has specified. */
3360 static void
3361 arm_option_override (void)
3362 {
3363 static const enum isa_feature fpu_bitlist[]
3364 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3365 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3366 cl_target_option opts;
3367
3368 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3369 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3370
3371 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3372 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3373
3374 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3375
3376 if (!global_options_set.x_arm_fpu_index)
3377 {
3378 bool ok;
3379 int fpu_index;
3380
3381 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3382 CL_TARGET);
3383 gcc_assert (ok);
3384 arm_fpu_index = (enum fpu_type) fpu_index;
3385 }
3386
3387 cl_target_option_save (&opts, &global_options);
3388 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3389 true);
3390
3391 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3392 SUBTARGET_OVERRIDE_OPTIONS;
3393 #endif
3394
3395 /* Initialize boolean versions of the architectural flags, for use
3396 in the arm.md file and for enabling feature flags. */
3397 arm_option_reconfigure_globals ();
3398
3399 arm_tune = arm_active_target.tune_core;
3400 tune_flags = arm_active_target.tune_flags;
3401 current_tune = arm_active_target.tune;
3402
3403 /* TBD: Dwarf info for apcs frame is not handled yet. */
3404 if (TARGET_APCS_FRAME)
3405 flag_shrink_wrap = false;
3406
3407 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3408 {
3409 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3410 target_flags |= MASK_APCS_FRAME;
3411 }
3412
3413 if (TARGET_POKE_FUNCTION_NAME)
3414 target_flags |= MASK_APCS_FRAME;
3415
3416 if (TARGET_APCS_REENT && flag_pic)
3417 error ("-fpic and -mapcs-reent are incompatible");
3418
3419 if (TARGET_APCS_REENT)
3420 warning (0, "APCS reentrant code not supported. Ignored");
3421
3422 /* Set up some tuning parameters. */
3423 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3424 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3425 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3426 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3427 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3428 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3429
3430 /* For arm2/3 there is no need to do any scheduling if we are doing
3431 software floating-point. */
3432 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3433 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3434
3435 /* Override the default structure alignment for AAPCS ABI. */
3436 if (!global_options_set.x_arm_structure_size_boundary)
3437 {
3438 if (TARGET_AAPCS_BASED)
3439 arm_structure_size_boundary = 8;
3440 }
3441 else
3442 {
3443 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3444
3445 if (arm_structure_size_boundary != 8
3446 && arm_structure_size_boundary != 32
3447 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3448 {
3449 if (ARM_DOUBLEWORD_ALIGN)
3450 warning (0,
3451 "structure size boundary can only be set to 8, 32 or 64");
3452 else
3453 warning (0, "structure size boundary can only be set to 8 or 32");
3454 arm_structure_size_boundary
3455 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3456 }
3457 }
3458
3459 if (TARGET_VXWORKS_RTP)
3460 {
3461 if (!global_options_set.x_arm_pic_data_is_text_relative)
3462 arm_pic_data_is_text_relative = 0;
3463 }
3464 else if (flag_pic
3465 && !arm_pic_data_is_text_relative
3466 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3467 /* When text & data segments don't have a fixed displacement, the
3468 intended use is with a single, read only, pic base register.
3469 Unless the user explicitly requested not to do that, set
3470 it. */
3471 target_flags |= MASK_SINGLE_PIC_BASE;
3472
3473 /* If stack checking is disabled, we can use r10 as the PIC register,
3474 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3475 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3476 {
3477 if (TARGET_VXWORKS_RTP)
3478 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3479 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3480 }
3481
3482 if (flag_pic && TARGET_VXWORKS_RTP)
3483 arm_pic_register = 9;
3484
3485 if (arm_pic_register_string != NULL)
3486 {
3487 int pic_register = decode_reg_name (arm_pic_register_string);
3488
3489 if (!flag_pic)
3490 warning (0, "-mpic-register= is useless without -fpic");
3491
3492 /* Prevent the user from choosing an obviously stupid PIC register. */
3493 else if (pic_register < 0 || call_used_regs[pic_register]
3494 || pic_register == HARD_FRAME_POINTER_REGNUM
3495 || pic_register == STACK_POINTER_REGNUM
3496 || pic_register >= PC_REGNUM
3497 || (TARGET_VXWORKS_RTP
3498 && (unsigned int) pic_register != arm_pic_register))
3499 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3500 else
3501 arm_pic_register = pic_register;
3502 }
3503
3504 if (flag_pic)
3505 target_word_relocations = 1;
3506
3507 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3508 if (fix_cm3_ldrd == 2)
3509 {
3510 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3511 fix_cm3_ldrd = 1;
3512 else
3513 fix_cm3_ldrd = 0;
3514 }
3515
3516 /* Hot/Cold partitioning is not currently supported, since we can't
3517 handle literal pool placement in that case. */
3518 if (flag_reorder_blocks_and_partition)
3519 {
3520 inform (input_location,
3521 "-freorder-blocks-and-partition not supported on this architecture");
3522 flag_reorder_blocks_and_partition = 0;
3523 flag_reorder_blocks = 1;
3524 }
3525
3526 if (flag_pic)
3527 /* Hoisting PIC address calculations more aggressively provides a small,
3528 but measurable, size reduction for PIC code. Therefore, we decrease
3529 the bar for unrestricted expression hoisting to the cost of PIC address
3530 calculation, which is 2 instructions. */
3531 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3532 global_options.x_param_values,
3533 global_options_set.x_param_values);
3534
3535 /* ARM EABI defaults to strict volatile bitfields. */
3536 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3537 && abi_version_at_least(2))
3538 flag_strict_volatile_bitfields = 1;
3539
3540 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3541 have deemed it beneficial (signified by setting
3542 prefetch.num_slots to 1 or more). */
3543 if (flag_prefetch_loop_arrays < 0
3544 && HAVE_prefetch
3545 && optimize >= 3
3546 && current_tune->prefetch.num_slots > 0)
3547 flag_prefetch_loop_arrays = 1;
3548
3549 /* Set up parameters to be used in prefetching algorithm. Do not
3550 override the defaults unless we are tuning for a core we have
3551 researched values for. */
3552 if (current_tune->prefetch.num_slots > 0)
3553 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3554 current_tune->prefetch.num_slots,
3555 global_options.x_param_values,
3556 global_options_set.x_param_values);
3557 if (current_tune->prefetch.l1_cache_line_size >= 0)
3558 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3559 current_tune->prefetch.l1_cache_line_size,
3560 global_options.x_param_values,
3561 global_options_set.x_param_values);
3562 if (current_tune->prefetch.l1_cache_size >= 0)
3563 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3564 current_tune->prefetch.l1_cache_size,
3565 global_options.x_param_values,
3566 global_options_set.x_param_values);
3567
3568 /* Use Neon to perform 64-bits operations rather than core
3569 registers. */
3570 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3571 if (use_neon_for_64bits == 1)
3572 prefer_neon_for_64bits = true;
3573
3574 /* Use the alternative scheduling-pressure algorithm by default. */
3575 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3576 global_options.x_param_values,
3577 global_options_set.x_param_values);
3578
3579 /* Look through ready list and all of queue for instructions
3580 relevant for L2 auto-prefetcher. */
3581 int param_sched_autopref_queue_depth;
3582
3583 switch (current_tune->sched_autopref)
3584 {
3585 case tune_params::SCHED_AUTOPREF_OFF:
3586 param_sched_autopref_queue_depth = -1;
3587 break;
3588
3589 case tune_params::SCHED_AUTOPREF_RANK:
3590 param_sched_autopref_queue_depth = 0;
3591 break;
3592
3593 case tune_params::SCHED_AUTOPREF_FULL:
3594 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3595 break;
3596
3597 default:
3598 gcc_unreachable ();
3599 }
3600
3601 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3602 param_sched_autopref_queue_depth,
3603 global_options.x_param_values,
3604 global_options_set.x_param_values);
3605
3606 /* Currently, for slow flash data, we just disable literal pools. We also
3607 disable it for pure-code. */
3608 if (target_slow_flash_data || target_pure_code)
3609 arm_disable_literal_pool = true;
3610
3611 /* Disable scheduling fusion by default if it's not armv7 processor
3612 or doesn't prefer ldrd/strd. */
3613 if (flag_schedule_fusion == 2
3614 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3615 flag_schedule_fusion = 0;
3616
3617 /* Need to remember initial options before they are overriden. */
3618 init_optimize = build_optimization_node (&global_options);
3619
3620 arm_options_perform_arch_sanity_checks ();
3621 arm_option_override_internal (&global_options, &global_options_set);
3622 arm_option_check_internal (&global_options);
3623 arm_option_params_internal ();
3624
3625 /* Create the default target_options structure. */
3626 target_option_default_node = target_option_current_node
3627 = build_target_option_node (&global_options);
3628
3629 /* Register global variables with the garbage collector. */
3630 arm_add_gc_roots ();
3631
3632 /* Init initial mode for testing. */
3633 thumb_flipper = TARGET_THUMB;
3634 }
3635
3636
3637 /* Reconfigure global status flags from the active_target.isa. */
3638 void
3639 arm_option_reconfigure_globals (void)
3640 {
3641 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3642 arm_base_arch = arm_active_target.base_arch;
3643
3644 /* Initialize boolean versions of the architectural flags, for use
3645 in the arm.md file. */
3646 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3647 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3648 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3649 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3650 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3651 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3652 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3653 arm_arch6m = arm_arch6 && !arm_arch_notm;
3654 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3655 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3656 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3657 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3658 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3659 arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3660 arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3661 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3662 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3663 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3664 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3665 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3666 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3667 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3668 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3669 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3670 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3671 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3672 if (arm_fp16_inst)
3673 {
3674 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3675 error ("selected fp16 options are incompatible");
3676 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3677 }
3678
3679 /* And finally, set up some quirks. */
3680 arm_arch_no_volatile_ce
3681 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3682 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3683 isa_bit_quirk_armv6kz);
3684
3685 /* Use the cp15 method if it is available. */
3686 if (target_thread_pointer == TP_AUTO)
3687 {
3688 if (arm_arch6k && !TARGET_THUMB1)
3689 target_thread_pointer = TP_CP15;
3690 else
3691 target_thread_pointer = TP_SOFT;
3692 }
3693 }
3694
3695 /* Perform some validation between the desired architecture and the rest of the
3696 options. */
3697 void
3698 arm_options_perform_arch_sanity_checks (void)
3699 {
3700 /* V5T code we generate is completely interworking capable, so we turn off
3701 TARGET_INTERWORK here to avoid many tests later on. */
3702
3703 /* XXX However, we must pass the right pre-processor defines to CPP
3704 or GLD can get confused. This is a hack. */
3705 if (TARGET_INTERWORK)
3706 arm_cpp_interwork = 1;
3707
3708 if (arm_arch5t)
3709 target_flags &= ~MASK_INTERWORK;
3710
3711 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3712 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3713
3714 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3715 error ("iwmmxt abi requires an iwmmxt capable cpu");
3716
3717 /* BPABI targets use linker tricks to allow interworking on cores
3718 without thumb support. */
3719 if (TARGET_INTERWORK
3720 && !TARGET_BPABI
3721 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3722 {
3723 warning (0, "target CPU does not support interworking" );
3724 target_flags &= ~MASK_INTERWORK;
3725 }
3726
3727 /* If soft-float is specified then don't use FPU. */
3728 if (TARGET_SOFT_FLOAT)
3729 arm_fpu_attr = FPU_NONE;
3730 else
3731 arm_fpu_attr = FPU_VFP;
3732
3733 if (TARGET_AAPCS_BASED)
3734 {
3735 if (TARGET_CALLER_INTERWORKING)
3736 error ("AAPCS does not support -mcaller-super-interworking");
3737 else
3738 if (TARGET_CALLEE_INTERWORKING)
3739 error ("AAPCS does not support -mcallee-super-interworking");
3740 }
3741
3742 /* __fp16 support currently assumes the core has ldrh. */
3743 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3744 sorry ("__fp16 and no ldrh");
3745
3746 if (use_cmse && !arm_arch_cmse)
3747 error ("target CPU does not support ARMv8-M Security Extensions");
3748
3749 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3750 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3751 if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3752 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3753
3754
3755 if (TARGET_AAPCS_BASED)
3756 {
3757 if (arm_abi == ARM_ABI_IWMMXT)
3758 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3759 else if (TARGET_HARD_FLOAT_ABI)
3760 {
3761 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3762 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3763 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3764 }
3765 else
3766 arm_pcs_default = ARM_PCS_AAPCS;
3767 }
3768 else
3769 {
3770 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3771 sorry ("-mfloat-abi=hard and VFP");
3772
3773 if (arm_abi == ARM_ABI_APCS)
3774 arm_pcs_default = ARM_PCS_APCS;
3775 else
3776 arm_pcs_default = ARM_PCS_ATPCS;
3777 }
3778 }
3779
3780 static void
3781 arm_add_gc_roots (void)
3782 {
3783 gcc_obstack_init(&minipool_obstack);
3784 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3785 }
3786 \f
3787 /* A table of known ARM exception types.
3788 For use with the interrupt function attribute. */
3789
3790 typedef struct
3791 {
3792 const char *const arg;
3793 const unsigned long return_value;
3794 }
3795 isr_attribute_arg;
3796
3797 static const isr_attribute_arg isr_attribute_args [] =
3798 {
3799 { "IRQ", ARM_FT_ISR },
3800 { "irq", ARM_FT_ISR },
3801 { "FIQ", ARM_FT_FIQ },
3802 { "fiq", ARM_FT_FIQ },
3803 { "ABORT", ARM_FT_ISR },
3804 { "abort", ARM_FT_ISR },
3805 { "ABORT", ARM_FT_ISR },
3806 { "abort", ARM_FT_ISR },
3807 { "UNDEF", ARM_FT_EXCEPTION },
3808 { "undef", ARM_FT_EXCEPTION },
3809 { "SWI", ARM_FT_EXCEPTION },
3810 { "swi", ARM_FT_EXCEPTION },
3811 { NULL, ARM_FT_NORMAL }
3812 };
3813
3814 /* Returns the (interrupt) function type of the current
3815 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3816
3817 static unsigned long
3818 arm_isr_value (tree argument)
3819 {
3820 const isr_attribute_arg * ptr;
3821 const char * arg;
3822
3823 if (!arm_arch_notm)
3824 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3825
3826 /* No argument - default to IRQ. */
3827 if (argument == NULL_TREE)
3828 return ARM_FT_ISR;
3829
3830 /* Get the value of the argument. */
3831 if (TREE_VALUE (argument) == NULL_TREE
3832 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3833 return ARM_FT_UNKNOWN;
3834
3835 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3836
3837 /* Check it against the list of known arguments. */
3838 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3839 if (streq (arg, ptr->arg))
3840 return ptr->return_value;
3841
3842 /* An unrecognized interrupt type. */
3843 return ARM_FT_UNKNOWN;
3844 }
3845
3846 /* Computes the type of the current function. */
3847
3848 static unsigned long
3849 arm_compute_func_type (void)
3850 {
3851 unsigned long type = ARM_FT_UNKNOWN;
3852 tree a;
3853 tree attr;
3854
3855 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3856
3857 /* Decide if the current function is volatile. Such functions
3858 never return, and many memory cycles can be saved by not storing
3859 register values that will never be needed again. This optimization
3860 was added to speed up context switching in a kernel application. */
3861 if (optimize > 0
3862 && (TREE_NOTHROW (current_function_decl)
3863 || !(flag_unwind_tables
3864 || (flag_exceptions
3865 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3866 && TREE_THIS_VOLATILE (current_function_decl))
3867 type |= ARM_FT_VOLATILE;
3868
3869 if (cfun->static_chain_decl != NULL)
3870 type |= ARM_FT_NESTED;
3871
3872 attr = DECL_ATTRIBUTES (current_function_decl);
3873
3874 a = lookup_attribute ("naked", attr);
3875 if (a != NULL_TREE)
3876 type |= ARM_FT_NAKED;
3877
3878 a = lookup_attribute ("isr", attr);
3879 if (a == NULL_TREE)
3880 a = lookup_attribute ("interrupt", attr);
3881
3882 if (a == NULL_TREE)
3883 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3884 else
3885 type |= arm_isr_value (TREE_VALUE (a));
3886
3887 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3888 type |= ARM_FT_CMSE_ENTRY;
3889
3890 return type;
3891 }
3892
3893 /* Returns the type of the current function. */
3894
3895 unsigned long
3896 arm_current_func_type (void)
3897 {
3898 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3899 cfun->machine->func_type = arm_compute_func_type ();
3900
3901 return cfun->machine->func_type;
3902 }
3903
3904 bool
3905 arm_allocate_stack_slots_for_args (void)
3906 {
3907 /* Naked functions should not allocate stack slots for arguments. */
3908 return !IS_NAKED (arm_current_func_type ());
3909 }
3910
3911 static bool
3912 arm_warn_func_return (tree decl)
3913 {
3914 /* Naked functions are implemented entirely in assembly, including the
3915 return sequence, so suppress warnings about this. */
3916 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3917 }
3918
3919 \f
3920 /* Output assembler code for a block containing the constant parts
3921 of a trampoline, leaving space for the variable parts.
3922
3923 On the ARM, (if r8 is the static chain regnum, and remembering that
3924 referencing pc adds an offset of 8) the trampoline looks like:
3925 ldr r8, [pc, #0]
3926 ldr pc, [pc]
3927 .word static chain value
3928 .word function's address
3929 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3930
3931 static void
3932 arm_asm_trampoline_template (FILE *f)
3933 {
3934 fprintf (f, "\t.syntax unified\n");
3935
3936 if (TARGET_ARM)
3937 {
3938 fprintf (f, "\t.arm\n");
3939 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3940 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3941 }
3942 else if (TARGET_THUMB2)
3943 {
3944 fprintf (f, "\t.thumb\n");
3945 /* The Thumb-2 trampoline is similar to the arm implementation.
3946 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3947 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3948 STATIC_CHAIN_REGNUM, PC_REGNUM);
3949 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3950 }
3951 else
3952 {
3953 ASM_OUTPUT_ALIGN (f, 2);
3954 fprintf (f, "\t.code\t16\n");
3955 fprintf (f, ".Ltrampoline_start:\n");
3956 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3957 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3958 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3959 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3960 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3961 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3962 }
3963 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3964 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3965 }
3966
3967 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3968
3969 static void
3970 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3971 {
3972 rtx fnaddr, mem, a_tramp;
3973
3974 emit_block_move (m_tramp, assemble_trampoline_template (),
3975 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3976
3977 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3978 emit_move_insn (mem, chain_value);
3979
3980 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3981 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3982 emit_move_insn (mem, fnaddr);
3983
3984 a_tramp = XEXP (m_tramp, 0);
3985 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3986 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3987 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3988 }
3989
3990 /* Thumb trampolines should be entered in thumb mode, so set
3991 the bottom bit of the address. */
3992
3993 static rtx
3994 arm_trampoline_adjust_address (rtx addr)
3995 {
3996 if (TARGET_THUMB)
3997 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3998 NULL, 0, OPTAB_LIB_WIDEN);
3999 return addr;
4000 }
4001 \f
4002 /* Return 1 if it is possible to return using a single instruction.
4003 If SIBLING is non-null, this is a test for a return before a sibling
4004 call. SIBLING is the call insn, so we can examine its register usage. */
4005
4006 int
4007 use_return_insn (int iscond, rtx sibling)
4008 {
4009 int regno;
4010 unsigned int func_type;
4011 unsigned long saved_int_regs;
4012 unsigned HOST_WIDE_INT stack_adjust;
4013 arm_stack_offsets *offsets;
4014
4015 /* Never use a return instruction before reload has run. */
4016 if (!reload_completed)
4017 return 0;
4018
4019 func_type = arm_current_func_type ();
4020
4021 /* Naked, volatile and stack alignment functions need special
4022 consideration. */
4023 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4024 return 0;
4025
4026 /* So do interrupt functions that use the frame pointer and Thumb
4027 interrupt functions. */
4028 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4029 return 0;
4030
4031 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4032 && !optimize_function_for_size_p (cfun))
4033 return 0;
4034
4035 offsets = arm_get_frame_offsets ();
4036 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4037
4038 /* As do variadic functions. */
4039 if (crtl->args.pretend_args_size
4040 || cfun->machine->uses_anonymous_args
4041 /* Or if the function calls __builtin_eh_return () */
4042 || crtl->calls_eh_return
4043 /* Or if the function calls alloca */
4044 || cfun->calls_alloca
4045 /* Or if there is a stack adjustment. However, if the stack pointer
4046 is saved on the stack, we can use a pre-incrementing stack load. */
4047 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4048 && stack_adjust == 4))
4049 /* Or if the static chain register was saved above the frame, under the
4050 assumption that the stack pointer isn't saved on the stack. */
4051 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4052 && arm_compute_static_chain_stack_bytes() != 0))
4053 return 0;
4054
4055 saved_int_regs = offsets->saved_regs_mask;
4056
4057 /* Unfortunately, the insn
4058
4059 ldmib sp, {..., sp, ...}
4060
4061 triggers a bug on most SA-110 based devices, such that the stack
4062 pointer won't be correctly restored if the instruction takes a
4063 page fault. We work around this problem by popping r3 along with
4064 the other registers, since that is never slower than executing
4065 another instruction.
4066
4067 We test for !arm_arch5t here, because code for any architecture
4068 less than this could potentially be run on one of the buggy
4069 chips. */
4070 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4071 {
4072 /* Validate that r3 is a call-clobbered register (always true in
4073 the default abi) ... */
4074 if (!call_used_regs[3])
4075 return 0;
4076
4077 /* ... that it isn't being used for a return value ... */
4078 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4079 return 0;
4080
4081 /* ... or for a tail-call argument ... */
4082 if (sibling)
4083 {
4084 gcc_assert (CALL_P (sibling));
4085
4086 if (find_regno_fusage (sibling, USE, 3))
4087 return 0;
4088 }
4089
4090 /* ... and that there are no call-saved registers in r0-r2
4091 (always true in the default ABI). */
4092 if (saved_int_regs & 0x7)
4093 return 0;
4094 }
4095
4096 /* Can't be done if interworking with Thumb, and any registers have been
4097 stacked. */
4098 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4099 return 0;
4100
4101 /* On StrongARM, conditional returns are expensive if they aren't
4102 taken and multiple registers have been stacked. */
4103 if (iscond && arm_tune_strongarm)
4104 {
4105 /* Conditional return when just the LR is stored is a simple
4106 conditional-load instruction, that's not expensive. */
4107 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4108 return 0;
4109
4110 if (flag_pic
4111 && arm_pic_register != INVALID_REGNUM
4112 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4113 return 0;
4114 }
4115
4116 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4117 several instructions if anything needs to be popped. */
4118 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4119 return 0;
4120
4121 /* If there are saved registers but the LR isn't saved, then we need
4122 two instructions for the return. */
4123 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4124 return 0;
4125
4126 /* Can't be done if any of the VFP regs are pushed,
4127 since this also requires an insn. */
4128 if (TARGET_HARD_FLOAT)
4129 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4130 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4131 return 0;
4132
4133 if (TARGET_REALLY_IWMMXT)
4134 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4135 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4136 return 0;
4137
4138 return 1;
4139 }
4140
4141 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4142 shrink-wrapping if possible. This is the case if we need to emit a
4143 prologue, which we can test by looking at the offsets. */
4144 bool
4145 use_simple_return_p (void)
4146 {
4147 arm_stack_offsets *offsets;
4148
4149 /* Note this function can be called before or after reload. */
4150 if (!reload_completed)
4151 arm_compute_frame_layout ();
4152
4153 offsets = arm_get_frame_offsets ();
4154 return offsets->outgoing_args != 0;
4155 }
4156
4157 /* Return TRUE if int I is a valid immediate ARM constant. */
4158
4159 int
4160 const_ok_for_arm (HOST_WIDE_INT i)
4161 {
4162 int lowbit;
4163
4164 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4165 be all zero, or all one. */
4166 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4167 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4168 != ((~(unsigned HOST_WIDE_INT) 0)
4169 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4170 return FALSE;
4171
4172 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4173
4174 /* Fast return for 0 and small values. We must do this for zero, since
4175 the code below can't handle that one case. */
4176 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4177 return TRUE;
4178
4179 /* Get the number of trailing zeros. */
4180 lowbit = ffs((int) i) - 1;
4181
4182 /* Only even shifts are allowed in ARM mode so round down to the
4183 nearest even number. */
4184 if (TARGET_ARM)
4185 lowbit &= ~1;
4186
4187 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4188 return TRUE;
4189
4190 if (TARGET_ARM)
4191 {
4192 /* Allow rotated constants in ARM mode. */
4193 if (lowbit <= 4
4194 && ((i & ~0xc000003f) == 0
4195 || (i & ~0xf000000f) == 0
4196 || (i & ~0xfc000003) == 0))
4197 return TRUE;
4198 }
4199 else if (TARGET_THUMB2)
4200 {
4201 HOST_WIDE_INT v;
4202
4203 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4204 v = i & 0xff;
4205 v |= v << 16;
4206 if (i == v || i == (v | (v << 8)))
4207 return TRUE;
4208
4209 /* Allow repeated pattern 0xXY00XY00. */
4210 v = i & 0xff00;
4211 v |= v << 16;
4212 if (i == v)
4213 return TRUE;
4214 }
4215 else if (TARGET_HAVE_MOVT)
4216 {
4217 /* Thumb-1 Targets with MOVT. */
4218 if (i > 0xffff)
4219 return FALSE;
4220 else
4221 return TRUE;
4222 }
4223
4224 return FALSE;
4225 }
4226
4227 /* Return true if I is a valid constant for the operation CODE. */
4228 int
4229 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4230 {
4231 if (const_ok_for_arm (i))
4232 return 1;
4233
4234 switch (code)
4235 {
4236 case SET:
4237 /* See if we can use movw. */
4238 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4239 return 1;
4240 else
4241 /* Otherwise, try mvn. */
4242 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4243
4244 case PLUS:
4245 /* See if we can use addw or subw. */
4246 if (TARGET_THUMB2
4247 && ((i & 0xfffff000) == 0
4248 || ((-i) & 0xfffff000) == 0))
4249 return 1;
4250 /* Fall through. */
4251 case COMPARE:
4252 case EQ:
4253 case NE:
4254 case GT:
4255 case LE:
4256 case LT:
4257 case GE:
4258 case GEU:
4259 case LTU:
4260 case GTU:
4261 case LEU:
4262 case UNORDERED:
4263 case ORDERED:
4264 case UNEQ:
4265 case UNGE:
4266 case UNLT:
4267 case UNGT:
4268 case UNLE:
4269 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4270
4271 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4272 case XOR:
4273 return 0;
4274
4275 case IOR:
4276 if (TARGET_THUMB2)
4277 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4278 return 0;
4279
4280 case AND:
4281 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4282
4283 default:
4284 gcc_unreachable ();
4285 }
4286 }
4287
4288 /* Return true if I is a valid di mode constant for the operation CODE. */
4289 int
4290 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4291 {
4292 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4293 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4294 rtx hi = GEN_INT (hi_val);
4295 rtx lo = GEN_INT (lo_val);
4296
4297 if (TARGET_THUMB1)
4298 return 0;
4299
4300 switch (code)
4301 {
4302 case AND:
4303 case IOR:
4304 case XOR:
4305 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4306 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4307 case PLUS:
4308 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4309
4310 default:
4311 return 0;
4312 }
4313 }
4314
4315 /* Emit a sequence of insns to handle a large constant.
4316 CODE is the code of the operation required, it can be any of SET, PLUS,
4317 IOR, AND, XOR, MINUS;
4318 MODE is the mode in which the operation is being performed;
4319 VAL is the integer to operate on;
4320 SOURCE is the other operand (a register, or a null-pointer for SET);
4321 SUBTARGETS means it is safe to create scratch registers if that will
4322 either produce a simpler sequence, or we will want to cse the values.
4323 Return value is the number of insns emitted. */
4324
4325 /* ??? Tweak this for thumb2. */
4326 int
4327 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4328 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4329 {
4330 rtx cond;
4331
4332 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4333 cond = COND_EXEC_TEST (PATTERN (insn));
4334 else
4335 cond = NULL_RTX;
4336
4337 if (subtargets || code == SET
4338 || (REG_P (target) && REG_P (source)
4339 && REGNO (target) != REGNO (source)))
4340 {
4341 /* After arm_reorg has been called, we can't fix up expensive
4342 constants by pushing them into memory so we must synthesize
4343 them in-line, regardless of the cost. This is only likely to
4344 be more costly on chips that have load delay slots and we are
4345 compiling without running the scheduler (so no splitting
4346 occurred before the final instruction emission).
4347
4348 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4349 */
4350 if (!cfun->machine->after_arm_reorg
4351 && !cond
4352 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4353 1, 0)
4354 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4355 + (code != SET))))
4356 {
4357 if (code == SET)
4358 {
4359 /* Currently SET is the only monadic value for CODE, all
4360 the rest are diadic. */
4361 if (TARGET_USE_MOVT)
4362 arm_emit_movpair (target, GEN_INT (val));
4363 else
4364 emit_set_insn (target, GEN_INT (val));
4365
4366 return 1;
4367 }
4368 else
4369 {
4370 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4371
4372 if (TARGET_USE_MOVT)
4373 arm_emit_movpair (temp, GEN_INT (val));
4374 else
4375 emit_set_insn (temp, GEN_INT (val));
4376
4377 /* For MINUS, the value is subtracted from, since we never
4378 have subtraction of a constant. */
4379 if (code == MINUS)
4380 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4381 else
4382 emit_set_insn (target,
4383 gen_rtx_fmt_ee (code, mode, source, temp));
4384 return 2;
4385 }
4386 }
4387 }
4388
4389 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4390 1);
4391 }
4392
4393 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4394 ARM/THUMB2 immediates, and add up to VAL.
4395 Thr function return value gives the number of insns required. */
4396 static int
4397 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4398 struct four_ints *return_sequence)
4399 {
4400 int best_consecutive_zeros = 0;
4401 int i;
4402 int best_start = 0;
4403 int insns1, insns2;
4404 struct four_ints tmp_sequence;
4405
4406 /* If we aren't targeting ARM, the best place to start is always at
4407 the bottom, otherwise look more closely. */
4408 if (TARGET_ARM)
4409 {
4410 for (i = 0; i < 32; i += 2)
4411 {
4412 int consecutive_zeros = 0;
4413
4414 if (!(val & (3 << i)))
4415 {
4416 while ((i < 32) && !(val & (3 << i)))
4417 {
4418 consecutive_zeros += 2;
4419 i += 2;
4420 }
4421 if (consecutive_zeros > best_consecutive_zeros)
4422 {
4423 best_consecutive_zeros = consecutive_zeros;
4424 best_start = i - consecutive_zeros;
4425 }
4426 i -= 2;
4427 }
4428 }
4429 }
4430
4431 /* So long as it won't require any more insns to do so, it's
4432 desirable to emit a small constant (in bits 0...9) in the last
4433 insn. This way there is more chance that it can be combined with
4434 a later addressing insn to form a pre-indexed load or store
4435 operation. Consider:
4436
4437 *((volatile int *)0xe0000100) = 1;
4438 *((volatile int *)0xe0000110) = 2;
4439
4440 We want this to wind up as:
4441
4442 mov rA, #0xe0000000
4443 mov rB, #1
4444 str rB, [rA, #0x100]
4445 mov rB, #2
4446 str rB, [rA, #0x110]
4447
4448 rather than having to synthesize both large constants from scratch.
4449
4450 Therefore, we calculate how many insns would be required to emit
4451 the constant starting from `best_start', and also starting from
4452 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4453 yield a shorter sequence, we may as well use zero. */
4454 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4455 if (best_start != 0
4456 && ((HOST_WIDE_INT_1U << best_start) < val))
4457 {
4458 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4459 if (insns2 <= insns1)
4460 {
4461 *return_sequence = tmp_sequence;
4462 insns1 = insns2;
4463 }
4464 }
4465
4466 return insns1;
4467 }
4468
4469 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4470 static int
4471 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4472 struct four_ints *return_sequence, int i)
4473 {
4474 int remainder = val & 0xffffffff;
4475 int insns = 0;
4476
4477 /* Try and find a way of doing the job in either two or three
4478 instructions.
4479
4480 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4481 location. We start at position I. This may be the MSB, or
4482 optimial_immediate_sequence may have positioned it at the largest block
4483 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4484 wrapping around to the top of the word when we drop off the bottom.
4485 In the worst case this code should produce no more than four insns.
4486
4487 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4488 constants, shifted to any arbitrary location. We should always start
4489 at the MSB. */
4490 do
4491 {
4492 int end;
4493 unsigned int b1, b2, b3, b4;
4494 unsigned HOST_WIDE_INT result;
4495 int loc;
4496
4497 gcc_assert (insns < 4);
4498
4499 if (i <= 0)
4500 i += 32;
4501
4502 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4503 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4504 {
4505 loc = i;
4506 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4507 /* We can use addw/subw for the last 12 bits. */
4508 result = remainder;
4509 else
4510 {
4511 /* Use an 8-bit shifted/rotated immediate. */
4512 end = i - 8;
4513 if (end < 0)
4514 end += 32;
4515 result = remainder & ((0x0ff << end)
4516 | ((i < end) ? (0xff >> (32 - end))
4517 : 0));
4518 i -= 8;
4519 }
4520 }
4521 else
4522 {
4523 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4524 arbitrary shifts. */
4525 i -= TARGET_ARM ? 2 : 1;
4526 continue;
4527 }
4528
4529 /* Next, see if we can do a better job with a thumb2 replicated
4530 constant.
4531
4532 We do it this way around to catch the cases like 0x01F001E0 where
4533 two 8-bit immediates would work, but a replicated constant would
4534 make it worse.
4535
4536 TODO: 16-bit constants that don't clear all the bits, but still win.
4537 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4538 if (TARGET_THUMB2)
4539 {
4540 b1 = (remainder & 0xff000000) >> 24;
4541 b2 = (remainder & 0x00ff0000) >> 16;
4542 b3 = (remainder & 0x0000ff00) >> 8;
4543 b4 = remainder & 0xff;
4544
4545 if (loc > 24)
4546 {
4547 /* The 8-bit immediate already found clears b1 (and maybe b2),
4548 but must leave b3 and b4 alone. */
4549
4550 /* First try to find a 32-bit replicated constant that clears
4551 almost everything. We can assume that we can't do it in one,
4552 or else we wouldn't be here. */
4553 unsigned int tmp = b1 & b2 & b3 & b4;
4554 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4555 + (tmp << 24);
4556 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4557 + (tmp == b3) + (tmp == b4);
4558 if (tmp
4559 && (matching_bytes >= 3
4560 || (matching_bytes == 2
4561 && const_ok_for_op (remainder & ~tmp2, code))))
4562 {
4563 /* At least 3 of the bytes match, and the fourth has at
4564 least as many bits set, or two of the bytes match
4565 and it will only require one more insn to finish. */
4566 result = tmp2;
4567 i = tmp != b1 ? 32
4568 : tmp != b2 ? 24
4569 : tmp != b3 ? 16
4570 : 8;
4571 }
4572
4573 /* Second, try to find a 16-bit replicated constant that can
4574 leave three of the bytes clear. If b2 or b4 is already
4575 zero, then we can. If the 8-bit from above would not
4576 clear b2 anyway, then we still win. */
4577 else if (b1 == b3 && (!b2 || !b4
4578 || (remainder & 0x00ff0000 & ~result)))
4579 {
4580 result = remainder & 0xff00ff00;
4581 i = 24;
4582 }
4583 }
4584 else if (loc > 16)
4585 {
4586 /* The 8-bit immediate already found clears b2 (and maybe b3)
4587 and we don't get here unless b1 is alredy clear, but it will
4588 leave b4 unchanged. */
4589
4590 /* If we can clear b2 and b4 at once, then we win, since the
4591 8-bits couldn't possibly reach that far. */
4592 if (b2 == b4)
4593 {
4594 result = remainder & 0x00ff00ff;
4595 i = 16;
4596 }
4597 }
4598 }
4599
4600 return_sequence->i[insns++] = result;
4601 remainder &= ~result;
4602
4603 if (code == SET || code == MINUS)
4604 code = PLUS;
4605 }
4606 while (remainder);
4607
4608 return insns;
4609 }
4610
4611 /* Emit an instruction with the indicated PATTERN. If COND is
4612 non-NULL, conditionalize the execution of the instruction on COND
4613 being true. */
4614
4615 static void
4616 emit_constant_insn (rtx cond, rtx pattern)
4617 {
4618 if (cond)
4619 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4620 emit_insn (pattern);
4621 }
4622
4623 /* As above, but extra parameter GENERATE which, if clear, suppresses
4624 RTL generation. */
4625
4626 static int
4627 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4628 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4629 int subtargets, int generate)
4630 {
4631 int can_invert = 0;
4632 int can_negate = 0;
4633 int final_invert = 0;
4634 int i;
4635 int set_sign_bit_copies = 0;
4636 int clear_sign_bit_copies = 0;
4637 int clear_zero_bit_copies = 0;
4638 int set_zero_bit_copies = 0;
4639 int insns = 0, neg_insns, inv_insns;
4640 unsigned HOST_WIDE_INT temp1, temp2;
4641 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4642 struct four_ints *immediates;
4643 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4644
4645 /* Find out which operations are safe for a given CODE. Also do a quick
4646 check for degenerate cases; these can occur when DImode operations
4647 are split. */
4648 switch (code)
4649 {
4650 case SET:
4651 can_invert = 1;
4652 break;
4653
4654 case PLUS:
4655 can_negate = 1;
4656 break;
4657
4658 case IOR:
4659 if (remainder == 0xffffffff)
4660 {
4661 if (generate)
4662 emit_constant_insn (cond,
4663 gen_rtx_SET (target,
4664 GEN_INT (ARM_SIGN_EXTEND (val))));
4665 return 1;
4666 }
4667
4668 if (remainder == 0)
4669 {
4670 if (reload_completed && rtx_equal_p (target, source))
4671 return 0;
4672
4673 if (generate)
4674 emit_constant_insn (cond, gen_rtx_SET (target, source));
4675 return 1;
4676 }
4677 break;
4678
4679 case AND:
4680 if (remainder == 0)
4681 {
4682 if (generate)
4683 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4684 return 1;
4685 }
4686 if (remainder == 0xffffffff)
4687 {
4688 if (reload_completed && rtx_equal_p (target, source))
4689 return 0;
4690 if (generate)
4691 emit_constant_insn (cond, gen_rtx_SET (target, source));
4692 return 1;
4693 }
4694 can_invert = 1;
4695 break;
4696
4697 case XOR:
4698 if (remainder == 0)
4699 {
4700 if (reload_completed && rtx_equal_p (target, source))
4701 return 0;
4702 if (generate)
4703 emit_constant_insn (cond, gen_rtx_SET (target, source));
4704 return 1;
4705 }
4706
4707 if (remainder == 0xffffffff)
4708 {
4709 if (generate)
4710 emit_constant_insn (cond,
4711 gen_rtx_SET (target,
4712 gen_rtx_NOT (mode, source)));
4713 return 1;
4714 }
4715 final_invert = 1;
4716 break;
4717
4718 case MINUS:
4719 /* We treat MINUS as (val - source), since (source - val) is always
4720 passed as (source + (-val)). */
4721 if (remainder == 0)
4722 {
4723 if (generate)
4724 emit_constant_insn (cond,
4725 gen_rtx_SET (target,
4726 gen_rtx_NEG (mode, source)));
4727 return 1;
4728 }
4729 if (const_ok_for_arm (val))
4730 {
4731 if (generate)
4732 emit_constant_insn (cond,
4733 gen_rtx_SET (target,
4734 gen_rtx_MINUS (mode, GEN_INT (val),
4735 source)));
4736 return 1;
4737 }
4738
4739 break;
4740
4741 default:
4742 gcc_unreachable ();
4743 }
4744
4745 /* If we can do it in one insn get out quickly. */
4746 if (const_ok_for_op (val, code))
4747 {
4748 if (generate)
4749 emit_constant_insn (cond,
4750 gen_rtx_SET (target,
4751 (source
4752 ? gen_rtx_fmt_ee (code, mode, source,
4753 GEN_INT (val))
4754 : GEN_INT (val))));
4755 return 1;
4756 }
4757
4758 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4759 insn. */
4760 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4761 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4762 {
4763 if (generate)
4764 {
4765 if (mode == SImode && i == 16)
4766 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4767 smaller insn. */
4768 emit_constant_insn (cond,
4769 gen_zero_extendhisi2
4770 (target, gen_lowpart (HImode, source)));
4771 else
4772 /* Extz only supports SImode, but we can coerce the operands
4773 into that mode. */
4774 emit_constant_insn (cond,
4775 gen_extzv_t2 (gen_lowpart (SImode, target),
4776 gen_lowpart (SImode, source),
4777 GEN_INT (i), const0_rtx));
4778 }
4779
4780 return 1;
4781 }
4782
4783 /* Calculate a few attributes that may be useful for specific
4784 optimizations. */
4785 /* Count number of leading zeros. */
4786 for (i = 31; i >= 0; i--)
4787 {
4788 if ((remainder & (1 << i)) == 0)
4789 clear_sign_bit_copies++;
4790 else
4791 break;
4792 }
4793
4794 /* Count number of leading 1's. */
4795 for (i = 31; i >= 0; i--)
4796 {
4797 if ((remainder & (1 << i)) != 0)
4798 set_sign_bit_copies++;
4799 else
4800 break;
4801 }
4802
4803 /* Count number of trailing zero's. */
4804 for (i = 0; i <= 31; i++)
4805 {
4806 if ((remainder & (1 << i)) == 0)
4807 clear_zero_bit_copies++;
4808 else
4809 break;
4810 }
4811
4812 /* Count number of trailing 1's. */
4813 for (i = 0; i <= 31; i++)
4814 {
4815 if ((remainder & (1 << i)) != 0)
4816 set_zero_bit_copies++;
4817 else
4818 break;
4819 }
4820
4821 switch (code)
4822 {
4823 case SET:
4824 /* See if we can do this by sign_extending a constant that is known
4825 to be negative. This is a good, way of doing it, since the shift
4826 may well merge into a subsequent insn. */
4827 if (set_sign_bit_copies > 1)
4828 {
4829 if (const_ok_for_arm
4830 (temp1 = ARM_SIGN_EXTEND (remainder
4831 << (set_sign_bit_copies - 1))))
4832 {
4833 if (generate)
4834 {
4835 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4836 emit_constant_insn (cond,
4837 gen_rtx_SET (new_src, GEN_INT (temp1)));
4838 emit_constant_insn (cond,
4839 gen_ashrsi3 (target, new_src,
4840 GEN_INT (set_sign_bit_copies - 1)));
4841 }
4842 return 2;
4843 }
4844 /* For an inverted constant, we will need to set the low bits,
4845 these will be shifted out of harm's way. */
4846 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4847 if (const_ok_for_arm (~temp1))
4848 {
4849 if (generate)
4850 {
4851 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4852 emit_constant_insn (cond,
4853 gen_rtx_SET (new_src, GEN_INT (temp1)));
4854 emit_constant_insn (cond,
4855 gen_ashrsi3 (target, new_src,
4856 GEN_INT (set_sign_bit_copies - 1)));
4857 }
4858 return 2;
4859 }
4860 }
4861
4862 /* See if we can calculate the value as the difference between two
4863 valid immediates. */
4864 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4865 {
4866 int topshift = clear_sign_bit_copies & ~1;
4867
4868 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4869 & (0xff000000 >> topshift));
4870
4871 /* If temp1 is zero, then that means the 9 most significant
4872 bits of remainder were 1 and we've caused it to overflow.
4873 When topshift is 0 we don't need to do anything since we
4874 can borrow from 'bit 32'. */
4875 if (temp1 == 0 && topshift != 0)
4876 temp1 = 0x80000000 >> (topshift - 1);
4877
4878 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4879
4880 if (const_ok_for_arm (temp2))
4881 {
4882 if (generate)
4883 {
4884 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4885 emit_constant_insn (cond,
4886 gen_rtx_SET (new_src, GEN_INT (temp1)));
4887 emit_constant_insn (cond,
4888 gen_addsi3 (target, new_src,
4889 GEN_INT (-temp2)));
4890 }
4891
4892 return 2;
4893 }
4894 }
4895
4896 /* See if we can generate this by setting the bottom (or the top)
4897 16 bits, and then shifting these into the other half of the
4898 word. We only look for the simplest cases, to do more would cost
4899 too much. Be careful, however, not to generate this when the
4900 alternative would take fewer insns. */
4901 if (val & 0xffff0000)
4902 {
4903 temp1 = remainder & 0xffff0000;
4904 temp2 = remainder & 0x0000ffff;
4905
4906 /* Overlaps outside this range are best done using other methods. */
4907 for (i = 9; i < 24; i++)
4908 {
4909 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4910 && !const_ok_for_arm (temp2))
4911 {
4912 rtx new_src = (subtargets
4913 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4914 : target);
4915 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4916 source, subtargets, generate);
4917 source = new_src;
4918 if (generate)
4919 emit_constant_insn
4920 (cond,
4921 gen_rtx_SET
4922 (target,
4923 gen_rtx_IOR (mode,
4924 gen_rtx_ASHIFT (mode, source,
4925 GEN_INT (i)),
4926 source)));
4927 return insns + 1;
4928 }
4929 }
4930
4931 /* Don't duplicate cases already considered. */
4932 for (i = 17; i < 24; i++)
4933 {
4934 if (((temp1 | (temp1 >> i)) == remainder)
4935 && !const_ok_for_arm (temp1))
4936 {
4937 rtx new_src = (subtargets
4938 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4939 : target);
4940 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4941 source, subtargets, generate);
4942 source = new_src;
4943 if (generate)
4944 emit_constant_insn
4945 (cond,
4946 gen_rtx_SET (target,
4947 gen_rtx_IOR
4948 (mode,
4949 gen_rtx_LSHIFTRT (mode, source,
4950 GEN_INT (i)),
4951 source)));
4952 return insns + 1;
4953 }
4954 }
4955 }
4956 break;
4957
4958 case IOR:
4959 case XOR:
4960 /* If we have IOR or XOR, and the constant can be loaded in a
4961 single instruction, and we can find a temporary to put it in,
4962 then this can be done in two instructions instead of 3-4. */
4963 if (subtargets
4964 /* TARGET can't be NULL if SUBTARGETS is 0 */
4965 || (reload_completed && !reg_mentioned_p (target, source)))
4966 {
4967 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4968 {
4969 if (generate)
4970 {
4971 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4972
4973 emit_constant_insn (cond,
4974 gen_rtx_SET (sub, GEN_INT (val)));
4975 emit_constant_insn (cond,
4976 gen_rtx_SET (target,
4977 gen_rtx_fmt_ee (code, mode,
4978 source, sub)));
4979 }
4980 return 2;
4981 }
4982 }
4983
4984 if (code == XOR)
4985 break;
4986
4987 /* Convert.
4988 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4989 and the remainder 0s for e.g. 0xfff00000)
4990 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4991
4992 This can be done in 2 instructions by using shifts with mov or mvn.
4993 e.g. for
4994 x = x | 0xfff00000;
4995 we generate.
4996 mvn r0, r0, asl #12
4997 mvn r0, r0, lsr #12 */
4998 if (set_sign_bit_copies > 8
4999 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5000 {
5001 if (generate)
5002 {
5003 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5004 rtx shift = GEN_INT (set_sign_bit_copies);
5005
5006 emit_constant_insn
5007 (cond,
5008 gen_rtx_SET (sub,
5009 gen_rtx_NOT (mode,
5010 gen_rtx_ASHIFT (mode,
5011 source,
5012 shift))));
5013 emit_constant_insn
5014 (cond,
5015 gen_rtx_SET (target,
5016 gen_rtx_NOT (mode,
5017 gen_rtx_LSHIFTRT (mode, sub,
5018 shift))));
5019 }
5020 return 2;
5021 }
5022
5023 /* Convert
5024 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5025 to
5026 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5027
5028 For eg. r0 = r0 | 0xfff
5029 mvn r0, r0, lsr #12
5030 mvn r0, r0, asl #12
5031
5032 */
5033 if (set_zero_bit_copies > 8
5034 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5035 {
5036 if (generate)
5037 {
5038 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5039 rtx shift = GEN_INT (set_zero_bit_copies);
5040
5041 emit_constant_insn
5042 (cond,
5043 gen_rtx_SET (sub,
5044 gen_rtx_NOT (mode,
5045 gen_rtx_LSHIFTRT (mode,
5046 source,
5047 shift))));
5048 emit_constant_insn
5049 (cond,
5050 gen_rtx_SET (target,
5051 gen_rtx_NOT (mode,
5052 gen_rtx_ASHIFT (mode, sub,
5053 shift))));
5054 }
5055 return 2;
5056 }
5057
5058 /* This will never be reached for Thumb2 because orn is a valid
5059 instruction. This is for Thumb1 and the ARM 32 bit cases.
5060
5061 x = y | constant (such that ~constant is a valid constant)
5062 Transform this to
5063 x = ~(~y & ~constant).
5064 */
5065 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5066 {
5067 if (generate)
5068 {
5069 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5070 emit_constant_insn (cond,
5071 gen_rtx_SET (sub,
5072 gen_rtx_NOT (mode, source)));
5073 source = sub;
5074 if (subtargets)
5075 sub = gen_reg_rtx (mode);
5076 emit_constant_insn (cond,
5077 gen_rtx_SET (sub,
5078 gen_rtx_AND (mode, source,
5079 GEN_INT (temp1))));
5080 emit_constant_insn (cond,
5081 gen_rtx_SET (target,
5082 gen_rtx_NOT (mode, sub)));
5083 }
5084 return 3;
5085 }
5086 break;
5087
5088 case AND:
5089 /* See if two shifts will do 2 or more insn's worth of work. */
5090 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5091 {
5092 HOST_WIDE_INT shift_mask = ((0xffffffff
5093 << (32 - clear_sign_bit_copies))
5094 & 0xffffffff);
5095
5096 if ((remainder | shift_mask) != 0xffffffff)
5097 {
5098 HOST_WIDE_INT new_val
5099 = ARM_SIGN_EXTEND (remainder | shift_mask);
5100
5101 if (generate)
5102 {
5103 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5104 insns = arm_gen_constant (AND, SImode, cond, new_val,
5105 new_src, source, subtargets, 1);
5106 source = new_src;
5107 }
5108 else
5109 {
5110 rtx targ = subtargets ? NULL_RTX : target;
5111 insns = arm_gen_constant (AND, mode, cond, new_val,
5112 targ, source, subtargets, 0);
5113 }
5114 }
5115
5116 if (generate)
5117 {
5118 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5119 rtx shift = GEN_INT (clear_sign_bit_copies);
5120
5121 emit_insn (gen_ashlsi3 (new_src, source, shift));
5122 emit_insn (gen_lshrsi3 (target, new_src, shift));
5123 }
5124
5125 return insns + 2;
5126 }
5127
5128 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5129 {
5130 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5131
5132 if ((remainder | shift_mask) != 0xffffffff)
5133 {
5134 HOST_WIDE_INT new_val
5135 = ARM_SIGN_EXTEND (remainder | shift_mask);
5136 if (generate)
5137 {
5138 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5139
5140 insns = arm_gen_constant (AND, mode, cond, new_val,
5141 new_src, source, subtargets, 1);
5142 source = new_src;
5143 }
5144 else
5145 {
5146 rtx targ = subtargets ? NULL_RTX : target;
5147
5148 insns = arm_gen_constant (AND, mode, cond, new_val,
5149 targ, source, subtargets, 0);
5150 }
5151 }
5152
5153 if (generate)
5154 {
5155 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5156 rtx shift = GEN_INT (clear_zero_bit_copies);
5157
5158 emit_insn (gen_lshrsi3 (new_src, source, shift));
5159 emit_insn (gen_ashlsi3 (target, new_src, shift));
5160 }
5161
5162 return insns + 2;
5163 }
5164
5165 break;
5166
5167 default:
5168 break;
5169 }
5170
5171 /* Calculate what the instruction sequences would be if we generated it
5172 normally, negated, or inverted. */
5173 if (code == AND)
5174 /* AND cannot be split into multiple insns, so invert and use BIC. */
5175 insns = 99;
5176 else
5177 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5178
5179 if (can_negate)
5180 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5181 &neg_immediates);
5182 else
5183 neg_insns = 99;
5184
5185 if (can_invert || final_invert)
5186 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5187 &inv_immediates);
5188 else
5189 inv_insns = 99;
5190
5191 immediates = &pos_immediates;
5192
5193 /* Is the negated immediate sequence more efficient? */
5194 if (neg_insns < insns && neg_insns <= inv_insns)
5195 {
5196 insns = neg_insns;
5197 immediates = &neg_immediates;
5198 }
5199 else
5200 can_negate = 0;
5201
5202 /* Is the inverted immediate sequence more efficient?
5203 We must allow for an extra NOT instruction for XOR operations, although
5204 there is some chance that the final 'mvn' will get optimized later. */
5205 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5206 {
5207 insns = inv_insns;
5208 immediates = &inv_immediates;
5209 }
5210 else
5211 {
5212 can_invert = 0;
5213 final_invert = 0;
5214 }
5215
5216 /* Now output the chosen sequence as instructions. */
5217 if (generate)
5218 {
5219 for (i = 0; i < insns; i++)
5220 {
5221 rtx new_src, temp1_rtx;
5222
5223 temp1 = immediates->i[i];
5224
5225 if (code == SET || code == MINUS)
5226 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5227 else if ((final_invert || i < (insns - 1)) && subtargets)
5228 new_src = gen_reg_rtx (mode);
5229 else
5230 new_src = target;
5231
5232 if (can_invert)
5233 temp1 = ~temp1;
5234 else if (can_negate)
5235 temp1 = -temp1;
5236
5237 temp1 = trunc_int_for_mode (temp1, mode);
5238 temp1_rtx = GEN_INT (temp1);
5239
5240 if (code == SET)
5241 ;
5242 else if (code == MINUS)
5243 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5244 else
5245 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5246
5247 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5248 source = new_src;
5249
5250 if (code == SET)
5251 {
5252 can_negate = can_invert;
5253 can_invert = 0;
5254 code = PLUS;
5255 }
5256 else if (code == MINUS)
5257 code = PLUS;
5258 }
5259 }
5260
5261 if (final_invert)
5262 {
5263 if (generate)
5264 emit_constant_insn (cond, gen_rtx_SET (target,
5265 gen_rtx_NOT (mode, source)));
5266 insns++;
5267 }
5268
5269 return insns;
5270 }
5271
5272 /* Canonicalize a comparison so that we are more likely to recognize it.
5273 This can be done for a few constant compares, where we can make the
5274 immediate value easier to load. */
5275
5276 static void
5277 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5278 bool op0_preserve_value)
5279 {
5280 machine_mode mode;
5281 unsigned HOST_WIDE_INT i, maxval;
5282
5283 mode = GET_MODE (*op0);
5284 if (mode == VOIDmode)
5285 mode = GET_MODE (*op1);
5286
5287 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5288
5289 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5290 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5291 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5292 for GTU/LEU in Thumb mode. */
5293 if (mode == DImode)
5294 {
5295
5296 if (*code == GT || *code == LE
5297 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5298 {
5299 /* Missing comparison. First try to use an available
5300 comparison. */
5301 if (CONST_INT_P (*op1))
5302 {
5303 i = INTVAL (*op1);
5304 switch (*code)
5305 {
5306 case GT:
5307 case LE:
5308 if (i != maxval
5309 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5310 {
5311 *op1 = GEN_INT (i + 1);
5312 *code = *code == GT ? GE : LT;
5313 return;
5314 }
5315 break;
5316 case GTU:
5317 case LEU:
5318 if (i != ~((unsigned HOST_WIDE_INT) 0)
5319 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5320 {
5321 *op1 = GEN_INT (i + 1);
5322 *code = *code == GTU ? GEU : LTU;
5323 return;
5324 }
5325 break;
5326 default:
5327 gcc_unreachable ();
5328 }
5329 }
5330
5331 /* If that did not work, reverse the condition. */
5332 if (!op0_preserve_value)
5333 {
5334 std::swap (*op0, *op1);
5335 *code = (int)swap_condition ((enum rtx_code)*code);
5336 }
5337 }
5338 return;
5339 }
5340
5341 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5342 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5343 to facilitate possible combining with a cmp into 'ands'. */
5344 if (mode == SImode
5345 && GET_CODE (*op0) == ZERO_EXTEND
5346 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5347 && GET_MODE (XEXP (*op0, 0)) == QImode
5348 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5349 && subreg_lowpart_p (XEXP (*op0, 0))
5350 && *op1 == const0_rtx)
5351 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5352 GEN_INT (255));
5353
5354 /* Comparisons smaller than DImode. Only adjust comparisons against
5355 an out-of-range constant. */
5356 if (!CONST_INT_P (*op1)
5357 || const_ok_for_arm (INTVAL (*op1))
5358 || const_ok_for_arm (- INTVAL (*op1)))
5359 return;
5360
5361 i = INTVAL (*op1);
5362
5363 switch (*code)
5364 {
5365 case EQ:
5366 case NE:
5367 return;
5368
5369 case GT:
5370 case LE:
5371 if (i != maxval
5372 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5373 {
5374 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5375 *code = *code == GT ? GE : LT;
5376 return;
5377 }
5378 break;
5379
5380 case GE:
5381 case LT:
5382 if (i != ~maxval
5383 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5384 {
5385 *op1 = GEN_INT (i - 1);
5386 *code = *code == GE ? GT : LE;
5387 return;
5388 }
5389 break;
5390
5391 case GTU:
5392 case LEU:
5393 if (i != ~((unsigned HOST_WIDE_INT) 0)
5394 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5395 {
5396 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5397 *code = *code == GTU ? GEU : LTU;
5398 return;
5399 }
5400 break;
5401
5402 case GEU:
5403 case LTU:
5404 if (i != 0
5405 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5406 {
5407 *op1 = GEN_INT (i - 1);
5408 *code = *code == GEU ? GTU : LEU;
5409 return;
5410 }
5411 break;
5412
5413 default:
5414 gcc_unreachable ();
5415 }
5416 }
5417
5418
5419 /* Define how to find the value returned by a function. */
5420
5421 static rtx
5422 arm_function_value(const_tree type, const_tree func,
5423 bool outgoing ATTRIBUTE_UNUSED)
5424 {
5425 machine_mode mode;
5426 int unsignedp ATTRIBUTE_UNUSED;
5427 rtx r ATTRIBUTE_UNUSED;
5428
5429 mode = TYPE_MODE (type);
5430
5431 if (TARGET_AAPCS_BASED)
5432 return aapcs_allocate_return_reg (mode, type, func);
5433
5434 /* Promote integer types. */
5435 if (INTEGRAL_TYPE_P (type))
5436 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5437
5438 /* Promotes small structs returned in a register to full-word size
5439 for big-endian AAPCS. */
5440 if (arm_return_in_msb (type))
5441 {
5442 HOST_WIDE_INT size = int_size_in_bytes (type);
5443 if (size % UNITS_PER_WORD != 0)
5444 {
5445 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5446 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5447 }
5448 }
5449
5450 return arm_libcall_value_1 (mode);
5451 }
5452
5453 /* libcall hashtable helpers. */
5454
5455 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5456 {
5457 static inline hashval_t hash (const rtx_def *);
5458 static inline bool equal (const rtx_def *, const rtx_def *);
5459 static inline void remove (rtx_def *);
5460 };
5461
5462 inline bool
5463 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5464 {
5465 return rtx_equal_p (p1, p2);
5466 }
5467
5468 inline hashval_t
5469 libcall_hasher::hash (const rtx_def *p1)
5470 {
5471 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5472 }
5473
5474 typedef hash_table<libcall_hasher> libcall_table_type;
5475
5476 static void
5477 add_libcall (libcall_table_type *htab, rtx libcall)
5478 {
5479 *htab->find_slot (libcall, INSERT) = libcall;
5480 }
5481
5482 static bool
5483 arm_libcall_uses_aapcs_base (const_rtx libcall)
5484 {
5485 static bool init_done = false;
5486 static libcall_table_type *libcall_htab = NULL;
5487
5488 if (!init_done)
5489 {
5490 init_done = true;
5491
5492 libcall_htab = new libcall_table_type (31);
5493 add_libcall (libcall_htab,
5494 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5495 add_libcall (libcall_htab,
5496 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5497 add_libcall (libcall_htab,
5498 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5499 add_libcall (libcall_htab,
5500 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5501
5502 add_libcall (libcall_htab,
5503 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5504 add_libcall (libcall_htab,
5505 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5506 add_libcall (libcall_htab,
5507 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5508 add_libcall (libcall_htab,
5509 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5510
5511 add_libcall (libcall_htab,
5512 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5513 add_libcall (libcall_htab,
5514 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5515 add_libcall (libcall_htab,
5516 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5517 add_libcall (libcall_htab,
5518 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5519 add_libcall (libcall_htab,
5520 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5521 add_libcall (libcall_htab,
5522 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5523 add_libcall (libcall_htab,
5524 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5525 add_libcall (libcall_htab,
5526 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5527
5528 /* Values from double-precision helper functions are returned in core
5529 registers if the selected core only supports single-precision
5530 arithmetic, even if we are using the hard-float ABI. The same is
5531 true for single-precision helpers, but we will never be using the
5532 hard-float ABI on a CPU which doesn't support single-precision
5533 operations in hardware. */
5534 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5535 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5536 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5537 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5538 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5539 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5540 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5541 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5542 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5543 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5544 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5545 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5546 SFmode));
5547 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5548 DFmode));
5549 add_libcall (libcall_htab,
5550 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5551 }
5552
5553 return libcall && libcall_htab->find (libcall) != NULL;
5554 }
5555
5556 static rtx
5557 arm_libcall_value_1 (machine_mode mode)
5558 {
5559 if (TARGET_AAPCS_BASED)
5560 return aapcs_libcall_value (mode);
5561 else if (TARGET_IWMMXT_ABI
5562 && arm_vector_mode_supported_p (mode))
5563 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5564 else
5565 return gen_rtx_REG (mode, ARG_REGISTER (1));
5566 }
5567
5568 /* Define how to find the value returned by a library function
5569 assuming the value has mode MODE. */
5570
5571 static rtx
5572 arm_libcall_value (machine_mode mode, const_rtx libcall)
5573 {
5574 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5575 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5576 {
5577 /* The following libcalls return their result in integer registers,
5578 even though they return a floating point value. */
5579 if (arm_libcall_uses_aapcs_base (libcall))
5580 return gen_rtx_REG (mode, ARG_REGISTER(1));
5581
5582 }
5583
5584 return arm_libcall_value_1 (mode);
5585 }
5586
5587 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5588
5589 static bool
5590 arm_function_value_regno_p (const unsigned int regno)
5591 {
5592 if (regno == ARG_REGISTER (1)
5593 || (TARGET_32BIT
5594 && TARGET_AAPCS_BASED
5595 && TARGET_HARD_FLOAT
5596 && regno == FIRST_VFP_REGNUM)
5597 || (TARGET_IWMMXT_ABI
5598 && regno == FIRST_IWMMXT_REGNUM))
5599 return true;
5600
5601 return false;
5602 }
5603
5604 /* Determine the amount of memory needed to store the possible return
5605 registers of an untyped call. */
5606 int
5607 arm_apply_result_size (void)
5608 {
5609 int size = 16;
5610
5611 if (TARGET_32BIT)
5612 {
5613 if (TARGET_HARD_FLOAT_ABI)
5614 size += 32;
5615 if (TARGET_IWMMXT_ABI)
5616 size += 8;
5617 }
5618
5619 return size;
5620 }
5621
5622 /* Decide whether TYPE should be returned in memory (true)
5623 or in a register (false). FNTYPE is the type of the function making
5624 the call. */
5625 static bool
5626 arm_return_in_memory (const_tree type, const_tree fntype)
5627 {
5628 HOST_WIDE_INT size;
5629
5630 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5631
5632 if (TARGET_AAPCS_BASED)
5633 {
5634 /* Simple, non-aggregate types (ie not including vectors and
5635 complex) are always returned in a register (or registers).
5636 We don't care about which register here, so we can short-cut
5637 some of the detail. */
5638 if (!AGGREGATE_TYPE_P (type)
5639 && TREE_CODE (type) != VECTOR_TYPE
5640 && TREE_CODE (type) != COMPLEX_TYPE)
5641 return false;
5642
5643 /* Any return value that is no larger than one word can be
5644 returned in r0. */
5645 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5646 return false;
5647
5648 /* Check any available co-processors to see if they accept the
5649 type as a register candidate (VFP, for example, can return
5650 some aggregates in consecutive registers). These aren't
5651 available if the call is variadic. */
5652 if (aapcs_select_return_coproc (type, fntype) >= 0)
5653 return false;
5654
5655 /* Vector values should be returned using ARM registers, not
5656 memory (unless they're over 16 bytes, which will break since
5657 we only have four call-clobbered registers to play with). */
5658 if (TREE_CODE (type) == VECTOR_TYPE)
5659 return (size < 0 || size > (4 * UNITS_PER_WORD));
5660
5661 /* The rest go in memory. */
5662 return true;
5663 }
5664
5665 if (TREE_CODE (type) == VECTOR_TYPE)
5666 return (size < 0 || size > (4 * UNITS_PER_WORD));
5667
5668 if (!AGGREGATE_TYPE_P (type) &&
5669 (TREE_CODE (type) != VECTOR_TYPE))
5670 /* All simple types are returned in registers. */
5671 return false;
5672
5673 if (arm_abi != ARM_ABI_APCS)
5674 {
5675 /* ATPCS and later return aggregate types in memory only if they are
5676 larger than a word (or are variable size). */
5677 return (size < 0 || size > UNITS_PER_WORD);
5678 }
5679
5680 /* For the arm-wince targets we choose to be compatible with Microsoft's
5681 ARM and Thumb compilers, which always return aggregates in memory. */
5682 #ifndef ARM_WINCE
5683 /* All structures/unions bigger than one word are returned in memory.
5684 Also catch the case where int_size_in_bytes returns -1. In this case
5685 the aggregate is either huge or of variable size, and in either case
5686 we will want to return it via memory and not in a register. */
5687 if (size < 0 || size > UNITS_PER_WORD)
5688 return true;
5689
5690 if (TREE_CODE (type) == RECORD_TYPE)
5691 {
5692 tree field;
5693
5694 /* For a struct the APCS says that we only return in a register
5695 if the type is 'integer like' and every addressable element
5696 has an offset of zero. For practical purposes this means
5697 that the structure can have at most one non bit-field element
5698 and that this element must be the first one in the structure. */
5699
5700 /* Find the first field, ignoring non FIELD_DECL things which will
5701 have been created by C++. */
5702 for (field = TYPE_FIELDS (type);
5703 field && TREE_CODE (field) != FIELD_DECL;
5704 field = DECL_CHAIN (field))
5705 continue;
5706
5707 if (field == NULL)
5708 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5709
5710 /* Check that the first field is valid for returning in a register. */
5711
5712 /* ... Floats are not allowed */
5713 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5714 return true;
5715
5716 /* ... Aggregates that are not themselves valid for returning in
5717 a register are not allowed. */
5718 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5719 return true;
5720
5721 /* Now check the remaining fields, if any. Only bitfields are allowed,
5722 since they are not addressable. */
5723 for (field = DECL_CHAIN (field);
5724 field;
5725 field = DECL_CHAIN (field))
5726 {
5727 if (TREE_CODE (field) != FIELD_DECL)
5728 continue;
5729
5730 if (!DECL_BIT_FIELD_TYPE (field))
5731 return true;
5732 }
5733
5734 return false;
5735 }
5736
5737 if (TREE_CODE (type) == UNION_TYPE)
5738 {
5739 tree field;
5740
5741 /* Unions can be returned in registers if every element is
5742 integral, or can be returned in an integer register. */
5743 for (field = TYPE_FIELDS (type);
5744 field;
5745 field = DECL_CHAIN (field))
5746 {
5747 if (TREE_CODE (field) != FIELD_DECL)
5748 continue;
5749
5750 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5751 return true;
5752
5753 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5754 return true;
5755 }
5756
5757 return false;
5758 }
5759 #endif /* not ARM_WINCE */
5760
5761 /* Return all other types in memory. */
5762 return true;
5763 }
5764
5765 const struct pcs_attribute_arg
5766 {
5767 const char *arg;
5768 enum arm_pcs value;
5769 } pcs_attribute_args[] =
5770 {
5771 {"aapcs", ARM_PCS_AAPCS},
5772 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5773 #if 0
5774 /* We could recognize these, but changes would be needed elsewhere
5775 * to implement them. */
5776 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5777 {"atpcs", ARM_PCS_ATPCS},
5778 {"apcs", ARM_PCS_APCS},
5779 #endif
5780 {NULL, ARM_PCS_UNKNOWN}
5781 };
5782
5783 static enum arm_pcs
5784 arm_pcs_from_attribute (tree attr)
5785 {
5786 const struct pcs_attribute_arg *ptr;
5787 const char *arg;
5788
5789 /* Get the value of the argument. */
5790 if (TREE_VALUE (attr) == NULL_TREE
5791 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5792 return ARM_PCS_UNKNOWN;
5793
5794 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5795
5796 /* Check it against the list of known arguments. */
5797 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5798 if (streq (arg, ptr->arg))
5799 return ptr->value;
5800
5801 /* An unrecognized interrupt type. */
5802 return ARM_PCS_UNKNOWN;
5803 }
5804
5805 /* Get the PCS variant to use for this call. TYPE is the function's type
5806 specification, DECL is the specific declartion. DECL may be null if
5807 the call could be indirect or if this is a library call. */
5808 static enum arm_pcs
5809 arm_get_pcs_model (const_tree type, const_tree decl)
5810 {
5811 bool user_convention = false;
5812 enum arm_pcs user_pcs = arm_pcs_default;
5813 tree attr;
5814
5815 gcc_assert (type);
5816
5817 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5818 if (attr)
5819 {
5820 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5821 user_convention = true;
5822 }
5823
5824 if (TARGET_AAPCS_BASED)
5825 {
5826 /* Detect varargs functions. These always use the base rules
5827 (no argument is ever a candidate for a co-processor
5828 register). */
5829 bool base_rules = stdarg_p (type);
5830
5831 if (user_convention)
5832 {
5833 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5834 sorry ("non-AAPCS derived PCS variant");
5835 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5836 error ("variadic functions must use the base AAPCS variant");
5837 }
5838
5839 if (base_rules)
5840 return ARM_PCS_AAPCS;
5841 else if (user_convention)
5842 return user_pcs;
5843 else if (decl && flag_unit_at_a_time)
5844 {
5845 /* Local functions never leak outside this compilation unit,
5846 so we are free to use whatever conventions are
5847 appropriate. */
5848 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5849 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5850 if (i && i->local)
5851 return ARM_PCS_AAPCS_LOCAL;
5852 }
5853 }
5854 else if (user_convention && user_pcs != arm_pcs_default)
5855 sorry ("PCS variant");
5856
5857 /* For everything else we use the target's default. */
5858 return arm_pcs_default;
5859 }
5860
5861
5862 static void
5863 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5864 const_tree fntype ATTRIBUTE_UNUSED,
5865 rtx libcall ATTRIBUTE_UNUSED,
5866 const_tree fndecl ATTRIBUTE_UNUSED)
5867 {
5868 /* Record the unallocated VFP registers. */
5869 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5870 pcum->aapcs_vfp_reg_alloc = 0;
5871 }
5872
5873 /* Walk down the type tree of TYPE counting consecutive base elements.
5874 If *MODEP is VOIDmode, then set it to the first valid floating point
5875 type. If a non-floating point type is found, or if a floating point
5876 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5877 otherwise return the count in the sub-tree. */
5878 static int
5879 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5880 {
5881 machine_mode mode;
5882 HOST_WIDE_INT size;
5883
5884 switch (TREE_CODE (type))
5885 {
5886 case REAL_TYPE:
5887 mode = TYPE_MODE (type);
5888 if (mode != DFmode && mode != SFmode && mode != HFmode)
5889 return -1;
5890
5891 if (*modep == VOIDmode)
5892 *modep = mode;
5893
5894 if (*modep == mode)
5895 return 1;
5896
5897 break;
5898
5899 case COMPLEX_TYPE:
5900 mode = TYPE_MODE (TREE_TYPE (type));
5901 if (mode != DFmode && mode != SFmode)
5902 return -1;
5903
5904 if (*modep == VOIDmode)
5905 *modep = mode;
5906
5907 if (*modep == mode)
5908 return 2;
5909
5910 break;
5911
5912 case VECTOR_TYPE:
5913 /* Use V2SImode and V4SImode as representatives of all 64-bit
5914 and 128-bit vector types, whether or not those modes are
5915 supported with the present options. */
5916 size = int_size_in_bytes (type);
5917 switch (size)
5918 {
5919 case 8:
5920 mode = V2SImode;
5921 break;
5922 case 16:
5923 mode = V4SImode;
5924 break;
5925 default:
5926 return -1;
5927 }
5928
5929 if (*modep == VOIDmode)
5930 *modep = mode;
5931
5932 /* Vector modes are considered to be opaque: two vectors are
5933 equivalent for the purposes of being homogeneous aggregates
5934 if they are the same size. */
5935 if (*modep == mode)
5936 return 1;
5937
5938 break;
5939
5940 case ARRAY_TYPE:
5941 {
5942 int count;
5943 tree index = TYPE_DOMAIN (type);
5944
5945 /* Can't handle incomplete types nor sizes that are not
5946 fixed. */
5947 if (!COMPLETE_TYPE_P (type)
5948 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5949 return -1;
5950
5951 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5952 if (count == -1
5953 || !index
5954 || !TYPE_MAX_VALUE (index)
5955 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5956 || !TYPE_MIN_VALUE (index)
5957 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5958 || count < 0)
5959 return -1;
5960
5961 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5962 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5963
5964 /* There must be no padding. */
5965 if (wi::to_wide (TYPE_SIZE (type))
5966 != count * GET_MODE_BITSIZE (*modep))
5967 return -1;
5968
5969 return count;
5970 }
5971
5972 case RECORD_TYPE:
5973 {
5974 int count = 0;
5975 int sub_count;
5976 tree field;
5977
5978 /* Can't handle incomplete types nor sizes that are not
5979 fixed. */
5980 if (!COMPLETE_TYPE_P (type)
5981 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5982 return -1;
5983
5984 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5985 {
5986 if (TREE_CODE (field) != FIELD_DECL)
5987 continue;
5988
5989 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5990 if (sub_count < 0)
5991 return -1;
5992 count += sub_count;
5993 }
5994
5995 /* There must be no padding. */
5996 if (wi::to_wide (TYPE_SIZE (type))
5997 != count * GET_MODE_BITSIZE (*modep))
5998 return -1;
5999
6000 return count;
6001 }
6002
6003 case UNION_TYPE:
6004 case QUAL_UNION_TYPE:
6005 {
6006 /* These aren't very interesting except in a degenerate case. */
6007 int count = 0;
6008 int sub_count;
6009 tree field;
6010
6011 /* Can't handle incomplete types nor sizes that are not
6012 fixed. */
6013 if (!COMPLETE_TYPE_P (type)
6014 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6015 return -1;
6016
6017 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6018 {
6019 if (TREE_CODE (field) != FIELD_DECL)
6020 continue;
6021
6022 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6023 if (sub_count < 0)
6024 return -1;
6025 count = count > sub_count ? count : sub_count;
6026 }
6027
6028 /* There must be no padding. */
6029 if (wi::to_wide (TYPE_SIZE (type))
6030 != count * GET_MODE_BITSIZE (*modep))
6031 return -1;
6032
6033 return count;
6034 }
6035
6036 default:
6037 break;
6038 }
6039
6040 return -1;
6041 }
6042
6043 /* Return true if PCS_VARIANT should use VFP registers. */
6044 static bool
6045 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6046 {
6047 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6048 {
6049 static bool seen_thumb1_vfp = false;
6050
6051 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6052 {
6053 sorry ("Thumb-1 hard-float VFP ABI");
6054 /* sorry() is not immediately fatal, so only display this once. */
6055 seen_thumb1_vfp = true;
6056 }
6057
6058 return true;
6059 }
6060
6061 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6062 return false;
6063
6064 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6065 (TARGET_VFP_DOUBLE || !is_double));
6066 }
6067
6068 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6069 suitable for passing or returning in VFP registers for the PCS
6070 variant selected. If it is, then *BASE_MODE is updated to contain
6071 a machine mode describing each element of the argument's type and
6072 *COUNT to hold the number of such elements. */
6073 static bool
6074 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6075 machine_mode mode, const_tree type,
6076 machine_mode *base_mode, int *count)
6077 {
6078 machine_mode new_mode = VOIDmode;
6079
6080 /* If we have the type information, prefer that to working things
6081 out from the mode. */
6082 if (type)
6083 {
6084 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6085
6086 if (ag_count > 0 && ag_count <= 4)
6087 *count = ag_count;
6088 else
6089 return false;
6090 }
6091 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6092 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6093 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6094 {
6095 *count = 1;
6096 new_mode = mode;
6097 }
6098 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6099 {
6100 *count = 2;
6101 new_mode = (mode == DCmode ? DFmode : SFmode);
6102 }
6103 else
6104 return false;
6105
6106
6107 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6108 return false;
6109
6110 *base_mode = new_mode;
6111 return true;
6112 }
6113
6114 static bool
6115 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6116 machine_mode mode, const_tree type)
6117 {
6118 int count ATTRIBUTE_UNUSED;
6119 machine_mode ag_mode ATTRIBUTE_UNUSED;
6120
6121 if (!use_vfp_abi (pcs_variant, false))
6122 return false;
6123 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6124 &ag_mode, &count);
6125 }
6126
6127 static bool
6128 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6129 const_tree type)
6130 {
6131 if (!use_vfp_abi (pcum->pcs_variant, false))
6132 return false;
6133
6134 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6135 &pcum->aapcs_vfp_rmode,
6136 &pcum->aapcs_vfp_rcount);
6137 }
6138
6139 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6140 for the behaviour of this function. */
6141
6142 static bool
6143 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6144 const_tree type ATTRIBUTE_UNUSED)
6145 {
6146 int rmode_size
6147 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6148 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6149 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6150 int regno;
6151
6152 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6153 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6154 {
6155 pcum->aapcs_vfp_reg_alloc = mask << regno;
6156 if (mode == BLKmode
6157 || (mode == TImode && ! TARGET_NEON)
6158 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6159 {
6160 int i;
6161 int rcount = pcum->aapcs_vfp_rcount;
6162 int rshift = shift;
6163 machine_mode rmode = pcum->aapcs_vfp_rmode;
6164 rtx par;
6165 if (!TARGET_NEON)
6166 {
6167 /* Avoid using unsupported vector modes. */
6168 if (rmode == V2SImode)
6169 rmode = DImode;
6170 else if (rmode == V4SImode)
6171 {
6172 rmode = DImode;
6173 rcount *= 2;
6174 rshift /= 2;
6175 }
6176 }
6177 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6178 for (i = 0; i < rcount; i++)
6179 {
6180 rtx tmp = gen_rtx_REG (rmode,
6181 FIRST_VFP_REGNUM + regno + i * rshift);
6182 tmp = gen_rtx_EXPR_LIST
6183 (VOIDmode, tmp,
6184 GEN_INT (i * GET_MODE_SIZE (rmode)));
6185 XVECEXP (par, 0, i) = tmp;
6186 }
6187
6188 pcum->aapcs_reg = par;
6189 }
6190 else
6191 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6192 return true;
6193 }
6194 return false;
6195 }
6196
6197 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6198 comment there for the behaviour of this function. */
6199
6200 static rtx
6201 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6202 machine_mode mode,
6203 const_tree type ATTRIBUTE_UNUSED)
6204 {
6205 if (!use_vfp_abi (pcs_variant, false))
6206 return NULL;
6207
6208 if (mode == BLKmode
6209 || (GET_MODE_CLASS (mode) == MODE_INT
6210 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6211 && !TARGET_NEON))
6212 {
6213 int count;
6214 machine_mode ag_mode;
6215 int i;
6216 rtx par;
6217 int shift;
6218
6219 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6220 &ag_mode, &count);
6221
6222 if (!TARGET_NEON)
6223 {
6224 if (ag_mode == V2SImode)
6225 ag_mode = DImode;
6226 else if (ag_mode == V4SImode)
6227 {
6228 ag_mode = DImode;
6229 count *= 2;
6230 }
6231 }
6232 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6233 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6234 for (i = 0; i < count; i++)
6235 {
6236 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6237 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6238 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6239 XVECEXP (par, 0, i) = tmp;
6240 }
6241
6242 return par;
6243 }
6244
6245 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6246 }
6247
6248 static void
6249 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6250 machine_mode mode ATTRIBUTE_UNUSED,
6251 const_tree type ATTRIBUTE_UNUSED)
6252 {
6253 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6254 pcum->aapcs_vfp_reg_alloc = 0;
6255 return;
6256 }
6257
6258 #define AAPCS_CP(X) \
6259 { \
6260 aapcs_ ## X ## _cum_init, \
6261 aapcs_ ## X ## _is_call_candidate, \
6262 aapcs_ ## X ## _allocate, \
6263 aapcs_ ## X ## _is_return_candidate, \
6264 aapcs_ ## X ## _allocate_return_reg, \
6265 aapcs_ ## X ## _advance \
6266 }
6267
6268 /* Table of co-processors that can be used to pass arguments in
6269 registers. Idealy no arugment should be a candidate for more than
6270 one co-processor table entry, but the table is processed in order
6271 and stops after the first match. If that entry then fails to put
6272 the argument into a co-processor register, the argument will go on
6273 the stack. */
6274 static struct
6275 {
6276 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6277 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6278
6279 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6280 BLKmode) is a candidate for this co-processor's registers; this
6281 function should ignore any position-dependent state in
6282 CUMULATIVE_ARGS and only use call-type dependent information. */
6283 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6284
6285 /* Return true if the argument does get a co-processor register; it
6286 should set aapcs_reg to an RTX of the register allocated as is
6287 required for a return from FUNCTION_ARG. */
6288 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6289
6290 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6291 be returned in this co-processor's registers. */
6292 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6293
6294 /* Allocate and return an RTX element to hold the return type of a call. This
6295 routine must not fail and will only be called if is_return_candidate
6296 returned true with the same parameters. */
6297 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6298
6299 /* Finish processing this argument and prepare to start processing
6300 the next one. */
6301 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6302 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6303 {
6304 AAPCS_CP(vfp)
6305 };
6306
6307 #undef AAPCS_CP
6308
6309 static int
6310 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6311 const_tree type)
6312 {
6313 int i;
6314
6315 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6316 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6317 return i;
6318
6319 return -1;
6320 }
6321
6322 static int
6323 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6324 {
6325 /* We aren't passed a decl, so we can't check that a call is local.
6326 However, it isn't clear that that would be a win anyway, since it
6327 might limit some tail-calling opportunities. */
6328 enum arm_pcs pcs_variant;
6329
6330 if (fntype)
6331 {
6332 const_tree fndecl = NULL_TREE;
6333
6334 if (TREE_CODE (fntype) == FUNCTION_DECL)
6335 {
6336 fndecl = fntype;
6337 fntype = TREE_TYPE (fntype);
6338 }
6339
6340 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6341 }
6342 else
6343 pcs_variant = arm_pcs_default;
6344
6345 if (pcs_variant != ARM_PCS_AAPCS)
6346 {
6347 int i;
6348
6349 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6350 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6351 TYPE_MODE (type),
6352 type))
6353 return i;
6354 }
6355 return -1;
6356 }
6357
6358 static rtx
6359 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6360 const_tree fntype)
6361 {
6362 /* We aren't passed a decl, so we can't check that a call is local.
6363 However, it isn't clear that that would be a win anyway, since it
6364 might limit some tail-calling opportunities. */
6365 enum arm_pcs pcs_variant;
6366 int unsignedp ATTRIBUTE_UNUSED;
6367
6368 if (fntype)
6369 {
6370 const_tree fndecl = NULL_TREE;
6371
6372 if (TREE_CODE (fntype) == FUNCTION_DECL)
6373 {
6374 fndecl = fntype;
6375 fntype = TREE_TYPE (fntype);
6376 }
6377
6378 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6379 }
6380 else
6381 pcs_variant = arm_pcs_default;
6382
6383 /* Promote integer types. */
6384 if (type && INTEGRAL_TYPE_P (type))
6385 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6386
6387 if (pcs_variant != ARM_PCS_AAPCS)
6388 {
6389 int i;
6390
6391 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6392 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6393 type))
6394 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6395 mode, type);
6396 }
6397
6398 /* Promotes small structs returned in a register to full-word size
6399 for big-endian AAPCS. */
6400 if (type && arm_return_in_msb (type))
6401 {
6402 HOST_WIDE_INT size = int_size_in_bytes (type);
6403 if (size % UNITS_PER_WORD != 0)
6404 {
6405 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6406 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6407 }
6408 }
6409
6410 return gen_rtx_REG (mode, R0_REGNUM);
6411 }
6412
6413 static rtx
6414 aapcs_libcall_value (machine_mode mode)
6415 {
6416 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6417 && GET_MODE_SIZE (mode) <= 4)
6418 mode = SImode;
6419
6420 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6421 }
6422
6423 /* Lay out a function argument using the AAPCS rules. The rule
6424 numbers referred to here are those in the AAPCS. */
6425 static void
6426 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6427 const_tree type, bool named)
6428 {
6429 int nregs, nregs2;
6430 int ncrn;
6431
6432 /* We only need to do this once per argument. */
6433 if (pcum->aapcs_arg_processed)
6434 return;
6435
6436 pcum->aapcs_arg_processed = true;
6437
6438 /* Special case: if named is false then we are handling an incoming
6439 anonymous argument which is on the stack. */
6440 if (!named)
6441 return;
6442
6443 /* Is this a potential co-processor register candidate? */
6444 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6445 {
6446 int slot = aapcs_select_call_coproc (pcum, mode, type);
6447 pcum->aapcs_cprc_slot = slot;
6448
6449 /* We don't have to apply any of the rules from part B of the
6450 preparation phase, these are handled elsewhere in the
6451 compiler. */
6452
6453 if (slot >= 0)
6454 {
6455 /* A Co-processor register candidate goes either in its own
6456 class of registers or on the stack. */
6457 if (!pcum->aapcs_cprc_failed[slot])
6458 {
6459 /* C1.cp - Try to allocate the argument to co-processor
6460 registers. */
6461 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6462 return;
6463
6464 /* C2.cp - Put the argument on the stack and note that we
6465 can't assign any more candidates in this slot. We also
6466 need to note that we have allocated stack space, so that
6467 we won't later try to split a non-cprc candidate between
6468 core registers and the stack. */
6469 pcum->aapcs_cprc_failed[slot] = true;
6470 pcum->can_split = false;
6471 }
6472
6473 /* We didn't get a register, so this argument goes on the
6474 stack. */
6475 gcc_assert (pcum->can_split == false);
6476 return;
6477 }
6478 }
6479
6480 /* C3 - For double-word aligned arguments, round the NCRN up to the
6481 next even number. */
6482 ncrn = pcum->aapcs_ncrn;
6483 if (ncrn & 1)
6484 {
6485 int res = arm_needs_doubleword_align (mode, type);
6486 /* Only warn during RTL expansion of call stmts, otherwise we would
6487 warn e.g. during gimplification even on functions that will be
6488 always inlined, and we'd warn multiple times. Don't warn when
6489 called in expand_function_start either, as we warn instead in
6490 arm_function_arg_boundary in that case. */
6491 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6492 inform (input_location, "parameter passing for argument of type "
6493 "%qT changed in GCC 7.1", type);
6494 else if (res > 0)
6495 ncrn++;
6496 }
6497
6498 nregs = ARM_NUM_REGS2(mode, type);
6499
6500 /* Sigh, this test should really assert that nregs > 0, but a GCC
6501 extension allows empty structs and then gives them empty size; it
6502 then allows such a structure to be passed by value. For some of
6503 the code below we have to pretend that such an argument has
6504 non-zero size so that we 'locate' it correctly either in
6505 registers or on the stack. */
6506 gcc_assert (nregs >= 0);
6507
6508 nregs2 = nregs ? nregs : 1;
6509
6510 /* C4 - Argument fits entirely in core registers. */
6511 if (ncrn + nregs2 <= NUM_ARG_REGS)
6512 {
6513 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6514 pcum->aapcs_next_ncrn = ncrn + nregs;
6515 return;
6516 }
6517
6518 /* C5 - Some core registers left and there are no arguments already
6519 on the stack: split this argument between the remaining core
6520 registers and the stack. */
6521 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6522 {
6523 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6524 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6525 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6526 return;
6527 }
6528
6529 /* C6 - NCRN is set to 4. */
6530 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6531
6532 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6533 return;
6534 }
6535
6536 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6537 for a call to a function whose data type is FNTYPE.
6538 For a library call, FNTYPE is NULL. */
6539 void
6540 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6541 rtx libname,
6542 tree fndecl ATTRIBUTE_UNUSED)
6543 {
6544 /* Long call handling. */
6545 if (fntype)
6546 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6547 else
6548 pcum->pcs_variant = arm_pcs_default;
6549
6550 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6551 {
6552 if (arm_libcall_uses_aapcs_base (libname))
6553 pcum->pcs_variant = ARM_PCS_AAPCS;
6554
6555 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6556 pcum->aapcs_reg = NULL_RTX;
6557 pcum->aapcs_partial = 0;
6558 pcum->aapcs_arg_processed = false;
6559 pcum->aapcs_cprc_slot = -1;
6560 pcum->can_split = true;
6561
6562 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6563 {
6564 int i;
6565
6566 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6567 {
6568 pcum->aapcs_cprc_failed[i] = false;
6569 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6570 }
6571 }
6572 return;
6573 }
6574
6575 /* Legacy ABIs */
6576
6577 /* On the ARM, the offset starts at 0. */
6578 pcum->nregs = 0;
6579 pcum->iwmmxt_nregs = 0;
6580 pcum->can_split = true;
6581
6582 /* Varargs vectors are treated the same as long long.
6583 named_count avoids having to change the way arm handles 'named' */
6584 pcum->named_count = 0;
6585 pcum->nargs = 0;
6586
6587 if (TARGET_REALLY_IWMMXT && fntype)
6588 {
6589 tree fn_arg;
6590
6591 for (fn_arg = TYPE_ARG_TYPES (fntype);
6592 fn_arg;
6593 fn_arg = TREE_CHAIN (fn_arg))
6594 pcum->named_count += 1;
6595
6596 if (! pcum->named_count)
6597 pcum->named_count = INT_MAX;
6598 }
6599 }
6600
6601 /* Return 2 if double word alignment is required for argument passing,
6602 but wasn't required before the fix for PR88469.
6603 Return 1 if double word alignment is required for argument passing.
6604 Return -1 if double word alignment used to be required for argument
6605 passing before PR77728 ABI fix, but is not required anymore.
6606 Return 0 if double word alignment is not required and wasn't requried
6607 before either. */
6608 static int
6609 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6610 {
6611 if (!type)
6612 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6613
6614 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6615 if (!AGGREGATE_TYPE_P (type))
6616 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6617
6618 /* Array types: Use member alignment of element type. */
6619 if (TREE_CODE (type) == ARRAY_TYPE)
6620 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6621
6622 int ret = 0;
6623 int ret2 = 0;
6624 /* Record/aggregate types: Use greatest member alignment of any member. */
6625 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6626 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6627 {
6628 if (TREE_CODE (field) == FIELD_DECL)
6629 return 1;
6630 else
6631 /* Before PR77728 fix, we were incorrectly considering also
6632 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6633 Make sure we can warn about that with -Wpsabi. */
6634 ret = -1;
6635 }
6636 else if (TREE_CODE (field) == FIELD_DECL
6637 && DECL_BIT_FIELD (field)
6638 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
6639 ret2 = 1;
6640
6641 if (ret2)
6642 return 2;
6643
6644 return ret;
6645 }
6646
6647
6648 /* Determine where to put an argument to a function.
6649 Value is zero to push the argument on the stack,
6650 or a hard register in which to store the argument.
6651
6652 MODE is the argument's machine mode.
6653 TYPE is the data type of the argument (as a tree).
6654 This is null for libcalls where that information may
6655 not be available.
6656 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6657 the preceding args and about the function being called.
6658 NAMED is nonzero if this argument is a named parameter
6659 (otherwise it is an extra parameter matching an ellipsis).
6660
6661 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6662 other arguments are passed on the stack. If (NAMED == 0) (which happens
6663 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6664 defined), say it is passed in the stack (function_prologue will
6665 indeed make it pass in the stack if necessary). */
6666
6667 static rtx
6668 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6669 const_tree type, bool named)
6670 {
6671 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6672 int nregs;
6673
6674 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6675 a call insn (op3 of a call_value insn). */
6676 if (mode == VOIDmode)
6677 return const0_rtx;
6678
6679 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6680 {
6681 aapcs_layout_arg (pcum, mode, type, named);
6682 return pcum->aapcs_reg;
6683 }
6684
6685 /* Varargs vectors are treated the same as long long.
6686 named_count avoids having to change the way arm handles 'named' */
6687 if (TARGET_IWMMXT_ABI
6688 && arm_vector_mode_supported_p (mode)
6689 && pcum->named_count > pcum->nargs + 1)
6690 {
6691 if (pcum->iwmmxt_nregs <= 9)
6692 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6693 else
6694 {
6695 pcum->can_split = false;
6696 return NULL_RTX;
6697 }
6698 }
6699
6700 /* Put doubleword aligned quantities in even register pairs. */
6701 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6702 {
6703 int res = arm_needs_doubleword_align (mode, type);
6704 if (res < 0 && warn_psabi)
6705 inform (input_location, "parameter passing for argument of type "
6706 "%qT changed in GCC 7.1", type);
6707 else if (res > 0)
6708 {
6709 pcum->nregs++;
6710 if (res > 1 && warn_psabi)
6711 inform (input_location, "parameter passing for argument of type "
6712 "%qT changed in GCC 9.1", type);
6713 }
6714 }
6715
6716 /* Only allow splitting an arg between regs and memory if all preceding
6717 args were allocated to regs. For args passed by reference we only count
6718 the reference pointer. */
6719 if (pcum->can_split)
6720 nregs = 1;
6721 else
6722 nregs = ARM_NUM_REGS2 (mode, type);
6723
6724 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6725 return NULL_RTX;
6726
6727 return gen_rtx_REG (mode, pcum->nregs);
6728 }
6729
6730 static unsigned int
6731 arm_function_arg_boundary (machine_mode mode, const_tree type)
6732 {
6733 if (!ARM_DOUBLEWORD_ALIGN)
6734 return PARM_BOUNDARY;
6735
6736 int res = arm_needs_doubleword_align (mode, type);
6737 if (res < 0 && warn_psabi)
6738 inform (input_location, "parameter passing for argument of type %qT "
6739 "changed in GCC 7.1", type);
6740 if (res > 1 && warn_psabi)
6741 inform (input_location, "parameter passing for argument of type "
6742 "%qT changed in GCC 9.1", type);
6743
6744 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6745 }
6746
6747 static int
6748 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6749 tree type, bool named)
6750 {
6751 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6752 int nregs = pcum->nregs;
6753
6754 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6755 {
6756 aapcs_layout_arg (pcum, mode, type, named);
6757 return pcum->aapcs_partial;
6758 }
6759
6760 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6761 return 0;
6762
6763 if (NUM_ARG_REGS > nregs
6764 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6765 && pcum->can_split)
6766 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6767
6768 return 0;
6769 }
6770
6771 /* Update the data in PCUM to advance over an argument
6772 of mode MODE and data type TYPE.
6773 (TYPE is null for libcalls where that information may not be available.) */
6774
6775 static void
6776 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6777 const_tree type, bool named)
6778 {
6779 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6780
6781 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6782 {
6783 aapcs_layout_arg (pcum, mode, type, named);
6784
6785 if (pcum->aapcs_cprc_slot >= 0)
6786 {
6787 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6788 type);
6789 pcum->aapcs_cprc_slot = -1;
6790 }
6791
6792 /* Generic stuff. */
6793 pcum->aapcs_arg_processed = false;
6794 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6795 pcum->aapcs_reg = NULL_RTX;
6796 pcum->aapcs_partial = 0;
6797 }
6798 else
6799 {
6800 pcum->nargs += 1;
6801 if (arm_vector_mode_supported_p (mode)
6802 && pcum->named_count > pcum->nargs
6803 && TARGET_IWMMXT_ABI)
6804 pcum->iwmmxt_nregs += 1;
6805 else
6806 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6807 }
6808 }
6809
6810 /* Variable sized types are passed by reference. This is a GCC
6811 extension to the ARM ABI. */
6812
6813 static bool
6814 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6815 machine_mode mode ATTRIBUTE_UNUSED,
6816 const_tree type, bool named ATTRIBUTE_UNUSED)
6817 {
6818 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6819 }
6820 \f
6821 /* Encode the current state of the #pragma [no_]long_calls. */
6822 typedef enum
6823 {
6824 OFF, /* No #pragma [no_]long_calls is in effect. */
6825 LONG, /* #pragma long_calls is in effect. */
6826 SHORT /* #pragma no_long_calls is in effect. */
6827 } arm_pragma_enum;
6828
6829 static arm_pragma_enum arm_pragma_long_calls = OFF;
6830
6831 void
6832 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6833 {
6834 arm_pragma_long_calls = LONG;
6835 }
6836
6837 void
6838 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6839 {
6840 arm_pragma_long_calls = SHORT;
6841 }
6842
6843 void
6844 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6845 {
6846 arm_pragma_long_calls = OFF;
6847 }
6848 \f
6849 /* Handle an attribute requiring a FUNCTION_DECL;
6850 arguments as in struct attribute_spec.handler. */
6851 static tree
6852 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6853 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6854 {
6855 if (TREE_CODE (*node) != FUNCTION_DECL)
6856 {
6857 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6858 name);
6859 *no_add_attrs = true;
6860 }
6861
6862 return NULL_TREE;
6863 }
6864
6865 /* Handle an "interrupt" or "isr" attribute;
6866 arguments as in struct attribute_spec.handler. */
6867 static tree
6868 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6869 bool *no_add_attrs)
6870 {
6871 if (DECL_P (*node))
6872 {
6873 if (TREE_CODE (*node) != FUNCTION_DECL)
6874 {
6875 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6876 name);
6877 *no_add_attrs = true;
6878 }
6879 /* FIXME: the argument if any is checked for type attributes;
6880 should it be checked for decl ones? */
6881 }
6882 else
6883 {
6884 if (TREE_CODE (*node) == FUNCTION_TYPE
6885 || TREE_CODE (*node) == METHOD_TYPE)
6886 {
6887 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6888 {
6889 warning (OPT_Wattributes, "%qE attribute ignored",
6890 name);
6891 *no_add_attrs = true;
6892 }
6893 }
6894 else if (TREE_CODE (*node) == POINTER_TYPE
6895 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6896 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6897 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6898 {
6899 *node = build_variant_type_copy (*node);
6900 TREE_TYPE (*node) = build_type_attribute_variant
6901 (TREE_TYPE (*node),
6902 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6903 *no_add_attrs = true;
6904 }
6905 else
6906 {
6907 /* Possibly pass this attribute on from the type to a decl. */
6908 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6909 | (int) ATTR_FLAG_FUNCTION_NEXT
6910 | (int) ATTR_FLAG_ARRAY_NEXT))
6911 {
6912 *no_add_attrs = true;
6913 return tree_cons (name, args, NULL_TREE);
6914 }
6915 else
6916 {
6917 warning (OPT_Wattributes, "%qE attribute ignored",
6918 name);
6919 }
6920 }
6921 }
6922
6923 return NULL_TREE;
6924 }
6925
6926 /* Handle a "pcs" attribute; arguments as in struct
6927 attribute_spec.handler. */
6928 static tree
6929 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6930 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6931 {
6932 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6933 {
6934 warning (OPT_Wattributes, "%qE attribute ignored", name);
6935 *no_add_attrs = true;
6936 }
6937 return NULL_TREE;
6938 }
6939
6940 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6941 /* Handle the "notshared" attribute. This attribute is another way of
6942 requesting hidden visibility. ARM's compiler supports
6943 "__declspec(notshared)"; we support the same thing via an
6944 attribute. */
6945
6946 static tree
6947 arm_handle_notshared_attribute (tree *node,
6948 tree name ATTRIBUTE_UNUSED,
6949 tree args ATTRIBUTE_UNUSED,
6950 int flags ATTRIBUTE_UNUSED,
6951 bool *no_add_attrs)
6952 {
6953 tree decl = TYPE_NAME (*node);
6954
6955 if (decl)
6956 {
6957 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6958 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6959 *no_add_attrs = false;
6960 }
6961 return NULL_TREE;
6962 }
6963 #endif
6964
6965 /* This function returns true if a function with declaration FNDECL and type
6966 FNTYPE uses the stack to pass arguments or return variables and false
6967 otherwise. This is used for functions with the attributes
6968 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6969 diagnostic messages if the stack is used. NAME is the name of the attribute
6970 used. */
6971
6972 static bool
6973 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6974 {
6975 function_args_iterator args_iter;
6976 CUMULATIVE_ARGS args_so_far_v;
6977 cumulative_args_t args_so_far;
6978 bool first_param = true;
6979 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6980
6981 /* Error out if any argument is passed on the stack. */
6982 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6983 args_so_far = pack_cumulative_args (&args_so_far_v);
6984 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6985 {
6986 rtx arg_rtx;
6987 machine_mode arg_mode = TYPE_MODE (arg_type);
6988
6989 prev_arg_type = arg_type;
6990 if (VOID_TYPE_P (arg_type))
6991 continue;
6992
6993 if (!first_param)
6994 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6995 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6996 if (!arg_rtx
6997 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6998 {
6999 error ("%qE attribute not available to functions with arguments "
7000 "passed on the stack", name);
7001 return true;
7002 }
7003 first_param = false;
7004 }
7005
7006 /* Error out for variadic functions since we cannot control how many
7007 arguments will be passed and thus stack could be used. stdarg_p () is not
7008 used for the checking to avoid browsing arguments twice. */
7009 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7010 {
7011 error ("%qE attribute not available to functions with variable number "
7012 "of arguments", name);
7013 return true;
7014 }
7015
7016 /* Error out if return value is passed on the stack. */
7017 ret_type = TREE_TYPE (fntype);
7018 if (arm_return_in_memory (ret_type, fntype))
7019 {
7020 error ("%qE attribute not available to functions that return value on "
7021 "the stack", name);
7022 return true;
7023 }
7024 return false;
7025 }
7026
7027 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7028 function will check whether the attribute is allowed here and will add the
7029 attribute to the function declaration tree or otherwise issue a warning. */
7030
7031 static tree
7032 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7033 tree /* args */,
7034 int /* flags */,
7035 bool *no_add_attrs)
7036 {
7037 tree fndecl;
7038
7039 if (!use_cmse)
7040 {
7041 *no_add_attrs = true;
7042 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7043 name);
7044 return NULL_TREE;
7045 }
7046
7047 /* Ignore attribute for function types. */
7048 if (TREE_CODE (*node) != FUNCTION_DECL)
7049 {
7050 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7051 name);
7052 *no_add_attrs = true;
7053 return NULL_TREE;
7054 }
7055
7056 fndecl = *node;
7057
7058 /* Warn for static linkage functions. */
7059 if (!TREE_PUBLIC (fndecl))
7060 {
7061 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7062 "with static linkage", name);
7063 *no_add_attrs = true;
7064 return NULL_TREE;
7065 }
7066
7067 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7068 TREE_TYPE (fndecl));
7069 return NULL_TREE;
7070 }
7071
7072
7073 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7074 function will check whether the attribute is allowed here and will add the
7075 attribute to the function type tree or otherwise issue a diagnostic. The
7076 reason we check this at declaration time is to only allow the use of the
7077 attribute with declarations of function pointers and not function
7078 declarations. This function checks NODE is of the expected type and issues
7079 diagnostics otherwise using NAME. If it is not of the expected type
7080 *NO_ADD_ATTRS will be set to true. */
7081
7082 static tree
7083 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7084 tree /* args */,
7085 int /* flags */,
7086 bool *no_add_attrs)
7087 {
7088 tree decl = NULL_TREE, fntype = NULL_TREE;
7089 tree type;
7090
7091 if (!use_cmse)
7092 {
7093 *no_add_attrs = true;
7094 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7095 name);
7096 return NULL_TREE;
7097 }
7098
7099 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7100 {
7101 decl = *node;
7102 fntype = TREE_TYPE (decl);
7103 }
7104
7105 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7106 fntype = TREE_TYPE (fntype);
7107
7108 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7109 {
7110 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7111 "function pointer", name);
7112 *no_add_attrs = true;
7113 return NULL_TREE;
7114 }
7115
7116 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7117
7118 if (*no_add_attrs)
7119 return NULL_TREE;
7120
7121 /* Prevent trees being shared among function types with and without
7122 cmse_nonsecure_call attribute. */
7123 type = TREE_TYPE (decl);
7124
7125 type = build_distinct_type_copy (type);
7126 TREE_TYPE (decl) = type;
7127 fntype = type;
7128
7129 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7130 {
7131 type = fntype;
7132 fntype = TREE_TYPE (fntype);
7133 fntype = build_distinct_type_copy (fntype);
7134 TREE_TYPE (type) = fntype;
7135 }
7136
7137 /* Construct a type attribute and add it to the function type. */
7138 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7139 TYPE_ATTRIBUTES (fntype));
7140 TYPE_ATTRIBUTES (fntype) = attrs;
7141 return NULL_TREE;
7142 }
7143
7144 /* Return 0 if the attributes for two types are incompatible, 1 if they
7145 are compatible, and 2 if they are nearly compatible (which causes a
7146 warning to be generated). */
7147 static int
7148 arm_comp_type_attributes (const_tree type1, const_tree type2)
7149 {
7150 int l1, l2, s1, s2;
7151
7152 /* Check for mismatch of non-default calling convention. */
7153 if (TREE_CODE (type1) != FUNCTION_TYPE)
7154 return 1;
7155
7156 /* Check for mismatched call attributes. */
7157 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7158 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7159 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7160 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7161
7162 /* Only bother to check if an attribute is defined. */
7163 if (l1 | l2 | s1 | s2)
7164 {
7165 /* If one type has an attribute, the other must have the same attribute. */
7166 if ((l1 != l2) || (s1 != s2))
7167 return 0;
7168
7169 /* Disallow mixed attributes. */
7170 if ((l1 & s2) || (l2 & s1))
7171 return 0;
7172 }
7173
7174 /* Check for mismatched ISR attribute. */
7175 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7176 if (! l1)
7177 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7178 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7179 if (! l2)
7180 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7181 if (l1 != l2)
7182 return 0;
7183
7184 l1 = lookup_attribute ("cmse_nonsecure_call",
7185 TYPE_ATTRIBUTES (type1)) != NULL;
7186 l2 = lookup_attribute ("cmse_nonsecure_call",
7187 TYPE_ATTRIBUTES (type2)) != NULL;
7188
7189 if (l1 != l2)
7190 return 0;
7191
7192 return 1;
7193 }
7194
7195 /* Assigns default attributes to newly defined type. This is used to
7196 set short_call/long_call attributes for function types of
7197 functions defined inside corresponding #pragma scopes. */
7198 static void
7199 arm_set_default_type_attributes (tree type)
7200 {
7201 /* Add __attribute__ ((long_call)) to all functions, when
7202 inside #pragma long_calls or __attribute__ ((short_call)),
7203 when inside #pragma no_long_calls. */
7204 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7205 {
7206 tree type_attr_list, attr_name;
7207 type_attr_list = TYPE_ATTRIBUTES (type);
7208
7209 if (arm_pragma_long_calls == LONG)
7210 attr_name = get_identifier ("long_call");
7211 else if (arm_pragma_long_calls == SHORT)
7212 attr_name = get_identifier ("short_call");
7213 else
7214 return;
7215
7216 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7217 TYPE_ATTRIBUTES (type) = type_attr_list;
7218 }
7219 }
7220 \f
7221 /* Return true if DECL is known to be linked into section SECTION. */
7222
7223 static bool
7224 arm_function_in_section_p (tree decl, section *section)
7225 {
7226 /* We can only be certain about the prevailing symbol definition. */
7227 if (!decl_binds_to_current_def_p (decl))
7228 return false;
7229
7230 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7231 if (!DECL_SECTION_NAME (decl))
7232 {
7233 /* Make sure that we will not create a unique section for DECL. */
7234 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7235 return false;
7236 }
7237
7238 return function_section (decl) == section;
7239 }
7240
7241 /* Return nonzero if a 32-bit "long_call" should be generated for
7242 a call from the current function to DECL. We generate a long_call
7243 if the function:
7244
7245 a. has an __attribute__((long call))
7246 or b. is within the scope of a #pragma long_calls
7247 or c. the -mlong-calls command line switch has been specified
7248
7249 However we do not generate a long call if the function:
7250
7251 d. has an __attribute__ ((short_call))
7252 or e. is inside the scope of a #pragma no_long_calls
7253 or f. is defined in the same section as the current function. */
7254
7255 bool
7256 arm_is_long_call_p (tree decl)
7257 {
7258 tree attrs;
7259
7260 if (!decl)
7261 return TARGET_LONG_CALLS;
7262
7263 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7264 if (lookup_attribute ("short_call", attrs))
7265 return false;
7266
7267 /* For "f", be conservative, and only cater for cases in which the
7268 whole of the current function is placed in the same section. */
7269 if (!flag_reorder_blocks_and_partition
7270 && TREE_CODE (decl) == FUNCTION_DECL
7271 && arm_function_in_section_p (decl, current_function_section ()))
7272 return false;
7273
7274 if (lookup_attribute ("long_call", attrs))
7275 return true;
7276
7277 return TARGET_LONG_CALLS;
7278 }
7279
7280 /* Return nonzero if it is ok to make a tail-call to DECL. */
7281 static bool
7282 arm_function_ok_for_sibcall (tree decl, tree exp)
7283 {
7284 unsigned long func_type;
7285
7286 if (cfun->machine->sibcall_blocked)
7287 return false;
7288
7289 /* Never tailcall something if we are generating code for Thumb-1. */
7290 if (TARGET_THUMB1)
7291 return false;
7292
7293 /* The PIC register is live on entry to VxWorks PLT entries, so we
7294 must make the call before restoring the PIC register. */
7295 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7296 return false;
7297
7298 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7299 may be used both as target of the call and base register for restoring
7300 the VFP registers */
7301 if (TARGET_APCS_FRAME && TARGET_ARM
7302 && TARGET_HARD_FLOAT
7303 && decl && arm_is_long_call_p (decl))
7304 return false;
7305
7306 /* If we are interworking and the function is not declared static
7307 then we can't tail-call it unless we know that it exists in this
7308 compilation unit (since it might be a Thumb routine). */
7309 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7310 && !TREE_ASM_WRITTEN (decl))
7311 return false;
7312
7313 func_type = arm_current_func_type ();
7314 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7315 if (IS_INTERRUPT (func_type))
7316 return false;
7317
7318 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7319 generated for entry functions themselves. */
7320 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7321 return false;
7322
7323 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7324 this would complicate matters for later code generation. */
7325 if (TREE_CODE (exp) == CALL_EXPR)
7326 {
7327 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7328 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7329 return false;
7330 }
7331
7332 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7333 {
7334 /* Check that the return value locations are the same. For
7335 example that we aren't returning a value from the sibling in
7336 a VFP register but then need to transfer it to a core
7337 register. */
7338 rtx a, b;
7339 tree decl_or_type = decl;
7340
7341 /* If it is an indirect function pointer, get the function type. */
7342 if (!decl)
7343 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7344
7345 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7346 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7347 cfun->decl, false);
7348 if (!rtx_equal_p (a, b))
7349 return false;
7350 }
7351
7352 /* Never tailcall if function may be called with a misaligned SP. */
7353 if (IS_STACKALIGN (func_type))
7354 return false;
7355
7356 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7357 references should become a NOP. Don't convert such calls into
7358 sibling calls. */
7359 if (TARGET_AAPCS_BASED
7360 && arm_abi == ARM_ABI_AAPCS
7361 && decl
7362 && DECL_WEAK (decl))
7363 return false;
7364
7365 /* We cannot do a tailcall for an indirect call by descriptor if all the
7366 argument registers are used because the only register left to load the
7367 address is IP and it will already contain the static chain. */
7368 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7369 {
7370 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7371 CUMULATIVE_ARGS cum;
7372 cumulative_args_t cum_v;
7373
7374 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7375 cum_v = pack_cumulative_args (&cum);
7376
7377 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7378 {
7379 tree type = TREE_VALUE (t);
7380 if (!VOID_TYPE_P (type))
7381 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7382 }
7383
7384 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7385 return false;
7386 }
7387
7388 /* Everything else is ok. */
7389 return true;
7390 }
7391
7392 \f
7393 /* Addressing mode support functions. */
7394
7395 /* Return nonzero if X is a legitimate immediate operand when compiling
7396 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7397 int
7398 legitimate_pic_operand_p (rtx x)
7399 {
7400 if (GET_CODE (x) == SYMBOL_REF
7401 || (GET_CODE (x) == CONST
7402 && GET_CODE (XEXP (x, 0)) == PLUS
7403 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7404 return 0;
7405
7406 return 1;
7407 }
7408
7409 /* Record that the current function needs a PIC register. If PIC_REG is null,
7410 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
7411 both case cfun->machine->pic_reg is initialized if we have not already done
7412 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
7413 PIC register is reloaded in the current position of the instruction stream
7414 irregardless of whether it was loaded before. Otherwise, it is only loaded
7415 if not already done so (crtl->uses_pic_offset_table is null). Note that
7416 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7417 is only supported iff COMPUTE_NOW is false. */
7418
7419 static void
7420 require_pic_register (rtx pic_reg, bool compute_now)
7421 {
7422 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7423
7424 /* A lot of the logic here is made obscure by the fact that this
7425 routine gets called as part of the rtx cost estimation process.
7426 We don't want those calls to affect any assumptions about the real
7427 function; and further, we can't call entry_of_function() until we
7428 start the real expansion process. */
7429 if (!crtl->uses_pic_offset_table || compute_now)
7430 {
7431 gcc_assert (can_create_pseudo_p ()
7432 || (pic_reg != NULL_RTX
7433 && REG_P (pic_reg)
7434 && GET_MODE (pic_reg) == Pmode));
7435 if (arm_pic_register != INVALID_REGNUM
7436 && !compute_now
7437 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7438 {
7439 if (!cfun->machine->pic_reg)
7440 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7441
7442 /* Play games to avoid marking the function as needing pic
7443 if we are being called as part of the cost-estimation
7444 process. */
7445 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7446 crtl->uses_pic_offset_table = 1;
7447 }
7448 else
7449 {
7450 rtx_insn *seq, *insn;
7451
7452 if (pic_reg == NULL_RTX)
7453 pic_reg = gen_reg_rtx (Pmode);
7454 if (!cfun->machine->pic_reg)
7455 cfun->machine->pic_reg = pic_reg;
7456
7457 /* Play games to avoid marking the function as needing pic
7458 if we are being called as part of the cost-estimation
7459 process. */
7460 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7461 {
7462 crtl->uses_pic_offset_table = 1;
7463 start_sequence ();
7464
7465 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7466 && arm_pic_register > LAST_LO_REGNUM
7467 && !compute_now)
7468 emit_move_insn (cfun->machine->pic_reg,
7469 gen_rtx_REG (Pmode, arm_pic_register));
7470 else
7471 arm_load_pic_register (0UL, pic_reg);
7472
7473 seq = get_insns ();
7474 end_sequence ();
7475
7476 for (insn = seq; insn; insn = NEXT_INSN (insn))
7477 if (INSN_P (insn))
7478 INSN_LOCATION (insn) = prologue_location;
7479
7480 /* We can be called during expansion of PHI nodes, where
7481 we can't yet emit instructions directly in the final
7482 insn stream. Queue the insns on the entry edge, they will
7483 be committed after everything else is expanded. */
7484 if (currently_expanding_to_rtl)
7485 insert_insn_on_edge (seq,
7486 single_succ_edge
7487 (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7488 else
7489 emit_insn (seq);
7490 }
7491 }
7492 }
7493 }
7494
7495 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
7496 created to hold the result of the load. If not NULL, PIC_REG indicates
7497 which register to use as PIC register, otherwise it is decided by register
7498 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
7499 location in the instruction stream, irregardless of whether it was loaded
7500 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
7501 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
7502
7503 Returns the register REG into which the PIC load is performed. */
7504
7505 rtx
7506 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
7507 bool compute_now)
7508 {
7509 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7510
7511 if (GET_CODE (orig) == SYMBOL_REF
7512 || GET_CODE (orig) == LABEL_REF)
7513 {
7514 if (reg == 0)
7515 {
7516 gcc_assert (can_create_pseudo_p ());
7517 reg = gen_reg_rtx (Pmode);
7518 }
7519
7520 /* VxWorks does not impose a fixed gap between segments; the run-time
7521 gap can be different from the object-file gap. We therefore can't
7522 use GOTOFF unless we are absolutely sure that the symbol is in the
7523 same segment as the GOT. Unfortunately, the flexibility of linker
7524 scripts means that we can't be sure of that in general, so assume
7525 that GOTOFF is never valid on VxWorks. */
7526 /* References to weak symbols cannot be resolved locally: they
7527 may be overridden by a non-weak definition at link time. */
7528 rtx_insn *insn;
7529 if ((GET_CODE (orig) == LABEL_REF
7530 || (GET_CODE (orig) == SYMBOL_REF
7531 && SYMBOL_REF_LOCAL_P (orig)
7532 && (SYMBOL_REF_DECL (orig)
7533 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7534 && NEED_GOT_RELOC
7535 && arm_pic_data_is_text_relative)
7536 insn = arm_pic_static_addr (orig, reg);
7537 else
7538 {
7539 rtx pat;
7540 rtx mem;
7541
7542 /* If this function doesn't have a pic register, create one now. */
7543 require_pic_register (pic_reg, compute_now);
7544
7545 if (pic_reg == NULL_RTX)
7546 pic_reg = cfun->machine->pic_reg;
7547
7548 pat = gen_calculate_pic_address (reg, pic_reg, orig);
7549
7550 /* Make the MEM as close to a constant as possible. */
7551 mem = SET_SRC (pat);
7552 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7553 MEM_READONLY_P (mem) = 1;
7554 MEM_NOTRAP_P (mem) = 1;
7555
7556 insn = emit_insn (pat);
7557 }
7558
7559 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7560 by loop. */
7561 set_unique_reg_note (insn, REG_EQUAL, orig);
7562
7563 return reg;
7564 }
7565 else if (GET_CODE (orig) == CONST)
7566 {
7567 rtx base, offset;
7568
7569 if (GET_CODE (XEXP (orig, 0)) == PLUS
7570 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7571 return orig;
7572
7573 /* Handle the case where we have: const (UNSPEC_TLS). */
7574 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7575 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7576 return orig;
7577
7578 /* Handle the case where we have:
7579 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7580 CONST_INT. */
7581 if (GET_CODE (XEXP (orig, 0)) == PLUS
7582 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7583 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7584 {
7585 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7586 return orig;
7587 }
7588
7589 if (reg == 0)
7590 {
7591 gcc_assert (can_create_pseudo_p ());
7592 reg = gen_reg_rtx (Pmode);
7593 }
7594
7595 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7596
7597 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
7598 pic_reg, compute_now);
7599 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7600 base == reg ? 0 : reg, pic_reg,
7601 compute_now);
7602
7603 if (CONST_INT_P (offset))
7604 {
7605 /* The base register doesn't really matter, we only want to
7606 test the index for the appropriate mode. */
7607 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7608 {
7609 gcc_assert (can_create_pseudo_p ());
7610 offset = force_reg (Pmode, offset);
7611 }
7612
7613 if (CONST_INT_P (offset))
7614 return plus_constant (Pmode, base, INTVAL (offset));
7615 }
7616
7617 if (GET_MODE_SIZE (mode) > 4
7618 && (GET_MODE_CLASS (mode) == MODE_INT
7619 || TARGET_SOFT_FLOAT))
7620 {
7621 emit_insn (gen_addsi3 (reg, base, offset));
7622 return reg;
7623 }
7624
7625 return gen_rtx_PLUS (Pmode, base, offset);
7626 }
7627
7628 return orig;
7629 }
7630
7631
7632 /* Find a spare register to use during the prolog of a function. */
7633
7634 static int
7635 thumb_find_work_register (unsigned long pushed_regs_mask)
7636 {
7637 int reg;
7638
7639 /* Check the argument registers first as these are call-used. The
7640 register allocation order means that sometimes r3 might be used
7641 but earlier argument registers might not, so check them all. */
7642 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7643 if (!df_regs_ever_live_p (reg))
7644 return reg;
7645
7646 /* Before going on to check the call-saved registers we can try a couple
7647 more ways of deducing that r3 is available. The first is when we are
7648 pushing anonymous arguments onto the stack and we have less than 4
7649 registers worth of fixed arguments(*). In this case r3 will be part of
7650 the variable argument list and so we can be sure that it will be
7651 pushed right at the start of the function. Hence it will be available
7652 for the rest of the prologue.
7653 (*): ie crtl->args.pretend_args_size is greater than 0. */
7654 if (cfun->machine->uses_anonymous_args
7655 && crtl->args.pretend_args_size > 0)
7656 return LAST_ARG_REGNUM;
7657
7658 /* The other case is when we have fixed arguments but less than 4 registers
7659 worth. In this case r3 might be used in the body of the function, but
7660 it is not being used to convey an argument into the function. In theory
7661 we could just check crtl->args.size to see how many bytes are
7662 being passed in argument registers, but it seems that it is unreliable.
7663 Sometimes it will have the value 0 when in fact arguments are being
7664 passed. (See testcase execute/20021111-1.c for an example). So we also
7665 check the args_info.nregs field as well. The problem with this field is
7666 that it makes no allowances for arguments that are passed to the
7667 function but which are not used. Hence we could miss an opportunity
7668 when a function has an unused argument in r3. But it is better to be
7669 safe than to be sorry. */
7670 if (! cfun->machine->uses_anonymous_args
7671 && crtl->args.size >= 0
7672 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7673 && (TARGET_AAPCS_BASED
7674 ? crtl->args.info.aapcs_ncrn < 4
7675 : crtl->args.info.nregs < 4))
7676 return LAST_ARG_REGNUM;
7677
7678 /* Otherwise look for a call-saved register that is going to be pushed. */
7679 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7680 if (pushed_regs_mask & (1 << reg))
7681 return reg;
7682
7683 if (TARGET_THUMB2)
7684 {
7685 /* Thumb-2 can use high regs. */
7686 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7687 if (pushed_regs_mask & (1 << reg))
7688 return reg;
7689 }
7690 /* Something went wrong - thumb_compute_save_reg_mask()
7691 should have arranged for a suitable register to be pushed. */
7692 gcc_unreachable ();
7693 }
7694
7695 static GTY(()) int pic_labelno;
7696
7697 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7698 low register. */
7699
7700 void
7701 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
7702 {
7703 rtx l1, labelno, pic_tmp, pic_rtx;
7704
7705 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7706 return;
7707
7708 gcc_assert (flag_pic);
7709
7710 if (pic_reg == NULL_RTX)
7711 pic_reg = cfun->machine->pic_reg;
7712 if (TARGET_VXWORKS_RTP)
7713 {
7714 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7715 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7716 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7717
7718 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7719
7720 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7721 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7722 }
7723 else
7724 {
7725 /* We use an UNSPEC rather than a LABEL_REF because this label
7726 never appears in the code stream. */
7727
7728 labelno = GEN_INT (pic_labelno++);
7729 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7730 l1 = gen_rtx_CONST (VOIDmode, l1);
7731
7732 /* On the ARM the PC register contains 'dot + 8' at the time of the
7733 addition, on the Thumb it is 'dot + 4'. */
7734 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7735 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7736 UNSPEC_GOTSYM_OFF);
7737 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7738
7739 if (TARGET_32BIT)
7740 {
7741 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7742 }
7743 else /* TARGET_THUMB1 */
7744 {
7745 if (arm_pic_register != INVALID_REGNUM
7746 && REGNO (pic_reg) > LAST_LO_REGNUM)
7747 {
7748 /* We will have pushed the pic register, so we should always be
7749 able to find a work register. */
7750 pic_tmp = gen_rtx_REG (SImode,
7751 thumb_find_work_register (saved_regs));
7752 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7753 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7754 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7755 }
7756 else if (arm_pic_register != INVALID_REGNUM
7757 && arm_pic_register > LAST_LO_REGNUM
7758 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7759 {
7760 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7761 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7762 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7763 }
7764 else
7765 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7766 }
7767 }
7768
7769 /* Need to emit this whether or not we obey regdecls,
7770 since setjmp/longjmp can cause life info to screw up. */
7771 emit_use (pic_reg);
7772 }
7773
7774 /* Generate code to load the address of a static var when flag_pic is set. */
7775 static rtx_insn *
7776 arm_pic_static_addr (rtx orig, rtx reg)
7777 {
7778 rtx l1, labelno, offset_rtx;
7779
7780 gcc_assert (flag_pic);
7781
7782 /* We use an UNSPEC rather than a LABEL_REF because this label
7783 never appears in the code stream. */
7784 labelno = GEN_INT (pic_labelno++);
7785 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7786 l1 = gen_rtx_CONST (VOIDmode, l1);
7787
7788 /* On the ARM the PC register contains 'dot + 8' at the time of the
7789 addition, on the Thumb it is 'dot + 4'. */
7790 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7791 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7792 UNSPEC_SYMBOL_OFFSET);
7793 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7794
7795 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7796 }
7797
7798 /* Return nonzero if X is valid as an ARM state addressing register. */
7799 static int
7800 arm_address_register_rtx_p (rtx x, int strict_p)
7801 {
7802 int regno;
7803
7804 if (!REG_P (x))
7805 return 0;
7806
7807 regno = REGNO (x);
7808
7809 if (strict_p)
7810 return ARM_REGNO_OK_FOR_BASE_P (regno);
7811
7812 return (regno <= LAST_ARM_REGNUM
7813 || regno >= FIRST_PSEUDO_REGISTER
7814 || regno == FRAME_POINTER_REGNUM
7815 || regno == ARG_POINTER_REGNUM);
7816 }
7817
7818 /* Return TRUE if this rtx is the difference of a symbol and a label,
7819 and will reduce to a PC-relative relocation in the object file.
7820 Expressions like this can be left alone when generating PIC, rather
7821 than forced through the GOT. */
7822 static int
7823 pcrel_constant_p (rtx x)
7824 {
7825 if (GET_CODE (x) == MINUS)
7826 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7827
7828 return FALSE;
7829 }
7830
7831 /* Return true if X will surely end up in an index register after next
7832 splitting pass. */
7833 static bool
7834 will_be_in_index_register (const_rtx x)
7835 {
7836 /* arm.md: calculate_pic_address will split this into a register. */
7837 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7838 }
7839
7840 /* Return nonzero if X is a valid ARM state address operand. */
7841 int
7842 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7843 int strict_p)
7844 {
7845 bool use_ldrd;
7846 enum rtx_code code = GET_CODE (x);
7847
7848 if (arm_address_register_rtx_p (x, strict_p))
7849 return 1;
7850
7851 use_ldrd = (TARGET_LDRD
7852 && (mode == DImode || mode == DFmode));
7853
7854 if (code == POST_INC || code == PRE_DEC
7855 || ((code == PRE_INC || code == POST_DEC)
7856 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7857 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7858
7859 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7860 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7861 && GET_CODE (XEXP (x, 1)) == PLUS
7862 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7863 {
7864 rtx addend = XEXP (XEXP (x, 1), 1);
7865
7866 /* Don't allow ldrd post increment by register because it's hard
7867 to fixup invalid register choices. */
7868 if (use_ldrd
7869 && GET_CODE (x) == POST_MODIFY
7870 && REG_P (addend))
7871 return 0;
7872
7873 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7874 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7875 }
7876
7877 /* After reload constants split into minipools will have addresses
7878 from a LABEL_REF. */
7879 else if (reload_completed
7880 && (code == LABEL_REF
7881 || (code == CONST
7882 && GET_CODE (XEXP (x, 0)) == PLUS
7883 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7884 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7885 return 1;
7886
7887 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7888 return 0;
7889
7890 else if (code == PLUS)
7891 {
7892 rtx xop0 = XEXP (x, 0);
7893 rtx xop1 = XEXP (x, 1);
7894
7895 return ((arm_address_register_rtx_p (xop0, strict_p)
7896 && ((CONST_INT_P (xop1)
7897 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7898 || (!strict_p && will_be_in_index_register (xop1))))
7899 || (arm_address_register_rtx_p (xop1, strict_p)
7900 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7901 }
7902
7903 #if 0
7904 /* Reload currently can't handle MINUS, so disable this for now */
7905 else if (GET_CODE (x) == MINUS)
7906 {
7907 rtx xop0 = XEXP (x, 0);
7908 rtx xop1 = XEXP (x, 1);
7909
7910 return (arm_address_register_rtx_p (xop0, strict_p)
7911 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7912 }
7913 #endif
7914
7915 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7916 && code == SYMBOL_REF
7917 && CONSTANT_POOL_ADDRESS_P (x)
7918 && ! (flag_pic
7919 && symbol_mentioned_p (get_pool_constant (x))
7920 && ! pcrel_constant_p (get_pool_constant (x))))
7921 return 1;
7922
7923 return 0;
7924 }
7925
7926 /* Return true if we can avoid creating a constant pool entry for x. */
7927 static bool
7928 can_avoid_literal_pool_for_label_p (rtx x)
7929 {
7930 /* Normally we can assign constant values to target registers without
7931 the help of constant pool. But there are cases we have to use constant
7932 pool like:
7933 1) assign a label to register.
7934 2) sign-extend a 8bit value to 32bit and then assign to register.
7935
7936 Constant pool access in format:
7937 (set (reg r0) (mem (symbol_ref (".LC0"))))
7938 will cause the use of literal pool (later in function arm_reorg).
7939 So here we mark such format as an invalid format, then the compiler
7940 will adjust it into:
7941 (set (reg r0) (symbol_ref (".LC0")))
7942 (set (reg r0) (mem (reg r0))).
7943 No extra register is required, and (mem (reg r0)) won't cause the use
7944 of literal pools. */
7945 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7946 && CONSTANT_POOL_ADDRESS_P (x))
7947 return 1;
7948 return 0;
7949 }
7950
7951
7952 /* Return nonzero if X is a valid Thumb-2 address operand. */
7953 static int
7954 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7955 {
7956 bool use_ldrd;
7957 enum rtx_code code = GET_CODE (x);
7958
7959 if (arm_address_register_rtx_p (x, strict_p))
7960 return 1;
7961
7962 use_ldrd = (TARGET_LDRD
7963 && (mode == DImode || mode == DFmode));
7964
7965 if (code == POST_INC || code == PRE_DEC
7966 || ((code == PRE_INC || code == POST_DEC)
7967 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7968 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7969
7970 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7971 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7972 && GET_CODE (XEXP (x, 1)) == PLUS
7973 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7974 {
7975 /* Thumb-2 only has autoincrement by constant. */
7976 rtx addend = XEXP (XEXP (x, 1), 1);
7977 HOST_WIDE_INT offset;
7978
7979 if (!CONST_INT_P (addend))
7980 return 0;
7981
7982 offset = INTVAL(addend);
7983 if (GET_MODE_SIZE (mode) <= 4)
7984 return (offset > -256 && offset < 256);
7985
7986 return (use_ldrd && offset > -1024 && offset < 1024
7987 && (offset & 3) == 0);
7988 }
7989
7990 /* After reload constants split into minipools will have addresses
7991 from a LABEL_REF. */
7992 else if (reload_completed
7993 && (code == LABEL_REF
7994 || (code == CONST
7995 && GET_CODE (XEXP (x, 0)) == PLUS
7996 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7997 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7998 return 1;
7999
8000 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8001 return 0;
8002
8003 else if (code == PLUS)
8004 {
8005 rtx xop0 = XEXP (x, 0);
8006 rtx xop1 = XEXP (x, 1);
8007
8008 return ((arm_address_register_rtx_p (xop0, strict_p)
8009 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8010 || (!strict_p && will_be_in_index_register (xop1))))
8011 || (arm_address_register_rtx_p (xop1, strict_p)
8012 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8013 }
8014
8015 else if (can_avoid_literal_pool_for_label_p (x))
8016 return 0;
8017
8018 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8019 && code == SYMBOL_REF
8020 && CONSTANT_POOL_ADDRESS_P (x)
8021 && ! (flag_pic
8022 && symbol_mentioned_p (get_pool_constant (x))
8023 && ! pcrel_constant_p (get_pool_constant (x))))
8024 return 1;
8025
8026 return 0;
8027 }
8028
8029 /* Return nonzero if INDEX is valid for an address index operand in
8030 ARM state. */
8031 static int
8032 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8033 int strict_p)
8034 {
8035 HOST_WIDE_INT range;
8036 enum rtx_code code = GET_CODE (index);
8037
8038 /* Standard coprocessor addressing modes. */
8039 if (TARGET_HARD_FLOAT
8040 && (mode == SFmode || mode == DFmode))
8041 return (code == CONST_INT && INTVAL (index) < 1024
8042 && INTVAL (index) > -1024
8043 && (INTVAL (index) & 3) == 0);
8044
8045 /* For quad modes, we restrict the constant offset to be slightly less
8046 than what the instruction format permits. We do this because for
8047 quad mode moves, we will actually decompose them into two separate
8048 double-mode reads or writes. INDEX must therefore be a valid
8049 (double-mode) offset and so should INDEX+8. */
8050 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8051 return (code == CONST_INT
8052 && INTVAL (index) < 1016
8053 && INTVAL (index) > -1024
8054 && (INTVAL (index) & 3) == 0);
8055
8056 /* We have no such constraint on double mode offsets, so we permit the
8057 full range of the instruction format. */
8058 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8059 return (code == CONST_INT
8060 && INTVAL (index) < 1024
8061 && INTVAL (index) > -1024
8062 && (INTVAL (index) & 3) == 0);
8063
8064 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8065 return (code == CONST_INT
8066 && INTVAL (index) < 1024
8067 && INTVAL (index) > -1024
8068 && (INTVAL (index) & 3) == 0);
8069
8070 if (arm_address_register_rtx_p (index, strict_p)
8071 && (GET_MODE_SIZE (mode) <= 4))
8072 return 1;
8073
8074 if (mode == DImode || mode == DFmode)
8075 {
8076 if (code == CONST_INT)
8077 {
8078 HOST_WIDE_INT val = INTVAL (index);
8079
8080 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8081 If vldr is selected it uses arm_coproc_mem_operand. */
8082 if (TARGET_LDRD)
8083 return val > -256 && val < 256;
8084 else
8085 return val > -4096 && val < 4092;
8086 }
8087
8088 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8089 }
8090
8091 if (GET_MODE_SIZE (mode) <= 4
8092 && ! (arm_arch4
8093 && (mode == HImode
8094 || mode == HFmode
8095 || (mode == QImode && outer == SIGN_EXTEND))))
8096 {
8097 if (code == MULT)
8098 {
8099 rtx xiop0 = XEXP (index, 0);
8100 rtx xiop1 = XEXP (index, 1);
8101
8102 return ((arm_address_register_rtx_p (xiop0, strict_p)
8103 && power_of_two_operand (xiop1, SImode))
8104 || (arm_address_register_rtx_p (xiop1, strict_p)
8105 && power_of_two_operand (xiop0, SImode)));
8106 }
8107 else if (code == LSHIFTRT || code == ASHIFTRT
8108 || code == ASHIFT || code == ROTATERT)
8109 {
8110 rtx op = XEXP (index, 1);
8111
8112 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8113 && CONST_INT_P (op)
8114 && INTVAL (op) > 0
8115 && INTVAL (op) <= 31);
8116 }
8117 }
8118
8119 /* For ARM v4 we may be doing a sign-extend operation during the
8120 load. */
8121 if (arm_arch4)
8122 {
8123 if (mode == HImode
8124 || mode == HFmode
8125 || (outer == SIGN_EXTEND && mode == QImode))
8126 range = 256;
8127 else
8128 range = 4096;
8129 }
8130 else
8131 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8132
8133 return (code == CONST_INT
8134 && INTVAL (index) < range
8135 && INTVAL (index) > -range);
8136 }
8137
8138 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8139 index operand. i.e. 1, 2, 4 or 8. */
8140 static bool
8141 thumb2_index_mul_operand (rtx op)
8142 {
8143 HOST_WIDE_INT val;
8144
8145 if (!CONST_INT_P (op))
8146 return false;
8147
8148 val = INTVAL(op);
8149 return (val == 1 || val == 2 || val == 4 || val == 8);
8150 }
8151
8152 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8153 static int
8154 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8155 {
8156 enum rtx_code code = GET_CODE (index);
8157
8158 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8159 /* Standard coprocessor addressing modes. */
8160 if (TARGET_HARD_FLOAT
8161 && (mode == SFmode || mode == DFmode))
8162 return (code == CONST_INT && INTVAL (index) < 1024
8163 /* Thumb-2 allows only > -256 index range for it's core register
8164 load/stores. Since we allow SF/DF in core registers, we have
8165 to use the intersection between -256~4096 (core) and -1024~1024
8166 (coprocessor). */
8167 && INTVAL (index) > -256
8168 && (INTVAL (index) & 3) == 0);
8169
8170 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8171 {
8172 /* For DImode assume values will usually live in core regs
8173 and only allow LDRD addressing modes. */
8174 if (!TARGET_LDRD || mode != DImode)
8175 return (code == CONST_INT
8176 && INTVAL (index) < 1024
8177 && INTVAL (index) > -1024
8178 && (INTVAL (index) & 3) == 0);
8179 }
8180
8181 /* For quad modes, we restrict the constant offset to be slightly less
8182 than what the instruction format permits. We do this because for
8183 quad mode moves, we will actually decompose them into two separate
8184 double-mode reads or writes. INDEX must therefore be a valid
8185 (double-mode) offset and so should INDEX+8. */
8186 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8187 return (code == CONST_INT
8188 && INTVAL (index) < 1016
8189 && INTVAL (index) > -1024
8190 && (INTVAL (index) & 3) == 0);
8191
8192 /* We have no such constraint on double mode offsets, so we permit the
8193 full range of the instruction format. */
8194 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8195 return (code == CONST_INT
8196 && INTVAL (index) < 1024
8197 && INTVAL (index) > -1024
8198 && (INTVAL (index) & 3) == 0);
8199
8200 if (arm_address_register_rtx_p (index, strict_p)
8201 && (GET_MODE_SIZE (mode) <= 4))
8202 return 1;
8203
8204 if (mode == DImode || mode == DFmode)
8205 {
8206 if (code == CONST_INT)
8207 {
8208 HOST_WIDE_INT val = INTVAL (index);
8209 /* Thumb-2 ldrd only has reg+const addressing modes.
8210 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8211 If vldr is selected it uses arm_coproc_mem_operand. */
8212 if (TARGET_LDRD)
8213 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8214 else
8215 return IN_RANGE (val, -255, 4095 - 4);
8216 }
8217 else
8218 return 0;
8219 }
8220
8221 if (code == MULT)
8222 {
8223 rtx xiop0 = XEXP (index, 0);
8224 rtx xiop1 = XEXP (index, 1);
8225
8226 return ((arm_address_register_rtx_p (xiop0, strict_p)
8227 && thumb2_index_mul_operand (xiop1))
8228 || (arm_address_register_rtx_p (xiop1, strict_p)
8229 && thumb2_index_mul_operand (xiop0)));
8230 }
8231 else if (code == ASHIFT)
8232 {
8233 rtx op = XEXP (index, 1);
8234
8235 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8236 && CONST_INT_P (op)
8237 && INTVAL (op) > 0
8238 && INTVAL (op) <= 3);
8239 }
8240
8241 return (code == CONST_INT
8242 && INTVAL (index) < 4096
8243 && INTVAL (index) > -256);
8244 }
8245
8246 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8247 static int
8248 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8249 {
8250 int regno;
8251
8252 if (!REG_P (x))
8253 return 0;
8254
8255 regno = REGNO (x);
8256
8257 if (strict_p)
8258 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8259
8260 return (regno <= LAST_LO_REGNUM
8261 || regno > LAST_VIRTUAL_REGISTER
8262 || regno == FRAME_POINTER_REGNUM
8263 || (GET_MODE_SIZE (mode) >= 4
8264 && (regno == STACK_POINTER_REGNUM
8265 || regno >= FIRST_PSEUDO_REGISTER
8266 || x == hard_frame_pointer_rtx
8267 || x == arg_pointer_rtx)));
8268 }
8269
8270 /* Return nonzero if x is a legitimate index register. This is the case
8271 for any base register that can access a QImode object. */
8272 inline static int
8273 thumb1_index_register_rtx_p (rtx x, int strict_p)
8274 {
8275 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8276 }
8277
8278 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8279
8280 The AP may be eliminated to either the SP or the FP, so we use the
8281 least common denominator, e.g. SImode, and offsets from 0 to 64.
8282
8283 ??? Verify whether the above is the right approach.
8284
8285 ??? Also, the FP may be eliminated to the SP, so perhaps that
8286 needs special handling also.
8287
8288 ??? Look at how the mips16 port solves this problem. It probably uses
8289 better ways to solve some of these problems.
8290
8291 Although it is not incorrect, we don't accept QImode and HImode
8292 addresses based on the frame pointer or arg pointer until the
8293 reload pass starts. This is so that eliminating such addresses
8294 into stack based ones won't produce impossible code. */
8295 int
8296 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8297 {
8298 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8299 return 0;
8300
8301 /* ??? Not clear if this is right. Experiment. */
8302 if (GET_MODE_SIZE (mode) < 4
8303 && !(reload_in_progress || reload_completed)
8304 && (reg_mentioned_p (frame_pointer_rtx, x)
8305 || reg_mentioned_p (arg_pointer_rtx, x)
8306 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8307 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8308 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8309 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8310 return 0;
8311
8312 /* Accept any base register. SP only in SImode or larger. */
8313 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8314 return 1;
8315
8316 /* This is PC relative data before arm_reorg runs. */
8317 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8318 && GET_CODE (x) == SYMBOL_REF
8319 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8320 return 1;
8321
8322 /* This is PC relative data after arm_reorg runs. */
8323 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8324 && reload_completed
8325 && (GET_CODE (x) == LABEL_REF
8326 || (GET_CODE (x) == CONST
8327 && GET_CODE (XEXP (x, 0)) == PLUS
8328 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8329 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8330 return 1;
8331
8332 /* Post-inc indexing only supported for SImode and larger. */
8333 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8334 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8335 return 1;
8336
8337 else if (GET_CODE (x) == PLUS)
8338 {
8339 /* REG+REG address can be any two index registers. */
8340 /* We disallow FRAME+REG addressing since we know that FRAME
8341 will be replaced with STACK, and SP relative addressing only
8342 permits SP+OFFSET. */
8343 if (GET_MODE_SIZE (mode) <= 4
8344 && XEXP (x, 0) != frame_pointer_rtx
8345 && XEXP (x, 1) != frame_pointer_rtx
8346 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8347 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8348 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8349 return 1;
8350
8351 /* REG+const has 5-7 bit offset for non-SP registers. */
8352 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8353 || XEXP (x, 0) == arg_pointer_rtx)
8354 && CONST_INT_P (XEXP (x, 1))
8355 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8356 return 1;
8357
8358 /* REG+const has 10-bit offset for SP, but only SImode and
8359 larger is supported. */
8360 /* ??? Should probably check for DI/DFmode overflow here
8361 just like GO_IF_LEGITIMATE_OFFSET does. */
8362 else if (REG_P (XEXP (x, 0))
8363 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8364 && GET_MODE_SIZE (mode) >= 4
8365 && CONST_INT_P (XEXP (x, 1))
8366 && INTVAL (XEXP (x, 1)) >= 0
8367 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8368 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8369 return 1;
8370
8371 else if (REG_P (XEXP (x, 0))
8372 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8373 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8374 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8375 && REGNO (XEXP (x, 0))
8376 <= LAST_VIRTUAL_POINTER_REGISTER))
8377 && GET_MODE_SIZE (mode) >= 4
8378 && CONST_INT_P (XEXP (x, 1))
8379 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8380 return 1;
8381 }
8382
8383 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8384 && GET_MODE_SIZE (mode) == 4
8385 && GET_CODE (x) == SYMBOL_REF
8386 && CONSTANT_POOL_ADDRESS_P (x)
8387 && ! (flag_pic
8388 && symbol_mentioned_p (get_pool_constant (x))
8389 && ! pcrel_constant_p (get_pool_constant (x))))
8390 return 1;
8391
8392 return 0;
8393 }
8394
8395 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8396 instruction of mode MODE. */
8397 int
8398 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8399 {
8400 switch (GET_MODE_SIZE (mode))
8401 {
8402 case 1:
8403 return val >= 0 && val < 32;
8404
8405 case 2:
8406 return val >= 0 && val < 64 && (val & 1) == 0;
8407
8408 default:
8409 return (val >= 0
8410 && (val + GET_MODE_SIZE (mode)) <= 128
8411 && (val & 3) == 0);
8412 }
8413 }
8414
8415 bool
8416 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8417 {
8418 if (TARGET_ARM)
8419 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8420 else if (TARGET_THUMB2)
8421 return thumb2_legitimate_address_p (mode, x, strict_p);
8422 else /* if (TARGET_THUMB1) */
8423 return thumb1_legitimate_address_p (mode, x, strict_p);
8424 }
8425
8426 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8427
8428 Given an rtx X being reloaded into a reg required to be
8429 in class CLASS, return the class of reg to actually use.
8430 In general this is just CLASS, but for the Thumb core registers and
8431 immediate constants we prefer a LO_REGS class or a subset. */
8432
8433 static reg_class_t
8434 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8435 {
8436 if (TARGET_32BIT)
8437 return rclass;
8438 else
8439 {
8440 if (rclass == GENERAL_REGS)
8441 return LO_REGS;
8442 else
8443 return rclass;
8444 }
8445 }
8446
8447 /* Build the SYMBOL_REF for __tls_get_addr. */
8448
8449 static GTY(()) rtx tls_get_addr_libfunc;
8450
8451 static rtx
8452 get_tls_get_addr (void)
8453 {
8454 if (!tls_get_addr_libfunc)
8455 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8456 return tls_get_addr_libfunc;
8457 }
8458
8459 rtx
8460 arm_load_tp (rtx target)
8461 {
8462 if (!target)
8463 target = gen_reg_rtx (SImode);
8464
8465 if (TARGET_HARD_TP)
8466 {
8467 /* Can return in any reg. */
8468 emit_insn (gen_load_tp_hard (target));
8469 }
8470 else
8471 {
8472 /* Always returned in r0. Immediately copy the result into a pseudo,
8473 otherwise other uses of r0 (e.g. setting up function arguments) may
8474 clobber the value. */
8475
8476 rtx tmp;
8477
8478 emit_insn (gen_load_tp_soft ());
8479
8480 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8481 emit_move_insn (target, tmp);
8482 }
8483 return target;
8484 }
8485
8486 static rtx
8487 load_tls_operand (rtx x, rtx reg)
8488 {
8489 rtx tmp;
8490
8491 if (reg == NULL_RTX)
8492 reg = gen_reg_rtx (SImode);
8493
8494 tmp = gen_rtx_CONST (SImode, x);
8495
8496 emit_move_insn (reg, tmp);
8497
8498 return reg;
8499 }
8500
8501 static rtx_insn *
8502 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8503 {
8504 rtx label, labelno, sum;
8505
8506 gcc_assert (reloc != TLS_DESCSEQ);
8507 start_sequence ();
8508
8509 labelno = GEN_INT (pic_labelno++);
8510 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8511 label = gen_rtx_CONST (VOIDmode, label);
8512
8513 sum = gen_rtx_UNSPEC (Pmode,
8514 gen_rtvec (4, x, GEN_INT (reloc), label,
8515 GEN_INT (TARGET_ARM ? 8 : 4)),
8516 UNSPEC_TLS);
8517 reg = load_tls_operand (sum, reg);
8518
8519 if (TARGET_ARM)
8520 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8521 else
8522 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8523
8524 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8525 LCT_PURE, /* LCT_CONST? */
8526 Pmode, reg, Pmode);
8527
8528 rtx_insn *insns = get_insns ();
8529 end_sequence ();
8530
8531 return insns;
8532 }
8533
8534 static rtx
8535 arm_tls_descseq_addr (rtx x, rtx reg)
8536 {
8537 rtx labelno = GEN_INT (pic_labelno++);
8538 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8539 rtx sum = gen_rtx_UNSPEC (Pmode,
8540 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8541 gen_rtx_CONST (VOIDmode, label),
8542 GEN_INT (!TARGET_ARM)),
8543 UNSPEC_TLS);
8544 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8545
8546 emit_insn (gen_tlscall (x, labelno));
8547 if (!reg)
8548 reg = gen_reg_rtx (SImode);
8549 else
8550 gcc_assert (REGNO (reg) != R0_REGNUM);
8551
8552 emit_move_insn (reg, reg0);
8553
8554 return reg;
8555 }
8556
8557 rtx
8558 legitimize_tls_address (rtx x, rtx reg)
8559 {
8560 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8561 rtx_insn *insns;
8562 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8563
8564 switch (model)
8565 {
8566 case TLS_MODEL_GLOBAL_DYNAMIC:
8567 if (TARGET_GNU2_TLS)
8568 {
8569 reg = arm_tls_descseq_addr (x, reg);
8570
8571 tp = arm_load_tp (NULL_RTX);
8572
8573 dest = gen_rtx_PLUS (Pmode, tp, reg);
8574 }
8575 else
8576 {
8577 /* Original scheme */
8578 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8579 dest = gen_reg_rtx (Pmode);
8580 emit_libcall_block (insns, dest, ret, x);
8581 }
8582 return dest;
8583
8584 case TLS_MODEL_LOCAL_DYNAMIC:
8585 if (TARGET_GNU2_TLS)
8586 {
8587 reg = arm_tls_descseq_addr (x, reg);
8588
8589 tp = arm_load_tp (NULL_RTX);
8590
8591 dest = gen_rtx_PLUS (Pmode, tp, reg);
8592 }
8593 else
8594 {
8595 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8596
8597 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8598 share the LDM result with other LD model accesses. */
8599 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8600 UNSPEC_TLS);
8601 dest = gen_reg_rtx (Pmode);
8602 emit_libcall_block (insns, dest, ret, eqv);
8603
8604 /* Load the addend. */
8605 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8606 GEN_INT (TLS_LDO32)),
8607 UNSPEC_TLS);
8608 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8609 dest = gen_rtx_PLUS (Pmode, dest, addend);
8610 }
8611 return dest;
8612
8613 case TLS_MODEL_INITIAL_EXEC:
8614 labelno = GEN_INT (pic_labelno++);
8615 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8616 label = gen_rtx_CONST (VOIDmode, label);
8617 sum = gen_rtx_UNSPEC (Pmode,
8618 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8619 GEN_INT (TARGET_ARM ? 8 : 4)),
8620 UNSPEC_TLS);
8621 reg = load_tls_operand (sum, reg);
8622
8623 if (TARGET_ARM)
8624 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8625 else if (TARGET_THUMB2)
8626 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8627 else
8628 {
8629 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8630 emit_move_insn (reg, gen_const_mem (SImode, reg));
8631 }
8632
8633 tp = arm_load_tp (NULL_RTX);
8634
8635 return gen_rtx_PLUS (Pmode, tp, reg);
8636
8637 case TLS_MODEL_LOCAL_EXEC:
8638 tp = arm_load_tp (NULL_RTX);
8639
8640 reg = gen_rtx_UNSPEC (Pmode,
8641 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8642 UNSPEC_TLS);
8643 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8644
8645 return gen_rtx_PLUS (Pmode, tp, reg);
8646
8647 default:
8648 abort ();
8649 }
8650 }
8651
8652 /* Try machine-dependent ways of modifying an illegitimate address
8653 to be legitimate. If we find one, return the new, valid address. */
8654 rtx
8655 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8656 {
8657 if (arm_tls_referenced_p (x))
8658 {
8659 rtx addend = NULL;
8660
8661 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8662 {
8663 addend = XEXP (XEXP (x, 0), 1);
8664 x = XEXP (XEXP (x, 0), 0);
8665 }
8666
8667 if (GET_CODE (x) != SYMBOL_REF)
8668 return x;
8669
8670 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8671
8672 x = legitimize_tls_address (x, NULL_RTX);
8673
8674 if (addend)
8675 {
8676 x = gen_rtx_PLUS (SImode, x, addend);
8677 orig_x = x;
8678 }
8679 else
8680 return x;
8681 }
8682
8683 if (!TARGET_ARM)
8684 {
8685 /* TODO: legitimize_address for Thumb2. */
8686 if (TARGET_THUMB2)
8687 return x;
8688 return thumb_legitimize_address (x, orig_x, mode);
8689 }
8690
8691 if (GET_CODE (x) == PLUS)
8692 {
8693 rtx xop0 = XEXP (x, 0);
8694 rtx xop1 = XEXP (x, 1);
8695
8696 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8697 xop0 = force_reg (SImode, xop0);
8698
8699 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8700 && !symbol_mentioned_p (xop1))
8701 xop1 = force_reg (SImode, xop1);
8702
8703 if (ARM_BASE_REGISTER_RTX_P (xop0)
8704 && CONST_INT_P (xop1))
8705 {
8706 HOST_WIDE_INT n, low_n;
8707 rtx base_reg, val;
8708 n = INTVAL (xop1);
8709
8710 /* VFP addressing modes actually allow greater offsets, but for
8711 now we just stick with the lowest common denominator. */
8712 if (mode == DImode || mode == DFmode)
8713 {
8714 low_n = n & 0x0f;
8715 n &= ~0x0f;
8716 if (low_n > 4)
8717 {
8718 n += 16;
8719 low_n -= 16;
8720 }
8721 }
8722 else
8723 {
8724 low_n = ((mode) == TImode ? 0
8725 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8726 n -= low_n;
8727 }
8728
8729 base_reg = gen_reg_rtx (SImode);
8730 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8731 emit_move_insn (base_reg, val);
8732 x = plus_constant (Pmode, base_reg, low_n);
8733 }
8734 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8735 x = gen_rtx_PLUS (SImode, xop0, xop1);
8736 }
8737
8738 /* XXX We don't allow MINUS any more -- see comment in
8739 arm_legitimate_address_outer_p (). */
8740 else if (GET_CODE (x) == MINUS)
8741 {
8742 rtx xop0 = XEXP (x, 0);
8743 rtx xop1 = XEXP (x, 1);
8744
8745 if (CONSTANT_P (xop0))
8746 xop0 = force_reg (SImode, xop0);
8747
8748 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8749 xop1 = force_reg (SImode, xop1);
8750
8751 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8752 x = gen_rtx_MINUS (SImode, xop0, xop1);
8753 }
8754
8755 /* Make sure to take full advantage of the pre-indexed addressing mode
8756 with absolute addresses which often allows for the base register to
8757 be factorized for multiple adjacent memory references, and it might
8758 even allows for the mini pool to be avoided entirely. */
8759 else if (CONST_INT_P (x) && optimize > 0)
8760 {
8761 unsigned int bits;
8762 HOST_WIDE_INT mask, base, index;
8763 rtx base_reg;
8764
8765 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8766 use a 8-bit index. So let's use a 12-bit index for SImode only and
8767 hope that arm_gen_constant will enable ldrb to use more bits. */
8768 bits = (mode == SImode) ? 12 : 8;
8769 mask = (1 << bits) - 1;
8770 base = INTVAL (x) & ~mask;
8771 index = INTVAL (x) & mask;
8772 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8773 {
8774 /* It'll most probably be more efficient to generate the base
8775 with more bits set and use a negative index instead. */
8776 base |= mask;
8777 index -= mask;
8778 }
8779 base_reg = force_reg (SImode, GEN_INT (base));
8780 x = plus_constant (Pmode, base_reg, index);
8781 }
8782
8783 if (flag_pic)
8784 {
8785 /* We need to find and carefully transform any SYMBOL and LABEL
8786 references; so go back to the original address expression. */
8787 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
8788 false /*compute_now*/);
8789
8790 if (new_x != orig_x)
8791 x = new_x;
8792 }
8793
8794 return x;
8795 }
8796
8797
8798 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8799 to be legitimate. If we find one, return the new, valid address. */
8800 rtx
8801 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8802 {
8803 if (GET_CODE (x) == PLUS
8804 && CONST_INT_P (XEXP (x, 1))
8805 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8806 || INTVAL (XEXP (x, 1)) < 0))
8807 {
8808 rtx xop0 = XEXP (x, 0);
8809 rtx xop1 = XEXP (x, 1);
8810 HOST_WIDE_INT offset = INTVAL (xop1);
8811
8812 /* Try and fold the offset into a biasing of the base register and
8813 then offsetting that. Don't do this when optimizing for space
8814 since it can cause too many CSEs. */
8815 if (optimize_size && offset >= 0
8816 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8817 {
8818 HOST_WIDE_INT delta;
8819
8820 if (offset >= 256)
8821 delta = offset - (256 - GET_MODE_SIZE (mode));
8822 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8823 delta = 31 * GET_MODE_SIZE (mode);
8824 else
8825 delta = offset & (~31 * GET_MODE_SIZE (mode));
8826
8827 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8828 NULL_RTX);
8829 x = plus_constant (Pmode, xop0, delta);
8830 }
8831 else if (offset < 0 && offset > -256)
8832 /* Small negative offsets are best done with a subtract before the
8833 dereference, forcing these into a register normally takes two
8834 instructions. */
8835 x = force_operand (x, NULL_RTX);
8836 else
8837 {
8838 /* For the remaining cases, force the constant into a register. */
8839 xop1 = force_reg (SImode, xop1);
8840 x = gen_rtx_PLUS (SImode, xop0, xop1);
8841 }
8842 }
8843 else if (GET_CODE (x) == PLUS
8844 && s_register_operand (XEXP (x, 1), SImode)
8845 && !s_register_operand (XEXP (x, 0), SImode))
8846 {
8847 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8848
8849 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8850 }
8851
8852 if (flag_pic)
8853 {
8854 /* We need to find and carefully transform any SYMBOL and LABEL
8855 references; so go back to the original address expression. */
8856 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
8857 false /*compute_now*/);
8858
8859 if (new_x != orig_x)
8860 x = new_x;
8861 }
8862
8863 return x;
8864 }
8865
8866 /* Return TRUE if X contains any TLS symbol references. */
8867
8868 bool
8869 arm_tls_referenced_p (rtx x)
8870 {
8871 if (! TARGET_HAVE_TLS)
8872 return false;
8873
8874 subrtx_iterator::array_type array;
8875 FOR_EACH_SUBRTX (iter, array, x, ALL)
8876 {
8877 const_rtx x = *iter;
8878 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8879 {
8880 /* ARM currently does not provide relocations to encode TLS variables
8881 into AArch32 instructions, only data, so there is no way to
8882 currently implement these if a literal pool is disabled. */
8883 if (arm_disable_literal_pool)
8884 sorry ("accessing thread-local storage is not currently supported "
8885 "with -mpure-code or -mslow-flash-data");
8886
8887 return true;
8888 }
8889
8890 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8891 TLS offsets, not real symbol references. */
8892 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8893 iter.skip_subrtxes ();
8894 }
8895 return false;
8896 }
8897
8898 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8899
8900 On the ARM, allow any integer (invalid ones are removed later by insn
8901 patterns), nice doubles and symbol_refs which refer to the function's
8902 constant pool XXX.
8903
8904 When generating pic allow anything. */
8905
8906 static bool
8907 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8908 {
8909 return flag_pic || !label_mentioned_p (x);
8910 }
8911
8912 static bool
8913 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8914 {
8915 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8916 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8917 for ARMv8-M Baseline or later the result is valid. */
8918 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8919 x = XEXP (x, 0);
8920
8921 return (CONST_INT_P (x)
8922 || CONST_DOUBLE_P (x)
8923 || CONSTANT_ADDRESS_P (x)
8924 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8925 || flag_pic);
8926 }
8927
8928 static bool
8929 arm_legitimate_constant_p (machine_mode mode, rtx x)
8930 {
8931 return (!arm_cannot_force_const_mem (mode, x)
8932 && (TARGET_32BIT
8933 ? arm_legitimate_constant_p_1 (mode, x)
8934 : thumb_legitimate_constant_p (mode, x)));
8935 }
8936
8937 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8938
8939 static bool
8940 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8941 {
8942 rtx base, offset;
8943
8944 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8945 {
8946 split_const (x, &base, &offset);
8947 if (GET_CODE (base) == SYMBOL_REF
8948 && !offset_within_block_p (base, INTVAL (offset)))
8949 return true;
8950 }
8951 return arm_tls_referenced_p (x);
8952 }
8953 \f
8954 #define REG_OR_SUBREG_REG(X) \
8955 (REG_P (X) \
8956 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8957
8958 #define REG_OR_SUBREG_RTX(X) \
8959 (REG_P (X) ? (X) : SUBREG_REG (X))
8960
8961 static inline int
8962 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8963 {
8964 machine_mode mode = GET_MODE (x);
8965 int total, words;
8966
8967 switch (code)
8968 {
8969 case ASHIFT:
8970 case ASHIFTRT:
8971 case LSHIFTRT:
8972 case ROTATERT:
8973 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8974
8975 case PLUS:
8976 case MINUS:
8977 case COMPARE:
8978 case NEG:
8979 case NOT:
8980 return COSTS_N_INSNS (1);
8981
8982 case MULT:
8983 if (arm_arch6m && arm_m_profile_small_mul)
8984 return COSTS_N_INSNS (32);
8985
8986 if (CONST_INT_P (XEXP (x, 1)))
8987 {
8988 int cycles = 0;
8989 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8990
8991 while (i)
8992 {
8993 i >>= 2;
8994 cycles++;
8995 }
8996 return COSTS_N_INSNS (2) + cycles;
8997 }
8998 return COSTS_N_INSNS (1) + 16;
8999
9000 case SET:
9001 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9002 the mode. */
9003 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9004 return (COSTS_N_INSNS (words)
9005 + 4 * ((MEM_P (SET_SRC (x)))
9006 + MEM_P (SET_DEST (x))));
9007
9008 case CONST_INT:
9009 if (outer == SET)
9010 {
9011 if (UINTVAL (x) < 256
9012 /* 16-bit constant. */
9013 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9014 return 0;
9015 if (thumb_shiftable_const (INTVAL (x)))
9016 return COSTS_N_INSNS (2);
9017 return COSTS_N_INSNS (3);
9018 }
9019 else if ((outer == PLUS || outer == COMPARE)
9020 && INTVAL (x) < 256 && INTVAL (x) > -256)
9021 return 0;
9022 else if ((outer == IOR || outer == XOR || outer == AND)
9023 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9024 return COSTS_N_INSNS (1);
9025 else if (outer == AND)
9026 {
9027 int i;
9028 /* This duplicates the tests in the andsi3 expander. */
9029 for (i = 9; i <= 31; i++)
9030 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9031 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9032 return COSTS_N_INSNS (2);
9033 }
9034 else if (outer == ASHIFT || outer == ASHIFTRT
9035 || outer == LSHIFTRT)
9036 return 0;
9037 return COSTS_N_INSNS (2);
9038
9039 case CONST:
9040 case CONST_DOUBLE:
9041 case LABEL_REF:
9042 case SYMBOL_REF:
9043 return COSTS_N_INSNS (3);
9044
9045 case UDIV:
9046 case UMOD:
9047 case DIV:
9048 case MOD:
9049 return 100;
9050
9051 case TRUNCATE:
9052 return 99;
9053
9054 case AND:
9055 case XOR:
9056 case IOR:
9057 /* XXX guess. */
9058 return 8;
9059
9060 case MEM:
9061 /* XXX another guess. */
9062 /* Memory costs quite a lot for the first word, but subsequent words
9063 load at the equivalent of a single insn each. */
9064 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9065 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9066 ? 4 : 0));
9067
9068 case IF_THEN_ELSE:
9069 /* XXX a guess. */
9070 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9071 return 14;
9072 return 2;
9073
9074 case SIGN_EXTEND:
9075 case ZERO_EXTEND:
9076 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9077 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9078
9079 if (mode == SImode)
9080 return total;
9081
9082 if (arm_arch6)
9083 return total + COSTS_N_INSNS (1);
9084
9085 /* Assume a two-shift sequence. Increase the cost slightly so
9086 we prefer actual shifts over an extend operation. */
9087 return total + 1 + COSTS_N_INSNS (2);
9088
9089 default:
9090 return 99;
9091 }
9092 }
9093
9094 /* Estimates the size cost of thumb1 instructions.
9095 For now most of the code is copied from thumb1_rtx_costs. We need more
9096 fine grain tuning when we have more related test cases. */
9097 static inline int
9098 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9099 {
9100 machine_mode mode = GET_MODE (x);
9101 int words, cost;
9102
9103 switch (code)
9104 {
9105 case ASHIFT:
9106 case ASHIFTRT:
9107 case LSHIFTRT:
9108 case ROTATERT:
9109 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9110
9111 case PLUS:
9112 case MINUS:
9113 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9114 defined by RTL expansion, especially for the expansion of
9115 multiplication. */
9116 if ((GET_CODE (XEXP (x, 0)) == MULT
9117 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9118 || (GET_CODE (XEXP (x, 1)) == MULT
9119 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9120 return COSTS_N_INSNS (2);
9121 /* Fall through. */
9122 case COMPARE:
9123 case NEG:
9124 case NOT:
9125 return COSTS_N_INSNS (1);
9126
9127 case MULT:
9128 if (CONST_INT_P (XEXP (x, 1)))
9129 {
9130 /* Thumb1 mul instruction can't operate on const. We must Load it
9131 into a register first. */
9132 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9133 /* For the targets which have a very small and high-latency multiply
9134 unit, we prefer to synthesize the mult with up to 5 instructions,
9135 giving a good balance between size and performance. */
9136 if (arm_arch6m && arm_m_profile_small_mul)
9137 return COSTS_N_INSNS (5);
9138 else
9139 return COSTS_N_INSNS (1) + const_size;
9140 }
9141 return COSTS_N_INSNS (1);
9142
9143 case SET:
9144 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9145 the mode. */
9146 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9147 cost = COSTS_N_INSNS (words);
9148 if (satisfies_constraint_J (SET_SRC (x))
9149 || satisfies_constraint_K (SET_SRC (x))
9150 /* Too big an immediate for a 2-byte mov, using MOVT. */
9151 || (CONST_INT_P (SET_SRC (x))
9152 && UINTVAL (SET_SRC (x)) >= 256
9153 && TARGET_HAVE_MOVT
9154 && satisfies_constraint_j (SET_SRC (x)))
9155 /* thumb1_movdi_insn. */
9156 || ((words > 1) && MEM_P (SET_SRC (x))))
9157 cost += COSTS_N_INSNS (1);
9158 return cost;
9159
9160 case CONST_INT:
9161 if (outer == SET)
9162 {
9163 if (UINTVAL (x) < 256)
9164 return COSTS_N_INSNS (1);
9165 /* movw is 4byte long. */
9166 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9167 return COSTS_N_INSNS (2);
9168 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9169 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9170 return COSTS_N_INSNS (2);
9171 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9172 if (thumb_shiftable_const (INTVAL (x)))
9173 return COSTS_N_INSNS (2);
9174 return COSTS_N_INSNS (3);
9175 }
9176 else if ((outer == PLUS || outer == COMPARE)
9177 && INTVAL (x) < 256 && INTVAL (x) > -256)
9178 return 0;
9179 else if ((outer == IOR || outer == XOR || outer == AND)
9180 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9181 return COSTS_N_INSNS (1);
9182 else if (outer == AND)
9183 {
9184 int i;
9185 /* This duplicates the tests in the andsi3 expander. */
9186 for (i = 9; i <= 31; i++)
9187 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9188 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9189 return COSTS_N_INSNS (2);
9190 }
9191 else if (outer == ASHIFT || outer == ASHIFTRT
9192 || outer == LSHIFTRT)
9193 return 0;
9194 return COSTS_N_INSNS (2);
9195
9196 case CONST:
9197 case CONST_DOUBLE:
9198 case LABEL_REF:
9199 case SYMBOL_REF:
9200 return COSTS_N_INSNS (3);
9201
9202 case UDIV:
9203 case UMOD:
9204 case DIV:
9205 case MOD:
9206 return 100;
9207
9208 case TRUNCATE:
9209 return 99;
9210
9211 case AND:
9212 case XOR:
9213 case IOR:
9214 return COSTS_N_INSNS (1);
9215
9216 case MEM:
9217 return (COSTS_N_INSNS (1)
9218 + COSTS_N_INSNS (1)
9219 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9220 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9221 ? COSTS_N_INSNS (1) : 0));
9222
9223 case IF_THEN_ELSE:
9224 /* XXX a guess. */
9225 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9226 return 14;
9227 return 2;
9228
9229 case ZERO_EXTEND:
9230 /* XXX still guessing. */
9231 switch (GET_MODE (XEXP (x, 0)))
9232 {
9233 case E_QImode:
9234 return (1 + (mode == DImode ? 4 : 0)
9235 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9236
9237 case E_HImode:
9238 return (4 + (mode == DImode ? 4 : 0)
9239 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9240
9241 case E_SImode:
9242 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9243
9244 default:
9245 return 99;
9246 }
9247
9248 default:
9249 return 99;
9250 }
9251 }
9252
9253 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9254 operand, then return the operand that is being shifted. If the shift
9255 is not by a constant, then set SHIFT_REG to point to the operand.
9256 Return NULL if OP is not a shifter operand. */
9257 static rtx
9258 shifter_op_p (rtx op, rtx *shift_reg)
9259 {
9260 enum rtx_code code = GET_CODE (op);
9261
9262 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9263 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9264 return XEXP (op, 0);
9265 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9266 return XEXP (op, 0);
9267 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9268 || code == ASHIFTRT)
9269 {
9270 if (!CONST_INT_P (XEXP (op, 1)))
9271 *shift_reg = XEXP (op, 1);
9272 return XEXP (op, 0);
9273 }
9274
9275 return NULL;
9276 }
9277
9278 static bool
9279 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9280 {
9281 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9282 rtx_code code = GET_CODE (x);
9283 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9284
9285 switch (XINT (x, 1))
9286 {
9287 case UNSPEC_UNALIGNED_LOAD:
9288 /* We can only do unaligned loads into the integer unit, and we can't
9289 use LDM or LDRD. */
9290 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9291 if (speed_p)
9292 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9293 + extra_cost->ldst.load_unaligned);
9294
9295 #ifdef NOT_YET
9296 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9297 ADDR_SPACE_GENERIC, speed_p);
9298 #endif
9299 return true;
9300
9301 case UNSPEC_UNALIGNED_STORE:
9302 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9303 if (speed_p)
9304 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9305 + extra_cost->ldst.store_unaligned);
9306
9307 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9308 #ifdef NOT_YET
9309 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9310 ADDR_SPACE_GENERIC, speed_p);
9311 #endif
9312 return true;
9313
9314 case UNSPEC_VRINTZ:
9315 case UNSPEC_VRINTP:
9316 case UNSPEC_VRINTM:
9317 case UNSPEC_VRINTR:
9318 case UNSPEC_VRINTX:
9319 case UNSPEC_VRINTA:
9320 if (speed_p)
9321 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9322
9323 return true;
9324 default:
9325 *cost = COSTS_N_INSNS (2);
9326 break;
9327 }
9328 return true;
9329 }
9330
9331 /* Cost of a libcall. We assume one insn per argument, an amount for the
9332 call (one insn for -Os) and then one for processing the result. */
9333 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9334
9335 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9336 do \
9337 { \
9338 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9339 if (shift_op != NULL \
9340 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9341 { \
9342 if (shift_reg) \
9343 { \
9344 if (speed_p) \
9345 *cost += extra_cost->alu.arith_shift_reg; \
9346 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9347 ASHIFT, 1, speed_p); \
9348 } \
9349 else if (speed_p) \
9350 *cost += extra_cost->alu.arith_shift; \
9351 \
9352 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9353 ASHIFT, 0, speed_p) \
9354 + rtx_cost (XEXP (x, 1 - IDX), \
9355 GET_MODE (shift_op), \
9356 OP, 1, speed_p)); \
9357 return true; \
9358 } \
9359 } \
9360 while (0)
9361
9362 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9363 considering the costs of the addressing mode and memory access
9364 separately. */
9365 static bool
9366 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9367 int *cost, bool speed_p)
9368 {
9369 machine_mode mode = GET_MODE (x);
9370
9371 *cost = COSTS_N_INSNS (1);
9372
9373 if (flag_pic
9374 && GET_CODE (XEXP (x, 0)) == PLUS
9375 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9376 /* This will be split into two instructions. Add the cost of the
9377 additional instruction here. The cost of the memory access is computed
9378 below. See arm.md:calculate_pic_address. */
9379 *cost += COSTS_N_INSNS (1);
9380
9381 /* Calculate cost of the addressing mode. */
9382 if (speed_p)
9383 {
9384 arm_addr_mode_op op_type;
9385 switch (GET_CODE (XEXP (x, 0)))
9386 {
9387 default:
9388 case REG:
9389 op_type = AMO_DEFAULT;
9390 break;
9391 case MINUS:
9392 /* MINUS does not appear in RTL, but the architecture supports it,
9393 so handle this case defensively. */
9394 /* fall through */
9395 case PLUS:
9396 op_type = AMO_NO_WB;
9397 break;
9398 case PRE_INC:
9399 case PRE_DEC:
9400 case POST_INC:
9401 case POST_DEC:
9402 case PRE_MODIFY:
9403 case POST_MODIFY:
9404 op_type = AMO_WB;
9405 break;
9406 }
9407
9408 if (VECTOR_MODE_P (mode))
9409 *cost += current_tune->addr_mode_costs->vector[op_type];
9410 else if (FLOAT_MODE_P (mode))
9411 *cost += current_tune->addr_mode_costs->fp[op_type];
9412 else
9413 *cost += current_tune->addr_mode_costs->integer[op_type];
9414 }
9415
9416 /* Calculate cost of memory access. */
9417 if (speed_p)
9418 {
9419 if (FLOAT_MODE_P (mode))
9420 {
9421 if (GET_MODE_SIZE (mode) == 8)
9422 *cost += extra_cost->ldst.loadd;
9423 else
9424 *cost += extra_cost->ldst.loadf;
9425 }
9426 else if (VECTOR_MODE_P (mode))
9427 *cost += extra_cost->ldst.loadv;
9428 else
9429 {
9430 /* Integer modes */
9431 if (GET_MODE_SIZE (mode) == 8)
9432 *cost += extra_cost->ldst.ldrd;
9433 else
9434 *cost += extra_cost->ldst.load;
9435 }
9436 }
9437
9438 return true;
9439 }
9440
9441 /* RTX costs. Make an estimate of the cost of executing the operation
9442 X, which is contained within an operation with code OUTER_CODE.
9443 SPEED_P indicates whether the cost desired is the performance cost,
9444 or the size cost. The estimate is stored in COST and the return
9445 value is TRUE if the cost calculation is final, or FALSE if the
9446 caller should recurse through the operands of X to add additional
9447 costs.
9448
9449 We currently make no attempt to model the size savings of Thumb-2
9450 16-bit instructions. At the normal points in compilation where
9451 this code is called we have no measure of whether the condition
9452 flags are live or not, and thus no realistic way to determine what
9453 the size will eventually be. */
9454 static bool
9455 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9456 const struct cpu_cost_table *extra_cost,
9457 int *cost, bool speed_p)
9458 {
9459 machine_mode mode = GET_MODE (x);
9460
9461 *cost = COSTS_N_INSNS (1);
9462
9463 if (TARGET_THUMB1)
9464 {
9465 if (speed_p)
9466 *cost = thumb1_rtx_costs (x, code, outer_code);
9467 else
9468 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9469 return true;
9470 }
9471
9472 switch (code)
9473 {
9474 case SET:
9475 *cost = 0;
9476 /* SET RTXs don't have a mode so we get it from the destination. */
9477 mode = GET_MODE (SET_DEST (x));
9478
9479 if (REG_P (SET_SRC (x))
9480 && REG_P (SET_DEST (x)))
9481 {
9482 /* Assume that most copies can be done with a single insn,
9483 unless we don't have HW FP, in which case everything
9484 larger than word mode will require two insns. */
9485 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9486 && GET_MODE_SIZE (mode) > 4)
9487 || mode == DImode)
9488 ? 2 : 1);
9489 /* Conditional register moves can be encoded
9490 in 16 bits in Thumb mode. */
9491 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9492 *cost >>= 1;
9493
9494 return true;
9495 }
9496
9497 if (CONST_INT_P (SET_SRC (x)))
9498 {
9499 /* Handle CONST_INT here, since the value doesn't have a mode
9500 and we would otherwise be unable to work out the true cost. */
9501 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9502 0, speed_p);
9503 outer_code = SET;
9504 /* Slightly lower the cost of setting a core reg to a constant.
9505 This helps break up chains and allows for better scheduling. */
9506 if (REG_P (SET_DEST (x))
9507 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9508 *cost -= 1;
9509 x = SET_SRC (x);
9510 /* Immediate moves with an immediate in the range [0, 255] can be
9511 encoded in 16 bits in Thumb mode. */
9512 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9513 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9514 *cost >>= 1;
9515 goto const_int_cost;
9516 }
9517
9518 return false;
9519
9520 case MEM:
9521 return arm_mem_costs (x, extra_cost, cost, speed_p);
9522
9523 case PARALLEL:
9524 {
9525 /* Calculations of LDM costs are complex. We assume an initial cost
9526 (ldm_1st) which will load the number of registers mentioned in
9527 ldm_regs_per_insn_1st registers; then each additional
9528 ldm_regs_per_insn_subsequent registers cost one more insn. The
9529 formula for N regs is thus:
9530
9531 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9532 + ldm_regs_per_insn_subsequent - 1)
9533 / ldm_regs_per_insn_subsequent).
9534
9535 Additional costs may also be added for addressing. A similar
9536 formula is used for STM. */
9537
9538 bool is_ldm = load_multiple_operation (x, SImode);
9539 bool is_stm = store_multiple_operation (x, SImode);
9540
9541 if (is_ldm || is_stm)
9542 {
9543 if (speed_p)
9544 {
9545 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9546 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9547 ? extra_cost->ldst.ldm_regs_per_insn_1st
9548 : extra_cost->ldst.stm_regs_per_insn_1st;
9549 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9550 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9551 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9552
9553 *cost += regs_per_insn_1st
9554 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9555 + regs_per_insn_sub - 1)
9556 / regs_per_insn_sub);
9557 return true;
9558 }
9559
9560 }
9561 return false;
9562 }
9563 case DIV:
9564 case UDIV:
9565 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9566 && (mode == SFmode || !TARGET_VFP_SINGLE))
9567 *cost += COSTS_N_INSNS (speed_p
9568 ? extra_cost->fp[mode != SFmode].div : 0);
9569 else if (mode == SImode && TARGET_IDIV)
9570 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9571 else
9572 *cost = LIBCALL_COST (2);
9573
9574 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9575 possible udiv is prefered. */
9576 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9577 return false; /* All arguments must be in registers. */
9578
9579 case MOD:
9580 /* MOD by a power of 2 can be expanded as:
9581 rsbs r1, r0, #0
9582 and r0, r0, #(n - 1)
9583 and r1, r1, #(n - 1)
9584 rsbpl r0, r1, #0. */
9585 if (CONST_INT_P (XEXP (x, 1))
9586 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9587 && mode == SImode)
9588 {
9589 *cost += COSTS_N_INSNS (3);
9590
9591 if (speed_p)
9592 *cost += 2 * extra_cost->alu.logical
9593 + extra_cost->alu.arith;
9594 return true;
9595 }
9596
9597 /* Fall-through. */
9598 case UMOD:
9599 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9600 possible udiv is prefered. */
9601 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9602 return false; /* All arguments must be in registers. */
9603
9604 case ROTATE:
9605 if (mode == SImode && REG_P (XEXP (x, 1)))
9606 {
9607 *cost += (COSTS_N_INSNS (1)
9608 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9609 if (speed_p)
9610 *cost += extra_cost->alu.shift_reg;
9611 return true;
9612 }
9613 /* Fall through */
9614 case ROTATERT:
9615 case ASHIFT:
9616 case LSHIFTRT:
9617 case ASHIFTRT:
9618 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9619 {
9620 *cost += (COSTS_N_INSNS (2)
9621 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9622 if (speed_p)
9623 *cost += 2 * extra_cost->alu.shift;
9624 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9625 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9626 *cost += 1;
9627 return true;
9628 }
9629 else if (mode == SImode)
9630 {
9631 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9632 /* Slightly disparage register shifts at -Os, but not by much. */
9633 if (!CONST_INT_P (XEXP (x, 1)))
9634 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9635 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9636 return true;
9637 }
9638 else if (GET_MODE_CLASS (mode) == MODE_INT
9639 && GET_MODE_SIZE (mode) < 4)
9640 {
9641 if (code == ASHIFT)
9642 {
9643 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9644 /* Slightly disparage register shifts at -Os, but not by
9645 much. */
9646 if (!CONST_INT_P (XEXP (x, 1)))
9647 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9648 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9649 }
9650 else if (code == LSHIFTRT || code == ASHIFTRT)
9651 {
9652 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9653 {
9654 /* Can use SBFX/UBFX. */
9655 if (speed_p)
9656 *cost += extra_cost->alu.bfx;
9657 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9658 }
9659 else
9660 {
9661 *cost += COSTS_N_INSNS (1);
9662 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9663 if (speed_p)
9664 {
9665 if (CONST_INT_P (XEXP (x, 1)))
9666 *cost += 2 * extra_cost->alu.shift;
9667 else
9668 *cost += (extra_cost->alu.shift
9669 + extra_cost->alu.shift_reg);
9670 }
9671 else
9672 /* Slightly disparage register shifts. */
9673 *cost += !CONST_INT_P (XEXP (x, 1));
9674 }
9675 }
9676 else /* Rotates. */
9677 {
9678 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9679 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9680 if (speed_p)
9681 {
9682 if (CONST_INT_P (XEXP (x, 1)))
9683 *cost += (2 * extra_cost->alu.shift
9684 + extra_cost->alu.log_shift);
9685 else
9686 *cost += (extra_cost->alu.shift
9687 + extra_cost->alu.shift_reg
9688 + extra_cost->alu.log_shift_reg);
9689 }
9690 }
9691 return true;
9692 }
9693
9694 *cost = LIBCALL_COST (2);
9695 return false;
9696
9697 case BSWAP:
9698 if (arm_arch6)
9699 {
9700 if (mode == SImode)
9701 {
9702 if (speed_p)
9703 *cost += extra_cost->alu.rev;
9704
9705 return false;
9706 }
9707 }
9708 else
9709 {
9710 /* No rev instruction available. Look at arm_legacy_rev
9711 and thumb_legacy_rev for the form of RTL used then. */
9712 if (TARGET_THUMB)
9713 {
9714 *cost += COSTS_N_INSNS (9);
9715
9716 if (speed_p)
9717 {
9718 *cost += 6 * extra_cost->alu.shift;
9719 *cost += 3 * extra_cost->alu.logical;
9720 }
9721 }
9722 else
9723 {
9724 *cost += COSTS_N_INSNS (4);
9725
9726 if (speed_p)
9727 {
9728 *cost += 2 * extra_cost->alu.shift;
9729 *cost += extra_cost->alu.arith_shift;
9730 *cost += 2 * extra_cost->alu.logical;
9731 }
9732 }
9733 return true;
9734 }
9735 return false;
9736
9737 case MINUS:
9738 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9739 && (mode == SFmode || !TARGET_VFP_SINGLE))
9740 {
9741 if (GET_CODE (XEXP (x, 0)) == MULT
9742 || GET_CODE (XEXP (x, 1)) == MULT)
9743 {
9744 rtx mul_op0, mul_op1, sub_op;
9745
9746 if (speed_p)
9747 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9748
9749 if (GET_CODE (XEXP (x, 0)) == MULT)
9750 {
9751 mul_op0 = XEXP (XEXP (x, 0), 0);
9752 mul_op1 = XEXP (XEXP (x, 0), 1);
9753 sub_op = XEXP (x, 1);
9754 }
9755 else
9756 {
9757 mul_op0 = XEXP (XEXP (x, 1), 0);
9758 mul_op1 = XEXP (XEXP (x, 1), 1);
9759 sub_op = XEXP (x, 0);
9760 }
9761
9762 /* The first operand of the multiply may be optionally
9763 negated. */
9764 if (GET_CODE (mul_op0) == NEG)
9765 mul_op0 = XEXP (mul_op0, 0);
9766
9767 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9768 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9769 + rtx_cost (sub_op, mode, code, 0, speed_p));
9770
9771 return true;
9772 }
9773
9774 if (speed_p)
9775 *cost += extra_cost->fp[mode != SFmode].addsub;
9776 return false;
9777 }
9778
9779 if (mode == SImode)
9780 {
9781 rtx shift_by_reg = NULL;
9782 rtx shift_op;
9783 rtx non_shift_op;
9784
9785 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9786 if (shift_op == NULL)
9787 {
9788 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9789 non_shift_op = XEXP (x, 0);
9790 }
9791 else
9792 non_shift_op = XEXP (x, 1);
9793
9794 if (shift_op != NULL)
9795 {
9796 if (shift_by_reg != NULL)
9797 {
9798 if (speed_p)
9799 *cost += extra_cost->alu.arith_shift_reg;
9800 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9801 }
9802 else if (speed_p)
9803 *cost += extra_cost->alu.arith_shift;
9804
9805 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9806 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9807 return true;
9808 }
9809
9810 if (arm_arch_thumb2
9811 && GET_CODE (XEXP (x, 1)) == MULT)
9812 {
9813 /* MLS. */
9814 if (speed_p)
9815 *cost += extra_cost->mult[0].add;
9816 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9817 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9818 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9819 return true;
9820 }
9821
9822 if (CONST_INT_P (XEXP (x, 0)))
9823 {
9824 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9825 INTVAL (XEXP (x, 0)), NULL_RTX,
9826 NULL_RTX, 1, 0);
9827 *cost = COSTS_N_INSNS (insns);
9828 if (speed_p)
9829 *cost += insns * extra_cost->alu.arith;
9830 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9831 return true;
9832 }
9833 else if (speed_p)
9834 *cost += extra_cost->alu.arith;
9835
9836 return false;
9837 }
9838
9839 if (GET_MODE_CLASS (mode) == MODE_INT
9840 && GET_MODE_SIZE (mode) < 4)
9841 {
9842 rtx shift_op, shift_reg;
9843 shift_reg = NULL;
9844
9845 /* We check both sides of the MINUS for shifter operands since,
9846 unlike PLUS, it's not commutative. */
9847
9848 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
9849 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
9850
9851 /* Slightly disparage, as we might need to widen the result. */
9852 *cost += 1;
9853 if (speed_p)
9854 *cost += extra_cost->alu.arith;
9855
9856 if (CONST_INT_P (XEXP (x, 0)))
9857 {
9858 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9859 return true;
9860 }
9861
9862 return false;
9863 }
9864
9865 if (mode == DImode)
9866 {
9867 *cost += COSTS_N_INSNS (1);
9868
9869 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9870 {
9871 rtx op1 = XEXP (x, 1);
9872
9873 if (speed_p)
9874 *cost += 2 * extra_cost->alu.arith;
9875
9876 if (GET_CODE (op1) == ZERO_EXTEND)
9877 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9878 0, speed_p);
9879 else
9880 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9881 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9882 0, speed_p);
9883 return true;
9884 }
9885 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9886 {
9887 if (speed_p)
9888 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9889 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9890 0, speed_p)
9891 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9892 return true;
9893 }
9894 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9895 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9896 {
9897 if (speed_p)
9898 *cost += (extra_cost->alu.arith
9899 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9900 ? extra_cost->alu.arith
9901 : extra_cost->alu.arith_shift));
9902 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9903 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9904 GET_CODE (XEXP (x, 1)), 0, speed_p));
9905 return true;
9906 }
9907
9908 if (speed_p)
9909 *cost += 2 * extra_cost->alu.arith;
9910 return false;
9911 }
9912
9913 /* Vector mode? */
9914
9915 *cost = LIBCALL_COST (2);
9916 return false;
9917
9918 case PLUS:
9919 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9920 && (mode == SFmode || !TARGET_VFP_SINGLE))
9921 {
9922 if (GET_CODE (XEXP (x, 0)) == MULT)
9923 {
9924 rtx mul_op0, mul_op1, add_op;
9925
9926 if (speed_p)
9927 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9928
9929 mul_op0 = XEXP (XEXP (x, 0), 0);
9930 mul_op1 = XEXP (XEXP (x, 0), 1);
9931 add_op = XEXP (x, 1);
9932
9933 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9934 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9935 + rtx_cost (add_op, mode, code, 0, speed_p));
9936
9937 return true;
9938 }
9939
9940 if (speed_p)
9941 *cost += extra_cost->fp[mode != SFmode].addsub;
9942 return false;
9943 }
9944 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9945 {
9946 *cost = LIBCALL_COST (2);
9947 return false;
9948 }
9949
9950 /* Narrow modes can be synthesized in SImode, but the range
9951 of useful sub-operations is limited. Check for shift operations
9952 on one of the operands. Only left shifts can be used in the
9953 narrow modes. */
9954 if (GET_MODE_CLASS (mode) == MODE_INT
9955 && GET_MODE_SIZE (mode) < 4)
9956 {
9957 rtx shift_op, shift_reg;
9958 shift_reg = NULL;
9959
9960 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
9961
9962 if (CONST_INT_P (XEXP (x, 1)))
9963 {
9964 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9965 INTVAL (XEXP (x, 1)), NULL_RTX,
9966 NULL_RTX, 1, 0);
9967 *cost = COSTS_N_INSNS (insns);
9968 if (speed_p)
9969 *cost += insns * extra_cost->alu.arith;
9970 /* Slightly penalize a narrow operation as the result may
9971 need widening. */
9972 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9973 return true;
9974 }
9975
9976 /* Slightly penalize a narrow operation as the result may
9977 need widening. */
9978 *cost += 1;
9979 if (speed_p)
9980 *cost += extra_cost->alu.arith;
9981
9982 return false;
9983 }
9984
9985 if (mode == SImode)
9986 {
9987 rtx shift_op, shift_reg;
9988
9989 if (TARGET_INT_SIMD
9990 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9991 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9992 {
9993 /* UXTA[BH] or SXTA[BH]. */
9994 if (speed_p)
9995 *cost += extra_cost->alu.extend_arith;
9996 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9997 0, speed_p)
9998 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9999 return true;
10000 }
10001
10002 shift_reg = NULL;
10003 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10004 if (shift_op != NULL)
10005 {
10006 if (shift_reg)
10007 {
10008 if (speed_p)
10009 *cost += extra_cost->alu.arith_shift_reg;
10010 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10011 }
10012 else if (speed_p)
10013 *cost += extra_cost->alu.arith_shift;
10014
10015 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10016 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10017 return true;
10018 }
10019 if (GET_CODE (XEXP (x, 0)) == MULT)
10020 {
10021 rtx mul_op = XEXP (x, 0);
10022
10023 if (TARGET_DSP_MULTIPLY
10024 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10025 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10026 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10027 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10028 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10029 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10030 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10031 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10032 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10033 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10034 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10035 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10036 == 16))))))
10037 {
10038 /* SMLA[BT][BT]. */
10039 if (speed_p)
10040 *cost += extra_cost->mult[0].extend_add;
10041 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10042 SIGN_EXTEND, 0, speed_p)
10043 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10044 SIGN_EXTEND, 0, speed_p)
10045 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10046 return true;
10047 }
10048
10049 if (speed_p)
10050 *cost += extra_cost->mult[0].add;
10051 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10052 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10053 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10054 return true;
10055 }
10056 if (CONST_INT_P (XEXP (x, 1)))
10057 {
10058 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10059 INTVAL (XEXP (x, 1)), NULL_RTX,
10060 NULL_RTX, 1, 0);
10061 *cost = COSTS_N_INSNS (insns);
10062 if (speed_p)
10063 *cost += insns * extra_cost->alu.arith;
10064 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10065 return true;
10066 }
10067 else if (speed_p)
10068 *cost += extra_cost->alu.arith;
10069
10070 return false;
10071 }
10072
10073 if (mode == DImode)
10074 {
10075 if (GET_CODE (XEXP (x, 0)) == MULT
10076 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10077 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10078 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10079 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10080 {
10081 if (speed_p)
10082 *cost += extra_cost->mult[1].extend_add;
10083 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10084 ZERO_EXTEND, 0, speed_p)
10085 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10086 ZERO_EXTEND, 0, speed_p)
10087 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10088 return true;
10089 }
10090
10091 *cost += COSTS_N_INSNS (1);
10092
10093 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10094 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10095 {
10096 if (speed_p)
10097 *cost += (extra_cost->alu.arith
10098 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10099 ? extra_cost->alu.arith
10100 : extra_cost->alu.arith_shift));
10101
10102 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10103 0, speed_p)
10104 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10105 return true;
10106 }
10107
10108 if (speed_p)
10109 *cost += 2 * extra_cost->alu.arith;
10110 return false;
10111 }
10112
10113 /* Vector mode? */
10114 *cost = LIBCALL_COST (2);
10115 return false;
10116 case IOR:
10117 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10118 {
10119 if (speed_p)
10120 *cost += extra_cost->alu.rev;
10121
10122 return true;
10123 }
10124 /* Fall through. */
10125 case AND: case XOR:
10126 if (mode == SImode)
10127 {
10128 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10129 rtx op0 = XEXP (x, 0);
10130 rtx shift_op, shift_reg;
10131
10132 if (subcode == NOT
10133 && (code == AND
10134 || (code == IOR && TARGET_THUMB2)))
10135 op0 = XEXP (op0, 0);
10136
10137 shift_reg = NULL;
10138 shift_op = shifter_op_p (op0, &shift_reg);
10139 if (shift_op != NULL)
10140 {
10141 if (shift_reg)
10142 {
10143 if (speed_p)
10144 *cost += extra_cost->alu.log_shift_reg;
10145 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10146 }
10147 else if (speed_p)
10148 *cost += extra_cost->alu.log_shift;
10149
10150 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10151 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10152 return true;
10153 }
10154
10155 if (CONST_INT_P (XEXP (x, 1)))
10156 {
10157 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10158 INTVAL (XEXP (x, 1)), NULL_RTX,
10159 NULL_RTX, 1, 0);
10160
10161 *cost = COSTS_N_INSNS (insns);
10162 if (speed_p)
10163 *cost += insns * extra_cost->alu.logical;
10164 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10165 return true;
10166 }
10167
10168 if (speed_p)
10169 *cost += extra_cost->alu.logical;
10170 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10171 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10172 return true;
10173 }
10174
10175 if (mode == DImode)
10176 {
10177 rtx op0 = XEXP (x, 0);
10178 enum rtx_code subcode = GET_CODE (op0);
10179
10180 *cost += COSTS_N_INSNS (1);
10181
10182 if (subcode == NOT
10183 && (code == AND
10184 || (code == IOR && TARGET_THUMB2)))
10185 op0 = XEXP (op0, 0);
10186
10187 if (GET_CODE (op0) == ZERO_EXTEND)
10188 {
10189 if (speed_p)
10190 *cost += 2 * extra_cost->alu.logical;
10191
10192 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10193 0, speed_p)
10194 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10195 return true;
10196 }
10197 else if (GET_CODE (op0) == SIGN_EXTEND)
10198 {
10199 if (speed_p)
10200 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10201
10202 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10203 0, speed_p)
10204 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10205 return true;
10206 }
10207
10208 if (speed_p)
10209 *cost += 2 * extra_cost->alu.logical;
10210
10211 return true;
10212 }
10213 /* Vector mode? */
10214
10215 *cost = LIBCALL_COST (2);
10216 return false;
10217
10218 case MULT:
10219 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10220 && (mode == SFmode || !TARGET_VFP_SINGLE))
10221 {
10222 rtx op0 = XEXP (x, 0);
10223
10224 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10225 op0 = XEXP (op0, 0);
10226
10227 if (speed_p)
10228 *cost += extra_cost->fp[mode != SFmode].mult;
10229
10230 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10231 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10232 return true;
10233 }
10234 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10235 {
10236 *cost = LIBCALL_COST (2);
10237 return false;
10238 }
10239
10240 if (mode == SImode)
10241 {
10242 if (TARGET_DSP_MULTIPLY
10243 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10244 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10245 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10246 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10247 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10248 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10249 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10250 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10251 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10252 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10253 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10254 && (INTVAL (XEXP (XEXP (x, 1), 1))
10255 == 16))))))
10256 {
10257 /* SMUL[TB][TB]. */
10258 if (speed_p)
10259 *cost += extra_cost->mult[0].extend;
10260 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10261 SIGN_EXTEND, 0, speed_p);
10262 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10263 SIGN_EXTEND, 1, speed_p);
10264 return true;
10265 }
10266 if (speed_p)
10267 *cost += extra_cost->mult[0].simple;
10268 return false;
10269 }
10270
10271 if (mode == DImode)
10272 {
10273 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10274 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10275 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10276 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
10277 {
10278 if (speed_p)
10279 *cost += extra_cost->mult[1].extend;
10280 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10281 ZERO_EXTEND, 0, speed_p)
10282 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10283 ZERO_EXTEND, 0, speed_p));
10284 return true;
10285 }
10286
10287 *cost = LIBCALL_COST (2);
10288 return false;
10289 }
10290
10291 /* Vector mode? */
10292 *cost = LIBCALL_COST (2);
10293 return false;
10294
10295 case NEG:
10296 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10297 && (mode == SFmode || !TARGET_VFP_SINGLE))
10298 {
10299 if (GET_CODE (XEXP (x, 0)) == MULT)
10300 {
10301 /* VNMUL. */
10302 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10303 return true;
10304 }
10305
10306 if (speed_p)
10307 *cost += extra_cost->fp[mode != SFmode].neg;
10308
10309 return false;
10310 }
10311 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10312 {
10313 *cost = LIBCALL_COST (1);
10314 return false;
10315 }
10316
10317 if (mode == SImode)
10318 {
10319 if (GET_CODE (XEXP (x, 0)) == ABS)
10320 {
10321 *cost += COSTS_N_INSNS (1);
10322 /* Assume the non-flag-changing variant. */
10323 if (speed_p)
10324 *cost += (extra_cost->alu.log_shift
10325 + extra_cost->alu.arith_shift);
10326 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10327 return true;
10328 }
10329
10330 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10331 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10332 {
10333 *cost += COSTS_N_INSNS (1);
10334 /* No extra cost for MOV imm and MVN imm. */
10335 /* If the comparison op is using the flags, there's no further
10336 cost, otherwise we need to add the cost of the comparison. */
10337 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10338 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10339 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10340 {
10341 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10342 *cost += (COSTS_N_INSNS (1)
10343 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10344 0, speed_p)
10345 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10346 1, speed_p));
10347 if (speed_p)
10348 *cost += extra_cost->alu.arith;
10349 }
10350 return true;
10351 }
10352
10353 if (speed_p)
10354 *cost += extra_cost->alu.arith;
10355 return false;
10356 }
10357
10358 if (GET_MODE_CLASS (mode) == MODE_INT
10359 && GET_MODE_SIZE (mode) < 4)
10360 {
10361 /* Slightly disparage, as we might need an extend operation. */
10362 *cost += 1;
10363 if (speed_p)
10364 *cost += extra_cost->alu.arith;
10365 return false;
10366 }
10367
10368 if (mode == DImode)
10369 {
10370 *cost += COSTS_N_INSNS (1);
10371 if (speed_p)
10372 *cost += 2 * extra_cost->alu.arith;
10373 return false;
10374 }
10375
10376 /* Vector mode? */
10377 *cost = LIBCALL_COST (1);
10378 return false;
10379
10380 case NOT:
10381 if (mode == SImode)
10382 {
10383 rtx shift_op;
10384 rtx shift_reg = NULL;
10385
10386 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10387
10388 if (shift_op)
10389 {
10390 if (shift_reg != NULL)
10391 {
10392 if (speed_p)
10393 *cost += extra_cost->alu.log_shift_reg;
10394 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10395 }
10396 else if (speed_p)
10397 *cost += extra_cost->alu.log_shift;
10398 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10399 return true;
10400 }
10401
10402 if (speed_p)
10403 *cost += extra_cost->alu.logical;
10404 return false;
10405 }
10406 if (mode == DImode)
10407 {
10408 *cost += COSTS_N_INSNS (1);
10409 return false;
10410 }
10411
10412 /* Vector mode? */
10413
10414 *cost += LIBCALL_COST (1);
10415 return false;
10416
10417 case IF_THEN_ELSE:
10418 {
10419 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10420 {
10421 *cost += COSTS_N_INSNS (3);
10422 return true;
10423 }
10424 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10425 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10426
10427 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10428 /* Assume that if one arm of the if_then_else is a register,
10429 that it will be tied with the result and eliminate the
10430 conditional insn. */
10431 if (REG_P (XEXP (x, 1)))
10432 *cost += op2cost;
10433 else if (REG_P (XEXP (x, 2)))
10434 *cost += op1cost;
10435 else
10436 {
10437 if (speed_p)
10438 {
10439 if (extra_cost->alu.non_exec_costs_exec)
10440 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10441 else
10442 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10443 }
10444 else
10445 *cost += op1cost + op2cost;
10446 }
10447 }
10448 return true;
10449
10450 case COMPARE:
10451 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10452 *cost = 0;
10453 else
10454 {
10455 machine_mode op0mode;
10456 /* We'll mostly assume that the cost of a compare is the cost of the
10457 LHS. However, there are some notable exceptions. */
10458
10459 /* Floating point compares are never done as side-effects. */
10460 op0mode = GET_MODE (XEXP (x, 0));
10461 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10462 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10463 {
10464 if (speed_p)
10465 *cost += extra_cost->fp[op0mode != SFmode].compare;
10466
10467 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10468 {
10469 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10470 return true;
10471 }
10472
10473 return false;
10474 }
10475 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10476 {
10477 *cost = LIBCALL_COST (2);
10478 return false;
10479 }
10480
10481 /* DImode compares normally take two insns. */
10482 if (op0mode == DImode)
10483 {
10484 *cost += COSTS_N_INSNS (1);
10485 if (speed_p)
10486 *cost += 2 * extra_cost->alu.arith;
10487 return false;
10488 }
10489
10490 if (op0mode == SImode)
10491 {
10492 rtx shift_op;
10493 rtx shift_reg;
10494
10495 if (XEXP (x, 1) == const0_rtx
10496 && !(REG_P (XEXP (x, 0))
10497 || (GET_CODE (XEXP (x, 0)) == SUBREG
10498 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10499 {
10500 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10501
10502 /* Multiply operations that set the flags are often
10503 significantly more expensive. */
10504 if (speed_p
10505 && GET_CODE (XEXP (x, 0)) == MULT
10506 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10507 *cost += extra_cost->mult[0].flag_setting;
10508
10509 if (speed_p
10510 && GET_CODE (XEXP (x, 0)) == PLUS
10511 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10512 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10513 0), 1), mode))
10514 *cost += extra_cost->mult[0].flag_setting;
10515 return true;
10516 }
10517
10518 shift_reg = NULL;
10519 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10520 if (shift_op != NULL)
10521 {
10522 if (shift_reg != NULL)
10523 {
10524 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10525 1, speed_p);
10526 if (speed_p)
10527 *cost += extra_cost->alu.arith_shift_reg;
10528 }
10529 else if (speed_p)
10530 *cost += extra_cost->alu.arith_shift;
10531 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10532 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10533 return true;
10534 }
10535
10536 if (speed_p)
10537 *cost += extra_cost->alu.arith;
10538 if (CONST_INT_P (XEXP (x, 1))
10539 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10540 {
10541 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10542 return true;
10543 }
10544 return false;
10545 }
10546
10547 /* Vector mode? */
10548
10549 *cost = LIBCALL_COST (2);
10550 return false;
10551 }
10552 return true;
10553
10554 case EQ:
10555 case NE:
10556 case LT:
10557 case LE:
10558 case GT:
10559 case GE:
10560 case LTU:
10561 case LEU:
10562 case GEU:
10563 case GTU:
10564 case ORDERED:
10565 case UNORDERED:
10566 case UNEQ:
10567 case UNLE:
10568 case UNLT:
10569 case UNGE:
10570 case UNGT:
10571 case LTGT:
10572 if (outer_code == SET)
10573 {
10574 /* Is it a store-flag operation? */
10575 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10576 && XEXP (x, 1) == const0_rtx)
10577 {
10578 /* Thumb also needs an IT insn. */
10579 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10580 return true;
10581 }
10582 if (XEXP (x, 1) == const0_rtx)
10583 {
10584 switch (code)
10585 {
10586 case LT:
10587 /* LSR Rd, Rn, #31. */
10588 if (speed_p)
10589 *cost += extra_cost->alu.shift;
10590 break;
10591
10592 case EQ:
10593 /* RSBS T1, Rn, #0
10594 ADC Rd, Rn, T1. */
10595
10596 case NE:
10597 /* SUBS T1, Rn, #1
10598 SBC Rd, Rn, T1. */
10599 *cost += COSTS_N_INSNS (1);
10600 break;
10601
10602 case LE:
10603 /* RSBS T1, Rn, Rn, LSR #31
10604 ADC Rd, Rn, T1. */
10605 *cost += COSTS_N_INSNS (1);
10606 if (speed_p)
10607 *cost += extra_cost->alu.arith_shift;
10608 break;
10609
10610 case GT:
10611 /* RSB Rd, Rn, Rn, ASR #1
10612 LSR Rd, Rd, #31. */
10613 *cost += COSTS_N_INSNS (1);
10614 if (speed_p)
10615 *cost += (extra_cost->alu.arith_shift
10616 + extra_cost->alu.shift);
10617 break;
10618
10619 case GE:
10620 /* ASR Rd, Rn, #31
10621 ADD Rd, Rn, #1. */
10622 *cost += COSTS_N_INSNS (1);
10623 if (speed_p)
10624 *cost += extra_cost->alu.shift;
10625 break;
10626
10627 default:
10628 /* Remaining cases are either meaningless or would take
10629 three insns anyway. */
10630 *cost = COSTS_N_INSNS (3);
10631 break;
10632 }
10633 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10634 return true;
10635 }
10636 else
10637 {
10638 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10639 if (CONST_INT_P (XEXP (x, 1))
10640 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10641 {
10642 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10643 return true;
10644 }
10645
10646 return false;
10647 }
10648 }
10649 /* Not directly inside a set. If it involves the condition code
10650 register it must be the condition for a branch, cond_exec or
10651 I_T_E operation. Since the comparison is performed elsewhere
10652 this is just the control part which has no additional
10653 cost. */
10654 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10655 && XEXP (x, 1) == const0_rtx)
10656 {
10657 *cost = 0;
10658 return true;
10659 }
10660 return false;
10661
10662 case ABS:
10663 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10664 && (mode == SFmode || !TARGET_VFP_SINGLE))
10665 {
10666 if (speed_p)
10667 *cost += extra_cost->fp[mode != SFmode].neg;
10668
10669 return false;
10670 }
10671 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10672 {
10673 *cost = LIBCALL_COST (1);
10674 return false;
10675 }
10676
10677 if (mode == SImode)
10678 {
10679 if (speed_p)
10680 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10681 return false;
10682 }
10683 /* Vector mode? */
10684 *cost = LIBCALL_COST (1);
10685 return false;
10686
10687 case SIGN_EXTEND:
10688 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10689 && MEM_P (XEXP (x, 0)))
10690 {
10691 if (mode == DImode)
10692 *cost += COSTS_N_INSNS (1);
10693
10694 if (!speed_p)
10695 return true;
10696
10697 if (GET_MODE (XEXP (x, 0)) == SImode)
10698 *cost += extra_cost->ldst.load;
10699 else
10700 *cost += extra_cost->ldst.load_sign_extend;
10701
10702 if (mode == DImode)
10703 *cost += extra_cost->alu.shift;
10704
10705 return true;
10706 }
10707
10708 /* Widening from less than 32-bits requires an extend operation. */
10709 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10710 {
10711 /* We have SXTB/SXTH. */
10712 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10713 if (speed_p)
10714 *cost += extra_cost->alu.extend;
10715 }
10716 else if (GET_MODE (XEXP (x, 0)) != SImode)
10717 {
10718 /* Needs two shifts. */
10719 *cost += COSTS_N_INSNS (1);
10720 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10721 if (speed_p)
10722 *cost += 2 * extra_cost->alu.shift;
10723 }
10724
10725 /* Widening beyond 32-bits requires one more insn. */
10726 if (mode == DImode)
10727 {
10728 *cost += COSTS_N_INSNS (1);
10729 if (speed_p)
10730 *cost += extra_cost->alu.shift;
10731 }
10732
10733 return true;
10734
10735 case ZERO_EXTEND:
10736 if ((arm_arch4
10737 || GET_MODE (XEXP (x, 0)) == SImode
10738 || GET_MODE (XEXP (x, 0)) == QImode)
10739 && MEM_P (XEXP (x, 0)))
10740 {
10741 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10742
10743 if (mode == DImode)
10744 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10745
10746 return true;
10747 }
10748
10749 /* Widening from less than 32-bits requires an extend operation. */
10750 if (GET_MODE (XEXP (x, 0)) == QImode)
10751 {
10752 /* UXTB can be a shorter instruction in Thumb2, but it might
10753 be slower than the AND Rd, Rn, #255 alternative. When
10754 optimizing for speed it should never be slower to use
10755 AND, and we don't really model 16-bit vs 32-bit insns
10756 here. */
10757 if (speed_p)
10758 *cost += extra_cost->alu.logical;
10759 }
10760 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10761 {
10762 /* We have UXTB/UXTH. */
10763 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10764 if (speed_p)
10765 *cost += extra_cost->alu.extend;
10766 }
10767 else if (GET_MODE (XEXP (x, 0)) != SImode)
10768 {
10769 /* Needs two shifts. It's marginally preferable to use
10770 shifts rather than two BIC instructions as the second
10771 shift may merge with a subsequent insn as a shifter
10772 op. */
10773 *cost = COSTS_N_INSNS (2);
10774 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10775 if (speed_p)
10776 *cost += 2 * extra_cost->alu.shift;
10777 }
10778
10779 /* Widening beyond 32-bits requires one more insn. */
10780 if (mode == DImode)
10781 {
10782 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10783 }
10784
10785 return true;
10786
10787 case CONST_INT:
10788 *cost = 0;
10789 /* CONST_INT has no mode, so we cannot tell for sure how many
10790 insns are really going to be needed. The best we can do is
10791 look at the value passed. If it fits in SImode, then assume
10792 that's the mode it will be used for. Otherwise assume it
10793 will be used in DImode. */
10794 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10795 mode = SImode;
10796 else
10797 mode = DImode;
10798
10799 /* Avoid blowing up in arm_gen_constant (). */
10800 if (!(outer_code == PLUS
10801 || outer_code == AND
10802 || outer_code == IOR
10803 || outer_code == XOR
10804 || outer_code == MINUS))
10805 outer_code = SET;
10806
10807 const_int_cost:
10808 if (mode == SImode)
10809 {
10810 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10811 INTVAL (x), NULL, NULL,
10812 0, 0));
10813 /* Extra costs? */
10814 }
10815 else
10816 {
10817 *cost += COSTS_N_INSNS (arm_gen_constant
10818 (outer_code, SImode, NULL,
10819 trunc_int_for_mode (INTVAL (x), SImode),
10820 NULL, NULL, 0, 0)
10821 + arm_gen_constant (outer_code, SImode, NULL,
10822 INTVAL (x) >> 32, NULL,
10823 NULL, 0, 0));
10824 /* Extra costs? */
10825 }
10826
10827 return true;
10828
10829 case CONST:
10830 case LABEL_REF:
10831 case SYMBOL_REF:
10832 if (speed_p)
10833 {
10834 if (arm_arch_thumb2 && !flag_pic)
10835 *cost += COSTS_N_INSNS (1);
10836 else
10837 *cost += extra_cost->ldst.load;
10838 }
10839 else
10840 *cost += COSTS_N_INSNS (1);
10841
10842 if (flag_pic)
10843 {
10844 *cost += COSTS_N_INSNS (1);
10845 if (speed_p)
10846 *cost += extra_cost->alu.arith;
10847 }
10848
10849 return true;
10850
10851 case CONST_FIXED:
10852 *cost = COSTS_N_INSNS (4);
10853 /* Fixme. */
10854 return true;
10855
10856 case CONST_DOUBLE:
10857 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10858 && (mode == SFmode || !TARGET_VFP_SINGLE))
10859 {
10860 if (vfp3_const_double_rtx (x))
10861 {
10862 if (speed_p)
10863 *cost += extra_cost->fp[mode == DFmode].fpconst;
10864 return true;
10865 }
10866
10867 if (speed_p)
10868 {
10869 if (mode == DFmode)
10870 *cost += extra_cost->ldst.loadd;
10871 else
10872 *cost += extra_cost->ldst.loadf;
10873 }
10874 else
10875 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10876
10877 return true;
10878 }
10879 *cost = COSTS_N_INSNS (4);
10880 return true;
10881
10882 case CONST_VECTOR:
10883 /* Fixme. */
10884 if (TARGET_NEON
10885 && TARGET_HARD_FLOAT
10886 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10887 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10888 *cost = COSTS_N_INSNS (1);
10889 else
10890 *cost = COSTS_N_INSNS (4);
10891 return true;
10892
10893 case HIGH:
10894 case LO_SUM:
10895 /* When optimizing for size, we prefer constant pool entries to
10896 MOVW/MOVT pairs, so bump the cost of these slightly. */
10897 if (!speed_p)
10898 *cost += 1;
10899 return true;
10900
10901 case CLZ:
10902 if (speed_p)
10903 *cost += extra_cost->alu.clz;
10904 return false;
10905
10906 case SMIN:
10907 if (XEXP (x, 1) == const0_rtx)
10908 {
10909 if (speed_p)
10910 *cost += extra_cost->alu.log_shift;
10911 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10912 return true;
10913 }
10914 /* Fall through. */
10915 case SMAX:
10916 case UMIN:
10917 case UMAX:
10918 *cost += COSTS_N_INSNS (1);
10919 return false;
10920
10921 case TRUNCATE:
10922 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10923 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10924 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10925 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10926 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10927 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10928 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10929 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10930 == ZERO_EXTEND))))
10931 {
10932 if (speed_p)
10933 *cost += extra_cost->mult[1].extend;
10934 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10935 ZERO_EXTEND, 0, speed_p)
10936 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10937 ZERO_EXTEND, 0, speed_p));
10938 return true;
10939 }
10940 *cost = LIBCALL_COST (1);
10941 return false;
10942
10943 case UNSPEC_VOLATILE:
10944 case UNSPEC:
10945 return arm_unspec_cost (x, outer_code, speed_p, cost);
10946
10947 case PC:
10948 /* Reading the PC is like reading any other register. Writing it
10949 is more expensive, but we take that into account elsewhere. */
10950 *cost = 0;
10951 return true;
10952
10953 case ZERO_EXTRACT:
10954 /* TODO: Simple zero_extract of bottom bits using AND. */
10955 /* Fall through. */
10956 case SIGN_EXTRACT:
10957 if (arm_arch6
10958 && mode == SImode
10959 && CONST_INT_P (XEXP (x, 1))
10960 && CONST_INT_P (XEXP (x, 2)))
10961 {
10962 if (speed_p)
10963 *cost += extra_cost->alu.bfx;
10964 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10965 return true;
10966 }
10967 /* Without UBFX/SBFX, need to resort to shift operations. */
10968 *cost += COSTS_N_INSNS (1);
10969 if (speed_p)
10970 *cost += 2 * extra_cost->alu.shift;
10971 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10972 return true;
10973
10974 case FLOAT_EXTEND:
10975 if (TARGET_HARD_FLOAT)
10976 {
10977 if (speed_p)
10978 *cost += extra_cost->fp[mode == DFmode].widen;
10979 if (!TARGET_VFP5
10980 && GET_MODE (XEXP (x, 0)) == HFmode)
10981 {
10982 /* Pre v8, widening HF->DF is a two-step process, first
10983 widening to SFmode. */
10984 *cost += COSTS_N_INSNS (1);
10985 if (speed_p)
10986 *cost += extra_cost->fp[0].widen;
10987 }
10988 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10989 return true;
10990 }
10991
10992 *cost = LIBCALL_COST (1);
10993 return false;
10994
10995 case FLOAT_TRUNCATE:
10996 if (TARGET_HARD_FLOAT)
10997 {
10998 if (speed_p)
10999 *cost += extra_cost->fp[mode == DFmode].narrow;
11000 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11001 return true;
11002 /* Vector modes? */
11003 }
11004 *cost = LIBCALL_COST (1);
11005 return false;
11006
11007 case FMA:
11008 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11009 {
11010 rtx op0 = XEXP (x, 0);
11011 rtx op1 = XEXP (x, 1);
11012 rtx op2 = XEXP (x, 2);
11013
11014
11015 /* vfms or vfnma. */
11016 if (GET_CODE (op0) == NEG)
11017 op0 = XEXP (op0, 0);
11018
11019 /* vfnms or vfnma. */
11020 if (GET_CODE (op2) == NEG)
11021 op2 = XEXP (op2, 0);
11022
11023 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11024 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11025 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11026
11027 if (speed_p)
11028 *cost += extra_cost->fp[mode ==DFmode].fma;
11029
11030 return true;
11031 }
11032
11033 *cost = LIBCALL_COST (3);
11034 return false;
11035
11036 case FIX:
11037 case UNSIGNED_FIX:
11038 if (TARGET_HARD_FLOAT)
11039 {
11040 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11041 a vcvt fixed-point conversion. */
11042 if (code == FIX && mode == SImode
11043 && GET_CODE (XEXP (x, 0)) == FIX
11044 && GET_MODE (XEXP (x, 0)) == SFmode
11045 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11046 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11047 > 0)
11048 {
11049 if (speed_p)
11050 *cost += extra_cost->fp[0].toint;
11051
11052 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11053 code, 0, speed_p);
11054 return true;
11055 }
11056
11057 if (GET_MODE_CLASS (mode) == MODE_INT)
11058 {
11059 mode = GET_MODE (XEXP (x, 0));
11060 if (speed_p)
11061 *cost += extra_cost->fp[mode == DFmode].toint;
11062 /* Strip of the 'cost' of rounding towards zero. */
11063 if (GET_CODE (XEXP (x, 0)) == FIX)
11064 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11065 0, speed_p);
11066 else
11067 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11068 /* ??? Increase the cost to deal with transferring from
11069 FP -> CORE registers? */
11070 return true;
11071 }
11072 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11073 && TARGET_VFP5)
11074 {
11075 if (speed_p)
11076 *cost += extra_cost->fp[mode == DFmode].roundint;
11077 return false;
11078 }
11079 /* Vector costs? */
11080 }
11081 *cost = LIBCALL_COST (1);
11082 return false;
11083
11084 case FLOAT:
11085 case UNSIGNED_FLOAT:
11086 if (TARGET_HARD_FLOAT)
11087 {
11088 /* ??? Increase the cost to deal with transferring from CORE
11089 -> FP registers? */
11090 if (speed_p)
11091 *cost += extra_cost->fp[mode == DFmode].fromint;
11092 return false;
11093 }
11094 *cost = LIBCALL_COST (1);
11095 return false;
11096
11097 case CALL:
11098 return true;
11099
11100 case ASM_OPERANDS:
11101 {
11102 /* Just a guess. Guess number of instructions in the asm
11103 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11104 though (see PR60663). */
11105 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11106 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11107
11108 *cost = COSTS_N_INSNS (asm_length + num_operands);
11109 return true;
11110 }
11111 default:
11112 if (mode != VOIDmode)
11113 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11114 else
11115 *cost = COSTS_N_INSNS (4); /* Who knows? */
11116 return false;
11117 }
11118 }
11119
11120 #undef HANDLE_NARROW_SHIFT_ARITH
11121
11122 /* RTX costs entry point. */
11123
11124 static bool
11125 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11126 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11127 {
11128 bool result;
11129 int code = GET_CODE (x);
11130 gcc_assert (current_tune->insn_extra_cost);
11131
11132 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
11133 (enum rtx_code) outer_code,
11134 current_tune->insn_extra_cost,
11135 total, speed);
11136
11137 if (dump_file && arm_verbose_cost)
11138 {
11139 print_rtl_single (dump_file, x);
11140 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11141 *total, result ? "final" : "partial");
11142 }
11143 return result;
11144 }
11145
11146 /* All address computations that can be done are free, but rtx cost returns
11147 the same for practically all of them. So we weight the different types
11148 of address here in the order (most pref first):
11149 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11150 static inline int
11151 arm_arm_address_cost (rtx x)
11152 {
11153 enum rtx_code c = GET_CODE (x);
11154
11155 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11156 return 0;
11157 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11158 return 10;
11159
11160 if (c == PLUS)
11161 {
11162 if (CONST_INT_P (XEXP (x, 1)))
11163 return 2;
11164
11165 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11166 return 3;
11167
11168 return 4;
11169 }
11170
11171 return 6;
11172 }
11173
11174 static inline int
11175 arm_thumb_address_cost (rtx x)
11176 {
11177 enum rtx_code c = GET_CODE (x);
11178
11179 if (c == REG)
11180 return 1;
11181 if (c == PLUS
11182 && REG_P (XEXP (x, 0))
11183 && CONST_INT_P (XEXP (x, 1)))
11184 return 1;
11185
11186 return 2;
11187 }
11188
11189 static int
11190 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11191 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11192 {
11193 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11194 }
11195
11196 /* Adjust cost hook for XScale. */
11197 static bool
11198 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11199 int * cost)
11200 {
11201 /* Some true dependencies can have a higher cost depending
11202 on precisely how certain input operands are used. */
11203 if (dep_type == 0
11204 && recog_memoized (insn) >= 0
11205 && recog_memoized (dep) >= 0)
11206 {
11207 int shift_opnum = get_attr_shift (insn);
11208 enum attr_type attr_type = get_attr_type (dep);
11209
11210 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11211 operand for INSN. If we have a shifted input operand and the
11212 instruction we depend on is another ALU instruction, then we may
11213 have to account for an additional stall. */
11214 if (shift_opnum != 0
11215 && (attr_type == TYPE_ALU_SHIFT_IMM
11216 || attr_type == TYPE_ALUS_SHIFT_IMM
11217 || attr_type == TYPE_LOGIC_SHIFT_IMM
11218 || attr_type == TYPE_LOGICS_SHIFT_IMM
11219 || attr_type == TYPE_ALU_SHIFT_REG
11220 || attr_type == TYPE_ALUS_SHIFT_REG
11221 || attr_type == TYPE_LOGIC_SHIFT_REG
11222 || attr_type == TYPE_LOGICS_SHIFT_REG
11223 || attr_type == TYPE_MOV_SHIFT
11224 || attr_type == TYPE_MVN_SHIFT
11225 || attr_type == TYPE_MOV_SHIFT_REG
11226 || attr_type == TYPE_MVN_SHIFT_REG))
11227 {
11228 rtx shifted_operand;
11229 int opno;
11230
11231 /* Get the shifted operand. */
11232 extract_insn (insn);
11233 shifted_operand = recog_data.operand[shift_opnum];
11234
11235 /* Iterate over all the operands in DEP. If we write an operand
11236 that overlaps with SHIFTED_OPERAND, then we have increase the
11237 cost of this dependency. */
11238 extract_insn (dep);
11239 preprocess_constraints (dep);
11240 for (opno = 0; opno < recog_data.n_operands; opno++)
11241 {
11242 /* We can ignore strict inputs. */
11243 if (recog_data.operand_type[opno] == OP_IN)
11244 continue;
11245
11246 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11247 shifted_operand))
11248 {
11249 *cost = 2;
11250 return false;
11251 }
11252 }
11253 }
11254 }
11255 return true;
11256 }
11257
11258 /* Adjust cost hook for Cortex A9. */
11259 static bool
11260 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11261 int * cost)
11262 {
11263 switch (dep_type)
11264 {
11265 case REG_DEP_ANTI:
11266 *cost = 0;
11267 return false;
11268
11269 case REG_DEP_TRUE:
11270 case REG_DEP_OUTPUT:
11271 if (recog_memoized (insn) >= 0
11272 && recog_memoized (dep) >= 0)
11273 {
11274 if (GET_CODE (PATTERN (insn)) == SET)
11275 {
11276 if (GET_MODE_CLASS
11277 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11278 || GET_MODE_CLASS
11279 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11280 {
11281 enum attr_type attr_type_insn = get_attr_type (insn);
11282 enum attr_type attr_type_dep = get_attr_type (dep);
11283
11284 /* By default all dependencies of the form
11285 s0 = s0 <op> s1
11286 s0 = s0 <op> s2
11287 have an extra latency of 1 cycle because
11288 of the input and output dependency in this
11289 case. However this gets modeled as an true
11290 dependency and hence all these checks. */
11291 if (REG_P (SET_DEST (PATTERN (insn)))
11292 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11293 {
11294 /* FMACS is a special case where the dependent
11295 instruction can be issued 3 cycles before
11296 the normal latency in case of an output
11297 dependency. */
11298 if ((attr_type_insn == TYPE_FMACS
11299 || attr_type_insn == TYPE_FMACD)
11300 && (attr_type_dep == TYPE_FMACS
11301 || attr_type_dep == TYPE_FMACD))
11302 {
11303 if (dep_type == REG_DEP_OUTPUT)
11304 *cost = insn_default_latency (dep) - 3;
11305 else
11306 *cost = insn_default_latency (dep);
11307 return false;
11308 }
11309 else
11310 {
11311 if (dep_type == REG_DEP_OUTPUT)
11312 *cost = insn_default_latency (dep) + 1;
11313 else
11314 *cost = insn_default_latency (dep);
11315 }
11316 return false;
11317 }
11318 }
11319 }
11320 }
11321 break;
11322
11323 default:
11324 gcc_unreachable ();
11325 }
11326
11327 return true;
11328 }
11329
11330 /* Adjust cost hook for FA726TE. */
11331 static bool
11332 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11333 int * cost)
11334 {
11335 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11336 have penalty of 3. */
11337 if (dep_type == REG_DEP_TRUE
11338 && recog_memoized (insn) >= 0
11339 && recog_memoized (dep) >= 0
11340 && get_attr_conds (dep) == CONDS_SET)
11341 {
11342 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11343 if (get_attr_conds (insn) == CONDS_USE
11344 && get_attr_type (insn) != TYPE_BRANCH)
11345 {
11346 *cost = 3;
11347 return false;
11348 }
11349
11350 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11351 || get_attr_conds (insn) == CONDS_USE)
11352 {
11353 *cost = 0;
11354 return false;
11355 }
11356 }
11357
11358 return true;
11359 }
11360
11361 /* Implement TARGET_REGISTER_MOVE_COST.
11362
11363 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11364 it is typically more expensive than a single memory access. We set
11365 the cost to less than two memory accesses so that floating
11366 point to integer conversion does not go through memory. */
11367
11368 int
11369 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11370 reg_class_t from, reg_class_t to)
11371 {
11372 if (TARGET_32BIT)
11373 {
11374 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11375 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11376 return 15;
11377 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11378 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11379 return 4;
11380 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11381 return 20;
11382 else
11383 return 2;
11384 }
11385 else
11386 {
11387 if (from == HI_REGS || to == HI_REGS)
11388 return 4;
11389 else
11390 return 2;
11391 }
11392 }
11393
11394 /* Implement TARGET_MEMORY_MOVE_COST. */
11395
11396 int
11397 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11398 bool in ATTRIBUTE_UNUSED)
11399 {
11400 if (TARGET_32BIT)
11401 return 10;
11402 else
11403 {
11404 if (GET_MODE_SIZE (mode) < 4)
11405 return 8;
11406 else
11407 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11408 }
11409 }
11410
11411 /* Vectorizer cost model implementation. */
11412
11413 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11414 static int
11415 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11416 tree vectype,
11417 int misalign ATTRIBUTE_UNUSED)
11418 {
11419 unsigned elements;
11420
11421 switch (type_of_cost)
11422 {
11423 case scalar_stmt:
11424 return current_tune->vec_costs->scalar_stmt_cost;
11425
11426 case scalar_load:
11427 return current_tune->vec_costs->scalar_load_cost;
11428
11429 case scalar_store:
11430 return current_tune->vec_costs->scalar_store_cost;
11431
11432 case vector_stmt:
11433 return current_tune->vec_costs->vec_stmt_cost;
11434
11435 case vector_load:
11436 return current_tune->vec_costs->vec_align_load_cost;
11437
11438 case vector_store:
11439 return current_tune->vec_costs->vec_store_cost;
11440
11441 case vec_to_scalar:
11442 return current_tune->vec_costs->vec_to_scalar_cost;
11443
11444 case scalar_to_vec:
11445 return current_tune->vec_costs->scalar_to_vec_cost;
11446
11447 case unaligned_load:
11448 case vector_gather_load:
11449 return current_tune->vec_costs->vec_unalign_load_cost;
11450
11451 case unaligned_store:
11452 case vector_scatter_store:
11453 return current_tune->vec_costs->vec_unalign_store_cost;
11454
11455 case cond_branch_taken:
11456 return current_tune->vec_costs->cond_taken_branch_cost;
11457
11458 case cond_branch_not_taken:
11459 return current_tune->vec_costs->cond_not_taken_branch_cost;
11460
11461 case vec_perm:
11462 case vec_promote_demote:
11463 return current_tune->vec_costs->vec_stmt_cost;
11464
11465 case vec_construct:
11466 elements = TYPE_VECTOR_SUBPARTS (vectype);
11467 return elements / 2 + 1;
11468
11469 default:
11470 gcc_unreachable ();
11471 }
11472 }
11473
11474 /* Implement targetm.vectorize.add_stmt_cost. */
11475
11476 static unsigned
11477 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11478 struct _stmt_vec_info *stmt_info, int misalign,
11479 enum vect_cost_model_location where)
11480 {
11481 unsigned *cost = (unsigned *) data;
11482 unsigned retval = 0;
11483
11484 if (flag_vect_cost_model)
11485 {
11486 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11487 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11488
11489 /* Statements in an inner loop relative to the loop being
11490 vectorized are weighted more heavily. The value here is
11491 arbitrary and could potentially be improved with analysis. */
11492 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11493 count *= 50; /* FIXME. */
11494
11495 retval = (unsigned) (count * stmt_cost);
11496 cost[where] += retval;
11497 }
11498
11499 return retval;
11500 }
11501
11502 /* Return true if and only if this insn can dual-issue only as older. */
11503 static bool
11504 cortexa7_older_only (rtx_insn *insn)
11505 {
11506 if (recog_memoized (insn) < 0)
11507 return false;
11508
11509 switch (get_attr_type (insn))
11510 {
11511 case TYPE_ALU_DSP_REG:
11512 case TYPE_ALU_SREG:
11513 case TYPE_ALUS_SREG:
11514 case TYPE_LOGIC_REG:
11515 case TYPE_LOGICS_REG:
11516 case TYPE_ADC_REG:
11517 case TYPE_ADCS_REG:
11518 case TYPE_ADR:
11519 case TYPE_BFM:
11520 case TYPE_REV:
11521 case TYPE_MVN_REG:
11522 case TYPE_SHIFT_IMM:
11523 case TYPE_SHIFT_REG:
11524 case TYPE_LOAD_BYTE:
11525 case TYPE_LOAD_4:
11526 case TYPE_STORE_4:
11527 case TYPE_FFARITHS:
11528 case TYPE_FADDS:
11529 case TYPE_FFARITHD:
11530 case TYPE_FADDD:
11531 case TYPE_FMOV:
11532 case TYPE_F_CVT:
11533 case TYPE_FCMPS:
11534 case TYPE_FCMPD:
11535 case TYPE_FCONSTS:
11536 case TYPE_FCONSTD:
11537 case TYPE_FMULS:
11538 case TYPE_FMACS:
11539 case TYPE_FMULD:
11540 case TYPE_FMACD:
11541 case TYPE_FDIVS:
11542 case TYPE_FDIVD:
11543 case TYPE_F_MRC:
11544 case TYPE_F_MRRC:
11545 case TYPE_F_FLAG:
11546 case TYPE_F_LOADS:
11547 case TYPE_F_STORES:
11548 return true;
11549 default:
11550 return false;
11551 }
11552 }
11553
11554 /* Return true if and only if this insn can dual-issue as younger. */
11555 static bool
11556 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11557 {
11558 if (recog_memoized (insn) < 0)
11559 {
11560 if (verbose > 5)
11561 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11562 return false;
11563 }
11564
11565 switch (get_attr_type (insn))
11566 {
11567 case TYPE_ALU_IMM:
11568 case TYPE_ALUS_IMM:
11569 case TYPE_LOGIC_IMM:
11570 case TYPE_LOGICS_IMM:
11571 case TYPE_EXTEND:
11572 case TYPE_MVN_IMM:
11573 case TYPE_MOV_IMM:
11574 case TYPE_MOV_REG:
11575 case TYPE_MOV_SHIFT:
11576 case TYPE_MOV_SHIFT_REG:
11577 case TYPE_BRANCH:
11578 case TYPE_CALL:
11579 return true;
11580 default:
11581 return false;
11582 }
11583 }
11584
11585
11586 /* Look for an instruction that can dual issue only as an older
11587 instruction, and move it in front of any instructions that can
11588 dual-issue as younger, while preserving the relative order of all
11589 other instructions in the ready list. This is a hueuristic to help
11590 dual-issue in later cycles, by postponing issue of more flexible
11591 instructions. This heuristic may affect dual issue opportunities
11592 in the current cycle. */
11593 static void
11594 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11595 int *n_readyp, int clock)
11596 {
11597 int i;
11598 int first_older_only = -1, first_younger = -1;
11599
11600 if (verbose > 5)
11601 fprintf (file,
11602 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11603 clock,
11604 *n_readyp);
11605
11606 /* Traverse the ready list from the head (the instruction to issue
11607 first), and looking for the first instruction that can issue as
11608 younger and the first instruction that can dual-issue only as
11609 older. */
11610 for (i = *n_readyp - 1; i >= 0; i--)
11611 {
11612 rtx_insn *insn = ready[i];
11613 if (cortexa7_older_only (insn))
11614 {
11615 first_older_only = i;
11616 if (verbose > 5)
11617 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11618 break;
11619 }
11620 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11621 first_younger = i;
11622 }
11623
11624 /* Nothing to reorder because either no younger insn found or insn
11625 that can dual-issue only as older appears before any insn that
11626 can dual-issue as younger. */
11627 if (first_younger == -1)
11628 {
11629 if (verbose > 5)
11630 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11631 return;
11632 }
11633
11634 /* Nothing to reorder because no older-only insn in the ready list. */
11635 if (first_older_only == -1)
11636 {
11637 if (verbose > 5)
11638 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11639 return;
11640 }
11641
11642 /* Move first_older_only insn before first_younger. */
11643 if (verbose > 5)
11644 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11645 INSN_UID(ready [first_older_only]),
11646 INSN_UID(ready [first_younger]));
11647 rtx_insn *first_older_only_insn = ready [first_older_only];
11648 for (i = first_older_only; i < first_younger; i++)
11649 {
11650 ready[i] = ready[i+1];
11651 }
11652
11653 ready[i] = first_older_only_insn;
11654 return;
11655 }
11656
11657 /* Implement TARGET_SCHED_REORDER. */
11658 static int
11659 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11660 int clock)
11661 {
11662 switch (arm_tune)
11663 {
11664 case TARGET_CPU_cortexa7:
11665 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11666 break;
11667 default:
11668 /* Do nothing for other cores. */
11669 break;
11670 }
11671
11672 return arm_issue_rate ();
11673 }
11674
11675 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11676 It corrects the value of COST based on the relationship between
11677 INSN and DEP through the dependence LINK. It returns the new
11678 value. There is a per-core adjust_cost hook to adjust scheduler costs
11679 and the per-core hook can choose to completely override the generic
11680 adjust_cost function. Only put bits of code into arm_adjust_cost that
11681 are common across all cores. */
11682 static int
11683 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11684 unsigned int)
11685 {
11686 rtx i_pat, d_pat;
11687
11688 /* When generating Thumb-1 code, we want to place flag-setting operations
11689 close to a conditional branch which depends on them, so that we can
11690 omit the comparison. */
11691 if (TARGET_THUMB1
11692 && dep_type == 0
11693 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11694 && recog_memoized (dep) >= 0
11695 && get_attr_conds (dep) == CONDS_SET)
11696 return 0;
11697
11698 if (current_tune->sched_adjust_cost != NULL)
11699 {
11700 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11701 return cost;
11702 }
11703
11704 /* XXX Is this strictly true? */
11705 if (dep_type == REG_DEP_ANTI
11706 || dep_type == REG_DEP_OUTPUT)
11707 return 0;
11708
11709 /* Call insns don't incur a stall, even if they follow a load. */
11710 if (dep_type == 0
11711 && CALL_P (insn))
11712 return 1;
11713
11714 if ((i_pat = single_set (insn)) != NULL
11715 && MEM_P (SET_SRC (i_pat))
11716 && (d_pat = single_set (dep)) != NULL
11717 && MEM_P (SET_DEST (d_pat)))
11718 {
11719 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11720 /* This is a load after a store, there is no conflict if the load reads
11721 from a cached area. Assume that loads from the stack, and from the
11722 constant pool are cached, and that others will miss. This is a
11723 hack. */
11724
11725 if ((GET_CODE (src_mem) == SYMBOL_REF
11726 && CONSTANT_POOL_ADDRESS_P (src_mem))
11727 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11728 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11729 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11730 return 1;
11731 }
11732
11733 return cost;
11734 }
11735
11736 int
11737 arm_max_conditional_execute (void)
11738 {
11739 return max_insns_skipped;
11740 }
11741
11742 static int
11743 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11744 {
11745 if (TARGET_32BIT)
11746 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11747 else
11748 return (optimize > 0) ? 2 : 0;
11749 }
11750
11751 static int
11752 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11753 {
11754 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11755 }
11756
11757 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11758 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11759 sequences of non-executed instructions in IT blocks probably take the same
11760 amount of time as executed instructions (and the IT instruction itself takes
11761 space in icache). This function was experimentally determined to give good
11762 results on a popular embedded benchmark. */
11763
11764 static int
11765 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11766 {
11767 return (TARGET_32BIT && speed_p) ? 1
11768 : arm_default_branch_cost (speed_p, predictable_p);
11769 }
11770
11771 static int
11772 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11773 {
11774 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11775 }
11776
11777 static bool fp_consts_inited = false;
11778
11779 static REAL_VALUE_TYPE value_fp0;
11780
11781 static void
11782 init_fp_table (void)
11783 {
11784 REAL_VALUE_TYPE r;
11785
11786 r = REAL_VALUE_ATOF ("0", DFmode);
11787 value_fp0 = r;
11788 fp_consts_inited = true;
11789 }
11790
11791 /* Return TRUE if rtx X is a valid immediate FP constant. */
11792 int
11793 arm_const_double_rtx (rtx x)
11794 {
11795 const REAL_VALUE_TYPE *r;
11796
11797 if (!fp_consts_inited)
11798 init_fp_table ();
11799
11800 r = CONST_DOUBLE_REAL_VALUE (x);
11801 if (REAL_VALUE_MINUS_ZERO (*r))
11802 return 0;
11803
11804 if (real_equal (r, &value_fp0))
11805 return 1;
11806
11807 return 0;
11808 }
11809
11810 /* VFPv3 has a fairly wide range of representable immediates, formed from
11811 "quarter-precision" floating-point values. These can be evaluated using this
11812 formula (with ^ for exponentiation):
11813
11814 -1^s * n * 2^-r
11815
11816 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11817 16 <= n <= 31 and 0 <= r <= 7.
11818
11819 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11820
11821 - A (most-significant) is the sign bit.
11822 - BCD are the exponent (encoded as r XOR 3).
11823 - EFGH are the mantissa (encoded as n - 16).
11824 */
11825
11826 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11827 fconst[sd] instruction, or -1 if X isn't suitable. */
11828 static int
11829 vfp3_const_double_index (rtx x)
11830 {
11831 REAL_VALUE_TYPE r, m;
11832 int sign, exponent;
11833 unsigned HOST_WIDE_INT mantissa, mant_hi;
11834 unsigned HOST_WIDE_INT mask;
11835 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11836 bool fail;
11837
11838 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11839 return -1;
11840
11841 r = *CONST_DOUBLE_REAL_VALUE (x);
11842
11843 /* We can't represent these things, so detect them first. */
11844 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11845 return -1;
11846
11847 /* Extract sign, exponent and mantissa. */
11848 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11849 r = real_value_abs (&r);
11850 exponent = REAL_EXP (&r);
11851 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11852 highest (sign) bit, with a fixed binary point at bit point_pos.
11853 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11854 bits for the mantissa, this may fail (low bits would be lost). */
11855 real_ldexp (&m, &r, point_pos - exponent);
11856 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11857 mantissa = w.elt (0);
11858 mant_hi = w.elt (1);
11859
11860 /* If there are bits set in the low part of the mantissa, we can't
11861 represent this value. */
11862 if (mantissa != 0)
11863 return -1;
11864
11865 /* Now make it so that mantissa contains the most-significant bits, and move
11866 the point_pos to indicate that the least-significant bits have been
11867 discarded. */
11868 point_pos -= HOST_BITS_PER_WIDE_INT;
11869 mantissa = mant_hi;
11870
11871 /* We can permit four significant bits of mantissa only, plus a high bit
11872 which is always 1. */
11873 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11874 if ((mantissa & mask) != 0)
11875 return -1;
11876
11877 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11878 mantissa >>= point_pos - 5;
11879
11880 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11881 floating-point immediate zero with Neon using an integer-zero load, but
11882 that case is handled elsewhere.) */
11883 if (mantissa == 0)
11884 return -1;
11885
11886 gcc_assert (mantissa >= 16 && mantissa <= 31);
11887
11888 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11889 normalized significands are in the range [1, 2). (Our mantissa is shifted
11890 left 4 places at this point relative to normalized IEEE754 values). GCC
11891 internally uses [0.5, 1) (see real.c), so the exponent returned from
11892 REAL_EXP must be altered. */
11893 exponent = 5 - exponent;
11894
11895 if (exponent < 0 || exponent > 7)
11896 return -1;
11897
11898 /* Sign, mantissa and exponent are now in the correct form to plug into the
11899 formula described in the comment above. */
11900 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11901 }
11902
11903 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11904 int
11905 vfp3_const_double_rtx (rtx x)
11906 {
11907 if (!TARGET_VFP3)
11908 return 0;
11909
11910 return vfp3_const_double_index (x) != -1;
11911 }
11912
11913 /* Recognize immediates which can be used in various Neon instructions. Legal
11914 immediates are described by the following table (for VMVN variants, the
11915 bitwise inverse of the constant shown is recognized. In either case, VMOV
11916 is output and the correct instruction to use for a given constant is chosen
11917 by the assembler). The constant shown is replicated across all elements of
11918 the destination vector.
11919
11920 insn elems variant constant (binary)
11921 ---- ----- ------- -----------------
11922 vmov i32 0 00000000 00000000 00000000 abcdefgh
11923 vmov i32 1 00000000 00000000 abcdefgh 00000000
11924 vmov i32 2 00000000 abcdefgh 00000000 00000000
11925 vmov i32 3 abcdefgh 00000000 00000000 00000000
11926 vmov i16 4 00000000 abcdefgh
11927 vmov i16 5 abcdefgh 00000000
11928 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11929 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11930 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11931 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11932 vmvn i16 10 00000000 abcdefgh
11933 vmvn i16 11 abcdefgh 00000000
11934 vmov i32 12 00000000 00000000 abcdefgh 11111111
11935 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11936 vmov i32 14 00000000 abcdefgh 11111111 11111111
11937 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11938 vmov i8 16 abcdefgh
11939 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11940 eeeeeeee ffffffff gggggggg hhhhhhhh
11941 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11942 vmov f32 19 00000000 00000000 00000000 00000000
11943
11944 For case 18, B = !b. Representable values are exactly those accepted by
11945 vfp3_const_double_index, but are output as floating-point numbers rather
11946 than indices.
11947
11948 For case 19, we will change it to vmov.i32 when assembling.
11949
11950 Variants 0-5 (inclusive) may also be used as immediates for the second
11951 operand of VORR/VBIC instructions.
11952
11953 The INVERSE argument causes the bitwise inverse of the given operand to be
11954 recognized instead (used for recognizing legal immediates for the VAND/VORN
11955 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11956 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11957 output, rather than the real insns vbic/vorr).
11958
11959 INVERSE makes no difference to the recognition of float vectors.
11960
11961 The return value is the variant of immediate as shown in the above table, or
11962 -1 if the given value doesn't match any of the listed patterns.
11963 */
11964 static int
11965 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11966 rtx *modconst, int *elementwidth)
11967 {
11968 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11969 matches = 1; \
11970 for (i = 0; i < idx; i += (STRIDE)) \
11971 if (!(TEST)) \
11972 matches = 0; \
11973 if (matches) \
11974 { \
11975 immtype = (CLASS); \
11976 elsize = (ELSIZE); \
11977 break; \
11978 }
11979
11980 unsigned int i, elsize = 0, idx = 0, n_elts;
11981 unsigned int innersize;
11982 unsigned char bytes[16];
11983 int immtype = -1, matches;
11984 unsigned int invmask = inverse ? 0xff : 0;
11985 bool vector = GET_CODE (op) == CONST_VECTOR;
11986
11987 if (vector)
11988 n_elts = CONST_VECTOR_NUNITS (op);
11989 else
11990 {
11991 n_elts = 1;
11992 if (mode == VOIDmode)
11993 mode = DImode;
11994 }
11995
11996 innersize = GET_MODE_UNIT_SIZE (mode);
11997
11998 /* Vectors of float constants. */
11999 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12000 {
12001 rtx el0 = CONST_VECTOR_ELT (op, 0);
12002
12003 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12004 return -1;
12005
12006 /* FP16 vectors cannot be represented. */
12007 if (GET_MODE_INNER (mode) == HFmode)
12008 return -1;
12009
12010 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12011 are distinct in this context. */
12012 if (!const_vec_duplicate_p (op))
12013 return -1;
12014
12015 if (modconst)
12016 *modconst = CONST_VECTOR_ELT (op, 0);
12017
12018 if (elementwidth)
12019 *elementwidth = 0;
12020
12021 if (el0 == CONST0_RTX (GET_MODE (el0)))
12022 return 19;
12023 else
12024 return 18;
12025 }
12026
12027 /* The tricks done in the code below apply for little-endian vector layout.
12028 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12029 FIXME: Implement logic for big-endian vectors. */
12030 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
12031 return -1;
12032
12033 /* Splat vector constant out into a byte vector. */
12034 for (i = 0; i < n_elts; i++)
12035 {
12036 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12037 unsigned HOST_WIDE_INT elpart;
12038
12039 gcc_assert (CONST_INT_P (el));
12040 elpart = INTVAL (el);
12041
12042 for (unsigned int byte = 0; byte < innersize; byte++)
12043 {
12044 bytes[idx++] = (elpart & 0xff) ^ invmask;
12045 elpart >>= BITS_PER_UNIT;
12046 }
12047 }
12048
12049 /* Sanity check. */
12050 gcc_assert (idx == GET_MODE_SIZE (mode));
12051
12052 do
12053 {
12054 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12055 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12056
12057 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12058 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12059
12060 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12061 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12062
12063 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12064 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12065
12066 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12067
12068 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12069
12070 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12071 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12072
12073 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12074 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12075
12076 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12077 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12078
12079 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12080 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12081
12082 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12083
12084 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12085
12086 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12087 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12088
12089 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12090 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12091
12092 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12093 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12094
12095 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12096 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12097
12098 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12099
12100 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12101 && bytes[i] == bytes[(i + 8) % idx]);
12102 }
12103 while (0);
12104
12105 if (immtype == -1)
12106 return -1;
12107
12108 if (elementwidth)
12109 *elementwidth = elsize;
12110
12111 if (modconst)
12112 {
12113 unsigned HOST_WIDE_INT imm = 0;
12114
12115 /* Un-invert bytes of recognized vector, if necessary. */
12116 if (invmask != 0)
12117 for (i = 0; i < idx; i++)
12118 bytes[i] ^= invmask;
12119
12120 if (immtype == 17)
12121 {
12122 /* FIXME: Broken on 32-bit H_W_I hosts. */
12123 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12124
12125 for (i = 0; i < 8; i++)
12126 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12127 << (i * BITS_PER_UNIT);
12128
12129 *modconst = GEN_INT (imm);
12130 }
12131 else
12132 {
12133 unsigned HOST_WIDE_INT imm = 0;
12134
12135 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12136 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12137
12138 *modconst = GEN_INT (imm);
12139 }
12140 }
12141
12142 return immtype;
12143 #undef CHECK
12144 }
12145
12146 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12147 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12148 float elements), and a modified constant (whatever should be output for a
12149 VMOV) in *MODCONST. */
12150
12151 int
12152 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12153 rtx *modconst, int *elementwidth)
12154 {
12155 rtx tmpconst;
12156 int tmpwidth;
12157 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12158
12159 if (retval == -1)
12160 return 0;
12161
12162 if (modconst)
12163 *modconst = tmpconst;
12164
12165 if (elementwidth)
12166 *elementwidth = tmpwidth;
12167
12168 return 1;
12169 }
12170
12171 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12172 the immediate is valid, write a constant suitable for using as an operand
12173 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12174 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12175
12176 int
12177 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12178 rtx *modconst, int *elementwidth)
12179 {
12180 rtx tmpconst;
12181 int tmpwidth;
12182 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12183
12184 if (retval < 0 || retval > 5)
12185 return 0;
12186
12187 if (modconst)
12188 *modconst = tmpconst;
12189
12190 if (elementwidth)
12191 *elementwidth = tmpwidth;
12192
12193 return 1;
12194 }
12195
12196 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12197 the immediate is valid, write a constant suitable for using as an operand
12198 to VSHR/VSHL to *MODCONST and the corresponding element width to
12199 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12200 because they have different limitations. */
12201
12202 int
12203 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12204 rtx *modconst, int *elementwidth,
12205 bool isleftshift)
12206 {
12207 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12208 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12209 unsigned HOST_WIDE_INT last_elt = 0;
12210 unsigned HOST_WIDE_INT maxshift;
12211
12212 /* Split vector constant out into a byte vector. */
12213 for (i = 0; i < n_elts; i++)
12214 {
12215 rtx el = CONST_VECTOR_ELT (op, i);
12216 unsigned HOST_WIDE_INT elpart;
12217
12218 if (CONST_INT_P (el))
12219 elpart = INTVAL (el);
12220 else if (CONST_DOUBLE_P (el))
12221 return 0;
12222 else
12223 gcc_unreachable ();
12224
12225 if (i != 0 && elpart != last_elt)
12226 return 0;
12227
12228 last_elt = elpart;
12229 }
12230
12231 /* Shift less than element size. */
12232 maxshift = innersize * 8;
12233
12234 if (isleftshift)
12235 {
12236 /* Left shift immediate value can be from 0 to <size>-1. */
12237 if (last_elt >= maxshift)
12238 return 0;
12239 }
12240 else
12241 {
12242 /* Right shift immediate value can be from 1 to <size>. */
12243 if (last_elt == 0 || last_elt > maxshift)
12244 return 0;
12245 }
12246
12247 if (elementwidth)
12248 *elementwidth = innersize * 8;
12249
12250 if (modconst)
12251 *modconst = CONST_VECTOR_ELT (op, 0);
12252
12253 return 1;
12254 }
12255
12256 /* Return a string suitable for output of Neon immediate logic operation
12257 MNEM. */
12258
12259 char *
12260 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12261 int inverse, int quad)
12262 {
12263 int width, is_valid;
12264 static char templ[40];
12265
12266 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12267
12268 gcc_assert (is_valid != 0);
12269
12270 if (quad)
12271 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12272 else
12273 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12274
12275 return templ;
12276 }
12277
12278 /* Return a string suitable for output of Neon immediate shift operation
12279 (VSHR or VSHL) MNEM. */
12280
12281 char *
12282 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12283 machine_mode mode, int quad,
12284 bool isleftshift)
12285 {
12286 int width, is_valid;
12287 static char templ[40];
12288
12289 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12290 gcc_assert (is_valid != 0);
12291
12292 if (quad)
12293 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12294 else
12295 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12296
12297 return templ;
12298 }
12299
12300 /* Output a sequence of pairwise operations to implement a reduction.
12301 NOTE: We do "too much work" here, because pairwise operations work on two
12302 registers-worth of operands in one go. Unfortunately we can't exploit those
12303 extra calculations to do the full operation in fewer steps, I don't think.
12304 Although all vector elements of the result but the first are ignored, we
12305 actually calculate the same result in each of the elements. An alternative
12306 such as initially loading a vector with zero to use as each of the second
12307 operands would use up an additional register and take an extra instruction,
12308 for no particular gain. */
12309
12310 void
12311 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12312 rtx (*reduc) (rtx, rtx, rtx))
12313 {
12314 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12315 rtx tmpsum = op1;
12316
12317 for (i = parts / 2; i >= 1; i /= 2)
12318 {
12319 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12320 emit_insn (reduc (dest, tmpsum, tmpsum));
12321 tmpsum = dest;
12322 }
12323 }
12324
12325 /* If VALS is a vector constant that can be loaded into a register
12326 using VDUP, generate instructions to do so and return an RTX to
12327 assign to the register. Otherwise return NULL_RTX. */
12328
12329 static rtx
12330 neon_vdup_constant (rtx vals)
12331 {
12332 machine_mode mode = GET_MODE (vals);
12333 machine_mode inner_mode = GET_MODE_INNER (mode);
12334 rtx x;
12335
12336 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12337 return NULL_RTX;
12338
12339 if (!const_vec_duplicate_p (vals, &x))
12340 /* The elements are not all the same. We could handle repeating
12341 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12342 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12343 vdup.i16). */
12344 return NULL_RTX;
12345
12346 /* We can load this constant by using VDUP and a constant in a
12347 single ARM register. This will be cheaper than a vector
12348 load. */
12349
12350 x = copy_to_mode_reg (inner_mode, x);
12351 return gen_vec_duplicate (mode, x);
12352 }
12353
12354 /* Generate code to load VALS, which is a PARALLEL containing only
12355 constants (for vec_init) or CONST_VECTOR, efficiently into a
12356 register. Returns an RTX to copy into the register, or NULL_RTX
12357 for a PARALLEL that cannot be converted into a CONST_VECTOR. */
12358
12359 rtx
12360 neon_make_constant (rtx vals)
12361 {
12362 machine_mode mode = GET_MODE (vals);
12363 rtx target;
12364 rtx const_vec = NULL_RTX;
12365 int n_elts = GET_MODE_NUNITS (mode);
12366 int n_const = 0;
12367 int i;
12368
12369 if (GET_CODE (vals) == CONST_VECTOR)
12370 const_vec = vals;
12371 else if (GET_CODE (vals) == PARALLEL)
12372 {
12373 /* A CONST_VECTOR must contain only CONST_INTs and
12374 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12375 Only store valid constants in a CONST_VECTOR. */
12376 for (i = 0; i < n_elts; ++i)
12377 {
12378 rtx x = XVECEXP (vals, 0, i);
12379 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12380 n_const++;
12381 }
12382 if (n_const == n_elts)
12383 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12384 }
12385 else
12386 gcc_unreachable ();
12387
12388 if (const_vec != NULL
12389 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12390 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12391 return const_vec;
12392 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12393 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12394 pipeline cycle; creating the constant takes one or two ARM
12395 pipeline cycles. */
12396 return target;
12397 else if (const_vec != NULL_RTX)
12398 /* Load from constant pool. On Cortex-A8 this takes two cycles
12399 (for either double or quad vectors). We cannot take advantage
12400 of single-cycle VLD1 because we need a PC-relative addressing
12401 mode. */
12402 return const_vec;
12403 else
12404 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12405 We cannot construct an initializer. */
12406 return NULL_RTX;
12407 }
12408
12409 /* Initialize vector TARGET to VALS. */
12410
12411 void
12412 neon_expand_vector_init (rtx target, rtx vals)
12413 {
12414 machine_mode mode = GET_MODE (target);
12415 machine_mode inner_mode = GET_MODE_INNER (mode);
12416 int n_elts = GET_MODE_NUNITS (mode);
12417 int n_var = 0, one_var = -1;
12418 bool all_same = true;
12419 rtx x, mem;
12420 int i;
12421
12422 for (i = 0; i < n_elts; ++i)
12423 {
12424 x = XVECEXP (vals, 0, i);
12425 if (!CONSTANT_P (x))
12426 ++n_var, one_var = i;
12427
12428 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12429 all_same = false;
12430 }
12431
12432 if (n_var == 0)
12433 {
12434 rtx constant = neon_make_constant (vals);
12435 if (constant != NULL_RTX)
12436 {
12437 emit_move_insn (target, constant);
12438 return;
12439 }
12440 }
12441
12442 /* Splat a single non-constant element if we can. */
12443 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12444 {
12445 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12446 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12447 return;
12448 }
12449
12450 /* One field is non-constant. Load constant then overwrite varying
12451 field. This is more efficient than using the stack. */
12452 if (n_var == 1)
12453 {
12454 rtx copy = copy_rtx (vals);
12455 rtx index = GEN_INT (one_var);
12456
12457 /* Load constant part of vector, substitute neighboring value for
12458 varying element. */
12459 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12460 neon_expand_vector_init (target, copy);
12461
12462 /* Insert variable. */
12463 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12464 switch (mode)
12465 {
12466 case E_V8QImode:
12467 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12468 break;
12469 case E_V16QImode:
12470 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12471 break;
12472 case E_V4HImode:
12473 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12474 break;
12475 case E_V8HImode:
12476 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12477 break;
12478 case E_V2SImode:
12479 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12480 break;
12481 case E_V4SImode:
12482 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12483 break;
12484 case E_V2SFmode:
12485 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12486 break;
12487 case E_V4SFmode:
12488 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12489 break;
12490 case E_V2DImode:
12491 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12492 break;
12493 default:
12494 gcc_unreachable ();
12495 }
12496 return;
12497 }
12498
12499 /* Construct the vector in memory one field at a time
12500 and load the whole vector. */
12501 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12502 for (i = 0; i < n_elts; i++)
12503 emit_move_insn (adjust_address_nv (mem, inner_mode,
12504 i * GET_MODE_SIZE (inner_mode)),
12505 XVECEXP (vals, 0, i));
12506 emit_move_insn (target, mem);
12507 }
12508
12509 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12510 ERR if it doesn't. EXP indicates the source location, which includes the
12511 inlining history for intrinsics. */
12512
12513 static void
12514 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12515 const_tree exp, const char *desc)
12516 {
12517 HOST_WIDE_INT lane;
12518
12519 gcc_assert (CONST_INT_P (operand));
12520
12521 lane = INTVAL (operand);
12522
12523 if (lane < low || lane >= high)
12524 {
12525 if (exp)
12526 error ("%K%s %wd out of range %wd - %wd",
12527 exp, desc, lane, low, high - 1);
12528 else
12529 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12530 }
12531 }
12532
12533 /* Bounds-check lanes. */
12534
12535 void
12536 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12537 const_tree exp)
12538 {
12539 bounds_check (operand, low, high, exp, "lane");
12540 }
12541
12542 /* Bounds-check constants. */
12543
12544 void
12545 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12546 {
12547 bounds_check (operand, low, high, NULL_TREE, "constant");
12548 }
12549
12550 HOST_WIDE_INT
12551 neon_element_bits (machine_mode mode)
12552 {
12553 return GET_MODE_UNIT_BITSIZE (mode);
12554 }
12555
12556 \f
12557 /* Predicates for `match_operand' and `match_operator'. */
12558
12559 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12560 WB is true if full writeback address modes are allowed and is false
12561 if limited writeback address modes (POST_INC and PRE_DEC) are
12562 allowed. */
12563
12564 int
12565 arm_coproc_mem_operand (rtx op, bool wb)
12566 {
12567 rtx ind;
12568
12569 /* Reject eliminable registers. */
12570 if (! (reload_in_progress || reload_completed || lra_in_progress)
12571 && ( reg_mentioned_p (frame_pointer_rtx, op)
12572 || reg_mentioned_p (arg_pointer_rtx, op)
12573 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12574 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12575 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12576 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12577 return FALSE;
12578
12579 /* Constants are converted into offsets from labels. */
12580 if (!MEM_P (op))
12581 return FALSE;
12582
12583 ind = XEXP (op, 0);
12584
12585 if (reload_completed
12586 && (GET_CODE (ind) == LABEL_REF
12587 || (GET_CODE (ind) == CONST
12588 && GET_CODE (XEXP (ind, 0)) == PLUS
12589 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12590 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12591 return TRUE;
12592
12593 /* Match: (mem (reg)). */
12594 if (REG_P (ind))
12595 return arm_address_register_rtx_p (ind, 0);
12596
12597 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12598 acceptable in any case (subject to verification by
12599 arm_address_register_rtx_p). We need WB to be true to accept
12600 PRE_INC and POST_DEC. */
12601 if (GET_CODE (ind) == POST_INC
12602 || GET_CODE (ind) == PRE_DEC
12603 || (wb
12604 && (GET_CODE (ind) == PRE_INC
12605 || GET_CODE (ind) == POST_DEC)))
12606 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12607
12608 if (wb
12609 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12610 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12611 && GET_CODE (XEXP (ind, 1)) == PLUS
12612 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12613 ind = XEXP (ind, 1);
12614
12615 /* Match:
12616 (plus (reg)
12617 (const)). */
12618 if (GET_CODE (ind) == PLUS
12619 && REG_P (XEXP (ind, 0))
12620 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12621 && CONST_INT_P (XEXP (ind, 1))
12622 && INTVAL (XEXP (ind, 1)) > -1024
12623 && INTVAL (XEXP (ind, 1)) < 1024
12624 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12625 return TRUE;
12626
12627 return FALSE;
12628 }
12629
12630 /* Return TRUE if OP is a memory operand which we can load or store a vector
12631 to/from. TYPE is one of the following values:
12632 0 - Vector load/stor (vldr)
12633 1 - Core registers (ldm)
12634 2 - Element/structure loads (vld1)
12635 */
12636 int
12637 neon_vector_mem_operand (rtx op, int type, bool strict)
12638 {
12639 rtx ind;
12640
12641 /* Reject eliminable registers. */
12642 if (strict && ! (reload_in_progress || reload_completed)
12643 && (reg_mentioned_p (frame_pointer_rtx, op)
12644 || reg_mentioned_p (arg_pointer_rtx, op)
12645 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12646 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12647 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12648 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12649 return FALSE;
12650
12651 /* Constants are converted into offsets from labels. */
12652 if (!MEM_P (op))
12653 return FALSE;
12654
12655 ind = XEXP (op, 0);
12656
12657 if (reload_completed
12658 && (GET_CODE (ind) == LABEL_REF
12659 || (GET_CODE (ind) == CONST
12660 && GET_CODE (XEXP (ind, 0)) == PLUS
12661 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12662 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12663 return TRUE;
12664
12665 /* Match: (mem (reg)). */
12666 if (REG_P (ind))
12667 return arm_address_register_rtx_p (ind, 0);
12668
12669 /* Allow post-increment with Neon registers. */
12670 if ((type != 1 && GET_CODE (ind) == POST_INC)
12671 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12672 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12673
12674 /* Allow post-increment by register for VLDn */
12675 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12676 && GET_CODE (XEXP (ind, 1)) == PLUS
12677 && REG_P (XEXP (XEXP (ind, 1), 1)))
12678 return true;
12679
12680 /* Match:
12681 (plus (reg)
12682 (const)). */
12683 if (type == 0
12684 && GET_CODE (ind) == PLUS
12685 && REG_P (XEXP (ind, 0))
12686 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12687 && CONST_INT_P (XEXP (ind, 1))
12688 && INTVAL (XEXP (ind, 1)) > -1024
12689 /* For quad modes, we restrict the constant offset to be slightly less
12690 than what the instruction format permits. We have no such constraint
12691 on double mode offsets. (This must match arm_legitimate_index_p.) */
12692 && (INTVAL (XEXP (ind, 1))
12693 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12694 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12695 return TRUE;
12696
12697 return FALSE;
12698 }
12699
12700 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12701 type. */
12702 int
12703 neon_struct_mem_operand (rtx op)
12704 {
12705 rtx ind;
12706
12707 /* Reject eliminable registers. */
12708 if (! (reload_in_progress || reload_completed)
12709 && ( reg_mentioned_p (frame_pointer_rtx, op)
12710 || reg_mentioned_p (arg_pointer_rtx, op)
12711 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12712 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12713 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12714 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12715 return FALSE;
12716
12717 /* Constants are converted into offsets from labels. */
12718 if (!MEM_P (op))
12719 return FALSE;
12720
12721 ind = XEXP (op, 0);
12722
12723 if (reload_completed
12724 && (GET_CODE (ind) == LABEL_REF
12725 || (GET_CODE (ind) == CONST
12726 && GET_CODE (XEXP (ind, 0)) == PLUS
12727 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12728 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12729 return TRUE;
12730
12731 /* Match: (mem (reg)). */
12732 if (REG_P (ind))
12733 return arm_address_register_rtx_p (ind, 0);
12734
12735 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12736 if (GET_CODE (ind) == POST_INC
12737 || GET_CODE (ind) == PRE_DEC)
12738 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12739
12740 return FALSE;
12741 }
12742
12743 /* Prepares the operands for the VCMLA by lane instruction such that the right
12744 register number is selected. This instruction is special in that it always
12745 requires a D register, however there is a choice to be made between Dn[0],
12746 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
12747
12748 The VCMLA by lane function always selects two values. For instance given D0
12749 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
12750 used by the instruction. However given V4SF then index 0 and 1 are valid as
12751 D0[0] or D1[0] are both valid.
12752
12753 This function centralizes that information based on OPERANDS, OPERANDS[3]
12754 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
12755 updated to contain the right index. */
12756
12757 rtx *
12758 neon_vcmla_lane_prepare_operands (rtx *operands)
12759 {
12760 int lane = INTVAL (operands[4]);
12761 machine_mode constmode = SImode;
12762 machine_mode mode = GET_MODE (operands[3]);
12763 int regno = REGNO (operands[3]);
12764 regno = ((regno - FIRST_VFP_REGNUM) >> 1);
12765 if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
12766 {
12767 operands[3] = gen_int_mode (regno + 1, constmode);
12768 operands[4]
12769 = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
12770 }
12771 else
12772 {
12773 operands[3] = gen_int_mode (regno, constmode);
12774 operands[4] = gen_int_mode (lane, constmode);
12775 }
12776 return operands;
12777 }
12778
12779
12780 /* Return true if X is a register that will be eliminated later on. */
12781 int
12782 arm_eliminable_register (rtx x)
12783 {
12784 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12785 || REGNO (x) == ARG_POINTER_REGNUM
12786 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12787 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12788 }
12789
12790 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12791 coprocessor registers. Otherwise return NO_REGS. */
12792
12793 enum reg_class
12794 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12795 {
12796 if (mode == HFmode)
12797 {
12798 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12799 return GENERAL_REGS;
12800 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12801 return NO_REGS;
12802 return GENERAL_REGS;
12803 }
12804
12805 /* The neon move patterns handle all legitimate vector and struct
12806 addresses. */
12807 if (TARGET_NEON
12808 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12809 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12810 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12811 || VALID_NEON_STRUCT_MODE (mode)))
12812 return NO_REGS;
12813
12814 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12815 return NO_REGS;
12816
12817 return GENERAL_REGS;
12818 }
12819
12820 /* Values which must be returned in the most-significant end of the return
12821 register. */
12822
12823 static bool
12824 arm_return_in_msb (const_tree valtype)
12825 {
12826 return (TARGET_AAPCS_BASED
12827 && BYTES_BIG_ENDIAN
12828 && (AGGREGATE_TYPE_P (valtype)
12829 || TREE_CODE (valtype) == COMPLEX_TYPE
12830 || FIXED_POINT_TYPE_P (valtype)));
12831 }
12832
12833 /* Return TRUE if X references a SYMBOL_REF. */
12834 int
12835 symbol_mentioned_p (rtx x)
12836 {
12837 const char * fmt;
12838 int i;
12839
12840 if (GET_CODE (x) == SYMBOL_REF)
12841 return 1;
12842
12843 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12844 are constant offsets, not symbols. */
12845 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12846 return 0;
12847
12848 fmt = GET_RTX_FORMAT (GET_CODE (x));
12849
12850 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12851 {
12852 if (fmt[i] == 'E')
12853 {
12854 int j;
12855
12856 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12857 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12858 return 1;
12859 }
12860 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12861 return 1;
12862 }
12863
12864 return 0;
12865 }
12866
12867 /* Return TRUE if X references a LABEL_REF. */
12868 int
12869 label_mentioned_p (rtx x)
12870 {
12871 const char * fmt;
12872 int i;
12873
12874 if (GET_CODE (x) == LABEL_REF)
12875 return 1;
12876
12877 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12878 instruction, but they are constant offsets, not symbols. */
12879 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12880 return 0;
12881
12882 fmt = GET_RTX_FORMAT (GET_CODE (x));
12883 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12884 {
12885 if (fmt[i] == 'E')
12886 {
12887 int j;
12888
12889 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12890 if (label_mentioned_p (XVECEXP (x, i, j)))
12891 return 1;
12892 }
12893 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12894 return 1;
12895 }
12896
12897 return 0;
12898 }
12899
12900 int
12901 tls_mentioned_p (rtx x)
12902 {
12903 switch (GET_CODE (x))
12904 {
12905 case CONST:
12906 return tls_mentioned_p (XEXP (x, 0));
12907
12908 case UNSPEC:
12909 if (XINT (x, 1) == UNSPEC_TLS)
12910 return 1;
12911
12912 /* Fall through. */
12913 default:
12914 return 0;
12915 }
12916 }
12917
12918 /* Must not copy any rtx that uses a pc-relative address.
12919 Also, disallow copying of load-exclusive instructions that
12920 may appear after splitting of compare-and-swap-style operations
12921 so as to prevent those loops from being transformed away from their
12922 canonical forms (see PR 69904). */
12923
12924 static bool
12925 arm_cannot_copy_insn_p (rtx_insn *insn)
12926 {
12927 /* The tls call insn cannot be copied, as it is paired with a data
12928 word. */
12929 if (recog_memoized (insn) == CODE_FOR_tlscall)
12930 return true;
12931
12932 subrtx_iterator::array_type array;
12933 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12934 {
12935 const_rtx x = *iter;
12936 if (GET_CODE (x) == UNSPEC
12937 && (XINT (x, 1) == UNSPEC_PIC_BASE
12938 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12939 return true;
12940 }
12941
12942 rtx set = single_set (insn);
12943 if (set)
12944 {
12945 rtx src = SET_SRC (set);
12946 if (GET_CODE (src) == ZERO_EXTEND)
12947 src = XEXP (src, 0);
12948
12949 /* Catch the load-exclusive and load-acquire operations. */
12950 if (GET_CODE (src) == UNSPEC_VOLATILE
12951 && (XINT (src, 1) == VUNSPEC_LL
12952 || XINT (src, 1) == VUNSPEC_LAX))
12953 return true;
12954 }
12955 return false;
12956 }
12957
12958 enum rtx_code
12959 minmax_code (rtx x)
12960 {
12961 enum rtx_code code = GET_CODE (x);
12962
12963 switch (code)
12964 {
12965 case SMAX:
12966 return GE;
12967 case SMIN:
12968 return LE;
12969 case UMIN:
12970 return LEU;
12971 case UMAX:
12972 return GEU;
12973 default:
12974 gcc_unreachable ();
12975 }
12976 }
12977
12978 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12979
12980 bool
12981 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12982 int *mask, bool *signed_sat)
12983 {
12984 /* The high bound must be a power of two minus one. */
12985 int log = exact_log2 (INTVAL (hi_bound) + 1);
12986 if (log == -1)
12987 return false;
12988
12989 /* The low bound is either zero (for usat) or one less than the
12990 negation of the high bound (for ssat). */
12991 if (INTVAL (lo_bound) == 0)
12992 {
12993 if (mask)
12994 *mask = log;
12995 if (signed_sat)
12996 *signed_sat = false;
12997
12998 return true;
12999 }
13000
13001 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13002 {
13003 if (mask)
13004 *mask = log + 1;
13005 if (signed_sat)
13006 *signed_sat = true;
13007
13008 return true;
13009 }
13010
13011 return false;
13012 }
13013
13014 /* Return 1 if memory locations are adjacent. */
13015 int
13016 adjacent_mem_locations (rtx a, rtx b)
13017 {
13018 /* We don't guarantee to preserve the order of these memory refs. */
13019 if (volatile_refs_p (a) || volatile_refs_p (b))
13020 return 0;
13021
13022 if ((REG_P (XEXP (a, 0))
13023 || (GET_CODE (XEXP (a, 0)) == PLUS
13024 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13025 && (REG_P (XEXP (b, 0))
13026 || (GET_CODE (XEXP (b, 0)) == PLUS
13027 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13028 {
13029 HOST_WIDE_INT val0 = 0, val1 = 0;
13030 rtx reg0, reg1;
13031 int val_diff;
13032
13033 if (GET_CODE (XEXP (a, 0)) == PLUS)
13034 {
13035 reg0 = XEXP (XEXP (a, 0), 0);
13036 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13037 }
13038 else
13039 reg0 = XEXP (a, 0);
13040
13041 if (GET_CODE (XEXP (b, 0)) == PLUS)
13042 {
13043 reg1 = XEXP (XEXP (b, 0), 0);
13044 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13045 }
13046 else
13047 reg1 = XEXP (b, 0);
13048
13049 /* Don't accept any offset that will require multiple
13050 instructions to handle, since this would cause the
13051 arith_adjacentmem pattern to output an overlong sequence. */
13052 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13053 return 0;
13054
13055 /* Don't allow an eliminable register: register elimination can make
13056 the offset too large. */
13057 if (arm_eliminable_register (reg0))
13058 return 0;
13059
13060 val_diff = val1 - val0;
13061
13062 if (arm_ld_sched)
13063 {
13064 /* If the target has load delay slots, then there's no benefit
13065 to using an ldm instruction unless the offset is zero and
13066 we are optimizing for size. */
13067 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13068 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13069 && (val_diff == 4 || val_diff == -4));
13070 }
13071
13072 return ((REGNO (reg0) == REGNO (reg1))
13073 && (val_diff == 4 || val_diff == -4));
13074 }
13075
13076 return 0;
13077 }
13078
13079 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13080 for load operations, false for store operations. CONSECUTIVE is true
13081 if the register numbers in the operation must be consecutive in the register
13082 bank. RETURN_PC is true if value is to be loaded in PC.
13083 The pattern we are trying to match for load is:
13084 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13085 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13086 :
13087 :
13088 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13089 ]
13090 where
13091 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13092 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13093 3. If consecutive is TRUE, then for kth register being loaded,
13094 REGNO (R_dk) = REGNO (R_d0) + k.
13095 The pattern for store is similar. */
13096 bool
13097 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13098 bool consecutive, bool return_pc)
13099 {
13100 HOST_WIDE_INT count = XVECLEN (op, 0);
13101 rtx reg, mem, addr;
13102 unsigned regno;
13103 unsigned first_regno;
13104 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13105 rtx elt;
13106 bool addr_reg_in_reglist = false;
13107 bool update = false;
13108 int reg_increment;
13109 int offset_adj;
13110 int regs_per_val;
13111
13112 /* If not in SImode, then registers must be consecutive
13113 (e.g., VLDM instructions for DFmode). */
13114 gcc_assert ((mode == SImode) || consecutive);
13115 /* Setting return_pc for stores is illegal. */
13116 gcc_assert (!return_pc || load);
13117
13118 /* Set up the increments and the regs per val based on the mode. */
13119 reg_increment = GET_MODE_SIZE (mode);
13120 regs_per_val = reg_increment / 4;
13121 offset_adj = return_pc ? 1 : 0;
13122
13123 if (count <= 1
13124 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13125 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13126 return false;
13127
13128 /* Check if this is a write-back. */
13129 elt = XVECEXP (op, 0, offset_adj);
13130 if (GET_CODE (SET_SRC (elt)) == PLUS)
13131 {
13132 i++;
13133 base = 1;
13134 update = true;
13135
13136 /* The offset adjustment must be the number of registers being
13137 popped times the size of a single register. */
13138 if (!REG_P (SET_DEST (elt))
13139 || !REG_P (XEXP (SET_SRC (elt), 0))
13140 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13141 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13142 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13143 ((count - 1 - offset_adj) * reg_increment))
13144 return false;
13145 }
13146
13147 i = i + offset_adj;
13148 base = base + offset_adj;
13149 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13150 success depends on the type: VLDM can do just one reg,
13151 LDM must do at least two. */
13152 if ((count <= i) && (mode == SImode))
13153 return false;
13154
13155 elt = XVECEXP (op, 0, i - 1);
13156 if (GET_CODE (elt) != SET)
13157 return false;
13158
13159 if (load)
13160 {
13161 reg = SET_DEST (elt);
13162 mem = SET_SRC (elt);
13163 }
13164 else
13165 {
13166 reg = SET_SRC (elt);
13167 mem = SET_DEST (elt);
13168 }
13169
13170 if (!REG_P (reg) || !MEM_P (mem))
13171 return false;
13172
13173 regno = REGNO (reg);
13174 first_regno = regno;
13175 addr = XEXP (mem, 0);
13176 if (GET_CODE (addr) == PLUS)
13177 {
13178 if (!CONST_INT_P (XEXP (addr, 1)))
13179 return false;
13180
13181 offset = INTVAL (XEXP (addr, 1));
13182 addr = XEXP (addr, 0);
13183 }
13184
13185 if (!REG_P (addr))
13186 return false;
13187
13188 /* Don't allow SP to be loaded unless it is also the base register. It
13189 guarantees that SP is reset correctly when an LDM instruction
13190 is interrupted. Otherwise, we might end up with a corrupt stack. */
13191 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13192 return false;
13193
13194 for (; i < count; i++)
13195 {
13196 elt = XVECEXP (op, 0, i);
13197 if (GET_CODE (elt) != SET)
13198 return false;
13199
13200 if (load)
13201 {
13202 reg = SET_DEST (elt);
13203 mem = SET_SRC (elt);
13204 }
13205 else
13206 {
13207 reg = SET_SRC (elt);
13208 mem = SET_DEST (elt);
13209 }
13210
13211 if (!REG_P (reg)
13212 || GET_MODE (reg) != mode
13213 || REGNO (reg) <= regno
13214 || (consecutive
13215 && (REGNO (reg) !=
13216 (unsigned int) (first_regno + regs_per_val * (i - base))))
13217 /* Don't allow SP to be loaded unless it is also the base register. It
13218 guarantees that SP is reset correctly when an LDM instruction
13219 is interrupted. Otherwise, we might end up with a corrupt stack. */
13220 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13221 || !MEM_P (mem)
13222 || GET_MODE (mem) != mode
13223 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13224 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13225 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13226 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13227 offset + (i - base) * reg_increment))
13228 && (!REG_P (XEXP (mem, 0))
13229 || offset + (i - base) * reg_increment != 0)))
13230 return false;
13231
13232 regno = REGNO (reg);
13233 if (regno == REGNO (addr))
13234 addr_reg_in_reglist = true;
13235 }
13236
13237 if (load)
13238 {
13239 if (update && addr_reg_in_reglist)
13240 return false;
13241
13242 /* For Thumb-1, address register is always modified - either by write-back
13243 or by explicit load. If the pattern does not describe an update,
13244 then the address register must be in the list of loaded registers. */
13245 if (TARGET_THUMB1)
13246 return update || addr_reg_in_reglist;
13247 }
13248
13249 return true;
13250 }
13251
13252 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13253 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13254 instruction. ADD_OFFSET is nonzero if the base address register needs
13255 to be modified with an add instruction before we can use it. */
13256
13257 static bool
13258 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13259 int nops, HOST_WIDE_INT add_offset)
13260 {
13261 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13262 if the offset isn't small enough. The reason 2 ldrs are faster
13263 is because these ARMs are able to do more than one cache access
13264 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13265 whilst the ARM8 has a double bandwidth cache. This means that
13266 these cores can do both an instruction fetch and a data fetch in
13267 a single cycle, so the trick of calculating the address into a
13268 scratch register (one of the result regs) and then doing a load
13269 multiple actually becomes slower (and no smaller in code size).
13270 That is the transformation
13271
13272 ldr rd1, [rbase + offset]
13273 ldr rd2, [rbase + offset + 4]
13274
13275 to
13276
13277 add rd1, rbase, offset
13278 ldmia rd1, {rd1, rd2}
13279
13280 produces worse code -- '3 cycles + any stalls on rd2' instead of
13281 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13282 access per cycle, the first sequence could never complete in less
13283 than 6 cycles, whereas the ldm sequence would only take 5 and
13284 would make better use of sequential accesses if not hitting the
13285 cache.
13286
13287 We cheat here and test 'arm_ld_sched' which we currently know to
13288 only be true for the ARM8, ARM9 and StrongARM. If this ever
13289 changes, then the test below needs to be reworked. */
13290 if (nops == 2 && arm_ld_sched && add_offset != 0)
13291 return false;
13292
13293 /* XScale has load-store double instructions, but they have stricter
13294 alignment requirements than load-store multiple, so we cannot
13295 use them.
13296
13297 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13298 the pipeline until completion.
13299
13300 NREGS CYCLES
13301 1 3
13302 2 4
13303 3 5
13304 4 6
13305
13306 An ldr instruction takes 1-3 cycles, but does not block the
13307 pipeline.
13308
13309 NREGS CYCLES
13310 1 1-3
13311 2 2-6
13312 3 3-9
13313 4 4-12
13314
13315 Best case ldr will always win. However, the more ldr instructions
13316 we issue, the less likely we are to be able to schedule them well.
13317 Using ldr instructions also increases code size.
13318
13319 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13320 for counts of 3 or 4 regs. */
13321 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13322 return false;
13323 return true;
13324 }
13325
13326 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13327 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13328 an array ORDER which describes the sequence to use when accessing the
13329 offsets that produces an ascending order. In this sequence, each
13330 offset must be larger by exactly 4 than the previous one. ORDER[0]
13331 must have been filled in with the lowest offset by the caller.
13332 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13333 we use to verify that ORDER produces an ascending order of registers.
13334 Return true if it was possible to construct such an order, false if
13335 not. */
13336
13337 static bool
13338 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13339 int *unsorted_regs)
13340 {
13341 int i;
13342 for (i = 1; i < nops; i++)
13343 {
13344 int j;
13345
13346 order[i] = order[i - 1];
13347 for (j = 0; j < nops; j++)
13348 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13349 {
13350 /* We must find exactly one offset that is higher than the
13351 previous one by 4. */
13352 if (order[i] != order[i - 1])
13353 return false;
13354 order[i] = j;
13355 }
13356 if (order[i] == order[i - 1])
13357 return false;
13358 /* The register numbers must be ascending. */
13359 if (unsorted_regs != NULL
13360 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13361 return false;
13362 }
13363 return true;
13364 }
13365
13366 /* Used to determine in a peephole whether a sequence of load
13367 instructions can be changed into a load-multiple instruction.
13368 NOPS is the number of separate load instructions we are examining. The
13369 first NOPS entries in OPERANDS are the destination registers, the
13370 next NOPS entries are memory operands. If this function is
13371 successful, *BASE is set to the common base register of the memory
13372 accesses; *LOAD_OFFSET is set to the first memory location's offset
13373 from that base register.
13374 REGS is an array filled in with the destination register numbers.
13375 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13376 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13377 the sequence of registers in REGS matches the loads from ascending memory
13378 locations, and the function verifies that the register numbers are
13379 themselves ascending. If CHECK_REGS is false, the register numbers
13380 are stored in the order they are found in the operands. */
13381 static int
13382 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13383 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13384 {
13385 int unsorted_regs[MAX_LDM_STM_OPS];
13386 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13387 int order[MAX_LDM_STM_OPS];
13388 rtx base_reg_rtx = NULL;
13389 int base_reg = -1;
13390 int i, ldm_case;
13391
13392 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13393 easily extended if required. */
13394 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13395
13396 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13397
13398 /* Loop over the operands and check that the memory references are
13399 suitable (i.e. immediate offsets from the same base register). At
13400 the same time, extract the target register, and the memory
13401 offsets. */
13402 for (i = 0; i < nops; i++)
13403 {
13404 rtx reg;
13405 rtx offset;
13406
13407 /* Convert a subreg of a mem into the mem itself. */
13408 if (GET_CODE (operands[nops + i]) == SUBREG)
13409 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13410
13411 gcc_assert (MEM_P (operands[nops + i]));
13412
13413 /* Don't reorder volatile memory references; it doesn't seem worth
13414 looking for the case where the order is ok anyway. */
13415 if (MEM_VOLATILE_P (operands[nops + i]))
13416 return 0;
13417
13418 offset = const0_rtx;
13419
13420 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13421 || (GET_CODE (reg) == SUBREG
13422 && REG_P (reg = SUBREG_REG (reg))))
13423 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13424 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13425 || (GET_CODE (reg) == SUBREG
13426 && REG_P (reg = SUBREG_REG (reg))))
13427 && (CONST_INT_P (offset
13428 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13429 {
13430 if (i == 0)
13431 {
13432 base_reg = REGNO (reg);
13433 base_reg_rtx = reg;
13434 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13435 return 0;
13436 }
13437 else if (base_reg != (int) REGNO (reg))
13438 /* Not addressed from the same base register. */
13439 return 0;
13440
13441 unsorted_regs[i] = (REG_P (operands[i])
13442 ? REGNO (operands[i])
13443 : REGNO (SUBREG_REG (operands[i])));
13444
13445 /* If it isn't an integer register, or if it overwrites the
13446 base register but isn't the last insn in the list, then
13447 we can't do this. */
13448 if (unsorted_regs[i] < 0
13449 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13450 || unsorted_regs[i] > 14
13451 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13452 return 0;
13453
13454 /* Don't allow SP to be loaded unless it is also the base
13455 register. It guarantees that SP is reset correctly when
13456 an LDM instruction is interrupted. Otherwise, we might
13457 end up with a corrupt stack. */
13458 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13459 return 0;
13460
13461 unsorted_offsets[i] = INTVAL (offset);
13462 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13463 order[0] = i;
13464 }
13465 else
13466 /* Not a suitable memory address. */
13467 return 0;
13468 }
13469
13470 /* All the useful information has now been extracted from the
13471 operands into unsorted_regs and unsorted_offsets; additionally,
13472 order[0] has been set to the lowest offset in the list. Sort
13473 the offsets into order, verifying that they are adjacent, and
13474 check that the register numbers are ascending. */
13475 if (!compute_offset_order (nops, unsorted_offsets, order,
13476 check_regs ? unsorted_regs : NULL))
13477 return 0;
13478
13479 if (saved_order)
13480 memcpy (saved_order, order, sizeof order);
13481
13482 if (base)
13483 {
13484 *base = base_reg;
13485
13486 for (i = 0; i < nops; i++)
13487 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13488
13489 *load_offset = unsorted_offsets[order[0]];
13490 }
13491
13492 if (TARGET_THUMB1
13493 && !peep2_reg_dead_p (nops, base_reg_rtx))
13494 return 0;
13495
13496 if (unsorted_offsets[order[0]] == 0)
13497 ldm_case = 1; /* ldmia */
13498 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13499 ldm_case = 2; /* ldmib */
13500 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13501 ldm_case = 3; /* ldmda */
13502 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13503 ldm_case = 4; /* ldmdb */
13504 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13505 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13506 ldm_case = 5;
13507 else
13508 return 0;
13509
13510 if (!multiple_operation_profitable_p (false, nops,
13511 ldm_case == 5
13512 ? unsorted_offsets[order[0]] : 0))
13513 return 0;
13514
13515 return ldm_case;
13516 }
13517
13518 /* Used to determine in a peephole whether a sequence of store instructions can
13519 be changed into a store-multiple instruction.
13520 NOPS is the number of separate store instructions we are examining.
13521 NOPS_TOTAL is the total number of instructions recognized by the peephole
13522 pattern.
13523 The first NOPS entries in OPERANDS are the source registers, the next
13524 NOPS entries are memory operands. If this function is successful, *BASE is
13525 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13526 to the first memory location's offset from that base register. REGS is an
13527 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13528 likewise filled with the corresponding rtx's.
13529 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13530 numbers to an ascending order of stores.
13531 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13532 from ascending memory locations, and the function verifies that the register
13533 numbers are themselves ascending. If CHECK_REGS is false, the register
13534 numbers are stored in the order they are found in the operands. */
13535 static int
13536 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13537 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13538 HOST_WIDE_INT *load_offset, bool check_regs)
13539 {
13540 int unsorted_regs[MAX_LDM_STM_OPS];
13541 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13542 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13543 int order[MAX_LDM_STM_OPS];
13544 int base_reg = -1;
13545 rtx base_reg_rtx = NULL;
13546 int i, stm_case;
13547
13548 /* Write back of base register is currently only supported for Thumb 1. */
13549 int base_writeback = TARGET_THUMB1;
13550
13551 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13552 easily extended if required. */
13553 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13554
13555 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13556
13557 /* Loop over the operands and check that the memory references are
13558 suitable (i.e. immediate offsets from the same base register). At
13559 the same time, extract the target register, and the memory
13560 offsets. */
13561 for (i = 0; i < nops; i++)
13562 {
13563 rtx reg;
13564 rtx offset;
13565
13566 /* Convert a subreg of a mem into the mem itself. */
13567 if (GET_CODE (operands[nops + i]) == SUBREG)
13568 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13569
13570 gcc_assert (MEM_P (operands[nops + i]));
13571
13572 /* Don't reorder volatile memory references; it doesn't seem worth
13573 looking for the case where the order is ok anyway. */
13574 if (MEM_VOLATILE_P (operands[nops + i]))
13575 return 0;
13576
13577 offset = const0_rtx;
13578
13579 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13580 || (GET_CODE (reg) == SUBREG
13581 && REG_P (reg = SUBREG_REG (reg))))
13582 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13583 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13584 || (GET_CODE (reg) == SUBREG
13585 && REG_P (reg = SUBREG_REG (reg))))
13586 && (CONST_INT_P (offset
13587 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13588 {
13589 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13590 ? operands[i] : SUBREG_REG (operands[i]));
13591 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13592
13593 if (i == 0)
13594 {
13595 base_reg = REGNO (reg);
13596 base_reg_rtx = reg;
13597 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13598 return 0;
13599 }
13600 else if (base_reg != (int) REGNO (reg))
13601 /* Not addressed from the same base register. */
13602 return 0;
13603
13604 /* If it isn't an integer register, then we can't do this. */
13605 if (unsorted_regs[i] < 0
13606 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13607 /* The effects are unpredictable if the base register is
13608 both updated and stored. */
13609 || (base_writeback && unsorted_regs[i] == base_reg)
13610 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13611 || unsorted_regs[i] > 14)
13612 return 0;
13613
13614 unsorted_offsets[i] = INTVAL (offset);
13615 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13616 order[0] = i;
13617 }
13618 else
13619 /* Not a suitable memory address. */
13620 return 0;
13621 }
13622
13623 /* All the useful information has now been extracted from the
13624 operands into unsorted_regs and unsorted_offsets; additionally,
13625 order[0] has been set to the lowest offset in the list. Sort
13626 the offsets into order, verifying that they are adjacent, and
13627 check that the register numbers are ascending. */
13628 if (!compute_offset_order (nops, unsorted_offsets, order,
13629 check_regs ? unsorted_regs : NULL))
13630 return 0;
13631
13632 if (saved_order)
13633 memcpy (saved_order, order, sizeof order);
13634
13635 if (base)
13636 {
13637 *base = base_reg;
13638
13639 for (i = 0; i < nops; i++)
13640 {
13641 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13642 if (reg_rtxs)
13643 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13644 }
13645
13646 *load_offset = unsorted_offsets[order[0]];
13647 }
13648
13649 if (TARGET_THUMB1
13650 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13651 return 0;
13652
13653 if (unsorted_offsets[order[0]] == 0)
13654 stm_case = 1; /* stmia */
13655 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13656 stm_case = 2; /* stmib */
13657 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13658 stm_case = 3; /* stmda */
13659 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13660 stm_case = 4; /* stmdb */
13661 else
13662 return 0;
13663
13664 if (!multiple_operation_profitable_p (false, nops, 0))
13665 return 0;
13666
13667 return stm_case;
13668 }
13669 \f
13670 /* Routines for use in generating RTL. */
13671
13672 /* Generate a load-multiple instruction. COUNT is the number of loads in
13673 the instruction; REGS and MEMS are arrays containing the operands.
13674 BASEREG is the base register to be used in addressing the memory operands.
13675 WBACK_OFFSET is nonzero if the instruction should update the base
13676 register. */
13677
13678 static rtx
13679 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13680 HOST_WIDE_INT wback_offset)
13681 {
13682 int i = 0, j;
13683 rtx result;
13684
13685 if (!multiple_operation_profitable_p (false, count, 0))
13686 {
13687 rtx seq;
13688
13689 start_sequence ();
13690
13691 for (i = 0; i < count; i++)
13692 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13693
13694 if (wback_offset != 0)
13695 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13696
13697 seq = get_insns ();
13698 end_sequence ();
13699
13700 return seq;
13701 }
13702
13703 result = gen_rtx_PARALLEL (VOIDmode,
13704 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13705 if (wback_offset != 0)
13706 {
13707 XVECEXP (result, 0, 0)
13708 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13709 i = 1;
13710 count++;
13711 }
13712
13713 for (j = 0; i < count; i++, j++)
13714 XVECEXP (result, 0, i)
13715 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13716
13717 return result;
13718 }
13719
13720 /* Generate a store-multiple instruction. COUNT is the number of stores in
13721 the instruction; REGS and MEMS are arrays containing the operands.
13722 BASEREG is the base register to be used in addressing the memory operands.
13723 WBACK_OFFSET is nonzero if the instruction should update the base
13724 register. */
13725
13726 static rtx
13727 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13728 HOST_WIDE_INT wback_offset)
13729 {
13730 int i = 0, j;
13731 rtx result;
13732
13733 if (GET_CODE (basereg) == PLUS)
13734 basereg = XEXP (basereg, 0);
13735
13736 if (!multiple_operation_profitable_p (false, count, 0))
13737 {
13738 rtx seq;
13739
13740 start_sequence ();
13741
13742 for (i = 0; i < count; i++)
13743 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13744
13745 if (wback_offset != 0)
13746 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13747
13748 seq = get_insns ();
13749 end_sequence ();
13750
13751 return seq;
13752 }
13753
13754 result = gen_rtx_PARALLEL (VOIDmode,
13755 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13756 if (wback_offset != 0)
13757 {
13758 XVECEXP (result, 0, 0)
13759 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13760 i = 1;
13761 count++;
13762 }
13763
13764 for (j = 0; i < count; i++, j++)
13765 XVECEXP (result, 0, i)
13766 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13767
13768 return result;
13769 }
13770
13771 /* Generate either a load-multiple or a store-multiple instruction. This
13772 function can be used in situations where we can start with a single MEM
13773 rtx and adjust its address upwards.
13774 COUNT is the number of operations in the instruction, not counting a
13775 possible update of the base register. REGS is an array containing the
13776 register operands.
13777 BASEREG is the base register to be used in addressing the memory operands,
13778 which are constructed from BASEMEM.
13779 WRITE_BACK specifies whether the generated instruction should include an
13780 update of the base register.
13781 OFFSETP is used to pass an offset to and from this function; this offset
13782 is not used when constructing the address (instead BASEMEM should have an
13783 appropriate offset in its address), it is used only for setting
13784 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13785
13786 static rtx
13787 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13788 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13789 {
13790 rtx mems[MAX_LDM_STM_OPS];
13791 HOST_WIDE_INT offset = *offsetp;
13792 int i;
13793
13794 gcc_assert (count <= MAX_LDM_STM_OPS);
13795
13796 if (GET_CODE (basereg) == PLUS)
13797 basereg = XEXP (basereg, 0);
13798
13799 for (i = 0; i < count; i++)
13800 {
13801 rtx addr = plus_constant (Pmode, basereg, i * 4);
13802 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13803 offset += 4;
13804 }
13805
13806 if (write_back)
13807 *offsetp = offset;
13808
13809 if (is_load)
13810 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13811 write_back ? 4 * count : 0);
13812 else
13813 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13814 write_back ? 4 * count : 0);
13815 }
13816
13817 rtx
13818 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13819 rtx basemem, HOST_WIDE_INT *offsetp)
13820 {
13821 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13822 offsetp);
13823 }
13824
13825 rtx
13826 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13827 rtx basemem, HOST_WIDE_INT *offsetp)
13828 {
13829 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13830 offsetp);
13831 }
13832
13833 /* Called from a peephole2 expander to turn a sequence of loads into an
13834 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13835 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13836 is true if we can reorder the registers because they are used commutatively
13837 subsequently.
13838 Returns true iff we could generate a new instruction. */
13839
13840 bool
13841 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13842 {
13843 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13844 rtx mems[MAX_LDM_STM_OPS];
13845 int i, j, base_reg;
13846 rtx base_reg_rtx;
13847 HOST_WIDE_INT offset;
13848 int write_back = FALSE;
13849 int ldm_case;
13850 rtx addr;
13851
13852 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13853 &base_reg, &offset, !sort_regs);
13854
13855 if (ldm_case == 0)
13856 return false;
13857
13858 if (sort_regs)
13859 for (i = 0; i < nops - 1; i++)
13860 for (j = i + 1; j < nops; j++)
13861 if (regs[i] > regs[j])
13862 {
13863 int t = regs[i];
13864 regs[i] = regs[j];
13865 regs[j] = t;
13866 }
13867 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13868
13869 if (TARGET_THUMB1)
13870 {
13871 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13872 gcc_assert (ldm_case == 1 || ldm_case == 5);
13873 write_back = TRUE;
13874 }
13875
13876 if (ldm_case == 5)
13877 {
13878 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13879 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13880 offset = 0;
13881 if (!TARGET_THUMB1)
13882 base_reg_rtx = newbase;
13883 }
13884
13885 for (i = 0; i < nops; i++)
13886 {
13887 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13888 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13889 SImode, addr, 0);
13890 }
13891 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13892 write_back ? offset + i * 4 : 0));
13893 return true;
13894 }
13895
13896 /* Called from a peephole2 expander to turn a sequence of stores into an
13897 STM instruction. OPERANDS are the operands found by the peephole matcher;
13898 NOPS indicates how many separate stores we are trying to combine.
13899 Returns true iff we could generate a new instruction. */
13900
13901 bool
13902 gen_stm_seq (rtx *operands, int nops)
13903 {
13904 int i;
13905 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13906 rtx mems[MAX_LDM_STM_OPS];
13907 int base_reg;
13908 rtx base_reg_rtx;
13909 HOST_WIDE_INT offset;
13910 int write_back = FALSE;
13911 int stm_case;
13912 rtx addr;
13913 bool base_reg_dies;
13914
13915 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13916 mem_order, &base_reg, &offset, true);
13917
13918 if (stm_case == 0)
13919 return false;
13920
13921 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13922
13923 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13924 if (TARGET_THUMB1)
13925 {
13926 gcc_assert (base_reg_dies);
13927 write_back = TRUE;
13928 }
13929
13930 if (stm_case == 5)
13931 {
13932 gcc_assert (base_reg_dies);
13933 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13934 offset = 0;
13935 }
13936
13937 addr = plus_constant (Pmode, base_reg_rtx, offset);
13938
13939 for (i = 0; i < nops; i++)
13940 {
13941 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13942 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13943 SImode, addr, 0);
13944 }
13945 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13946 write_back ? offset + i * 4 : 0));
13947 return true;
13948 }
13949
13950 /* Called from a peephole2 expander to turn a sequence of stores that are
13951 preceded by constant loads into an STM instruction. OPERANDS are the
13952 operands found by the peephole matcher; NOPS indicates how many
13953 separate stores we are trying to combine; there are 2 * NOPS
13954 instructions in the peephole.
13955 Returns true iff we could generate a new instruction. */
13956
13957 bool
13958 gen_const_stm_seq (rtx *operands, int nops)
13959 {
13960 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13961 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13962 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13963 rtx mems[MAX_LDM_STM_OPS];
13964 int base_reg;
13965 rtx base_reg_rtx;
13966 HOST_WIDE_INT offset;
13967 int write_back = FALSE;
13968 int stm_case;
13969 rtx addr;
13970 bool base_reg_dies;
13971 int i, j;
13972 HARD_REG_SET allocated;
13973
13974 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13975 mem_order, &base_reg, &offset, false);
13976
13977 if (stm_case == 0)
13978 return false;
13979
13980 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13981
13982 /* If the same register is used more than once, try to find a free
13983 register. */
13984 CLEAR_HARD_REG_SET (allocated);
13985 for (i = 0; i < nops; i++)
13986 {
13987 for (j = i + 1; j < nops; j++)
13988 if (regs[i] == regs[j])
13989 {
13990 rtx t = peep2_find_free_register (0, nops * 2,
13991 TARGET_THUMB1 ? "l" : "r",
13992 SImode, &allocated);
13993 if (t == NULL_RTX)
13994 return false;
13995 reg_rtxs[i] = t;
13996 regs[i] = REGNO (t);
13997 }
13998 }
13999
14000 /* Compute an ordering that maps the register numbers to an ascending
14001 sequence. */
14002 reg_order[0] = 0;
14003 for (i = 0; i < nops; i++)
14004 if (regs[i] < regs[reg_order[0]])
14005 reg_order[0] = i;
14006
14007 for (i = 1; i < nops; i++)
14008 {
14009 int this_order = reg_order[i - 1];
14010 for (j = 0; j < nops; j++)
14011 if (regs[j] > regs[reg_order[i - 1]]
14012 && (this_order == reg_order[i - 1]
14013 || regs[j] < regs[this_order]))
14014 this_order = j;
14015 reg_order[i] = this_order;
14016 }
14017
14018 /* Ensure that registers that must be live after the instruction end
14019 up with the correct value. */
14020 for (i = 0; i < nops; i++)
14021 {
14022 int this_order = reg_order[i];
14023 if ((this_order != mem_order[i]
14024 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14025 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14026 return false;
14027 }
14028
14029 /* Load the constants. */
14030 for (i = 0; i < nops; i++)
14031 {
14032 rtx op = operands[2 * nops + mem_order[i]];
14033 sorted_regs[i] = regs[reg_order[i]];
14034 emit_move_insn (reg_rtxs[reg_order[i]], op);
14035 }
14036
14037 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14038
14039 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14040 if (TARGET_THUMB1)
14041 {
14042 gcc_assert (base_reg_dies);
14043 write_back = TRUE;
14044 }
14045
14046 if (stm_case == 5)
14047 {
14048 gcc_assert (base_reg_dies);
14049 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14050 offset = 0;
14051 }
14052
14053 addr = plus_constant (Pmode, base_reg_rtx, offset);
14054
14055 for (i = 0; i < nops; i++)
14056 {
14057 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14058 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14059 SImode, addr, 0);
14060 }
14061 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14062 write_back ? offset + i * 4 : 0));
14063 return true;
14064 }
14065
14066 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14067 unaligned copies on processors which support unaligned semantics for those
14068 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14069 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14070 An interleave factor of 1 (the minimum) will perform no interleaving.
14071 Load/store multiple are used for aligned addresses where possible. */
14072
14073 static void
14074 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14075 HOST_WIDE_INT length,
14076 unsigned int interleave_factor)
14077 {
14078 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14079 int *regnos = XALLOCAVEC (int, interleave_factor);
14080 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14081 HOST_WIDE_INT i, j;
14082 HOST_WIDE_INT remaining = length, words;
14083 rtx halfword_tmp = NULL, byte_tmp = NULL;
14084 rtx dst, src;
14085 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14086 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14087 HOST_WIDE_INT srcoffset, dstoffset;
14088 HOST_WIDE_INT src_autoinc, dst_autoinc;
14089 rtx mem, addr;
14090
14091 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
14092
14093 /* Use hard registers if we have aligned source or destination so we can use
14094 load/store multiple with contiguous registers. */
14095 if (dst_aligned || src_aligned)
14096 for (i = 0; i < interleave_factor; i++)
14097 regs[i] = gen_rtx_REG (SImode, i);
14098 else
14099 for (i = 0; i < interleave_factor; i++)
14100 regs[i] = gen_reg_rtx (SImode);
14101
14102 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14103 src = copy_addr_to_reg (XEXP (srcbase, 0));
14104
14105 srcoffset = dstoffset = 0;
14106
14107 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14108 For copying the last bytes we want to subtract this offset again. */
14109 src_autoinc = dst_autoinc = 0;
14110
14111 for (i = 0; i < interleave_factor; i++)
14112 regnos[i] = i;
14113
14114 /* Copy BLOCK_SIZE_BYTES chunks. */
14115
14116 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14117 {
14118 /* Load words. */
14119 if (src_aligned && interleave_factor > 1)
14120 {
14121 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14122 TRUE, srcbase, &srcoffset));
14123 src_autoinc += UNITS_PER_WORD * interleave_factor;
14124 }
14125 else
14126 {
14127 for (j = 0; j < interleave_factor; j++)
14128 {
14129 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14130 - src_autoinc));
14131 mem = adjust_automodify_address (srcbase, SImode, addr,
14132 srcoffset + j * UNITS_PER_WORD);
14133 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14134 }
14135 srcoffset += block_size_bytes;
14136 }
14137
14138 /* Store words. */
14139 if (dst_aligned && interleave_factor > 1)
14140 {
14141 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14142 TRUE, dstbase, &dstoffset));
14143 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14144 }
14145 else
14146 {
14147 for (j = 0; j < interleave_factor; j++)
14148 {
14149 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14150 - dst_autoinc));
14151 mem = adjust_automodify_address (dstbase, SImode, addr,
14152 dstoffset + j * UNITS_PER_WORD);
14153 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14154 }
14155 dstoffset += block_size_bytes;
14156 }
14157
14158 remaining -= block_size_bytes;
14159 }
14160
14161 /* Copy any whole words left (note these aren't interleaved with any
14162 subsequent halfword/byte load/stores in the interests of simplicity). */
14163
14164 words = remaining / UNITS_PER_WORD;
14165
14166 gcc_assert (words < interleave_factor);
14167
14168 if (src_aligned && words > 1)
14169 {
14170 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14171 &srcoffset));
14172 src_autoinc += UNITS_PER_WORD * words;
14173 }
14174 else
14175 {
14176 for (j = 0; j < words; j++)
14177 {
14178 addr = plus_constant (Pmode, src,
14179 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14180 mem = adjust_automodify_address (srcbase, SImode, addr,
14181 srcoffset + j * UNITS_PER_WORD);
14182 if (src_aligned)
14183 emit_move_insn (regs[j], mem);
14184 else
14185 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14186 }
14187 srcoffset += words * UNITS_PER_WORD;
14188 }
14189
14190 if (dst_aligned && words > 1)
14191 {
14192 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14193 &dstoffset));
14194 dst_autoinc += words * UNITS_PER_WORD;
14195 }
14196 else
14197 {
14198 for (j = 0; j < words; j++)
14199 {
14200 addr = plus_constant (Pmode, dst,
14201 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14202 mem = adjust_automodify_address (dstbase, SImode, addr,
14203 dstoffset + j * UNITS_PER_WORD);
14204 if (dst_aligned)
14205 emit_move_insn (mem, regs[j]);
14206 else
14207 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14208 }
14209 dstoffset += words * UNITS_PER_WORD;
14210 }
14211
14212 remaining -= words * UNITS_PER_WORD;
14213
14214 gcc_assert (remaining < 4);
14215
14216 /* Copy a halfword if necessary. */
14217
14218 if (remaining >= 2)
14219 {
14220 halfword_tmp = gen_reg_rtx (SImode);
14221
14222 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14223 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14224 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14225
14226 /* Either write out immediately, or delay until we've loaded the last
14227 byte, depending on interleave factor. */
14228 if (interleave_factor == 1)
14229 {
14230 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14231 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14232 emit_insn (gen_unaligned_storehi (mem,
14233 gen_lowpart (HImode, halfword_tmp)));
14234 halfword_tmp = NULL;
14235 dstoffset += 2;
14236 }
14237
14238 remaining -= 2;
14239 srcoffset += 2;
14240 }
14241
14242 gcc_assert (remaining < 2);
14243
14244 /* Copy last byte. */
14245
14246 if ((remaining & 1) != 0)
14247 {
14248 byte_tmp = gen_reg_rtx (SImode);
14249
14250 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14251 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14252 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14253
14254 if (interleave_factor == 1)
14255 {
14256 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14257 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14258 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14259 byte_tmp = NULL;
14260 dstoffset++;
14261 }
14262
14263 remaining--;
14264 srcoffset++;
14265 }
14266
14267 /* Store last halfword if we haven't done so already. */
14268
14269 if (halfword_tmp)
14270 {
14271 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14272 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14273 emit_insn (gen_unaligned_storehi (mem,
14274 gen_lowpart (HImode, halfword_tmp)));
14275 dstoffset += 2;
14276 }
14277
14278 /* Likewise for last byte. */
14279
14280 if (byte_tmp)
14281 {
14282 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14283 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14284 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14285 dstoffset++;
14286 }
14287
14288 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14289 }
14290
14291 /* From mips_adjust_block_mem:
14292
14293 Helper function for doing a loop-based block operation on memory
14294 reference MEM. Each iteration of the loop will operate on LENGTH
14295 bytes of MEM.
14296
14297 Create a new base register for use within the loop and point it to
14298 the start of MEM. Create a new memory reference that uses this
14299 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14300
14301 static void
14302 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14303 rtx *loop_mem)
14304 {
14305 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14306
14307 /* Although the new mem does not refer to a known location,
14308 it does keep up to LENGTH bytes of alignment. */
14309 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14310 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14311 }
14312
14313 /* From mips_block_move_loop:
14314
14315 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14316 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14317 the memory regions do not overlap. */
14318
14319 static void
14320 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14321 unsigned int interleave_factor,
14322 HOST_WIDE_INT bytes_per_iter)
14323 {
14324 rtx src_reg, dest_reg, final_src, test;
14325 HOST_WIDE_INT leftover;
14326
14327 leftover = length % bytes_per_iter;
14328 length -= leftover;
14329
14330 /* Create registers and memory references for use within the loop. */
14331 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14332 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14333
14334 /* Calculate the value that SRC_REG should have after the last iteration of
14335 the loop. */
14336 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14337 0, 0, OPTAB_WIDEN);
14338
14339 /* Emit the start of the loop. */
14340 rtx_code_label *label = gen_label_rtx ();
14341 emit_label (label);
14342
14343 /* Emit the loop body. */
14344 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14345 interleave_factor);
14346
14347 /* Move on to the next block. */
14348 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14349 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14350
14351 /* Emit the loop condition. */
14352 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14353 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14354
14355 /* Mop up any left-over bytes. */
14356 if (leftover)
14357 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14358 }
14359
14360 /* Emit a block move when either the source or destination is unaligned (not
14361 aligned to a four-byte boundary). This may need further tuning depending on
14362 core type, optimize_size setting, etc. */
14363
14364 static int
14365 arm_movmemqi_unaligned (rtx *operands)
14366 {
14367 HOST_WIDE_INT length = INTVAL (operands[2]);
14368
14369 if (optimize_size)
14370 {
14371 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14372 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14373 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14374 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14375 or dst_aligned though: allow more interleaving in those cases since the
14376 resulting code can be smaller. */
14377 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14378 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14379
14380 if (length > 12)
14381 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14382 interleave_factor, bytes_per_iter);
14383 else
14384 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14385 interleave_factor);
14386 }
14387 else
14388 {
14389 /* Note that the loop created by arm_block_move_unaligned_loop may be
14390 subject to loop unrolling, which makes tuning this condition a little
14391 redundant. */
14392 if (length > 32)
14393 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14394 else
14395 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14396 }
14397
14398 return 1;
14399 }
14400
14401 int
14402 arm_gen_movmemqi (rtx *operands)
14403 {
14404 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14405 HOST_WIDE_INT srcoffset, dstoffset;
14406 rtx src, dst, srcbase, dstbase;
14407 rtx part_bytes_reg = NULL;
14408 rtx mem;
14409
14410 if (!CONST_INT_P (operands[2])
14411 || !CONST_INT_P (operands[3])
14412 || INTVAL (operands[2]) > 64)
14413 return 0;
14414
14415 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14416 return arm_movmemqi_unaligned (operands);
14417
14418 if (INTVAL (operands[3]) & 3)
14419 return 0;
14420
14421 dstbase = operands[0];
14422 srcbase = operands[1];
14423
14424 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14425 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14426
14427 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14428 out_words_to_go = INTVAL (operands[2]) / 4;
14429 last_bytes = INTVAL (operands[2]) & 3;
14430 dstoffset = srcoffset = 0;
14431
14432 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14433 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14434
14435 while (in_words_to_go >= 2)
14436 {
14437 if (in_words_to_go > 4)
14438 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14439 TRUE, srcbase, &srcoffset));
14440 else
14441 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14442 src, FALSE, srcbase,
14443 &srcoffset));
14444
14445 if (out_words_to_go)
14446 {
14447 if (out_words_to_go > 4)
14448 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14449 TRUE, dstbase, &dstoffset));
14450 else if (out_words_to_go != 1)
14451 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14452 out_words_to_go, dst,
14453 (last_bytes == 0
14454 ? FALSE : TRUE),
14455 dstbase, &dstoffset));
14456 else
14457 {
14458 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14459 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14460 if (last_bytes != 0)
14461 {
14462 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14463 dstoffset += 4;
14464 }
14465 }
14466 }
14467
14468 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14469 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14470 }
14471
14472 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14473 if (out_words_to_go)
14474 {
14475 rtx sreg;
14476
14477 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14478 sreg = copy_to_reg (mem);
14479
14480 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14481 emit_move_insn (mem, sreg);
14482 in_words_to_go--;
14483
14484 gcc_assert (!in_words_to_go); /* Sanity check */
14485 }
14486
14487 if (in_words_to_go)
14488 {
14489 gcc_assert (in_words_to_go > 0);
14490
14491 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14492 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14493 }
14494
14495 gcc_assert (!last_bytes || part_bytes_reg);
14496
14497 if (BYTES_BIG_ENDIAN && last_bytes)
14498 {
14499 rtx tmp = gen_reg_rtx (SImode);
14500
14501 /* The bytes we want are in the top end of the word. */
14502 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14503 GEN_INT (8 * (4 - last_bytes))));
14504 part_bytes_reg = tmp;
14505
14506 while (last_bytes)
14507 {
14508 mem = adjust_automodify_address (dstbase, QImode,
14509 plus_constant (Pmode, dst,
14510 last_bytes - 1),
14511 dstoffset + last_bytes - 1);
14512 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14513
14514 if (--last_bytes)
14515 {
14516 tmp = gen_reg_rtx (SImode);
14517 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14518 part_bytes_reg = tmp;
14519 }
14520 }
14521
14522 }
14523 else
14524 {
14525 if (last_bytes > 1)
14526 {
14527 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14528 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14529 last_bytes -= 2;
14530 if (last_bytes)
14531 {
14532 rtx tmp = gen_reg_rtx (SImode);
14533 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14534 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14535 part_bytes_reg = tmp;
14536 dstoffset += 2;
14537 }
14538 }
14539
14540 if (last_bytes)
14541 {
14542 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14543 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14544 }
14545 }
14546
14547 return 1;
14548 }
14549
14550 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14551 by mode size. */
14552 inline static rtx
14553 next_consecutive_mem (rtx mem)
14554 {
14555 machine_mode mode = GET_MODE (mem);
14556 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14557 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14558
14559 return adjust_automodify_address (mem, mode, addr, offset);
14560 }
14561
14562 /* Copy using LDRD/STRD instructions whenever possible.
14563 Returns true upon success. */
14564 bool
14565 gen_movmem_ldrd_strd (rtx *operands)
14566 {
14567 unsigned HOST_WIDE_INT len;
14568 HOST_WIDE_INT align;
14569 rtx src, dst, base;
14570 rtx reg0;
14571 bool src_aligned, dst_aligned;
14572 bool src_volatile, dst_volatile;
14573
14574 gcc_assert (CONST_INT_P (operands[2]));
14575 gcc_assert (CONST_INT_P (operands[3]));
14576
14577 len = UINTVAL (operands[2]);
14578 if (len > 64)
14579 return false;
14580
14581 /* Maximum alignment we can assume for both src and dst buffers. */
14582 align = INTVAL (operands[3]);
14583
14584 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14585 return false;
14586
14587 /* Place src and dst addresses in registers
14588 and update the corresponding mem rtx. */
14589 dst = operands[0];
14590 dst_volatile = MEM_VOLATILE_P (dst);
14591 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14592 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14593 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14594
14595 src = operands[1];
14596 src_volatile = MEM_VOLATILE_P (src);
14597 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14598 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14599 src = adjust_automodify_address (src, VOIDmode, base, 0);
14600
14601 if (!unaligned_access && !(src_aligned && dst_aligned))
14602 return false;
14603
14604 if (src_volatile || dst_volatile)
14605 return false;
14606
14607 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14608 if (!(dst_aligned || src_aligned))
14609 return arm_gen_movmemqi (operands);
14610
14611 /* If the either src or dst is unaligned we'll be accessing it as pairs
14612 of unaligned SImode accesses. Otherwise we can generate DImode
14613 ldrd/strd instructions. */
14614 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14615 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14616
14617 while (len >= 8)
14618 {
14619 len -= 8;
14620 reg0 = gen_reg_rtx (DImode);
14621 rtx low_reg = NULL_RTX;
14622 rtx hi_reg = NULL_RTX;
14623
14624 if (!src_aligned || !dst_aligned)
14625 {
14626 low_reg = gen_lowpart (SImode, reg0);
14627 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14628 }
14629 if (src_aligned)
14630 emit_move_insn (reg0, src);
14631 else
14632 {
14633 emit_insn (gen_unaligned_loadsi (low_reg, src));
14634 src = next_consecutive_mem (src);
14635 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14636 }
14637
14638 if (dst_aligned)
14639 emit_move_insn (dst, reg0);
14640 else
14641 {
14642 emit_insn (gen_unaligned_storesi (dst, low_reg));
14643 dst = next_consecutive_mem (dst);
14644 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14645 }
14646
14647 src = next_consecutive_mem (src);
14648 dst = next_consecutive_mem (dst);
14649 }
14650
14651 gcc_assert (len < 8);
14652 if (len >= 4)
14653 {
14654 /* More than a word but less than a double-word to copy. Copy a word. */
14655 reg0 = gen_reg_rtx (SImode);
14656 src = adjust_address (src, SImode, 0);
14657 dst = adjust_address (dst, SImode, 0);
14658 if (src_aligned)
14659 emit_move_insn (reg0, src);
14660 else
14661 emit_insn (gen_unaligned_loadsi (reg0, src));
14662
14663 if (dst_aligned)
14664 emit_move_insn (dst, reg0);
14665 else
14666 emit_insn (gen_unaligned_storesi (dst, reg0));
14667
14668 src = next_consecutive_mem (src);
14669 dst = next_consecutive_mem (dst);
14670 len -= 4;
14671 }
14672
14673 if (len == 0)
14674 return true;
14675
14676 /* Copy the remaining bytes. */
14677 if (len >= 2)
14678 {
14679 dst = adjust_address (dst, HImode, 0);
14680 src = adjust_address (src, HImode, 0);
14681 reg0 = gen_reg_rtx (SImode);
14682 if (src_aligned)
14683 emit_insn (gen_zero_extendhisi2 (reg0, src));
14684 else
14685 emit_insn (gen_unaligned_loadhiu (reg0, src));
14686
14687 if (dst_aligned)
14688 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14689 else
14690 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14691
14692 src = next_consecutive_mem (src);
14693 dst = next_consecutive_mem (dst);
14694 if (len == 2)
14695 return true;
14696 }
14697
14698 dst = adjust_address (dst, QImode, 0);
14699 src = adjust_address (src, QImode, 0);
14700 reg0 = gen_reg_rtx (QImode);
14701 emit_move_insn (reg0, src);
14702 emit_move_insn (dst, reg0);
14703 return true;
14704 }
14705
14706 /* Select a dominance comparison mode if possible for a test of the general
14707 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14708 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14709 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14710 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14711 In all cases OP will be either EQ or NE, but we don't need to know which
14712 here. If we are unable to support a dominance comparison we return
14713 CC mode. This will then fail to match for the RTL expressions that
14714 generate this call. */
14715 machine_mode
14716 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14717 {
14718 enum rtx_code cond1, cond2;
14719 int swapped = 0;
14720
14721 /* Currently we will probably get the wrong result if the individual
14722 comparisons are not simple. This also ensures that it is safe to
14723 reverse a comparison if necessary. */
14724 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14725 != CCmode)
14726 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14727 != CCmode))
14728 return CCmode;
14729
14730 /* The if_then_else variant of this tests the second condition if the
14731 first passes, but is true if the first fails. Reverse the first
14732 condition to get a true "inclusive-or" expression. */
14733 if (cond_or == DOM_CC_NX_OR_Y)
14734 cond1 = reverse_condition (cond1);
14735
14736 /* If the comparisons are not equal, and one doesn't dominate the other,
14737 then we can't do this. */
14738 if (cond1 != cond2
14739 && !comparison_dominates_p (cond1, cond2)
14740 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14741 return CCmode;
14742
14743 if (swapped)
14744 std::swap (cond1, cond2);
14745
14746 switch (cond1)
14747 {
14748 case EQ:
14749 if (cond_or == DOM_CC_X_AND_Y)
14750 return CC_DEQmode;
14751
14752 switch (cond2)
14753 {
14754 case EQ: return CC_DEQmode;
14755 case LE: return CC_DLEmode;
14756 case LEU: return CC_DLEUmode;
14757 case GE: return CC_DGEmode;
14758 case GEU: return CC_DGEUmode;
14759 default: gcc_unreachable ();
14760 }
14761
14762 case LT:
14763 if (cond_or == DOM_CC_X_AND_Y)
14764 return CC_DLTmode;
14765
14766 switch (cond2)
14767 {
14768 case LT:
14769 return CC_DLTmode;
14770 case LE:
14771 return CC_DLEmode;
14772 case NE:
14773 return CC_DNEmode;
14774 default:
14775 gcc_unreachable ();
14776 }
14777
14778 case GT:
14779 if (cond_or == DOM_CC_X_AND_Y)
14780 return CC_DGTmode;
14781
14782 switch (cond2)
14783 {
14784 case GT:
14785 return CC_DGTmode;
14786 case GE:
14787 return CC_DGEmode;
14788 case NE:
14789 return CC_DNEmode;
14790 default:
14791 gcc_unreachable ();
14792 }
14793
14794 case LTU:
14795 if (cond_or == DOM_CC_X_AND_Y)
14796 return CC_DLTUmode;
14797
14798 switch (cond2)
14799 {
14800 case LTU:
14801 return CC_DLTUmode;
14802 case LEU:
14803 return CC_DLEUmode;
14804 case NE:
14805 return CC_DNEmode;
14806 default:
14807 gcc_unreachable ();
14808 }
14809
14810 case GTU:
14811 if (cond_or == DOM_CC_X_AND_Y)
14812 return CC_DGTUmode;
14813
14814 switch (cond2)
14815 {
14816 case GTU:
14817 return CC_DGTUmode;
14818 case GEU:
14819 return CC_DGEUmode;
14820 case NE:
14821 return CC_DNEmode;
14822 default:
14823 gcc_unreachable ();
14824 }
14825
14826 /* The remaining cases only occur when both comparisons are the
14827 same. */
14828 case NE:
14829 gcc_assert (cond1 == cond2);
14830 return CC_DNEmode;
14831
14832 case LE:
14833 gcc_assert (cond1 == cond2);
14834 return CC_DLEmode;
14835
14836 case GE:
14837 gcc_assert (cond1 == cond2);
14838 return CC_DGEmode;
14839
14840 case LEU:
14841 gcc_assert (cond1 == cond2);
14842 return CC_DLEUmode;
14843
14844 case GEU:
14845 gcc_assert (cond1 == cond2);
14846 return CC_DGEUmode;
14847
14848 default:
14849 gcc_unreachable ();
14850 }
14851 }
14852
14853 machine_mode
14854 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14855 {
14856 /* All floating point compares return CCFP if it is an equality
14857 comparison, and CCFPE otherwise. */
14858 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14859 {
14860 switch (op)
14861 {
14862 case EQ:
14863 case NE:
14864 case UNORDERED:
14865 case ORDERED:
14866 case UNLT:
14867 case UNLE:
14868 case UNGT:
14869 case UNGE:
14870 case UNEQ:
14871 case LTGT:
14872 return CCFPmode;
14873
14874 case LT:
14875 case LE:
14876 case GT:
14877 case GE:
14878 return CCFPEmode;
14879
14880 default:
14881 gcc_unreachable ();
14882 }
14883 }
14884
14885 /* A compare with a shifted operand. Because of canonicalization, the
14886 comparison will have to be swapped when we emit the assembler. */
14887 if (GET_MODE (y) == SImode
14888 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14889 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14890 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14891 || GET_CODE (x) == ROTATERT))
14892 return CC_SWPmode;
14893
14894 /* This operation is performed swapped, but since we only rely on the Z
14895 flag we don't need an additional mode. */
14896 if (GET_MODE (y) == SImode
14897 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14898 && GET_CODE (x) == NEG
14899 && (op == EQ || op == NE))
14900 return CC_Zmode;
14901
14902 /* This is a special case that is used by combine to allow a
14903 comparison of a shifted byte load to be split into a zero-extend
14904 followed by a comparison of the shifted integer (only valid for
14905 equalities and unsigned inequalities). */
14906 if (GET_MODE (x) == SImode
14907 && GET_CODE (x) == ASHIFT
14908 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14909 && GET_CODE (XEXP (x, 0)) == SUBREG
14910 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14911 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14912 && (op == EQ || op == NE
14913 || op == GEU || op == GTU || op == LTU || op == LEU)
14914 && CONST_INT_P (y))
14915 return CC_Zmode;
14916
14917 /* A construct for a conditional compare, if the false arm contains
14918 0, then both conditions must be true, otherwise either condition
14919 must be true. Not all conditions are possible, so CCmode is
14920 returned if it can't be done. */
14921 if (GET_CODE (x) == IF_THEN_ELSE
14922 && (XEXP (x, 2) == const0_rtx
14923 || XEXP (x, 2) == const1_rtx)
14924 && COMPARISON_P (XEXP (x, 0))
14925 && COMPARISON_P (XEXP (x, 1)))
14926 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14927 INTVAL (XEXP (x, 2)));
14928
14929 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14930 if (GET_CODE (x) == AND
14931 && (op == EQ || op == NE)
14932 && COMPARISON_P (XEXP (x, 0))
14933 && COMPARISON_P (XEXP (x, 1)))
14934 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14935 DOM_CC_X_AND_Y);
14936
14937 if (GET_CODE (x) == IOR
14938 && (op == EQ || op == NE)
14939 && COMPARISON_P (XEXP (x, 0))
14940 && COMPARISON_P (XEXP (x, 1)))
14941 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14942 DOM_CC_X_OR_Y);
14943
14944 /* An operation (on Thumb) where we want to test for a single bit.
14945 This is done by shifting that bit up into the top bit of a
14946 scratch register; we can then branch on the sign bit. */
14947 if (TARGET_THUMB1
14948 && GET_MODE (x) == SImode
14949 && (op == EQ || op == NE)
14950 && GET_CODE (x) == ZERO_EXTRACT
14951 && XEXP (x, 1) == const1_rtx)
14952 return CC_Nmode;
14953
14954 /* An operation that sets the condition codes as a side-effect, the
14955 V flag is not set correctly, so we can only use comparisons where
14956 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14957 instead.) */
14958 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14959 if (GET_MODE (x) == SImode
14960 && y == const0_rtx
14961 && (op == EQ || op == NE || op == LT || op == GE)
14962 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14963 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14964 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14965 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14966 || GET_CODE (x) == LSHIFTRT
14967 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14968 || GET_CODE (x) == ROTATERT
14969 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14970 return CC_NOOVmode;
14971
14972 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14973 return CC_Zmode;
14974
14975 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14976 && GET_CODE (x) == PLUS
14977 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14978 return CC_Cmode;
14979
14980 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14981 {
14982 switch (op)
14983 {
14984 case EQ:
14985 case NE:
14986 /* A DImode comparison against zero can be implemented by
14987 or'ing the two halves together. */
14988 if (y == const0_rtx)
14989 return CC_Zmode;
14990
14991 /* We can do an equality test in three Thumb instructions. */
14992 if (!TARGET_32BIT)
14993 return CC_Zmode;
14994
14995 /* FALLTHROUGH */
14996
14997 case LTU:
14998 case LEU:
14999 case GTU:
15000 case GEU:
15001 /* DImode unsigned comparisons can be implemented by cmp +
15002 cmpeq without a scratch register. Not worth doing in
15003 Thumb-2. */
15004 if (TARGET_32BIT)
15005 return CC_CZmode;
15006
15007 /* FALLTHROUGH */
15008
15009 case LT:
15010 case LE:
15011 case GT:
15012 case GE:
15013 /* DImode signed and unsigned comparisons can be implemented
15014 by cmp + sbcs with a scratch register, but that does not
15015 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15016 gcc_assert (op != EQ && op != NE);
15017 return CC_NCVmode;
15018
15019 default:
15020 gcc_unreachable ();
15021 }
15022 }
15023
15024 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15025 return GET_MODE (x);
15026
15027 return CCmode;
15028 }
15029
15030 /* X and Y are two things to compare using CODE. Emit the compare insn and
15031 return the rtx for register 0 in the proper mode. FP means this is a
15032 floating point compare: I don't think that it is needed on the arm. */
15033 rtx
15034 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15035 {
15036 machine_mode mode;
15037 rtx cc_reg;
15038 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15039
15040 /* We might have X as a constant, Y as a register because of the predicates
15041 used for cmpdi. If so, force X to a register here. */
15042 if (dimode_comparison && !REG_P (x))
15043 x = force_reg (DImode, x);
15044
15045 mode = SELECT_CC_MODE (code, x, y);
15046 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15047
15048 if (dimode_comparison
15049 && mode != CC_CZmode)
15050 {
15051 rtx clobber, set;
15052
15053 /* To compare two non-zero values for equality, XOR them and
15054 then compare against zero. Not used for ARM mode; there
15055 CC_CZmode is cheaper. */
15056 if (mode == CC_Zmode && y != const0_rtx)
15057 {
15058 gcc_assert (!reload_completed);
15059 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15060 y = const0_rtx;
15061 }
15062
15063 /* A scratch register is required. */
15064 if (reload_completed)
15065 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15066 else
15067 scratch = gen_rtx_SCRATCH (SImode);
15068
15069 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15070 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
15071 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15072 }
15073 else
15074 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15075
15076 return cc_reg;
15077 }
15078
15079 /* Generate a sequence of insns that will generate the correct return
15080 address mask depending on the physical architecture that the program
15081 is running on. */
15082 rtx
15083 arm_gen_return_addr_mask (void)
15084 {
15085 rtx reg = gen_reg_rtx (Pmode);
15086
15087 emit_insn (gen_return_addr_mask (reg));
15088 return reg;
15089 }
15090
15091 void
15092 arm_reload_in_hi (rtx *operands)
15093 {
15094 rtx ref = operands[1];
15095 rtx base, scratch;
15096 HOST_WIDE_INT offset = 0;
15097
15098 if (GET_CODE (ref) == SUBREG)
15099 {
15100 offset = SUBREG_BYTE (ref);
15101 ref = SUBREG_REG (ref);
15102 }
15103
15104 if (REG_P (ref))
15105 {
15106 /* We have a pseudo which has been spilt onto the stack; there
15107 are two cases here: the first where there is a simple
15108 stack-slot replacement and a second where the stack-slot is
15109 out of range, or is used as a subreg. */
15110 if (reg_equiv_mem (REGNO (ref)))
15111 {
15112 ref = reg_equiv_mem (REGNO (ref));
15113 base = find_replacement (&XEXP (ref, 0));
15114 }
15115 else
15116 /* The slot is out of range, or was dressed up in a SUBREG. */
15117 base = reg_equiv_address (REGNO (ref));
15118
15119 /* PR 62554: If there is no equivalent memory location then just move
15120 the value as an SImode register move. This happens when the target
15121 architecture variant does not have an HImode register move. */
15122 if (base == NULL)
15123 {
15124 gcc_assert (REG_P (operands[0]));
15125 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15126 gen_rtx_SUBREG (SImode, ref, 0)));
15127 return;
15128 }
15129 }
15130 else
15131 base = find_replacement (&XEXP (ref, 0));
15132
15133 /* Handle the case where the address is too complex to be offset by 1. */
15134 if (GET_CODE (base) == MINUS
15135 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15136 {
15137 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15138
15139 emit_set_insn (base_plus, base);
15140 base = base_plus;
15141 }
15142 else if (GET_CODE (base) == PLUS)
15143 {
15144 /* The addend must be CONST_INT, or we would have dealt with it above. */
15145 HOST_WIDE_INT hi, lo;
15146
15147 offset += INTVAL (XEXP (base, 1));
15148 base = XEXP (base, 0);
15149
15150 /* Rework the address into a legal sequence of insns. */
15151 /* Valid range for lo is -4095 -> 4095 */
15152 lo = (offset >= 0
15153 ? (offset & 0xfff)
15154 : -((-offset) & 0xfff));
15155
15156 /* Corner case, if lo is the max offset then we would be out of range
15157 once we have added the additional 1 below, so bump the msb into the
15158 pre-loading insn(s). */
15159 if (lo == 4095)
15160 lo &= 0x7ff;
15161
15162 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15163 ^ (HOST_WIDE_INT) 0x80000000)
15164 - (HOST_WIDE_INT) 0x80000000);
15165
15166 gcc_assert (hi + lo == offset);
15167
15168 if (hi != 0)
15169 {
15170 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15171
15172 /* Get the base address; addsi3 knows how to handle constants
15173 that require more than one insn. */
15174 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15175 base = base_plus;
15176 offset = lo;
15177 }
15178 }
15179
15180 /* Operands[2] may overlap operands[0] (though it won't overlap
15181 operands[1]), that's why we asked for a DImode reg -- so we can
15182 use the bit that does not overlap. */
15183 if (REGNO (operands[2]) == REGNO (operands[0]))
15184 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15185 else
15186 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15187
15188 emit_insn (gen_zero_extendqisi2 (scratch,
15189 gen_rtx_MEM (QImode,
15190 plus_constant (Pmode, base,
15191 offset))));
15192 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15193 gen_rtx_MEM (QImode,
15194 plus_constant (Pmode, base,
15195 offset + 1))));
15196 if (!BYTES_BIG_ENDIAN)
15197 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15198 gen_rtx_IOR (SImode,
15199 gen_rtx_ASHIFT
15200 (SImode,
15201 gen_rtx_SUBREG (SImode, operands[0], 0),
15202 GEN_INT (8)),
15203 scratch));
15204 else
15205 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15206 gen_rtx_IOR (SImode,
15207 gen_rtx_ASHIFT (SImode, scratch,
15208 GEN_INT (8)),
15209 gen_rtx_SUBREG (SImode, operands[0], 0)));
15210 }
15211
15212 /* Handle storing a half-word to memory during reload by synthesizing as two
15213 byte stores. Take care not to clobber the input values until after we
15214 have moved them somewhere safe. This code assumes that if the DImode
15215 scratch in operands[2] overlaps either the input value or output address
15216 in some way, then that value must die in this insn (we absolutely need
15217 two scratch registers for some corner cases). */
15218 void
15219 arm_reload_out_hi (rtx *operands)
15220 {
15221 rtx ref = operands[0];
15222 rtx outval = operands[1];
15223 rtx base, scratch;
15224 HOST_WIDE_INT offset = 0;
15225
15226 if (GET_CODE (ref) == SUBREG)
15227 {
15228 offset = SUBREG_BYTE (ref);
15229 ref = SUBREG_REG (ref);
15230 }
15231
15232 if (REG_P (ref))
15233 {
15234 /* We have a pseudo which has been spilt onto the stack; there
15235 are two cases here: the first where there is a simple
15236 stack-slot replacement and a second where the stack-slot is
15237 out of range, or is used as a subreg. */
15238 if (reg_equiv_mem (REGNO (ref)))
15239 {
15240 ref = reg_equiv_mem (REGNO (ref));
15241 base = find_replacement (&XEXP (ref, 0));
15242 }
15243 else
15244 /* The slot is out of range, or was dressed up in a SUBREG. */
15245 base = reg_equiv_address (REGNO (ref));
15246
15247 /* PR 62254: If there is no equivalent memory location then just move
15248 the value as an SImode register move. This happens when the target
15249 architecture variant does not have an HImode register move. */
15250 if (base == NULL)
15251 {
15252 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15253
15254 if (REG_P (outval))
15255 {
15256 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15257 gen_rtx_SUBREG (SImode, outval, 0)));
15258 }
15259 else /* SUBREG_P (outval) */
15260 {
15261 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15262 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15263 SUBREG_REG (outval)));
15264 else
15265 /* FIXME: Handle other cases ? */
15266 gcc_unreachable ();
15267 }
15268 return;
15269 }
15270 }
15271 else
15272 base = find_replacement (&XEXP (ref, 0));
15273
15274 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15275
15276 /* Handle the case where the address is too complex to be offset by 1. */
15277 if (GET_CODE (base) == MINUS
15278 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15279 {
15280 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15281
15282 /* Be careful not to destroy OUTVAL. */
15283 if (reg_overlap_mentioned_p (base_plus, outval))
15284 {
15285 /* Updating base_plus might destroy outval, see if we can
15286 swap the scratch and base_plus. */
15287 if (!reg_overlap_mentioned_p (scratch, outval))
15288 std::swap (scratch, base_plus);
15289 else
15290 {
15291 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15292
15293 /* Be conservative and copy OUTVAL into the scratch now,
15294 this should only be necessary if outval is a subreg
15295 of something larger than a word. */
15296 /* XXX Might this clobber base? I can't see how it can,
15297 since scratch is known to overlap with OUTVAL, and
15298 must be wider than a word. */
15299 emit_insn (gen_movhi (scratch_hi, outval));
15300 outval = scratch_hi;
15301 }
15302 }
15303
15304 emit_set_insn (base_plus, base);
15305 base = base_plus;
15306 }
15307 else if (GET_CODE (base) == PLUS)
15308 {
15309 /* The addend must be CONST_INT, or we would have dealt with it above. */
15310 HOST_WIDE_INT hi, lo;
15311
15312 offset += INTVAL (XEXP (base, 1));
15313 base = XEXP (base, 0);
15314
15315 /* Rework the address into a legal sequence of insns. */
15316 /* Valid range for lo is -4095 -> 4095 */
15317 lo = (offset >= 0
15318 ? (offset & 0xfff)
15319 : -((-offset) & 0xfff));
15320
15321 /* Corner case, if lo is the max offset then we would be out of range
15322 once we have added the additional 1 below, so bump the msb into the
15323 pre-loading insn(s). */
15324 if (lo == 4095)
15325 lo &= 0x7ff;
15326
15327 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15328 ^ (HOST_WIDE_INT) 0x80000000)
15329 - (HOST_WIDE_INT) 0x80000000);
15330
15331 gcc_assert (hi + lo == offset);
15332
15333 if (hi != 0)
15334 {
15335 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15336
15337 /* Be careful not to destroy OUTVAL. */
15338 if (reg_overlap_mentioned_p (base_plus, outval))
15339 {
15340 /* Updating base_plus might destroy outval, see if we
15341 can swap the scratch and base_plus. */
15342 if (!reg_overlap_mentioned_p (scratch, outval))
15343 std::swap (scratch, base_plus);
15344 else
15345 {
15346 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15347
15348 /* Be conservative and copy outval into scratch now,
15349 this should only be necessary if outval is a
15350 subreg of something larger than a word. */
15351 /* XXX Might this clobber base? I can't see how it
15352 can, since scratch is known to overlap with
15353 outval. */
15354 emit_insn (gen_movhi (scratch_hi, outval));
15355 outval = scratch_hi;
15356 }
15357 }
15358
15359 /* Get the base address; addsi3 knows how to handle constants
15360 that require more than one insn. */
15361 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15362 base = base_plus;
15363 offset = lo;
15364 }
15365 }
15366
15367 if (BYTES_BIG_ENDIAN)
15368 {
15369 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15370 plus_constant (Pmode, base,
15371 offset + 1)),
15372 gen_lowpart (QImode, outval)));
15373 emit_insn (gen_lshrsi3 (scratch,
15374 gen_rtx_SUBREG (SImode, outval, 0),
15375 GEN_INT (8)));
15376 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15377 offset)),
15378 gen_lowpart (QImode, scratch)));
15379 }
15380 else
15381 {
15382 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15383 offset)),
15384 gen_lowpart (QImode, outval)));
15385 emit_insn (gen_lshrsi3 (scratch,
15386 gen_rtx_SUBREG (SImode, outval, 0),
15387 GEN_INT (8)));
15388 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15389 plus_constant (Pmode, base,
15390 offset + 1)),
15391 gen_lowpart (QImode, scratch)));
15392 }
15393 }
15394
15395 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15396 (padded to the size of a word) should be passed in a register. */
15397
15398 static bool
15399 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15400 {
15401 if (TARGET_AAPCS_BASED)
15402 return must_pass_in_stack_var_size (mode, type);
15403 else
15404 return must_pass_in_stack_var_size_or_pad (mode, type);
15405 }
15406
15407
15408 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15409 byte of a stack argument has useful data. For legacy APCS ABIs we use
15410 the default. For AAPCS based ABIs small aggregate types are placed
15411 in the lowest memory address. */
15412
15413 static pad_direction
15414 arm_function_arg_padding (machine_mode mode, const_tree type)
15415 {
15416 if (!TARGET_AAPCS_BASED)
15417 return default_function_arg_padding (mode, type);
15418
15419 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15420 return PAD_DOWNWARD;
15421
15422 return PAD_UPWARD;
15423 }
15424
15425
15426 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15427 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15428 register has useful data, and return the opposite if the most
15429 significant byte does. */
15430
15431 bool
15432 arm_pad_reg_upward (machine_mode mode,
15433 tree type, int first ATTRIBUTE_UNUSED)
15434 {
15435 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15436 {
15437 /* For AAPCS, small aggregates, small fixed-point types,
15438 and small complex types are always padded upwards. */
15439 if (type)
15440 {
15441 if ((AGGREGATE_TYPE_P (type)
15442 || TREE_CODE (type) == COMPLEX_TYPE
15443 || FIXED_POINT_TYPE_P (type))
15444 && int_size_in_bytes (type) <= 4)
15445 return true;
15446 }
15447 else
15448 {
15449 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15450 && GET_MODE_SIZE (mode) <= 4)
15451 return true;
15452 }
15453 }
15454
15455 /* Otherwise, use default padding. */
15456 return !BYTES_BIG_ENDIAN;
15457 }
15458
15459 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15460 assuming that the address in the base register is word aligned. */
15461 bool
15462 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15463 {
15464 HOST_WIDE_INT max_offset;
15465
15466 /* Offset must be a multiple of 4 in Thumb mode. */
15467 if (TARGET_THUMB2 && ((offset & 3) != 0))
15468 return false;
15469
15470 if (TARGET_THUMB2)
15471 max_offset = 1020;
15472 else if (TARGET_ARM)
15473 max_offset = 255;
15474 else
15475 return false;
15476
15477 return ((offset <= max_offset) && (offset >= -max_offset));
15478 }
15479
15480 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15481 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15482 Assumes that the address in the base register RN is word aligned. Pattern
15483 guarantees that both memory accesses use the same base register,
15484 the offsets are constants within the range, and the gap between the offsets is 4.
15485 If preload complete then check that registers are legal. WBACK indicates whether
15486 address is updated. LOAD indicates whether memory access is load or store. */
15487 bool
15488 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15489 bool wback, bool load)
15490 {
15491 unsigned int t, t2, n;
15492
15493 if (!reload_completed)
15494 return true;
15495
15496 if (!offset_ok_for_ldrd_strd (offset))
15497 return false;
15498
15499 t = REGNO (rt);
15500 t2 = REGNO (rt2);
15501 n = REGNO (rn);
15502
15503 if ((TARGET_THUMB2)
15504 && ((wback && (n == t || n == t2))
15505 || (t == SP_REGNUM)
15506 || (t == PC_REGNUM)
15507 || (t2 == SP_REGNUM)
15508 || (t2 == PC_REGNUM)
15509 || (!load && (n == PC_REGNUM))
15510 || (load && (t == t2))
15511 /* Triggers Cortex-M3 LDRD errata. */
15512 || (!wback && load && fix_cm3_ldrd && (n == t))))
15513 return false;
15514
15515 if ((TARGET_ARM)
15516 && ((wback && (n == t || n == t2))
15517 || (t2 == PC_REGNUM)
15518 || (t % 2 != 0) /* First destination register is not even. */
15519 || (t2 != t + 1)
15520 /* PC can be used as base register (for offset addressing only),
15521 but it is depricated. */
15522 || (n == PC_REGNUM)))
15523 return false;
15524
15525 return true;
15526 }
15527
15528 /* Return true if a 64-bit access with alignment ALIGN and with a
15529 constant offset OFFSET from the base pointer is permitted on this
15530 architecture. */
15531 static bool
15532 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15533 {
15534 return (unaligned_access
15535 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15536 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15537 }
15538
15539 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15540 operand MEM's address contains an immediate offset from the base
15541 register and has no side effects, in which case it sets BASE,
15542 OFFSET and ALIGN accordingly. */
15543 static bool
15544 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15545 {
15546 rtx addr;
15547
15548 gcc_assert (base != NULL && offset != NULL);
15549
15550 /* TODO: Handle more general memory operand patterns, such as
15551 PRE_DEC and PRE_INC. */
15552
15553 if (side_effects_p (mem))
15554 return false;
15555
15556 /* Can't deal with subregs. */
15557 if (GET_CODE (mem) == SUBREG)
15558 return false;
15559
15560 gcc_assert (MEM_P (mem));
15561
15562 *offset = const0_rtx;
15563 *align = MEM_ALIGN (mem);
15564
15565 addr = XEXP (mem, 0);
15566
15567 /* If addr isn't valid for DImode, then we can't handle it. */
15568 if (!arm_legitimate_address_p (DImode, addr,
15569 reload_in_progress || reload_completed))
15570 return false;
15571
15572 if (REG_P (addr))
15573 {
15574 *base = addr;
15575 return true;
15576 }
15577 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15578 {
15579 *base = XEXP (addr, 0);
15580 *offset = XEXP (addr, 1);
15581 return (REG_P (*base) && CONST_INT_P (*offset));
15582 }
15583
15584 return false;
15585 }
15586
15587 /* Called from a peephole2 to replace two word-size accesses with a
15588 single LDRD/STRD instruction. Returns true iff we can generate a
15589 new instruction sequence. That is, both accesses use the same base
15590 register and the gap between constant offsets is 4. This function
15591 may reorder its operands to match ldrd/strd RTL templates.
15592 OPERANDS are the operands found by the peephole matcher;
15593 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15594 corresponding memory operands. LOAD indicaates whether the access
15595 is load or store. CONST_STORE indicates a store of constant
15596 integer values held in OPERANDS[4,5] and assumes that the pattern
15597 is of length 4 insn, for the purpose of checking dead registers.
15598 COMMUTE indicates that register operands may be reordered. */
15599 bool
15600 gen_operands_ldrd_strd (rtx *operands, bool load,
15601 bool const_store, bool commute)
15602 {
15603 int nops = 2;
15604 HOST_WIDE_INT offsets[2], offset, align[2];
15605 rtx base = NULL_RTX;
15606 rtx cur_base, cur_offset, tmp;
15607 int i, gap;
15608 HARD_REG_SET regset;
15609
15610 gcc_assert (!const_store || !load);
15611 /* Check that the memory references are immediate offsets from the
15612 same base register. Extract the base register, the destination
15613 registers, and the corresponding memory offsets. */
15614 for (i = 0; i < nops; i++)
15615 {
15616 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15617 &align[i]))
15618 return false;
15619
15620 if (i == 0)
15621 base = cur_base;
15622 else if (REGNO (base) != REGNO (cur_base))
15623 return false;
15624
15625 offsets[i] = INTVAL (cur_offset);
15626 if (GET_CODE (operands[i]) == SUBREG)
15627 {
15628 tmp = SUBREG_REG (operands[i]);
15629 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15630 operands[i] = tmp;
15631 }
15632 }
15633
15634 /* Make sure there is no dependency between the individual loads. */
15635 if (load && REGNO (operands[0]) == REGNO (base))
15636 return false; /* RAW */
15637
15638 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15639 return false; /* WAW */
15640
15641 /* If the same input register is used in both stores
15642 when storing different constants, try to find a free register.
15643 For example, the code
15644 mov r0, 0
15645 str r0, [r2]
15646 mov r0, 1
15647 str r0, [r2, #4]
15648 can be transformed into
15649 mov r1, 0
15650 mov r0, 1
15651 strd r1, r0, [r2]
15652 in Thumb mode assuming that r1 is free.
15653 For ARM mode do the same but only if the starting register
15654 can be made to be even. */
15655 if (const_store
15656 && REGNO (operands[0]) == REGNO (operands[1])
15657 && INTVAL (operands[4]) != INTVAL (operands[5]))
15658 {
15659 if (TARGET_THUMB2)
15660 {
15661 CLEAR_HARD_REG_SET (regset);
15662 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15663 if (tmp == NULL_RTX)
15664 return false;
15665
15666 /* Use the new register in the first load to ensure that
15667 if the original input register is not dead after peephole,
15668 then it will have the correct constant value. */
15669 operands[0] = tmp;
15670 }
15671 else if (TARGET_ARM)
15672 {
15673 int regno = REGNO (operands[0]);
15674 if (!peep2_reg_dead_p (4, operands[0]))
15675 {
15676 /* When the input register is even and is not dead after the
15677 pattern, it has to hold the second constant but we cannot
15678 form a legal STRD in ARM mode with this register as the second
15679 register. */
15680 if (regno % 2 == 0)
15681 return false;
15682
15683 /* Is regno-1 free? */
15684 SET_HARD_REG_SET (regset);
15685 CLEAR_HARD_REG_BIT(regset, regno - 1);
15686 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15687 if (tmp == NULL_RTX)
15688 return false;
15689
15690 operands[0] = tmp;
15691 }
15692 else
15693 {
15694 /* Find a DImode register. */
15695 CLEAR_HARD_REG_SET (regset);
15696 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15697 if (tmp != NULL_RTX)
15698 {
15699 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15700 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15701 }
15702 else
15703 {
15704 /* Can we use the input register to form a DI register? */
15705 SET_HARD_REG_SET (regset);
15706 CLEAR_HARD_REG_BIT(regset,
15707 regno % 2 == 0 ? regno + 1 : regno - 1);
15708 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15709 if (tmp == NULL_RTX)
15710 return false;
15711 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15712 }
15713 }
15714
15715 gcc_assert (operands[0] != NULL_RTX);
15716 gcc_assert (operands[1] != NULL_RTX);
15717 gcc_assert (REGNO (operands[0]) % 2 == 0);
15718 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15719 }
15720 }
15721
15722 /* Make sure the instructions are ordered with lower memory access first. */
15723 if (offsets[0] > offsets[1])
15724 {
15725 gap = offsets[0] - offsets[1];
15726 offset = offsets[1];
15727
15728 /* Swap the instructions such that lower memory is accessed first. */
15729 std::swap (operands[0], operands[1]);
15730 std::swap (operands[2], operands[3]);
15731 std::swap (align[0], align[1]);
15732 if (const_store)
15733 std::swap (operands[4], operands[5]);
15734 }
15735 else
15736 {
15737 gap = offsets[1] - offsets[0];
15738 offset = offsets[0];
15739 }
15740
15741 /* Make sure accesses are to consecutive memory locations. */
15742 if (gap != 4)
15743 return false;
15744
15745 if (!align_ok_ldrd_strd (align[0], offset))
15746 return false;
15747
15748 /* Make sure we generate legal instructions. */
15749 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15750 false, load))
15751 return true;
15752
15753 /* In Thumb state, where registers are almost unconstrained, there
15754 is little hope to fix it. */
15755 if (TARGET_THUMB2)
15756 return false;
15757
15758 if (load && commute)
15759 {
15760 /* Try reordering registers. */
15761 std::swap (operands[0], operands[1]);
15762 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15763 false, load))
15764 return true;
15765 }
15766
15767 if (const_store)
15768 {
15769 /* If input registers are dead after this pattern, they can be
15770 reordered or replaced by other registers that are free in the
15771 current pattern. */
15772 if (!peep2_reg_dead_p (4, operands[0])
15773 || !peep2_reg_dead_p (4, operands[1]))
15774 return false;
15775
15776 /* Try to reorder the input registers. */
15777 /* For example, the code
15778 mov r0, 0
15779 mov r1, 1
15780 str r1, [r2]
15781 str r0, [r2, #4]
15782 can be transformed into
15783 mov r1, 0
15784 mov r0, 1
15785 strd r0, [r2]
15786 */
15787 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15788 false, false))
15789 {
15790 std::swap (operands[0], operands[1]);
15791 return true;
15792 }
15793
15794 /* Try to find a free DI register. */
15795 CLEAR_HARD_REG_SET (regset);
15796 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15797 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15798 while (true)
15799 {
15800 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15801 if (tmp == NULL_RTX)
15802 return false;
15803
15804 /* DREG must be an even-numbered register in DImode.
15805 Split it into SI registers. */
15806 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15807 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15808 gcc_assert (operands[0] != NULL_RTX);
15809 gcc_assert (operands[1] != NULL_RTX);
15810 gcc_assert (REGNO (operands[0]) % 2 == 0);
15811 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15812
15813 return (operands_ok_ldrd_strd (operands[0], operands[1],
15814 base, offset,
15815 false, load));
15816 }
15817 }
15818
15819 return false;
15820 }
15821
15822
15823
15824 \f
15825 /* Print a symbolic form of X to the debug file, F. */
15826 static void
15827 arm_print_value (FILE *f, rtx x)
15828 {
15829 switch (GET_CODE (x))
15830 {
15831 case CONST_INT:
15832 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15833 return;
15834
15835 case CONST_DOUBLE:
15836 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15837 return;
15838
15839 case CONST_VECTOR:
15840 {
15841 int i;
15842
15843 fprintf (f, "<");
15844 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15845 {
15846 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15847 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15848 fputc (',', f);
15849 }
15850 fprintf (f, ">");
15851 }
15852 return;
15853
15854 case CONST_STRING:
15855 fprintf (f, "\"%s\"", XSTR (x, 0));
15856 return;
15857
15858 case SYMBOL_REF:
15859 fprintf (f, "`%s'", XSTR (x, 0));
15860 return;
15861
15862 case LABEL_REF:
15863 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15864 return;
15865
15866 case CONST:
15867 arm_print_value (f, XEXP (x, 0));
15868 return;
15869
15870 case PLUS:
15871 arm_print_value (f, XEXP (x, 0));
15872 fprintf (f, "+");
15873 arm_print_value (f, XEXP (x, 1));
15874 return;
15875
15876 case PC:
15877 fprintf (f, "pc");
15878 return;
15879
15880 default:
15881 fprintf (f, "????");
15882 return;
15883 }
15884 }
15885 \f
15886 /* Routines for manipulation of the constant pool. */
15887
15888 /* Arm instructions cannot load a large constant directly into a
15889 register; they have to come from a pc relative load. The constant
15890 must therefore be placed in the addressable range of the pc
15891 relative load. Depending on the precise pc relative load
15892 instruction the range is somewhere between 256 bytes and 4k. This
15893 means that we often have to dump a constant inside a function, and
15894 generate code to branch around it.
15895
15896 It is important to minimize this, since the branches will slow
15897 things down and make the code larger.
15898
15899 Normally we can hide the table after an existing unconditional
15900 branch so that there is no interruption of the flow, but in the
15901 worst case the code looks like this:
15902
15903 ldr rn, L1
15904 ...
15905 b L2
15906 align
15907 L1: .long value
15908 L2:
15909 ...
15910
15911 ldr rn, L3
15912 ...
15913 b L4
15914 align
15915 L3: .long value
15916 L4:
15917 ...
15918
15919 We fix this by performing a scan after scheduling, which notices
15920 which instructions need to have their operands fetched from the
15921 constant table and builds the table.
15922
15923 The algorithm starts by building a table of all the constants that
15924 need fixing up and all the natural barriers in the function (places
15925 where a constant table can be dropped without breaking the flow).
15926 For each fixup we note how far the pc-relative replacement will be
15927 able to reach and the offset of the instruction into the function.
15928
15929 Having built the table we then group the fixes together to form
15930 tables that are as large as possible (subject to addressing
15931 constraints) and emit each table of constants after the last
15932 barrier that is within range of all the instructions in the group.
15933 If a group does not contain a barrier, then we forcibly create one
15934 by inserting a jump instruction into the flow. Once the table has
15935 been inserted, the insns are then modified to reference the
15936 relevant entry in the pool.
15937
15938 Possible enhancements to the algorithm (not implemented) are:
15939
15940 1) For some processors and object formats, there may be benefit in
15941 aligning the pools to the start of cache lines; this alignment
15942 would need to be taken into account when calculating addressability
15943 of a pool. */
15944
15945 /* These typedefs are located at the start of this file, so that
15946 they can be used in the prototypes there. This comment is to
15947 remind readers of that fact so that the following structures
15948 can be understood more easily.
15949
15950 typedef struct minipool_node Mnode;
15951 typedef struct minipool_fixup Mfix; */
15952
15953 struct minipool_node
15954 {
15955 /* Doubly linked chain of entries. */
15956 Mnode * next;
15957 Mnode * prev;
15958 /* The maximum offset into the code that this entry can be placed. While
15959 pushing fixes for forward references, all entries are sorted in order
15960 of increasing max_address. */
15961 HOST_WIDE_INT max_address;
15962 /* Similarly for an entry inserted for a backwards ref. */
15963 HOST_WIDE_INT min_address;
15964 /* The number of fixes referencing this entry. This can become zero
15965 if we "unpush" an entry. In this case we ignore the entry when we
15966 come to emit the code. */
15967 int refcount;
15968 /* The offset from the start of the minipool. */
15969 HOST_WIDE_INT offset;
15970 /* The value in table. */
15971 rtx value;
15972 /* The mode of value. */
15973 machine_mode mode;
15974 /* The size of the value. With iWMMXt enabled
15975 sizes > 4 also imply an alignment of 8-bytes. */
15976 int fix_size;
15977 };
15978
15979 struct minipool_fixup
15980 {
15981 Mfix * next;
15982 rtx_insn * insn;
15983 HOST_WIDE_INT address;
15984 rtx * loc;
15985 machine_mode mode;
15986 int fix_size;
15987 rtx value;
15988 Mnode * minipool;
15989 HOST_WIDE_INT forwards;
15990 HOST_WIDE_INT backwards;
15991 };
15992
15993 /* Fixes less than a word need padding out to a word boundary. */
15994 #define MINIPOOL_FIX_SIZE(mode) \
15995 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15996
15997 static Mnode * minipool_vector_head;
15998 static Mnode * minipool_vector_tail;
15999 static rtx_code_label *minipool_vector_label;
16000 static int minipool_pad;
16001
16002 /* The linked list of all minipool fixes required for this function. */
16003 Mfix * minipool_fix_head;
16004 Mfix * minipool_fix_tail;
16005 /* The fix entry for the current minipool, once it has been placed. */
16006 Mfix * minipool_barrier;
16007
16008 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16009 #define JUMP_TABLES_IN_TEXT_SECTION 0
16010 #endif
16011
16012 static HOST_WIDE_INT
16013 get_jump_table_size (rtx_jump_table_data *insn)
16014 {
16015 /* ADDR_VECs only take room if read-only data does into the text
16016 section. */
16017 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16018 {
16019 rtx body = PATTERN (insn);
16020 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16021 HOST_WIDE_INT size;
16022 HOST_WIDE_INT modesize;
16023
16024 modesize = GET_MODE_SIZE (GET_MODE (body));
16025 size = modesize * XVECLEN (body, elt);
16026 switch (modesize)
16027 {
16028 case 1:
16029 /* Round up size of TBB table to a halfword boundary. */
16030 size = (size + 1) & ~HOST_WIDE_INT_1;
16031 break;
16032 case 2:
16033 /* No padding necessary for TBH. */
16034 break;
16035 case 4:
16036 /* Add two bytes for alignment on Thumb. */
16037 if (TARGET_THUMB)
16038 size += 2;
16039 break;
16040 default:
16041 gcc_unreachable ();
16042 }
16043 return size;
16044 }
16045
16046 return 0;
16047 }
16048
16049 /* Return the maximum amount of padding that will be inserted before
16050 label LABEL. */
16051
16052 static HOST_WIDE_INT
16053 get_label_padding (rtx label)
16054 {
16055 HOST_WIDE_INT align, min_insn_size;
16056
16057 align = 1 << label_to_alignment (label).levels[0].log;
16058 min_insn_size = TARGET_THUMB ? 2 : 4;
16059 return align > min_insn_size ? align - min_insn_size : 0;
16060 }
16061
16062 /* Move a minipool fix MP from its current location to before MAX_MP.
16063 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16064 constraints may need updating. */
16065 static Mnode *
16066 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16067 HOST_WIDE_INT max_address)
16068 {
16069 /* The code below assumes these are different. */
16070 gcc_assert (mp != max_mp);
16071
16072 if (max_mp == NULL)
16073 {
16074 if (max_address < mp->max_address)
16075 mp->max_address = max_address;
16076 }
16077 else
16078 {
16079 if (max_address > max_mp->max_address - mp->fix_size)
16080 mp->max_address = max_mp->max_address - mp->fix_size;
16081 else
16082 mp->max_address = max_address;
16083
16084 /* Unlink MP from its current position. Since max_mp is non-null,
16085 mp->prev must be non-null. */
16086 mp->prev->next = mp->next;
16087 if (mp->next != NULL)
16088 mp->next->prev = mp->prev;
16089 else
16090 minipool_vector_tail = mp->prev;
16091
16092 /* Re-insert it before MAX_MP. */
16093 mp->next = max_mp;
16094 mp->prev = max_mp->prev;
16095 max_mp->prev = mp;
16096
16097 if (mp->prev != NULL)
16098 mp->prev->next = mp;
16099 else
16100 minipool_vector_head = mp;
16101 }
16102
16103 /* Save the new entry. */
16104 max_mp = mp;
16105
16106 /* Scan over the preceding entries and adjust their addresses as
16107 required. */
16108 while (mp->prev != NULL
16109 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16110 {
16111 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16112 mp = mp->prev;
16113 }
16114
16115 return max_mp;
16116 }
16117
16118 /* Add a constant to the minipool for a forward reference. Returns the
16119 node added or NULL if the constant will not fit in this pool. */
16120 static Mnode *
16121 add_minipool_forward_ref (Mfix *fix)
16122 {
16123 /* If set, max_mp is the first pool_entry that has a lower
16124 constraint than the one we are trying to add. */
16125 Mnode * max_mp = NULL;
16126 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16127 Mnode * mp;
16128
16129 /* If the minipool starts before the end of FIX->INSN then this FIX
16130 cannot be placed into the current pool. Furthermore, adding the
16131 new constant pool entry may cause the pool to start FIX_SIZE bytes
16132 earlier. */
16133 if (minipool_vector_head &&
16134 (fix->address + get_attr_length (fix->insn)
16135 >= minipool_vector_head->max_address - fix->fix_size))
16136 return NULL;
16137
16138 /* Scan the pool to see if a constant with the same value has
16139 already been added. While we are doing this, also note the
16140 location where we must insert the constant if it doesn't already
16141 exist. */
16142 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16143 {
16144 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16145 && fix->mode == mp->mode
16146 && (!LABEL_P (fix->value)
16147 || (CODE_LABEL_NUMBER (fix->value)
16148 == CODE_LABEL_NUMBER (mp->value)))
16149 && rtx_equal_p (fix->value, mp->value))
16150 {
16151 /* More than one fix references this entry. */
16152 mp->refcount++;
16153 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16154 }
16155
16156 /* Note the insertion point if necessary. */
16157 if (max_mp == NULL
16158 && mp->max_address > max_address)
16159 max_mp = mp;
16160
16161 /* If we are inserting an 8-bytes aligned quantity and
16162 we have not already found an insertion point, then
16163 make sure that all such 8-byte aligned quantities are
16164 placed at the start of the pool. */
16165 if (ARM_DOUBLEWORD_ALIGN
16166 && max_mp == NULL
16167 && fix->fix_size >= 8
16168 && mp->fix_size < 8)
16169 {
16170 max_mp = mp;
16171 max_address = mp->max_address;
16172 }
16173 }
16174
16175 /* The value is not currently in the minipool, so we need to create
16176 a new entry for it. If MAX_MP is NULL, the entry will be put on
16177 the end of the list since the placement is less constrained than
16178 any existing entry. Otherwise, we insert the new fix before
16179 MAX_MP and, if necessary, adjust the constraints on the other
16180 entries. */
16181 mp = XNEW (Mnode);
16182 mp->fix_size = fix->fix_size;
16183 mp->mode = fix->mode;
16184 mp->value = fix->value;
16185 mp->refcount = 1;
16186 /* Not yet required for a backwards ref. */
16187 mp->min_address = -65536;
16188
16189 if (max_mp == NULL)
16190 {
16191 mp->max_address = max_address;
16192 mp->next = NULL;
16193 mp->prev = minipool_vector_tail;
16194
16195 if (mp->prev == NULL)
16196 {
16197 minipool_vector_head = mp;
16198 minipool_vector_label = gen_label_rtx ();
16199 }
16200 else
16201 mp->prev->next = mp;
16202
16203 minipool_vector_tail = mp;
16204 }
16205 else
16206 {
16207 if (max_address > max_mp->max_address - mp->fix_size)
16208 mp->max_address = max_mp->max_address - mp->fix_size;
16209 else
16210 mp->max_address = max_address;
16211
16212 mp->next = max_mp;
16213 mp->prev = max_mp->prev;
16214 max_mp->prev = mp;
16215 if (mp->prev != NULL)
16216 mp->prev->next = mp;
16217 else
16218 minipool_vector_head = mp;
16219 }
16220
16221 /* Save the new entry. */
16222 max_mp = mp;
16223
16224 /* Scan over the preceding entries and adjust their addresses as
16225 required. */
16226 while (mp->prev != NULL
16227 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16228 {
16229 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16230 mp = mp->prev;
16231 }
16232
16233 return max_mp;
16234 }
16235
16236 static Mnode *
16237 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16238 HOST_WIDE_INT min_address)
16239 {
16240 HOST_WIDE_INT offset;
16241
16242 /* The code below assumes these are different. */
16243 gcc_assert (mp != min_mp);
16244
16245 if (min_mp == NULL)
16246 {
16247 if (min_address > mp->min_address)
16248 mp->min_address = min_address;
16249 }
16250 else
16251 {
16252 /* We will adjust this below if it is too loose. */
16253 mp->min_address = min_address;
16254
16255 /* Unlink MP from its current position. Since min_mp is non-null,
16256 mp->next must be non-null. */
16257 mp->next->prev = mp->prev;
16258 if (mp->prev != NULL)
16259 mp->prev->next = mp->next;
16260 else
16261 minipool_vector_head = mp->next;
16262
16263 /* Reinsert it after MIN_MP. */
16264 mp->prev = min_mp;
16265 mp->next = min_mp->next;
16266 min_mp->next = mp;
16267 if (mp->next != NULL)
16268 mp->next->prev = mp;
16269 else
16270 minipool_vector_tail = mp;
16271 }
16272
16273 min_mp = mp;
16274
16275 offset = 0;
16276 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16277 {
16278 mp->offset = offset;
16279 if (mp->refcount > 0)
16280 offset += mp->fix_size;
16281
16282 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16283 mp->next->min_address = mp->min_address + mp->fix_size;
16284 }
16285
16286 return min_mp;
16287 }
16288
16289 /* Add a constant to the minipool for a backward reference. Returns the
16290 node added or NULL if the constant will not fit in this pool.
16291
16292 Note that the code for insertion for a backwards reference can be
16293 somewhat confusing because the calculated offsets for each fix do
16294 not take into account the size of the pool (which is still under
16295 construction. */
16296 static Mnode *
16297 add_minipool_backward_ref (Mfix *fix)
16298 {
16299 /* If set, min_mp is the last pool_entry that has a lower constraint
16300 than the one we are trying to add. */
16301 Mnode *min_mp = NULL;
16302 /* This can be negative, since it is only a constraint. */
16303 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16304 Mnode *mp;
16305
16306 /* If we can't reach the current pool from this insn, or if we can't
16307 insert this entry at the end of the pool without pushing other
16308 fixes out of range, then we don't try. This ensures that we
16309 can't fail later on. */
16310 if (min_address >= minipool_barrier->address
16311 || (minipool_vector_tail->min_address + fix->fix_size
16312 >= minipool_barrier->address))
16313 return NULL;
16314
16315 /* Scan the pool to see if a constant with the same value has
16316 already been added. While we are doing this, also note the
16317 location where we must insert the constant if it doesn't already
16318 exist. */
16319 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16320 {
16321 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16322 && fix->mode == mp->mode
16323 && (!LABEL_P (fix->value)
16324 || (CODE_LABEL_NUMBER (fix->value)
16325 == CODE_LABEL_NUMBER (mp->value)))
16326 && rtx_equal_p (fix->value, mp->value)
16327 /* Check that there is enough slack to move this entry to the
16328 end of the table (this is conservative). */
16329 && (mp->max_address
16330 > (minipool_barrier->address
16331 + minipool_vector_tail->offset
16332 + minipool_vector_tail->fix_size)))
16333 {
16334 mp->refcount++;
16335 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16336 }
16337
16338 if (min_mp != NULL)
16339 mp->min_address += fix->fix_size;
16340 else
16341 {
16342 /* Note the insertion point if necessary. */
16343 if (mp->min_address < min_address)
16344 {
16345 /* For now, we do not allow the insertion of 8-byte alignment
16346 requiring nodes anywhere but at the start of the pool. */
16347 if (ARM_DOUBLEWORD_ALIGN
16348 && fix->fix_size >= 8 && mp->fix_size < 8)
16349 return NULL;
16350 else
16351 min_mp = mp;
16352 }
16353 else if (mp->max_address
16354 < minipool_barrier->address + mp->offset + fix->fix_size)
16355 {
16356 /* Inserting before this entry would push the fix beyond
16357 its maximum address (which can happen if we have
16358 re-located a forwards fix); force the new fix to come
16359 after it. */
16360 if (ARM_DOUBLEWORD_ALIGN
16361 && fix->fix_size >= 8 && mp->fix_size < 8)
16362 return NULL;
16363 else
16364 {
16365 min_mp = mp;
16366 min_address = mp->min_address + fix->fix_size;
16367 }
16368 }
16369 /* Do not insert a non-8-byte aligned quantity before 8-byte
16370 aligned quantities. */
16371 else if (ARM_DOUBLEWORD_ALIGN
16372 && fix->fix_size < 8
16373 && mp->fix_size >= 8)
16374 {
16375 min_mp = mp;
16376 min_address = mp->min_address + fix->fix_size;
16377 }
16378 }
16379 }
16380
16381 /* We need to create a new entry. */
16382 mp = XNEW (Mnode);
16383 mp->fix_size = fix->fix_size;
16384 mp->mode = fix->mode;
16385 mp->value = fix->value;
16386 mp->refcount = 1;
16387 mp->max_address = minipool_barrier->address + 65536;
16388
16389 mp->min_address = min_address;
16390
16391 if (min_mp == NULL)
16392 {
16393 mp->prev = NULL;
16394 mp->next = minipool_vector_head;
16395
16396 if (mp->next == NULL)
16397 {
16398 minipool_vector_tail = mp;
16399 minipool_vector_label = gen_label_rtx ();
16400 }
16401 else
16402 mp->next->prev = mp;
16403
16404 minipool_vector_head = mp;
16405 }
16406 else
16407 {
16408 mp->next = min_mp->next;
16409 mp->prev = min_mp;
16410 min_mp->next = mp;
16411
16412 if (mp->next != NULL)
16413 mp->next->prev = mp;
16414 else
16415 minipool_vector_tail = mp;
16416 }
16417
16418 /* Save the new entry. */
16419 min_mp = mp;
16420
16421 if (mp->prev)
16422 mp = mp->prev;
16423 else
16424 mp->offset = 0;
16425
16426 /* Scan over the following entries and adjust their offsets. */
16427 while (mp->next != NULL)
16428 {
16429 if (mp->next->min_address < mp->min_address + mp->fix_size)
16430 mp->next->min_address = mp->min_address + mp->fix_size;
16431
16432 if (mp->refcount)
16433 mp->next->offset = mp->offset + mp->fix_size;
16434 else
16435 mp->next->offset = mp->offset;
16436
16437 mp = mp->next;
16438 }
16439
16440 return min_mp;
16441 }
16442
16443 static void
16444 assign_minipool_offsets (Mfix *barrier)
16445 {
16446 HOST_WIDE_INT offset = 0;
16447 Mnode *mp;
16448
16449 minipool_barrier = barrier;
16450
16451 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16452 {
16453 mp->offset = offset;
16454
16455 if (mp->refcount > 0)
16456 offset += mp->fix_size;
16457 }
16458 }
16459
16460 /* Output the literal table */
16461 static void
16462 dump_minipool (rtx_insn *scan)
16463 {
16464 Mnode * mp;
16465 Mnode * nmp;
16466 int align64 = 0;
16467
16468 if (ARM_DOUBLEWORD_ALIGN)
16469 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16470 if (mp->refcount > 0 && mp->fix_size >= 8)
16471 {
16472 align64 = 1;
16473 break;
16474 }
16475
16476 if (dump_file)
16477 fprintf (dump_file,
16478 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16479 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16480
16481 scan = emit_label_after (gen_label_rtx (), scan);
16482 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16483 scan = emit_label_after (minipool_vector_label, scan);
16484
16485 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16486 {
16487 if (mp->refcount > 0)
16488 {
16489 if (dump_file)
16490 {
16491 fprintf (dump_file,
16492 ";; Offset %u, min %ld, max %ld ",
16493 (unsigned) mp->offset, (unsigned long) mp->min_address,
16494 (unsigned long) mp->max_address);
16495 arm_print_value (dump_file, mp->value);
16496 fputc ('\n', dump_file);
16497 }
16498
16499 rtx val = copy_rtx (mp->value);
16500
16501 switch (GET_MODE_SIZE (mp->mode))
16502 {
16503 #ifdef HAVE_consttable_1
16504 case 1:
16505 scan = emit_insn_after (gen_consttable_1 (val), scan);
16506 break;
16507
16508 #endif
16509 #ifdef HAVE_consttable_2
16510 case 2:
16511 scan = emit_insn_after (gen_consttable_2 (val), scan);
16512 break;
16513
16514 #endif
16515 #ifdef HAVE_consttable_4
16516 case 4:
16517 scan = emit_insn_after (gen_consttable_4 (val), scan);
16518 break;
16519
16520 #endif
16521 #ifdef HAVE_consttable_8
16522 case 8:
16523 scan = emit_insn_after (gen_consttable_8 (val), scan);
16524 break;
16525
16526 #endif
16527 #ifdef HAVE_consttable_16
16528 case 16:
16529 scan = emit_insn_after (gen_consttable_16 (val), scan);
16530 break;
16531
16532 #endif
16533 default:
16534 gcc_unreachable ();
16535 }
16536 }
16537
16538 nmp = mp->next;
16539 free (mp);
16540 }
16541
16542 minipool_vector_head = minipool_vector_tail = NULL;
16543 scan = emit_insn_after (gen_consttable_end (), scan);
16544 scan = emit_barrier_after (scan);
16545 }
16546
16547 /* Return the cost of forcibly inserting a barrier after INSN. */
16548 static int
16549 arm_barrier_cost (rtx_insn *insn)
16550 {
16551 /* Basing the location of the pool on the loop depth is preferable,
16552 but at the moment, the basic block information seems to be
16553 corrupt by this stage of the compilation. */
16554 int base_cost = 50;
16555 rtx_insn *next = next_nonnote_insn (insn);
16556
16557 if (next != NULL && LABEL_P (next))
16558 base_cost -= 20;
16559
16560 switch (GET_CODE (insn))
16561 {
16562 case CODE_LABEL:
16563 /* It will always be better to place the table before the label, rather
16564 than after it. */
16565 return 50;
16566
16567 case INSN:
16568 case CALL_INSN:
16569 return base_cost;
16570
16571 case JUMP_INSN:
16572 return base_cost - 10;
16573
16574 default:
16575 return base_cost + 10;
16576 }
16577 }
16578
16579 /* Find the best place in the insn stream in the range
16580 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16581 Create the barrier by inserting a jump and add a new fix entry for
16582 it. */
16583 static Mfix *
16584 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16585 {
16586 HOST_WIDE_INT count = 0;
16587 rtx_barrier *barrier;
16588 rtx_insn *from = fix->insn;
16589 /* The instruction after which we will insert the jump. */
16590 rtx_insn *selected = NULL;
16591 int selected_cost;
16592 /* The address at which the jump instruction will be placed. */
16593 HOST_WIDE_INT selected_address;
16594 Mfix * new_fix;
16595 HOST_WIDE_INT max_count = max_address - fix->address;
16596 rtx_code_label *label = gen_label_rtx ();
16597
16598 selected_cost = arm_barrier_cost (from);
16599 selected_address = fix->address;
16600
16601 while (from && count < max_count)
16602 {
16603 rtx_jump_table_data *tmp;
16604 int new_cost;
16605
16606 /* This code shouldn't have been called if there was a natural barrier
16607 within range. */
16608 gcc_assert (!BARRIER_P (from));
16609
16610 /* Count the length of this insn. This must stay in sync with the
16611 code that pushes minipool fixes. */
16612 if (LABEL_P (from))
16613 count += get_label_padding (from);
16614 else
16615 count += get_attr_length (from);
16616
16617 /* If there is a jump table, add its length. */
16618 if (tablejump_p (from, NULL, &tmp))
16619 {
16620 count += get_jump_table_size (tmp);
16621
16622 /* Jump tables aren't in a basic block, so base the cost on
16623 the dispatch insn. If we select this location, we will
16624 still put the pool after the table. */
16625 new_cost = arm_barrier_cost (from);
16626
16627 if (count < max_count
16628 && (!selected || new_cost <= selected_cost))
16629 {
16630 selected = tmp;
16631 selected_cost = new_cost;
16632 selected_address = fix->address + count;
16633 }
16634
16635 /* Continue after the dispatch table. */
16636 from = NEXT_INSN (tmp);
16637 continue;
16638 }
16639
16640 new_cost = arm_barrier_cost (from);
16641
16642 if (count < max_count
16643 && (!selected || new_cost <= selected_cost))
16644 {
16645 selected = from;
16646 selected_cost = new_cost;
16647 selected_address = fix->address + count;
16648 }
16649
16650 from = NEXT_INSN (from);
16651 }
16652
16653 /* Make sure that we found a place to insert the jump. */
16654 gcc_assert (selected);
16655
16656 /* Create a new JUMP_INSN that branches around a barrier. */
16657 from = emit_jump_insn_after (gen_jump (label), selected);
16658 JUMP_LABEL (from) = label;
16659 barrier = emit_barrier_after (from);
16660 emit_label_after (label, barrier);
16661
16662 /* Create a minipool barrier entry for the new barrier. */
16663 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16664 new_fix->insn = barrier;
16665 new_fix->address = selected_address;
16666 new_fix->next = fix->next;
16667 fix->next = new_fix;
16668
16669 return new_fix;
16670 }
16671
16672 /* Record that there is a natural barrier in the insn stream at
16673 ADDRESS. */
16674 static void
16675 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16676 {
16677 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16678
16679 fix->insn = insn;
16680 fix->address = address;
16681
16682 fix->next = NULL;
16683 if (minipool_fix_head != NULL)
16684 minipool_fix_tail->next = fix;
16685 else
16686 minipool_fix_head = fix;
16687
16688 minipool_fix_tail = fix;
16689 }
16690
16691 /* Record INSN, which will need fixing up to load a value from the
16692 minipool. ADDRESS is the offset of the insn since the start of the
16693 function; LOC is a pointer to the part of the insn which requires
16694 fixing; VALUE is the constant that must be loaded, which is of type
16695 MODE. */
16696 static void
16697 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16698 machine_mode mode, rtx value)
16699 {
16700 gcc_assert (!arm_disable_literal_pool);
16701 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16702
16703 fix->insn = insn;
16704 fix->address = address;
16705 fix->loc = loc;
16706 fix->mode = mode;
16707 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16708 fix->value = value;
16709 fix->forwards = get_attr_pool_range (insn);
16710 fix->backwards = get_attr_neg_pool_range (insn);
16711 fix->minipool = NULL;
16712
16713 /* If an insn doesn't have a range defined for it, then it isn't
16714 expecting to be reworked by this code. Better to stop now than
16715 to generate duff assembly code. */
16716 gcc_assert (fix->forwards || fix->backwards);
16717
16718 /* If an entry requires 8-byte alignment then assume all constant pools
16719 require 4 bytes of padding. Trying to do this later on a per-pool
16720 basis is awkward because existing pool entries have to be modified. */
16721 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16722 minipool_pad = 4;
16723
16724 if (dump_file)
16725 {
16726 fprintf (dump_file,
16727 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16728 GET_MODE_NAME (mode),
16729 INSN_UID (insn), (unsigned long) address,
16730 -1 * (long)fix->backwards, (long)fix->forwards);
16731 arm_print_value (dump_file, fix->value);
16732 fprintf (dump_file, "\n");
16733 }
16734
16735 /* Add it to the chain of fixes. */
16736 fix->next = NULL;
16737
16738 if (minipool_fix_head != NULL)
16739 minipool_fix_tail->next = fix;
16740 else
16741 minipool_fix_head = fix;
16742
16743 minipool_fix_tail = fix;
16744 }
16745
16746 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16747 Returns the number of insns needed, or 99 if we always want to synthesize
16748 the value. */
16749 int
16750 arm_max_const_double_inline_cost ()
16751 {
16752 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16753 }
16754
16755 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16756 Returns the number of insns needed, or 99 if we don't know how to
16757 do it. */
16758 int
16759 arm_const_double_inline_cost (rtx val)
16760 {
16761 rtx lowpart, highpart;
16762 machine_mode mode;
16763
16764 mode = GET_MODE (val);
16765
16766 if (mode == VOIDmode)
16767 mode = DImode;
16768
16769 gcc_assert (GET_MODE_SIZE (mode) == 8);
16770
16771 lowpart = gen_lowpart (SImode, val);
16772 highpart = gen_highpart_mode (SImode, mode, val);
16773
16774 gcc_assert (CONST_INT_P (lowpart));
16775 gcc_assert (CONST_INT_P (highpart));
16776
16777 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16778 NULL_RTX, NULL_RTX, 0, 0)
16779 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16780 NULL_RTX, NULL_RTX, 0, 0));
16781 }
16782
16783 /* Cost of loading a SImode constant. */
16784 static inline int
16785 arm_const_inline_cost (enum rtx_code code, rtx val)
16786 {
16787 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16788 NULL_RTX, NULL_RTX, 1, 0);
16789 }
16790
16791 /* Return true if it is worthwhile to split a 64-bit constant into two
16792 32-bit operations. This is the case if optimizing for size, or
16793 if we have load delay slots, or if one 32-bit part can be done with
16794 a single data operation. */
16795 bool
16796 arm_const_double_by_parts (rtx val)
16797 {
16798 machine_mode mode = GET_MODE (val);
16799 rtx part;
16800
16801 if (optimize_size || arm_ld_sched)
16802 return true;
16803
16804 if (mode == VOIDmode)
16805 mode = DImode;
16806
16807 part = gen_highpart_mode (SImode, mode, val);
16808
16809 gcc_assert (CONST_INT_P (part));
16810
16811 if (const_ok_for_arm (INTVAL (part))
16812 || const_ok_for_arm (~INTVAL (part)))
16813 return true;
16814
16815 part = gen_lowpart (SImode, val);
16816
16817 gcc_assert (CONST_INT_P (part));
16818
16819 if (const_ok_for_arm (INTVAL (part))
16820 || const_ok_for_arm (~INTVAL (part)))
16821 return true;
16822
16823 return false;
16824 }
16825
16826 /* Return true if it is possible to inline both the high and low parts
16827 of a 64-bit constant into 32-bit data processing instructions. */
16828 bool
16829 arm_const_double_by_immediates (rtx val)
16830 {
16831 machine_mode mode = GET_MODE (val);
16832 rtx part;
16833
16834 if (mode == VOIDmode)
16835 mode = DImode;
16836
16837 part = gen_highpart_mode (SImode, mode, val);
16838
16839 gcc_assert (CONST_INT_P (part));
16840
16841 if (!const_ok_for_arm (INTVAL (part)))
16842 return false;
16843
16844 part = gen_lowpart (SImode, val);
16845
16846 gcc_assert (CONST_INT_P (part));
16847
16848 if (!const_ok_for_arm (INTVAL (part)))
16849 return false;
16850
16851 return true;
16852 }
16853
16854 /* Scan INSN and note any of its operands that need fixing.
16855 If DO_PUSHES is false we do not actually push any of the fixups
16856 needed. */
16857 static void
16858 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16859 {
16860 int opno;
16861
16862 extract_constrain_insn (insn);
16863
16864 if (recog_data.n_alternatives == 0)
16865 return;
16866
16867 /* Fill in recog_op_alt with information about the constraints of
16868 this insn. */
16869 preprocess_constraints (insn);
16870
16871 const operand_alternative *op_alt = which_op_alt ();
16872 for (opno = 0; opno < recog_data.n_operands; opno++)
16873 {
16874 /* Things we need to fix can only occur in inputs. */
16875 if (recog_data.operand_type[opno] != OP_IN)
16876 continue;
16877
16878 /* If this alternative is a memory reference, then any mention
16879 of constants in this alternative is really to fool reload
16880 into allowing us to accept one there. We need to fix them up
16881 now so that we output the right code. */
16882 if (op_alt[opno].memory_ok)
16883 {
16884 rtx op = recog_data.operand[opno];
16885
16886 if (CONSTANT_P (op))
16887 {
16888 if (do_pushes)
16889 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16890 recog_data.operand_mode[opno], op);
16891 }
16892 else if (MEM_P (op)
16893 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16894 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16895 {
16896 if (do_pushes)
16897 {
16898 rtx cop = avoid_constant_pool_reference (op);
16899
16900 /* Casting the address of something to a mode narrower
16901 than a word can cause avoid_constant_pool_reference()
16902 to return the pool reference itself. That's no good to
16903 us here. Lets just hope that we can use the
16904 constant pool value directly. */
16905 if (op == cop)
16906 cop = get_pool_constant (XEXP (op, 0));
16907
16908 push_minipool_fix (insn, address,
16909 recog_data.operand_loc[opno],
16910 recog_data.operand_mode[opno], cop);
16911 }
16912
16913 }
16914 }
16915 }
16916
16917 return;
16918 }
16919
16920 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16921 and unions in the context of ARMv8-M Security Extensions. It is used as a
16922 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16923 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16924 or four masks, depending on whether it is being computed for a
16925 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16926 respectively. The tree for the type of the argument or a field within an
16927 argument is passed in ARG_TYPE, the current register this argument or field
16928 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16929 argument or field starts at is passed in STARTING_BIT and the last used bit
16930 is kept in LAST_USED_BIT which is also updated accordingly. */
16931
16932 static unsigned HOST_WIDE_INT
16933 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16934 uint32_t * padding_bits_to_clear,
16935 unsigned starting_bit, int * last_used_bit)
16936
16937 {
16938 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16939
16940 if (TREE_CODE (arg_type) == RECORD_TYPE)
16941 {
16942 unsigned current_bit = starting_bit;
16943 tree field;
16944 long int offset, size;
16945
16946
16947 field = TYPE_FIELDS (arg_type);
16948 while (field)
16949 {
16950 /* The offset within a structure is always an offset from
16951 the start of that structure. Make sure we take that into the
16952 calculation of the register based offset that we use here. */
16953 offset = starting_bit;
16954 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16955 offset %= 32;
16956
16957 /* This is the actual size of the field, for bitfields this is the
16958 bitfield width and not the container size. */
16959 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16960
16961 if (*last_used_bit != offset)
16962 {
16963 if (offset < *last_used_bit)
16964 {
16965 /* This field's offset is before the 'last_used_bit', that
16966 means this field goes on the next register. So we need to
16967 pad the rest of the current register and increase the
16968 register number. */
16969 uint32_t mask;
16970 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16971 mask++;
16972
16973 padding_bits_to_clear[*regno] |= mask;
16974 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16975 (*regno)++;
16976 }
16977 else
16978 {
16979 /* Otherwise we pad the bits between the last field's end and
16980 the start of the new field. */
16981 uint32_t mask;
16982
16983 mask = ((uint32_t)-1) >> (32 - offset);
16984 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16985 padding_bits_to_clear[*regno] |= mask;
16986 }
16987 current_bit = offset;
16988 }
16989
16990 /* Calculate further padding bits for inner structs/unions too. */
16991 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16992 {
16993 *last_used_bit = current_bit;
16994 not_to_clear_reg_mask
16995 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16996 padding_bits_to_clear, offset,
16997 last_used_bit);
16998 }
16999 else
17000 {
17001 /* Update 'current_bit' with this field's size. If the
17002 'current_bit' lies in a subsequent register, update 'regno' and
17003 reset 'current_bit' to point to the current bit in that new
17004 register. */
17005 current_bit += size;
17006 while (current_bit >= 32)
17007 {
17008 current_bit-=32;
17009 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17010 (*regno)++;
17011 }
17012 *last_used_bit = current_bit;
17013 }
17014
17015 field = TREE_CHAIN (field);
17016 }
17017 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17018 }
17019 else if (TREE_CODE (arg_type) == UNION_TYPE)
17020 {
17021 tree field, field_t;
17022 int i, regno_t, field_size;
17023 int max_reg = -1;
17024 int max_bit = -1;
17025 uint32_t mask;
17026 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
17027 = {-1, -1, -1, -1};
17028
17029 /* To compute the padding bits in a union we only consider bits as
17030 padding bits if they are always either a padding bit or fall outside a
17031 fields size for all fields in the union. */
17032 field = TYPE_FIELDS (arg_type);
17033 while (field)
17034 {
17035 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
17036 = {0U, 0U, 0U, 0U};
17037 int last_used_bit_t = *last_used_bit;
17038 regno_t = *regno;
17039 field_t = TREE_TYPE (field);
17040
17041 /* If the field's type is either a record or a union make sure to
17042 compute their padding bits too. */
17043 if (RECORD_OR_UNION_TYPE_P (field_t))
17044 not_to_clear_reg_mask
17045 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
17046 &padding_bits_to_clear_t[0],
17047 starting_bit, &last_used_bit_t);
17048 else
17049 {
17050 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
17051 regno_t = (field_size / 32) + *regno;
17052 last_used_bit_t = (starting_bit + field_size) % 32;
17053 }
17054
17055 for (i = *regno; i < regno_t; i++)
17056 {
17057 /* For all but the last register used by this field only keep the
17058 padding bits that were padding bits in this field. */
17059 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
17060 }
17061
17062 /* For the last register, keep all padding bits that were padding
17063 bits in this field and any padding bits that are still valid
17064 as padding bits but fall outside of this field's size. */
17065 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
17066 padding_bits_to_clear_res[regno_t]
17067 &= padding_bits_to_clear_t[regno_t] | mask;
17068
17069 /* Update the maximum size of the fields in terms of registers used
17070 ('max_reg') and the 'last_used_bit' in said register. */
17071 if (max_reg < regno_t)
17072 {
17073 max_reg = regno_t;
17074 max_bit = last_used_bit_t;
17075 }
17076 else if (max_reg == regno_t && max_bit < last_used_bit_t)
17077 max_bit = last_used_bit_t;
17078
17079 field = TREE_CHAIN (field);
17080 }
17081
17082 /* Update the current padding_bits_to_clear using the intersection of the
17083 padding bits of all the fields. */
17084 for (i=*regno; i < max_reg; i++)
17085 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
17086
17087 /* Do not keep trailing padding bits, we do not know yet whether this
17088 is the end of the argument. */
17089 mask = ((uint32_t) 1 << max_bit) - 1;
17090 padding_bits_to_clear[max_reg]
17091 |= padding_bits_to_clear_res[max_reg] & mask;
17092
17093 *regno = max_reg;
17094 *last_used_bit = max_bit;
17095 }
17096 else
17097 /* This function should only be used for structs and unions. */
17098 gcc_unreachable ();
17099
17100 return not_to_clear_reg_mask;
17101 }
17102
17103 /* In the context of ARMv8-M Security Extensions, this function is used for both
17104 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17105 registers are used when returning or passing arguments, which is then
17106 returned as a mask. It will also compute a mask to indicate padding/unused
17107 bits for each of these registers, and passes this through the
17108 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
17109 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17110 the starting register used to pass this argument or return value is passed
17111 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17112 for struct and union types. */
17113
17114 static unsigned HOST_WIDE_INT
17115 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17116 uint32_t * padding_bits_to_clear)
17117
17118 {
17119 int last_used_bit = 0;
17120 unsigned HOST_WIDE_INT not_to_clear_mask;
17121
17122 if (RECORD_OR_UNION_TYPE_P (arg_type))
17123 {
17124 not_to_clear_mask
17125 = comp_not_to_clear_mask_str_un (arg_type, &regno,
17126 padding_bits_to_clear, 0,
17127 &last_used_bit);
17128
17129
17130 /* If the 'last_used_bit' is not zero, that means we are still using a
17131 part of the last 'regno'. In such cases we must clear the trailing
17132 bits. Otherwise we are not using regno and we should mark it as to
17133 clear. */
17134 if (last_used_bit != 0)
17135 padding_bits_to_clear[regno]
17136 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17137 else
17138 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17139 }
17140 else
17141 {
17142 not_to_clear_mask = 0;
17143 /* We are not dealing with structs nor unions. So these arguments may be
17144 passed in floating point registers too. In some cases a BLKmode is
17145 used when returning or passing arguments in multiple VFP registers. */
17146 if (GET_MODE (arg_rtx) == BLKmode)
17147 {
17148 int i, arg_regs;
17149 rtx reg;
17150
17151 /* This should really only occur when dealing with the hard-float
17152 ABI. */
17153 gcc_assert (TARGET_HARD_FLOAT_ABI);
17154
17155 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17156 {
17157 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17158 gcc_assert (REG_P (reg));
17159
17160 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17161
17162 /* If we are dealing with DF mode, make sure we don't
17163 clear either of the registers it addresses. */
17164 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17165 if (arg_regs > 1)
17166 {
17167 unsigned HOST_WIDE_INT mask;
17168 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17169 mask -= HOST_WIDE_INT_1U << REGNO (reg);
17170 not_to_clear_mask |= mask;
17171 }
17172 }
17173 }
17174 else
17175 {
17176 /* Otherwise we can rely on the MODE to determine how many registers
17177 are being used by this argument. */
17178 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17179 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17180 if (arg_regs > 1)
17181 {
17182 unsigned HOST_WIDE_INT
17183 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17184 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17185 not_to_clear_mask |= mask;
17186 }
17187 }
17188 }
17189
17190 return not_to_clear_mask;
17191 }
17192
17193 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17194 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
17195 are to be fully cleared, using the value in register CLEARING_REG if more
17196 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17197 the bits that needs to be cleared in caller-saved core registers, with
17198 SCRATCH_REG used as a scratch register for that clearing.
17199
17200 NOTE: one of three following assertions must hold:
17201 - SCRATCH_REG is a low register
17202 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17203 in TO_CLEAR_BITMAP)
17204 - CLEARING_REG is a low register. */
17205
17206 static void
17207 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17208 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17209 {
17210 bool saved_clearing = false;
17211 rtx saved_clearing_reg = NULL_RTX;
17212 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17213
17214 gcc_assert (arm_arch_cmse);
17215
17216 if (!bitmap_empty_p (to_clear_bitmap))
17217 {
17218 minregno = bitmap_first_set_bit (to_clear_bitmap);
17219 maxregno = bitmap_last_set_bit (to_clear_bitmap);
17220 }
17221 clearing_regno = REGNO (clearing_reg);
17222
17223 /* Clear padding bits. */
17224 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17225 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17226 {
17227 uint64_t mask;
17228 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17229
17230 if (padding_bits_to_clear[i] == 0)
17231 continue;
17232
17233 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17234 CLEARING_REG as scratch. */
17235 if (TARGET_THUMB1
17236 && REGNO (scratch_reg) > LAST_LO_REGNUM)
17237 {
17238 /* clearing_reg is not to be cleared, copy its value into scratch_reg
17239 such that we can use clearing_reg to clear the unused bits in the
17240 arguments. */
17241 if ((clearing_regno > maxregno
17242 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17243 && !saved_clearing)
17244 {
17245 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17246 emit_move_insn (scratch_reg, clearing_reg);
17247 saved_clearing = true;
17248 saved_clearing_reg = scratch_reg;
17249 }
17250 scratch_reg = clearing_reg;
17251 }
17252
17253 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
17254 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17255 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17256
17257 /* Fill the top half of the negated padding_bits_to_clear[i]. */
17258 mask = (~padding_bits_to_clear[i]) >> 16;
17259 rtx16 = gen_int_mode (16, SImode);
17260 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17261 if (mask)
17262 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17263
17264 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17265 }
17266 if (saved_clearing)
17267 emit_move_insn (clearing_reg, saved_clearing_reg);
17268
17269
17270 /* Clear full registers. */
17271
17272 /* If not marked for clearing, clearing_reg already does not contain
17273 any secret. */
17274 if (clearing_regno <= maxregno
17275 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17276 {
17277 emit_move_insn (clearing_reg, const0_rtx);
17278 emit_use (clearing_reg);
17279 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17280 }
17281
17282 for (regno = minregno; regno <= maxregno; regno++)
17283 {
17284 if (!bitmap_bit_p (to_clear_bitmap, regno))
17285 continue;
17286
17287 if (IS_VFP_REGNUM (regno))
17288 {
17289 /* If regno is an even vfp register and its successor is also to
17290 be cleared, use vmov. */
17291 if (TARGET_VFP_DOUBLE
17292 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17293 && bitmap_bit_p (to_clear_bitmap, regno + 1))
17294 {
17295 emit_move_insn (gen_rtx_REG (DFmode, regno),
17296 CONST1_RTX (DFmode));
17297 emit_use (gen_rtx_REG (DFmode, regno));
17298 regno++;
17299 }
17300 else
17301 {
17302 emit_move_insn (gen_rtx_REG (SFmode, regno),
17303 CONST1_RTX (SFmode));
17304 emit_use (gen_rtx_REG (SFmode, regno));
17305 }
17306 }
17307 else
17308 {
17309 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
17310 emit_use (gen_rtx_REG (SImode, regno));
17311 }
17312 }
17313 }
17314
17315 /* Clears caller saved registers not used to pass arguments before a
17316 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
17317 registers is done in __gnu_cmse_nonsecure_call libcall.
17318 See libgcc/config/arm/cmse_nonsecure_call.S. */
17319
17320 static void
17321 cmse_nonsecure_call_clear_caller_saved (void)
17322 {
17323 basic_block bb;
17324
17325 FOR_EACH_BB_FN (bb, cfun)
17326 {
17327 rtx_insn *insn;
17328
17329 FOR_BB_INSNS (bb, insn)
17330 {
17331 unsigned address_regnum, regno, maxregno =
17332 TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
17333 auto_sbitmap to_clear_bitmap (maxregno + 1);
17334 rtx_insn *seq;
17335 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
17336 rtx address;
17337 CUMULATIVE_ARGS args_so_far_v;
17338 cumulative_args_t args_so_far;
17339 tree arg_type, fntype;
17340 bool first_param = true;
17341 function_args_iterator args_iter;
17342 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17343
17344 if (!NONDEBUG_INSN_P (insn))
17345 continue;
17346
17347 if (!CALL_P (insn))
17348 continue;
17349
17350 pat = PATTERN (insn);
17351 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17352 call = XVECEXP (pat, 0, 0);
17353
17354 /* Get the real call RTX if the insn sets a value, ie. returns. */
17355 if (GET_CODE (call) == SET)
17356 call = SET_SRC (call);
17357
17358 /* Check if it is a cmse_nonsecure_call. */
17359 unspec = XEXP (call, 0);
17360 if (GET_CODE (unspec) != UNSPEC
17361 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17362 continue;
17363
17364 /* Determine the caller-saved registers we need to clear. */
17365 bitmap_clear (to_clear_bitmap);
17366 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
17367
17368 /* Only look at the caller-saved floating point registers in case of
17369 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17370 lazy store and loads which clear both caller- and callee-saved
17371 registers. */
17372 if (TARGET_HARD_FLOAT_ABI)
17373 {
17374 auto_sbitmap float_bitmap (maxregno + 1);
17375
17376 bitmap_clear (float_bitmap);
17377 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
17378 D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
17379 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
17380 }
17381
17382 /* Make sure the register used to hold the function address is not
17383 cleared. */
17384 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17385 gcc_assert (MEM_P (address));
17386 gcc_assert (REG_P (XEXP (address, 0)));
17387 address_regnum = REGNO (XEXP (address, 0));
17388 if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
17389 bitmap_clear_bit (to_clear_bitmap, address_regnum);
17390
17391 /* Set basic block of call insn so that df rescan is performed on
17392 insns inserted here. */
17393 set_block_for_insn (insn, bb);
17394 df_set_flags (DF_DEFER_INSN_RESCAN);
17395 start_sequence ();
17396
17397 /* Make sure the scheduler doesn't schedule other insns beyond
17398 here. */
17399 emit_insn (gen_blockage ());
17400
17401 /* Walk through all arguments and clear registers appropriately.
17402 */
17403 fntype = TREE_TYPE (MEM_EXPR (address));
17404 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17405 NULL_TREE);
17406 args_so_far = pack_cumulative_args (&args_so_far_v);
17407 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17408 {
17409 rtx arg_rtx;
17410 uint64_t to_clear_args_mask;
17411 machine_mode arg_mode = TYPE_MODE (arg_type);
17412
17413 if (VOID_TYPE_P (arg_type))
17414 continue;
17415
17416 if (!first_param)
17417 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17418 true);
17419
17420 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17421 true);
17422 gcc_assert (REG_P (arg_rtx));
17423 to_clear_args_mask
17424 = compute_not_to_clear_mask (arg_type, arg_rtx,
17425 REGNO (arg_rtx),
17426 &padding_bits_to_clear[0]);
17427 if (to_clear_args_mask)
17428 {
17429 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17430 {
17431 if (to_clear_args_mask & (1ULL << regno))
17432 bitmap_clear_bit (to_clear_bitmap, regno);
17433 }
17434 }
17435
17436 first_param = false;
17437 }
17438
17439 /* We use right shift and left shift to clear the LSB of the address
17440 we jump to instead of using bic, to avoid having to use an extra
17441 register on Thumb-1. */
17442 clearing_reg = XEXP (address, 0);
17443 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
17444 emit_insn (gen_rtx_SET (clearing_reg, shift));
17445 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
17446 emit_insn (gen_rtx_SET (clearing_reg, shift));
17447
17448 /* Clear caller-saved registers that leak before doing a non-secure
17449 call. */
17450 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
17451 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
17452 NUM_ARG_REGS, ip_reg, clearing_reg);
17453
17454 seq = get_insns ();
17455 end_sequence ();
17456 emit_insn_before (seq, insn);
17457 }
17458 }
17459 }
17460
17461 /* Rewrite move insn into subtract of 0 if the condition codes will
17462 be useful in next conditional jump insn. */
17463
17464 static void
17465 thumb1_reorg (void)
17466 {
17467 basic_block bb;
17468
17469 FOR_EACH_BB_FN (bb, cfun)
17470 {
17471 rtx dest, src;
17472 rtx cmp, op0, op1, set = NULL;
17473 rtx_insn *prev, *insn = BB_END (bb);
17474 bool insn_clobbered = false;
17475
17476 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17477 insn = PREV_INSN (insn);
17478
17479 /* Find the last cbranchsi4_insn in basic block BB. */
17480 if (insn == BB_HEAD (bb)
17481 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17482 continue;
17483
17484 /* Get the register with which we are comparing. */
17485 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17486 op0 = XEXP (cmp, 0);
17487 op1 = XEXP (cmp, 1);
17488
17489 /* Check that comparison is against ZERO. */
17490 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17491 continue;
17492
17493 /* Find the first flag setting insn before INSN in basic block BB. */
17494 gcc_assert (insn != BB_HEAD (bb));
17495 for (prev = PREV_INSN (insn);
17496 (!insn_clobbered
17497 && prev != BB_HEAD (bb)
17498 && (NOTE_P (prev)
17499 || DEBUG_INSN_P (prev)
17500 || ((set = single_set (prev)) != NULL
17501 && get_attr_conds (prev) == CONDS_NOCOND)));
17502 prev = PREV_INSN (prev))
17503 {
17504 if (reg_set_p (op0, prev))
17505 insn_clobbered = true;
17506 }
17507
17508 /* Skip if op0 is clobbered by insn other than prev. */
17509 if (insn_clobbered)
17510 continue;
17511
17512 if (!set)
17513 continue;
17514
17515 dest = SET_DEST (set);
17516 src = SET_SRC (set);
17517 if (!low_register_operand (dest, SImode)
17518 || !low_register_operand (src, SImode))
17519 continue;
17520
17521 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17522 in INSN. Both src and dest of the move insn are checked. */
17523 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17524 {
17525 dest = copy_rtx (dest);
17526 src = copy_rtx (src);
17527 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17528 PATTERN (prev) = gen_rtx_SET (dest, src);
17529 INSN_CODE (prev) = -1;
17530 /* Set test register in INSN to dest. */
17531 XEXP (cmp, 0) = copy_rtx (dest);
17532 INSN_CODE (insn) = -1;
17533 }
17534 }
17535 }
17536
17537 /* Convert instructions to their cc-clobbering variant if possible, since
17538 that allows us to use smaller encodings. */
17539
17540 static void
17541 thumb2_reorg (void)
17542 {
17543 basic_block bb;
17544 regset_head live;
17545
17546 INIT_REG_SET (&live);
17547
17548 /* We are freeing block_for_insn in the toplev to keep compatibility
17549 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17550 compute_bb_for_insn ();
17551 df_analyze ();
17552
17553 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17554
17555 FOR_EACH_BB_FN (bb, cfun)
17556 {
17557 if ((current_tune->disparage_flag_setting_t16_encodings
17558 == tune_params::DISPARAGE_FLAGS_ALL)
17559 && optimize_bb_for_speed_p (bb))
17560 continue;
17561
17562 rtx_insn *insn;
17563 Convert_Action action = SKIP;
17564 Convert_Action action_for_partial_flag_setting
17565 = ((current_tune->disparage_flag_setting_t16_encodings
17566 != tune_params::DISPARAGE_FLAGS_NEITHER)
17567 && optimize_bb_for_speed_p (bb))
17568 ? SKIP : CONV;
17569
17570 COPY_REG_SET (&live, DF_LR_OUT (bb));
17571 df_simulate_initialize_backwards (bb, &live);
17572 FOR_BB_INSNS_REVERSE (bb, insn)
17573 {
17574 if (NONJUMP_INSN_P (insn)
17575 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17576 && GET_CODE (PATTERN (insn)) == SET)
17577 {
17578 action = SKIP;
17579 rtx pat = PATTERN (insn);
17580 rtx dst = XEXP (pat, 0);
17581 rtx src = XEXP (pat, 1);
17582 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17583
17584 if (UNARY_P (src) || BINARY_P (src))
17585 op0 = XEXP (src, 0);
17586
17587 if (BINARY_P (src))
17588 op1 = XEXP (src, 1);
17589
17590 if (low_register_operand (dst, SImode))
17591 {
17592 switch (GET_CODE (src))
17593 {
17594 case PLUS:
17595 /* Adding two registers and storing the result
17596 in the first source is already a 16-bit
17597 operation. */
17598 if (rtx_equal_p (dst, op0)
17599 && register_operand (op1, SImode))
17600 break;
17601
17602 if (low_register_operand (op0, SImode))
17603 {
17604 /* ADDS <Rd>,<Rn>,<Rm> */
17605 if (low_register_operand (op1, SImode))
17606 action = CONV;
17607 /* ADDS <Rdn>,#<imm8> */
17608 /* SUBS <Rdn>,#<imm8> */
17609 else if (rtx_equal_p (dst, op0)
17610 && CONST_INT_P (op1)
17611 && IN_RANGE (INTVAL (op1), -255, 255))
17612 action = CONV;
17613 /* ADDS <Rd>,<Rn>,#<imm3> */
17614 /* SUBS <Rd>,<Rn>,#<imm3> */
17615 else if (CONST_INT_P (op1)
17616 && IN_RANGE (INTVAL (op1), -7, 7))
17617 action = CONV;
17618 }
17619 /* ADCS <Rd>, <Rn> */
17620 else if (GET_CODE (XEXP (src, 0)) == PLUS
17621 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17622 && low_register_operand (XEXP (XEXP (src, 0), 1),
17623 SImode)
17624 && COMPARISON_P (op1)
17625 && cc_register (XEXP (op1, 0), VOIDmode)
17626 && maybe_get_arm_condition_code (op1) == ARM_CS
17627 && XEXP (op1, 1) == const0_rtx)
17628 action = CONV;
17629 break;
17630
17631 case MINUS:
17632 /* RSBS <Rd>,<Rn>,#0
17633 Not handled here: see NEG below. */
17634 /* SUBS <Rd>,<Rn>,#<imm3>
17635 SUBS <Rdn>,#<imm8>
17636 Not handled here: see PLUS above. */
17637 /* SUBS <Rd>,<Rn>,<Rm> */
17638 if (low_register_operand (op0, SImode)
17639 && low_register_operand (op1, SImode))
17640 action = CONV;
17641 break;
17642
17643 case MULT:
17644 /* MULS <Rdm>,<Rn>,<Rdm>
17645 As an exception to the rule, this is only used
17646 when optimizing for size since MULS is slow on all
17647 known implementations. We do not even want to use
17648 MULS in cold code, if optimizing for speed, so we
17649 test the global flag here. */
17650 if (!optimize_size)
17651 break;
17652 /* Fall through. */
17653 case AND:
17654 case IOR:
17655 case XOR:
17656 /* ANDS <Rdn>,<Rm> */
17657 if (rtx_equal_p (dst, op0)
17658 && low_register_operand (op1, SImode))
17659 action = action_for_partial_flag_setting;
17660 else if (rtx_equal_p (dst, op1)
17661 && low_register_operand (op0, SImode))
17662 action = action_for_partial_flag_setting == SKIP
17663 ? SKIP : SWAP_CONV;
17664 break;
17665
17666 case ASHIFTRT:
17667 case ASHIFT:
17668 case LSHIFTRT:
17669 /* ASRS <Rdn>,<Rm> */
17670 /* LSRS <Rdn>,<Rm> */
17671 /* LSLS <Rdn>,<Rm> */
17672 if (rtx_equal_p (dst, op0)
17673 && low_register_operand (op1, SImode))
17674 action = action_for_partial_flag_setting;
17675 /* ASRS <Rd>,<Rm>,#<imm5> */
17676 /* LSRS <Rd>,<Rm>,#<imm5> */
17677 /* LSLS <Rd>,<Rm>,#<imm5> */
17678 else if (low_register_operand (op0, SImode)
17679 && CONST_INT_P (op1)
17680 && IN_RANGE (INTVAL (op1), 0, 31))
17681 action = action_for_partial_flag_setting;
17682 break;
17683
17684 case ROTATERT:
17685 /* RORS <Rdn>,<Rm> */
17686 if (rtx_equal_p (dst, op0)
17687 && low_register_operand (op1, SImode))
17688 action = action_for_partial_flag_setting;
17689 break;
17690
17691 case NOT:
17692 /* MVNS <Rd>,<Rm> */
17693 if (low_register_operand (op0, SImode))
17694 action = action_for_partial_flag_setting;
17695 break;
17696
17697 case NEG:
17698 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17699 if (low_register_operand (op0, SImode))
17700 action = CONV;
17701 break;
17702
17703 case CONST_INT:
17704 /* MOVS <Rd>,#<imm8> */
17705 if (CONST_INT_P (src)
17706 && IN_RANGE (INTVAL (src), 0, 255))
17707 action = action_for_partial_flag_setting;
17708 break;
17709
17710 case REG:
17711 /* MOVS and MOV<c> with registers have different
17712 encodings, so are not relevant here. */
17713 break;
17714
17715 default:
17716 break;
17717 }
17718 }
17719
17720 if (action != SKIP)
17721 {
17722 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17723 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17724 rtvec vec;
17725
17726 if (action == SWAP_CONV)
17727 {
17728 src = copy_rtx (src);
17729 XEXP (src, 0) = op1;
17730 XEXP (src, 1) = op0;
17731 pat = gen_rtx_SET (dst, src);
17732 vec = gen_rtvec (2, pat, clobber);
17733 }
17734 else /* action == CONV */
17735 vec = gen_rtvec (2, pat, clobber);
17736
17737 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17738 INSN_CODE (insn) = -1;
17739 }
17740 }
17741
17742 if (NONDEBUG_INSN_P (insn))
17743 df_simulate_one_insn_backwards (bb, insn, &live);
17744 }
17745 }
17746
17747 CLEAR_REG_SET (&live);
17748 }
17749
17750 /* Gcc puts the pool in the wrong place for ARM, since we can only
17751 load addresses a limited distance around the pc. We do some
17752 special munging to move the constant pool values to the correct
17753 point in the code. */
17754 static void
17755 arm_reorg (void)
17756 {
17757 rtx_insn *insn;
17758 HOST_WIDE_INT address = 0;
17759 Mfix * fix;
17760
17761 if (use_cmse)
17762 cmse_nonsecure_call_clear_caller_saved ();
17763
17764 /* We cannot run the Thumb passes for thunks because there is no CFG. */
17765 if (cfun->is_thunk)
17766 ;
17767 else if (TARGET_THUMB1)
17768 thumb1_reorg ();
17769 else if (TARGET_THUMB2)
17770 thumb2_reorg ();
17771
17772 /* Ensure all insns that must be split have been split at this point.
17773 Otherwise, the pool placement code below may compute incorrect
17774 insn lengths. Note that when optimizing, all insns have already
17775 been split at this point. */
17776 if (!optimize)
17777 split_all_insns_noflow ();
17778
17779 /* Make sure we do not attempt to create a literal pool even though it should
17780 no longer be necessary to create any. */
17781 if (arm_disable_literal_pool)
17782 return ;
17783
17784 minipool_fix_head = minipool_fix_tail = NULL;
17785
17786 /* The first insn must always be a note, or the code below won't
17787 scan it properly. */
17788 insn = get_insns ();
17789 gcc_assert (NOTE_P (insn));
17790 minipool_pad = 0;
17791
17792 /* Scan all the insns and record the operands that will need fixing. */
17793 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17794 {
17795 if (BARRIER_P (insn))
17796 push_minipool_barrier (insn, address);
17797 else if (INSN_P (insn))
17798 {
17799 rtx_jump_table_data *table;
17800
17801 note_invalid_constants (insn, address, true);
17802 address += get_attr_length (insn);
17803
17804 /* If the insn is a vector jump, add the size of the table
17805 and skip the table. */
17806 if (tablejump_p (insn, NULL, &table))
17807 {
17808 address += get_jump_table_size (table);
17809 insn = table;
17810 }
17811 }
17812 else if (LABEL_P (insn))
17813 /* Add the worst-case padding due to alignment. We don't add
17814 the _current_ padding because the minipool insertions
17815 themselves might change it. */
17816 address += get_label_padding (insn);
17817 }
17818
17819 fix = minipool_fix_head;
17820
17821 /* Now scan the fixups and perform the required changes. */
17822 while (fix)
17823 {
17824 Mfix * ftmp;
17825 Mfix * fdel;
17826 Mfix * last_added_fix;
17827 Mfix * last_barrier = NULL;
17828 Mfix * this_fix;
17829
17830 /* Skip any further barriers before the next fix. */
17831 while (fix && BARRIER_P (fix->insn))
17832 fix = fix->next;
17833
17834 /* No more fixes. */
17835 if (fix == NULL)
17836 break;
17837
17838 last_added_fix = NULL;
17839
17840 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17841 {
17842 if (BARRIER_P (ftmp->insn))
17843 {
17844 if (ftmp->address >= minipool_vector_head->max_address)
17845 break;
17846
17847 last_barrier = ftmp;
17848 }
17849 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17850 break;
17851
17852 last_added_fix = ftmp; /* Keep track of the last fix added. */
17853 }
17854
17855 /* If we found a barrier, drop back to that; any fixes that we
17856 could have reached but come after the barrier will now go in
17857 the next mini-pool. */
17858 if (last_barrier != NULL)
17859 {
17860 /* Reduce the refcount for those fixes that won't go into this
17861 pool after all. */
17862 for (fdel = last_barrier->next;
17863 fdel && fdel != ftmp;
17864 fdel = fdel->next)
17865 {
17866 fdel->minipool->refcount--;
17867 fdel->minipool = NULL;
17868 }
17869
17870 ftmp = last_barrier;
17871 }
17872 else
17873 {
17874 /* ftmp is first fix that we can't fit into this pool and
17875 there no natural barriers that we could use. Insert a
17876 new barrier in the code somewhere between the previous
17877 fix and this one, and arrange to jump around it. */
17878 HOST_WIDE_INT max_address;
17879
17880 /* The last item on the list of fixes must be a barrier, so
17881 we can never run off the end of the list of fixes without
17882 last_barrier being set. */
17883 gcc_assert (ftmp);
17884
17885 max_address = minipool_vector_head->max_address;
17886 /* Check that there isn't another fix that is in range that
17887 we couldn't fit into this pool because the pool was
17888 already too large: we need to put the pool before such an
17889 instruction. The pool itself may come just after the
17890 fix because create_fix_barrier also allows space for a
17891 jump instruction. */
17892 if (ftmp->address < max_address)
17893 max_address = ftmp->address + 1;
17894
17895 last_barrier = create_fix_barrier (last_added_fix, max_address);
17896 }
17897
17898 assign_minipool_offsets (last_barrier);
17899
17900 while (ftmp)
17901 {
17902 if (!BARRIER_P (ftmp->insn)
17903 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17904 == NULL))
17905 break;
17906
17907 ftmp = ftmp->next;
17908 }
17909
17910 /* Scan over the fixes we have identified for this pool, fixing them
17911 up and adding the constants to the pool itself. */
17912 for (this_fix = fix; this_fix && ftmp != this_fix;
17913 this_fix = this_fix->next)
17914 if (!BARRIER_P (this_fix->insn))
17915 {
17916 rtx addr
17917 = plus_constant (Pmode,
17918 gen_rtx_LABEL_REF (VOIDmode,
17919 minipool_vector_label),
17920 this_fix->minipool->offset);
17921 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17922 }
17923
17924 dump_minipool (last_barrier->insn);
17925 fix = ftmp;
17926 }
17927
17928 /* From now on we must synthesize any constants that we can't handle
17929 directly. This can happen if the RTL gets split during final
17930 instruction generation. */
17931 cfun->machine->after_arm_reorg = 1;
17932
17933 /* Free the minipool memory. */
17934 obstack_free (&minipool_obstack, minipool_startobj);
17935 }
17936 \f
17937 /* Routines to output assembly language. */
17938
17939 /* Return string representation of passed in real value. */
17940 static const char *
17941 fp_const_from_val (REAL_VALUE_TYPE *r)
17942 {
17943 if (!fp_consts_inited)
17944 init_fp_table ();
17945
17946 gcc_assert (real_equal (r, &value_fp0));
17947 return "0";
17948 }
17949
17950 /* OPERANDS[0] is the entire list of insns that constitute pop,
17951 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17952 is in the list, UPDATE is true iff the list contains explicit
17953 update of base register. */
17954 void
17955 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17956 bool update)
17957 {
17958 int i;
17959 char pattern[100];
17960 int offset;
17961 const char *conditional;
17962 int num_saves = XVECLEN (operands[0], 0);
17963 unsigned int regno;
17964 unsigned int regno_base = REGNO (operands[1]);
17965 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17966
17967 offset = 0;
17968 offset += update ? 1 : 0;
17969 offset += return_pc ? 1 : 0;
17970
17971 /* Is the base register in the list? */
17972 for (i = offset; i < num_saves; i++)
17973 {
17974 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17975 /* If SP is in the list, then the base register must be SP. */
17976 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17977 /* If base register is in the list, there must be no explicit update. */
17978 if (regno == regno_base)
17979 gcc_assert (!update);
17980 }
17981
17982 conditional = reverse ? "%?%D0" : "%?%d0";
17983 /* Can't use POP if returning from an interrupt. */
17984 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17985 sprintf (pattern, "pop%s\t{", conditional);
17986 else
17987 {
17988 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17989 It's just a convention, their semantics are identical. */
17990 if (regno_base == SP_REGNUM)
17991 sprintf (pattern, "ldmfd%s\t", conditional);
17992 else if (update)
17993 sprintf (pattern, "ldmia%s\t", conditional);
17994 else
17995 sprintf (pattern, "ldm%s\t", conditional);
17996
17997 strcat (pattern, reg_names[regno_base]);
17998 if (update)
17999 strcat (pattern, "!, {");
18000 else
18001 strcat (pattern, ", {");
18002 }
18003
18004 /* Output the first destination register. */
18005 strcat (pattern,
18006 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
18007
18008 /* Output the rest of the destination registers. */
18009 for (i = offset + 1; i < num_saves; i++)
18010 {
18011 strcat (pattern, ", ");
18012 strcat (pattern,
18013 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
18014 }
18015
18016 strcat (pattern, "}");
18017
18018 if (interrupt_p && return_pc)
18019 strcat (pattern, "^");
18020
18021 output_asm_insn (pattern, &cond);
18022 }
18023
18024
18025 /* Output the assembly for a store multiple. */
18026
18027 const char *
18028 vfp_output_vstmd (rtx * operands)
18029 {
18030 char pattern[100];
18031 int p;
18032 int base;
18033 int i;
18034 rtx addr_reg = REG_P (XEXP (operands[0], 0))
18035 ? XEXP (operands[0], 0)
18036 : XEXP (XEXP (operands[0], 0), 0);
18037 bool push_p = REGNO (addr_reg) == SP_REGNUM;
18038
18039 if (push_p)
18040 strcpy (pattern, "vpush%?.64\t{%P1");
18041 else
18042 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
18043
18044 p = strlen (pattern);
18045
18046 gcc_assert (REG_P (operands[1]));
18047
18048 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
18049 for (i = 1; i < XVECLEN (operands[2], 0); i++)
18050 {
18051 p += sprintf (&pattern[p], ", d%d", base + i);
18052 }
18053 strcpy (&pattern[p], "}");
18054
18055 output_asm_insn (pattern, operands);
18056 return "";
18057 }
18058
18059
18060 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
18061 number of bytes pushed. */
18062
18063 static int
18064 vfp_emit_fstmd (int base_reg, int count)
18065 {
18066 rtx par;
18067 rtx dwarf;
18068 rtx tmp, reg;
18069 int i;
18070
18071 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
18072 register pairs are stored by a store multiple insn. We avoid this
18073 by pushing an extra pair. */
18074 if (count == 2 && !arm_arch6)
18075 {
18076 if (base_reg == LAST_VFP_REGNUM - 3)
18077 base_reg -= 2;
18078 count++;
18079 }
18080
18081 /* FSTMD may not store more than 16 doubleword registers at once. Split
18082 larger stores into multiple parts (up to a maximum of two, in
18083 practice). */
18084 if (count > 16)
18085 {
18086 int saved;
18087 /* NOTE: base_reg is an internal register number, so each D register
18088 counts as 2. */
18089 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
18090 saved += vfp_emit_fstmd (base_reg, 16);
18091 return saved;
18092 }
18093
18094 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
18095 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
18096
18097 reg = gen_rtx_REG (DFmode, base_reg);
18098 base_reg += 2;
18099
18100 XVECEXP (par, 0, 0)
18101 = gen_rtx_SET (gen_frame_mem
18102 (BLKmode,
18103 gen_rtx_PRE_MODIFY (Pmode,
18104 stack_pointer_rtx,
18105 plus_constant
18106 (Pmode, stack_pointer_rtx,
18107 - (count * 8)))
18108 ),
18109 gen_rtx_UNSPEC (BLKmode,
18110 gen_rtvec (1, reg),
18111 UNSPEC_PUSH_MULT));
18112
18113 tmp = gen_rtx_SET (stack_pointer_rtx,
18114 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
18115 RTX_FRAME_RELATED_P (tmp) = 1;
18116 XVECEXP (dwarf, 0, 0) = tmp;
18117
18118 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
18119 RTX_FRAME_RELATED_P (tmp) = 1;
18120 XVECEXP (dwarf, 0, 1) = tmp;
18121
18122 for (i = 1; i < count; i++)
18123 {
18124 reg = gen_rtx_REG (DFmode, base_reg);
18125 base_reg += 2;
18126 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18127
18128 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18129 plus_constant (Pmode,
18130 stack_pointer_rtx,
18131 i * 8)),
18132 reg);
18133 RTX_FRAME_RELATED_P (tmp) = 1;
18134 XVECEXP (dwarf, 0, i + 1) = tmp;
18135 }
18136
18137 par = emit_insn (par);
18138 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18139 RTX_FRAME_RELATED_P (par) = 1;
18140
18141 return count * 8;
18142 }
18143
18144 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18145 has the cmse_nonsecure_call attribute and returns false otherwise. */
18146
18147 bool
18148 detect_cmse_nonsecure_call (tree addr)
18149 {
18150 if (!addr)
18151 return FALSE;
18152
18153 tree fntype = TREE_TYPE (addr);
18154 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18155 TYPE_ATTRIBUTES (fntype)))
18156 return TRUE;
18157 return FALSE;
18158 }
18159
18160
18161 /* Emit a call instruction with pattern PAT. ADDR is the address of
18162 the call target. */
18163
18164 void
18165 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18166 {
18167 rtx insn;
18168
18169 insn = emit_call_insn (pat);
18170
18171 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18172 If the call might use such an entry, add a use of the PIC register
18173 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18174 if (TARGET_VXWORKS_RTP
18175 && flag_pic
18176 && !sibcall
18177 && GET_CODE (addr) == SYMBOL_REF
18178 && (SYMBOL_REF_DECL (addr)
18179 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18180 : !SYMBOL_REF_LOCAL_P (addr)))
18181 {
18182 require_pic_register (NULL_RTX, false /*compute_now*/);
18183 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18184 }
18185
18186 if (TARGET_AAPCS_BASED)
18187 {
18188 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18189 linker. We need to add an IP clobber to allow setting
18190 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18191 is not needed since it's a fixed register. */
18192 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18193 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18194 }
18195 }
18196
18197 /* Output a 'call' insn. */
18198 const char *
18199 output_call (rtx *operands)
18200 {
18201 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
18202
18203 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18204 if (REGNO (operands[0]) == LR_REGNUM)
18205 {
18206 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18207 output_asm_insn ("mov%?\t%0, %|lr", operands);
18208 }
18209
18210 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18211
18212 if (TARGET_INTERWORK || arm_arch4t)
18213 output_asm_insn ("bx%?\t%0", operands);
18214 else
18215 output_asm_insn ("mov%?\t%|pc, %0", operands);
18216
18217 return "";
18218 }
18219
18220 /* Output a move from arm registers to arm registers of a long double
18221 OPERANDS[0] is the destination.
18222 OPERANDS[1] is the source. */
18223 const char *
18224 output_mov_long_double_arm_from_arm (rtx *operands)
18225 {
18226 /* We have to be careful here because the two might overlap. */
18227 int dest_start = REGNO (operands[0]);
18228 int src_start = REGNO (operands[1]);
18229 rtx ops[2];
18230 int i;
18231
18232 if (dest_start < src_start)
18233 {
18234 for (i = 0; i < 3; i++)
18235 {
18236 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18237 ops[1] = gen_rtx_REG (SImode, src_start + i);
18238 output_asm_insn ("mov%?\t%0, %1", ops);
18239 }
18240 }
18241 else
18242 {
18243 for (i = 2; i >= 0; i--)
18244 {
18245 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18246 ops[1] = gen_rtx_REG (SImode, src_start + i);
18247 output_asm_insn ("mov%?\t%0, %1", ops);
18248 }
18249 }
18250
18251 return "";
18252 }
18253
18254 void
18255 arm_emit_movpair (rtx dest, rtx src)
18256 {
18257 /* If the src is an immediate, simplify it. */
18258 if (CONST_INT_P (src))
18259 {
18260 HOST_WIDE_INT val = INTVAL (src);
18261 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18262 if ((val >> 16) & 0x0000ffff)
18263 {
18264 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18265 GEN_INT (16)),
18266 GEN_INT ((val >> 16) & 0x0000ffff));
18267 rtx_insn *insn = get_last_insn ();
18268 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18269 }
18270 return;
18271 }
18272 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18273 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18274 rtx_insn *insn = get_last_insn ();
18275 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18276 }
18277
18278 /* Output a move between double words. It must be REG<-MEM
18279 or MEM<-REG. */
18280 const char *
18281 output_move_double (rtx *operands, bool emit, int *count)
18282 {
18283 enum rtx_code code0 = GET_CODE (operands[0]);
18284 enum rtx_code code1 = GET_CODE (operands[1]);
18285 rtx otherops[3];
18286 if (count)
18287 *count = 1;
18288
18289 /* The only case when this might happen is when
18290 you are looking at the length of a DImode instruction
18291 that has an invalid constant in it. */
18292 if (code0 == REG && code1 != MEM)
18293 {
18294 gcc_assert (!emit);
18295 *count = 2;
18296 return "";
18297 }
18298
18299 if (code0 == REG)
18300 {
18301 unsigned int reg0 = REGNO (operands[0]);
18302
18303 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18304
18305 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18306
18307 switch (GET_CODE (XEXP (operands[1], 0)))
18308 {
18309 case REG:
18310
18311 if (emit)
18312 {
18313 if (TARGET_LDRD
18314 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18315 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18316 else
18317 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18318 }
18319 break;
18320
18321 case PRE_INC:
18322 gcc_assert (TARGET_LDRD);
18323 if (emit)
18324 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18325 break;
18326
18327 case PRE_DEC:
18328 if (emit)
18329 {
18330 if (TARGET_LDRD)
18331 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18332 else
18333 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18334 }
18335 break;
18336
18337 case POST_INC:
18338 if (emit)
18339 {
18340 if (TARGET_LDRD)
18341 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18342 else
18343 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18344 }
18345 break;
18346
18347 case POST_DEC:
18348 gcc_assert (TARGET_LDRD);
18349 if (emit)
18350 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18351 break;
18352
18353 case PRE_MODIFY:
18354 case POST_MODIFY:
18355 /* Autoicrement addressing modes should never have overlapping
18356 base and destination registers, and overlapping index registers
18357 are already prohibited, so this doesn't need to worry about
18358 fix_cm3_ldrd. */
18359 otherops[0] = operands[0];
18360 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18361 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18362
18363 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18364 {
18365 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18366 {
18367 /* Registers overlap so split out the increment. */
18368 if (emit)
18369 {
18370 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18371 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18372 }
18373 if (count)
18374 *count = 2;
18375 }
18376 else
18377 {
18378 /* Use a single insn if we can.
18379 FIXME: IWMMXT allows offsets larger than ldrd can
18380 handle, fix these up with a pair of ldr. */
18381 if (TARGET_THUMB2
18382 || !CONST_INT_P (otherops[2])
18383 || (INTVAL (otherops[2]) > -256
18384 && INTVAL (otherops[2]) < 256))
18385 {
18386 if (emit)
18387 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18388 }
18389 else
18390 {
18391 if (emit)
18392 {
18393 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18394 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18395 }
18396 if (count)
18397 *count = 2;
18398
18399 }
18400 }
18401 }
18402 else
18403 {
18404 /* Use a single insn if we can.
18405 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18406 fix these up with a pair of ldr. */
18407 if (TARGET_THUMB2
18408 || !CONST_INT_P (otherops[2])
18409 || (INTVAL (otherops[2]) > -256
18410 && INTVAL (otherops[2]) < 256))
18411 {
18412 if (emit)
18413 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18414 }
18415 else
18416 {
18417 if (emit)
18418 {
18419 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18420 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18421 }
18422 if (count)
18423 *count = 2;
18424 }
18425 }
18426 break;
18427
18428 case LABEL_REF:
18429 case CONST:
18430 /* We might be able to use ldrd %0, %1 here. However the range is
18431 different to ldr/adr, and it is broken on some ARMv7-M
18432 implementations. */
18433 /* Use the second register of the pair to avoid problematic
18434 overlap. */
18435 otherops[1] = operands[1];
18436 if (emit)
18437 output_asm_insn ("adr%?\t%0, %1", otherops);
18438 operands[1] = otherops[0];
18439 if (emit)
18440 {
18441 if (TARGET_LDRD)
18442 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18443 else
18444 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18445 }
18446
18447 if (count)
18448 *count = 2;
18449 break;
18450
18451 /* ??? This needs checking for thumb2. */
18452 default:
18453 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18454 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18455 {
18456 otherops[0] = operands[0];
18457 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18458 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18459
18460 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18461 {
18462 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18463 {
18464 switch ((int) INTVAL (otherops[2]))
18465 {
18466 case -8:
18467 if (emit)
18468 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18469 return "";
18470 case -4:
18471 if (TARGET_THUMB2)
18472 break;
18473 if (emit)
18474 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18475 return "";
18476 case 4:
18477 if (TARGET_THUMB2)
18478 break;
18479 if (emit)
18480 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18481 return "";
18482 }
18483 }
18484 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18485 operands[1] = otherops[0];
18486 if (TARGET_LDRD
18487 && (REG_P (otherops[2])
18488 || TARGET_THUMB2
18489 || (CONST_INT_P (otherops[2])
18490 && INTVAL (otherops[2]) > -256
18491 && INTVAL (otherops[2]) < 256)))
18492 {
18493 if (reg_overlap_mentioned_p (operands[0],
18494 otherops[2]))
18495 {
18496 /* Swap base and index registers over to
18497 avoid a conflict. */
18498 std::swap (otherops[1], otherops[2]);
18499 }
18500 /* If both registers conflict, it will usually
18501 have been fixed by a splitter. */
18502 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18503 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18504 {
18505 if (emit)
18506 {
18507 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18508 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18509 }
18510 if (count)
18511 *count = 2;
18512 }
18513 else
18514 {
18515 otherops[0] = operands[0];
18516 if (emit)
18517 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18518 }
18519 return "";
18520 }
18521
18522 if (CONST_INT_P (otherops[2]))
18523 {
18524 if (emit)
18525 {
18526 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18527 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18528 else
18529 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18530 }
18531 }
18532 else
18533 {
18534 if (emit)
18535 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18536 }
18537 }
18538 else
18539 {
18540 if (emit)
18541 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18542 }
18543
18544 if (count)
18545 *count = 2;
18546
18547 if (TARGET_LDRD)
18548 return "ldrd%?\t%0, [%1]";
18549
18550 return "ldmia%?\t%1, %M0";
18551 }
18552 else
18553 {
18554 otherops[1] = adjust_address (operands[1], SImode, 4);
18555 /* Take care of overlapping base/data reg. */
18556 if (reg_mentioned_p (operands[0], operands[1]))
18557 {
18558 if (emit)
18559 {
18560 output_asm_insn ("ldr%?\t%0, %1", otherops);
18561 output_asm_insn ("ldr%?\t%0, %1", operands);
18562 }
18563 if (count)
18564 *count = 2;
18565
18566 }
18567 else
18568 {
18569 if (emit)
18570 {
18571 output_asm_insn ("ldr%?\t%0, %1", operands);
18572 output_asm_insn ("ldr%?\t%0, %1", otherops);
18573 }
18574 if (count)
18575 *count = 2;
18576 }
18577 }
18578 }
18579 }
18580 else
18581 {
18582 /* Constraints should ensure this. */
18583 gcc_assert (code0 == MEM && code1 == REG);
18584 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18585 || (TARGET_ARM && TARGET_LDRD));
18586
18587 /* For TARGET_ARM the first source register of an STRD
18588 must be even. This is usually the case for double-word
18589 values but user assembly constraints can force an odd
18590 starting register. */
18591 bool allow_strd = TARGET_LDRD
18592 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
18593 switch (GET_CODE (XEXP (operands[0], 0)))
18594 {
18595 case REG:
18596 if (emit)
18597 {
18598 if (allow_strd)
18599 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18600 else
18601 output_asm_insn ("stm%?\t%m0, %M1", operands);
18602 }
18603 break;
18604
18605 case PRE_INC:
18606 gcc_assert (allow_strd);
18607 if (emit)
18608 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18609 break;
18610
18611 case PRE_DEC:
18612 if (emit)
18613 {
18614 if (allow_strd)
18615 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18616 else
18617 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18618 }
18619 break;
18620
18621 case POST_INC:
18622 if (emit)
18623 {
18624 if (allow_strd)
18625 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18626 else
18627 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18628 }
18629 break;
18630
18631 case POST_DEC:
18632 gcc_assert (allow_strd);
18633 if (emit)
18634 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18635 break;
18636
18637 case PRE_MODIFY:
18638 case POST_MODIFY:
18639 otherops[0] = operands[1];
18640 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18641 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18642
18643 /* IWMMXT allows offsets larger than strd can handle,
18644 fix these up with a pair of str. */
18645 if (!TARGET_THUMB2
18646 && CONST_INT_P (otherops[2])
18647 && (INTVAL(otherops[2]) <= -256
18648 || INTVAL(otherops[2]) >= 256))
18649 {
18650 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18651 {
18652 if (emit)
18653 {
18654 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18655 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18656 }
18657 if (count)
18658 *count = 2;
18659 }
18660 else
18661 {
18662 if (emit)
18663 {
18664 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18665 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18666 }
18667 if (count)
18668 *count = 2;
18669 }
18670 }
18671 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18672 {
18673 if (emit)
18674 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18675 }
18676 else
18677 {
18678 if (emit)
18679 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18680 }
18681 break;
18682
18683 case PLUS:
18684 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18685 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18686 {
18687 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18688 {
18689 case -8:
18690 if (emit)
18691 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18692 return "";
18693
18694 case -4:
18695 if (TARGET_THUMB2)
18696 break;
18697 if (emit)
18698 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18699 return "";
18700
18701 case 4:
18702 if (TARGET_THUMB2)
18703 break;
18704 if (emit)
18705 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18706 return "";
18707 }
18708 }
18709 if (allow_strd
18710 && (REG_P (otherops[2])
18711 || TARGET_THUMB2
18712 || (CONST_INT_P (otherops[2])
18713 && INTVAL (otherops[2]) > -256
18714 && INTVAL (otherops[2]) < 256)))
18715 {
18716 otherops[0] = operands[1];
18717 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18718 if (emit)
18719 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18720 return "";
18721 }
18722 /* Fall through */
18723
18724 default:
18725 otherops[0] = adjust_address (operands[0], SImode, 4);
18726 otherops[1] = operands[1];
18727 if (emit)
18728 {
18729 output_asm_insn ("str%?\t%1, %0", operands);
18730 output_asm_insn ("str%?\t%H1, %0", otherops);
18731 }
18732 if (count)
18733 *count = 2;
18734 }
18735 }
18736
18737 return "";
18738 }
18739
18740 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18741 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18742
18743 const char *
18744 output_move_quad (rtx *operands)
18745 {
18746 if (REG_P (operands[0]))
18747 {
18748 /* Load, or reg->reg move. */
18749
18750 if (MEM_P (operands[1]))
18751 {
18752 switch (GET_CODE (XEXP (operands[1], 0)))
18753 {
18754 case REG:
18755 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18756 break;
18757
18758 case LABEL_REF:
18759 case CONST:
18760 output_asm_insn ("adr%?\t%0, %1", operands);
18761 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18762 break;
18763
18764 default:
18765 gcc_unreachable ();
18766 }
18767 }
18768 else
18769 {
18770 rtx ops[2];
18771 int dest, src, i;
18772
18773 gcc_assert (REG_P (operands[1]));
18774
18775 dest = REGNO (operands[0]);
18776 src = REGNO (operands[1]);
18777
18778 /* This seems pretty dumb, but hopefully GCC won't try to do it
18779 very often. */
18780 if (dest < src)
18781 for (i = 0; i < 4; i++)
18782 {
18783 ops[0] = gen_rtx_REG (SImode, dest + i);
18784 ops[1] = gen_rtx_REG (SImode, src + i);
18785 output_asm_insn ("mov%?\t%0, %1", ops);
18786 }
18787 else
18788 for (i = 3; i >= 0; i--)
18789 {
18790 ops[0] = gen_rtx_REG (SImode, dest + i);
18791 ops[1] = gen_rtx_REG (SImode, src + i);
18792 output_asm_insn ("mov%?\t%0, %1", ops);
18793 }
18794 }
18795 }
18796 else
18797 {
18798 gcc_assert (MEM_P (operands[0]));
18799 gcc_assert (REG_P (operands[1]));
18800 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18801
18802 switch (GET_CODE (XEXP (operands[0], 0)))
18803 {
18804 case REG:
18805 output_asm_insn ("stm%?\t%m0, %M1", operands);
18806 break;
18807
18808 default:
18809 gcc_unreachable ();
18810 }
18811 }
18812
18813 return "";
18814 }
18815
18816 /* Output a VFP load or store instruction. */
18817
18818 const char *
18819 output_move_vfp (rtx *operands)
18820 {
18821 rtx reg, mem, addr, ops[2];
18822 int load = REG_P (operands[0]);
18823 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18824 int sp = (!TARGET_VFP_FP16INST
18825 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18826 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18827 const char *templ;
18828 char buff[50];
18829 machine_mode mode;
18830
18831 reg = operands[!load];
18832 mem = operands[load];
18833
18834 mode = GET_MODE (reg);
18835
18836 gcc_assert (REG_P (reg));
18837 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18838 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18839 || mode == SFmode
18840 || mode == DFmode
18841 || mode == HImode
18842 || mode == SImode
18843 || mode == DImode
18844 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18845 gcc_assert (MEM_P (mem));
18846
18847 addr = XEXP (mem, 0);
18848
18849 switch (GET_CODE (addr))
18850 {
18851 case PRE_DEC:
18852 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18853 ops[0] = XEXP (addr, 0);
18854 ops[1] = reg;
18855 break;
18856
18857 case POST_INC:
18858 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18859 ops[0] = XEXP (addr, 0);
18860 ops[1] = reg;
18861 break;
18862
18863 default:
18864 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18865 ops[0] = reg;
18866 ops[1] = mem;
18867 break;
18868 }
18869
18870 sprintf (buff, templ,
18871 load ? "ld" : "st",
18872 dp ? "64" : sp ? "32" : "16",
18873 dp ? "P" : "",
18874 integer_p ? "\t%@ int" : "");
18875 output_asm_insn (buff, ops);
18876
18877 return "";
18878 }
18879
18880 /* Output a Neon double-word or quad-word load or store, or a load
18881 or store for larger structure modes.
18882
18883 WARNING: The ordering of elements is weird in big-endian mode,
18884 because the EABI requires that vectors stored in memory appear
18885 as though they were stored by a VSTM, as required by the EABI.
18886 GCC RTL defines element ordering based on in-memory order.
18887 This can be different from the architectural ordering of elements
18888 within a NEON register. The intrinsics defined in arm_neon.h use the
18889 NEON register element ordering, not the GCC RTL element ordering.
18890
18891 For example, the in-memory ordering of a big-endian a quadword
18892 vector with 16-bit elements when stored from register pair {d0,d1}
18893 will be (lowest address first, d0[N] is NEON register element N):
18894
18895 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18896
18897 When necessary, quadword registers (dN, dN+1) are moved to ARM
18898 registers from rN in the order:
18899
18900 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18901
18902 So that STM/LDM can be used on vectors in ARM registers, and the
18903 same memory layout will result as if VSTM/VLDM were used.
18904
18905 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18906 possible, which allows use of appropriate alignment tags.
18907 Note that the choice of "64" is independent of the actual vector
18908 element size; this size simply ensures that the behavior is
18909 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18910
18911 Due to limitations of those instructions, use of VST1.64/VLD1.64
18912 is not possible if:
18913 - the address contains PRE_DEC, or
18914 - the mode refers to more than 4 double-word registers
18915
18916 In those cases, it would be possible to replace VSTM/VLDM by a
18917 sequence of instructions; this is not currently implemented since
18918 this is not certain to actually improve performance. */
18919
18920 const char *
18921 output_move_neon (rtx *operands)
18922 {
18923 rtx reg, mem, addr, ops[2];
18924 int regno, nregs, load = REG_P (operands[0]);
18925 const char *templ;
18926 char buff[50];
18927 machine_mode mode;
18928
18929 reg = operands[!load];
18930 mem = operands[load];
18931
18932 mode = GET_MODE (reg);
18933
18934 gcc_assert (REG_P (reg));
18935 regno = REGNO (reg);
18936 nregs = REG_NREGS (reg) / 2;
18937 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18938 || NEON_REGNO_OK_FOR_QUAD (regno));
18939 gcc_assert (VALID_NEON_DREG_MODE (mode)
18940 || VALID_NEON_QREG_MODE (mode)
18941 || VALID_NEON_STRUCT_MODE (mode));
18942 gcc_assert (MEM_P (mem));
18943
18944 addr = XEXP (mem, 0);
18945
18946 /* Strip off const from addresses like (const (plus (...))). */
18947 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18948 addr = XEXP (addr, 0);
18949
18950 switch (GET_CODE (addr))
18951 {
18952 case POST_INC:
18953 /* We have to use vldm / vstm for too-large modes. */
18954 if (nregs > 4)
18955 {
18956 templ = "v%smia%%?\t%%0!, %%h1";
18957 ops[0] = XEXP (addr, 0);
18958 }
18959 else
18960 {
18961 templ = "v%s1.64\t%%h1, %%A0";
18962 ops[0] = mem;
18963 }
18964 ops[1] = reg;
18965 break;
18966
18967 case PRE_DEC:
18968 /* We have to use vldm / vstm in this case, since there is no
18969 pre-decrement form of the vld1 / vst1 instructions. */
18970 templ = "v%smdb%%?\t%%0!, %%h1";
18971 ops[0] = XEXP (addr, 0);
18972 ops[1] = reg;
18973 break;
18974
18975 case POST_MODIFY:
18976 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18977 gcc_unreachable ();
18978
18979 case REG:
18980 /* We have to use vldm / vstm for too-large modes. */
18981 if (nregs > 1)
18982 {
18983 if (nregs > 4)
18984 templ = "v%smia%%?\t%%m0, %%h1";
18985 else
18986 templ = "v%s1.64\t%%h1, %%A0";
18987
18988 ops[0] = mem;
18989 ops[1] = reg;
18990 break;
18991 }
18992 /* Fall through. */
18993 case LABEL_REF:
18994 case PLUS:
18995 {
18996 int i;
18997 int overlap = -1;
18998 for (i = 0; i < nregs; i++)
18999 {
19000 /* We're only using DImode here because it's a convenient size. */
19001 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
19002 ops[1] = adjust_address (mem, DImode, 8 * i);
19003 if (reg_overlap_mentioned_p (ops[0], mem))
19004 {
19005 gcc_assert (overlap == -1);
19006 overlap = i;
19007 }
19008 else
19009 {
19010 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
19011 output_asm_insn (buff, ops);
19012 }
19013 }
19014 if (overlap != -1)
19015 {
19016 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
19017 ops[1] = adjust_address (mem, SImode, 8 * overlap);
19018 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
19019 output_asm_insn (buff, ops);
19020 }
19021
19022 return "";
19023 }
19024
19025 default:
19026 gcc_unreachable ();
19027 }
19028
19029 sprintf (buff, templ, load ? "ld" : "st");
19030 output_asm_insn (buff, ops);
19031
19032 return "";
19033 }
19034
19035 /* Compute and return the length of neon_mov<mode>, where <mode> is
19036 one of VSTRUCT modes: EI, OI, CI or XI. */
19037 int
19038 arm_attr_length_move_neon (rtx_insn *insn)
19039 {
19040 rtx reg, mem, addr;
19041 int load;
19042 machine_mode mode;
19043
19044 extract_insn_cached (insn);
19045
19046 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
19047 {
19048 mode = GET_MODE (recog_data.operand[0]);
19049 switch (mode)
19050 {
19051 case E_EImode:
19052 case E_OImode:
19053 return 8;
19054 case E_CImode:
19055 return 12;
19056 case E_XImode:
19057 return 16;
19058 default:
19059 gcc_unreachable ();
19060 }
19061 }
19062
19063 load = REG_P (recog_data.operand[0]);
19064 reg = recog_data.operand[!load];
19065 mem = recog_data.operand[load];
19066
19067 gcc_assert (MEM_P (mem));
19068
19069 addr = XEXP (mem, 0);
19070
19071 /* Strip off const from addresses like (const (plus (...))). */
19072 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
19073 addr = XEXP (addr, 0);
19074
19075 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
19076 {
19077 int insns = REG_NREGS (reg) / 2;
19078 return insns * 4;
19079 }
19080 else
19081 return 4;
19082 }
19083
19084 /* Return nonzero if the offset in the address is an immediate. Otherwise,
19085 return zero. */
19086
19087 int
19088 arm_address_offset_is_imm (rtx_insn *insn)
19089 {
19090 rtx mem, addr;
19091
19092 extract_insn_cached (insn);
19093
19094 if (REG_P (recog_data.operand[0]))
19095 return 0;
19096
19097 mem = recog_data.operand[0];
19098
19099 gcc_assert (MEM_P (mem));
19100
19101 addr = XEXP (mem, 0);
19102
19103 if (REG_P (addr)
19104 || (GET_CODE (addr) == PLUS
19105 && REG_P (XEXP (addr, 0))
19106 && CONST_INT_P (XEXP (addr, 1))))
19107 return 1;
19108 else
19109 return 0;
19110 }
19111
19112 /* Output an ADD r, s, #n where n may be too big for one instruction.
19113 If adding zero to one register, output nothing. */
19114 const char *
19115 output_add_immediate (rtx *operands)
19116 {
19117 HOST_WIDE_INT n = INTVAL (operands[2]);
19118
19119 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19120 {
19121 if (n < 0)
19122 output_multi_immediate (operands,
19123 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19124 -n);
19125 else
19126 output_multi_immediate (operands,
19127 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19128 n);
19129 }
19130
19131 return "";
19132 }
19133
19134 /* Output a multiple immediate operation.
19135 OPERANDS is the vector of operands referred to in the output patterns.
19136 INSTR1 is the output pattern to use for the first constant.
19137 INSTR2 is the output pattern to use for subsequent constants.
19138 IMMED_OP is the index of the constant slot in OPERANDS.
19139 N is the constant value. */
19140 static const char *
19141 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19142 int immed_op, HOST_WIDE_INT n)
19143 {
19144 #if HOST_BITS_PER_WIDE_INT > 32
19145 n &= 0xffffffff;
19146 #endif
19147
19148 if (n == 0)
19149 {
19150 /* Quick and easy output. */
19151 operands[immed_op] = const0_rtx;
19152 output_asm_insn (instr1, operands);
19153 }
19154 else
19155 {
19156 int i;
19157 const char * instr = instr1;
19158
19159 /* Note that n is never zero here (which would give no output). */
19160 for (i = 0; i < 32; i += 2)
19161 {
19162 if (n & (3 << i))
19163 {
19164 operands[immed_op] = GEN_INT (n & (255 << i));
19165 output_asm_insn (instr, operands);
19166 instr = instr2;
19167 i += 6;
19168 }
19169 }
19170 }
19171
19172 return "";
19173 }
19174
19175 /* Return the name of a shifter operation. */
19176 static const char *
19177 arm_shift_nmem(enum rtx_code code)
19178 {
19179 switch (code)
19180 {
19181 case ASHIFT:
19182 return ARM_LSL_NAME;
19183
19184 case ASHIFTRT:
19185 return "asr";
19186
19187 case LSHIFTRT:
19188 return "lsr";
19189
19190 case ROTATERT:
19191 return "ror";
19192
19193 default:
19194 abort();
19195 }
19196 }
19197
19198 /* Return the appropriate ARM instruction for the operation code.
19199 The returned result should not be overwritten. OP is the rtx of the
19200 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19201 was shifted. */
19202 const char *
19203 arithmetic_instr (rtx op, int shift_first_arg)
19204 {
19205 switch (GET_CODE (op))
19206 {
19207 case PLUS:
19208 return "add";
19209
19210 case MINUS:
19211 return shift_first_arg ? "rsb" : "sub";
19212
19213 case IOR:
19214 return "orr";
19215
19216 case XOR:
19217 return "eor";
19218
19219 case AND:
19220 return "and";
19221
19222 case ASHIFT:
19223 case ASHIFTRT:
19224 case LSHIFTRT:
19225 case ROTATERT:
19226 return arm_shift_nmem(GET_CODE(op));
19227
19228 default:
19229 gcc_unreachable ();
19230 }
19231 }
19232
19233 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19234 for the operation code. The returned result should not be overwritten.
19235 OP is the rtx code of the shift.
19236 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19237 shift. */
19238 static const char *
19239 shift_op (rtx op, HOST_WIDE_INT *amountp)
19240 {
19241 const char * mnem;
19242 enum rtx_code code = GET_CODE (op);
19243
19244 switch (code)
19245 {
19246 case ROTATE:
19247 if (!CONST_INT_P (XEXP (op, 1)))
19248 {
19249 output_operand_lossage ("invalid shift operand");
19250 return NULL;
19251 }
19252
19253 code = ROTATERT;
19254 *amountp = 32 - INTVAL (XEXP (op, 1));
19255 mnem = "ror";
19256 break;
19257
19258 case ASHIFT:
19259 case ASHIFTRT:
19260 case LSHIFTRT:
19261 case ROTATERT:
19262 mnem = arm_shift_nmem(code);
19263 if (CONST_INT_P (XEXP (op, 1)))
19264 {
19265 *amountp = INTVAL (XEXP (op, 1));
19266 }
19267 else if (REG_P (XEXP (op, 1)))
19268 {
19269 *amountp = -1;
19270 return mnem;
19271 }
19272 else
19273 {
19274 output_operand_lossage ("invalid shift operand");
19275 return NULL;
19276 }
19277 break;
19278
19279 case MULT:
19280 /* We never have to worry about the amount being other than a
19281 power of 2, since this case can never be reloaded from a reg. */
19282 if (!CONST_INT_P (XEXP (op, 1)))
19283 {
19284 output_operand_lossage ("invalid shift operand");
19285 return NULL;
19286 }
19287
19288 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19289
19290 /* Amount must be a power of two. */
19291 if (*amountp & (*amountp - 1))
19292 {
19293 output_operand_lossage ("invalid shift operand");
19294 return NULL;
19295 }
19296
19297 *amountp = exact_log2 (*amountp);
19298 gcc_assert (IN_RANGE (*amountp, 0, 31));
19299 return ARM_LSL_NAME;
19300
19301 default:
19302 output_operand_lossage ("invalid shift operand");
19303 return NULL;
19304 }
19305
19306 /* This is not 100% correct, but follows from the desire to merge
19307 multiplication by a power of 2 with the recognizer for a
19308 shift. >=32 is not a valid shift for "lsl", so we must try and
19309 output a shift that produces the correct arithmetical result.
19310 Using lsr #32 is identical except for the fact that the carry bit
19311 is not set correctly if we set the flags; but we never use the
19312 carry bit from such an operation, so we can ignore that. */
19313 if (code == ROTATERT)
19314 /* Rotate is just modulo 32. */
19315 *amountp &= 31;
19316 else if (*amountp != (*amountp & 31))
19317 {
19318 if (code == ASHIFT)
19319 mnem = "lsr";
19320 *amountp = 32;
19321 }
19322
19323 /* Shifts of 0 are no-ops. */
19324 if (*amountp == 0)
19325 return NULL;
19326
19327 return mnem;
19328 }
19329
19330 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19331 because /bin/as is horribly restrictive. The judgement about
19332 whether or not each character is 'printable' (and can be output as
19333 is) or not (and must be printed with an octal escape) must be made
19334 with reference to the *host* character set -- the situation is
19335 similar to that discussed in the comments above pp_c_char in
19336 c-pretty-print.c. */
19337
19338 #define MAX_ASCII_LEN 51
19339
19340 void
19341 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19342 {
19343 int i;
19344 int len_so_far = 0;
19345
19346 fputs ("\t.ascii\t\"", stream);
19347
19348 for (i = 0; i < len; i++)
19349 {
19350 int c = p[i];
19351
19352 if (len_so_far >= MAX_ASCII_LEN)
19353 {
19354 fputs ("\"\n\t.ascii\t\"", stream);
19355 len_so_far = 0;
19356 }
19357
19358 if (ISPRINT (c))
19359 {
19360 if (c == '\\' || c == '\"')
19361 {
19362 putc ('\\', stream);
19363 len_so_far++;
19364 }
19365 putc (c, stream);
19366 len_so_far++;
19367 }
19368 else
19369 {
19370 fprintf (stream, "\\%03o", c);
19371 len_so_far += 4;
19372 }
19373 }
19374
19375 fputs ("\"\n", stream);
19376 }
19377 \f
19378 /* Whether a register is callee saved or not. This is necessary because high
19379 registers are marked as caller saved when optimizing for size on Thumb-1
19380 targets despite being callee saved in order to avoid using them. */
19381 #define callee_saved_reg_p(reg) \
19382 (!call_used_regs[reg] \
19383 || (TARGET_THUMB1 && optimize_size \
19384 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19385
19386 /* Compute the register save mask for registers 0 through 12
19387 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19388
19389 static unsigned long
19390 arm_compute_save_reg0_reg12_mask (void)
19391 {
19392 unsigned long func_type = arm_current_func_type ();
19393 unsigned long save_reg_mask = 0;
19394 unsigned int reg;
19395
19396 if (IS_INTERRUPT (func_type))
19397 {
19398 unsigned int max_reg;
19399 /* Interrupt functions must not corrupt any registers,
19400 even call clobbered ones. If this is a leaf function
19401 we can just examine the registers used by the RTL, but
19402 otherwise we have to assume that whatever function is
19403 called might clobber anything, and so we have to save
19404 all the call-clobbered registers as well. */
19405 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19406 /* FIQ handlers have registers r8 - r12 banked, so
19407 we only need to check r0 - r7, Normal ISRs only
19408 bank r14 and r15, so we must check up to r12.
19409 r13 is the stack pointer which is always preserved,
19410 so we do not need to consider it here. */
19411 max_reg = 7;
19412 else
19413 max_reg = 12;
19414
19415 for (reg = 0; reg <= max_reg; reg++)
19416 if (df_regs_ever_live_p (reg)
19417 || (! crtl->is_leaf && call_used_regs[reg]))
19418 save_reg_mask |= (1 << reg);
19419
19420 /* Also save the pic base register if necessary. */
19421 if (flag_pic
19422 && !TARGET_SINGLE_PIC_BASE
19423 && arm_pic_register != INVALID_REGNUM
19424 && crtl->uses_pic_offset_table)
19425 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19426 }
19427 else if (IS_VOLATILE(func_type))
19428 {
19429 /* For noreturn functions we historically omitted register saves
19430 altogether. However this really messes up debugging. As a
19431 compromise save just the frame pointers. Combined with the link
19432 register saved elsewhere this should be sufficient to get
19433 a backtrace. */
19434 if (frame_pointer_needed)
19435 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19436 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19437 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19438 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19439 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19440 }
19441 else
19442 {
19443 /* In the normal case we only need to save those registers
19444 which are call saved and which are used by this function. */
19445 for (reg = 0; reg <= 11; reg++)
19446 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19447 save_reg_mask |= (1 << reg);
19448
19449 /* Handle the frame pointer as a special case. */
19450 if (frame_pointer_needed)
19451 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19452
19453 /* If we aren't loading the PIC register,
19454 don't stack it even though it may be live. */
19455 if (flag_pic
19456 && !TARGET_SINGLE_PIC_BASE
19457 && arm_pic_register != INVALID_REGNUM
19458 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19459 || crtl->uses_pic_offset_table))
19460 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19461
19462 /* The prologue will copy SP into R0, so save it. */
19463 if (IS_STACKALIGN (func_type))
19464 save_reg_mask |= 1;
19465 }
19466
19467 /* Save registers so the exception handler can modify them. */
19468 if (crtl->calls_eh_return)
19469 {
19470 unsigned int i;
19471
19472 for (i = 0; ; i++)
19473 {
19474 reg = EH_RETURN_DATA_REGNO (i);
19475 if (reg == INVALID_REGNUM)
19476 break;
19477 save_reg_mask |= 1 << reg;
19478 }
19479 }
19480
19481 return save_reg_mask;
19482 }
19483
19484 /* Return true if r3 is live at the start of the function. */
19485
19486 static bool
19487 arm_r3_live_at_start_p (void)
19488 {
19489 /* Just look at cfg info, which is still close enough to correct at this
19490 point. This gives false positives for broken functions that might use
19491 uninitialized data that happens to be allocated in r3, but who cares? */
19492 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19493 }
19494
19495 /* Compute the number of bytes used to store the static chain register on the
19496 stack, above the stack frame. We need to know this accurately to get the
19497 alignment of the rest of the stack frame correct. */
19498
19499 static int
19500 arm_compute_static_chain_stack_bytes (void)
19501 {
19502 /* Once the value is updated from the init value of -1, do not
19503 re-compute. */
19504 if (cfun->machine->static_chain_stack_bytes != -1)
19505 return cfun->machine->static_chain_stack_bytes;
19506
19507 /* See the defining assertion in arm_expand_prologue. */
19508 if (IS_NESTED (arm_current_func_type ())
19509 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19510 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19511 || flag_stack_clash_protection)
19512 && !df_regs_ever_live_p (LR_REGNUM)))
19513 && arm_r3_live_at_start_p ()
19514 && crtl->args.pretend_args_size == 0)
19515 return 4;
19516
19517 return 0;
19518 }
19519
19520 /* Compute a bit mask of which core registers need to be
19521 saved on the stack for the current function.
19522 This is used by arm_compute_frame_layout, which may add extra registers. */
19523
19524 static unsigned long
19525 arm_compute_save_core_reg_mask (void)
19526 {
19527 unsigned int save_reg_mask = 0;
19528 unsigned long func_type = arm_current_func_type ();
19529 unsigned int reg;
19530
19531 if (IS_NAKED (func_type))
19532 /* This should never really happen. */
19533 return 0;
19534
19535 /* If we are creating a stack frame, then we must save the frame pointer,
19536 IP (which will hold the old stack pointer), LR and the PC. */
19537 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19538 save_reg_mask |=
19539 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19540 | (1 << IP_REGNUM)
19541 | (1 << LR_REGNUM)
19542 | (1 << PC_REGNUM);
19543
19544 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19545
19546 /* Decide if we need to save the link register.
19547 Interrupt routines have their own banked link register,
19548 so they never need to save it.
19549 Otherwise if we do not use the link register we do not need to save
19550 it. If we are pushing other registers onto the stack however, we
19551 can save an instruction in the epilogue by pushing the link register
19552 now and then popping it back into the PC. This incurs extra memory
19553 accesses though, so we only do it when optimizing for size, and only
19554 if we know that we will not need a fancy return sequence. */
19555 if (df_regs_ever_live_p (LR_REGNUM)
19556 || (save_reg_mask
19557 && optimize_size
19558 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19559 && !crtl->tail_call_emit
19560 && !crtl->calls_eh_return))
19561 save_reg_mask |= 1 << LR_REGNUM;
19562
19563 if (cfun->machine->lr_save_eliminated)
19564 save_reg_mask &= ~ (1 << LR_REGNUM);
19565
19566 if (TARGET_REALLY_IWMMXT
19567 && ((bit_count (save_reg_mask)
19568 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19569 arm_compute_static_chain_stack_bytes())
19570 ) % 2) != 0)
19571 {
19572 /* The total number of registers that are going to be pushed
19573 onto the stack is odd. We need to ensure that the stack
19574 is 64-bit aligned before we start to save iWMMXt registers,
19575 and also before we start to create locals. (A local variable
19576 might be a double or long long which we will load/store using
19577 an iWMMXt instruction). Therefore we need to push another
19578 ARM register, so that the stack will be 64-bit aligned. We
19579 try to avoid using the arg registers (r0 -r3) as they might be
19580 used to pass values in a tail call. */
19581 for (reg = 4; reg <= 12; reg++)
19582 if ((save_reg_mask & (1 << reg)) == 0)
19583 break;
19584
19585 if (reg <= 12)
19586 save_reg_mask |= (1 << reg);
19587 else
19588 {
19589 cfun->machine->sibcall_blocked = 1;
19590 save_reg_mask |= (1 << 3);
19591 }
19592 }
19593
19594 /* We may need to push an additional register for use initializing the
19595 PIC base register. */
19596 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19597 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19598 {
19599 reg = thumb_find_work_register (1 << 4);
19600 if (!call_used_regs[reg])
19601 save_reg_mask |= (1 << reg);
19602 }
19603
19604 return save_reg_mask;
19605 }
19606
19607 /* Compute a bit mask of which core registers need to be
19608 saved on the stack for the current function. */
19609 static unsigned long
19610 thumb1_compute_save_core_reg_mask (void)
19611 {
19612 unsigned long mask;
19613 unsigned reg;
19614
19615 mask = 0;
19616 for (reg = 0; reg < 12; reg ++)
19617 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19618 mask |= 1 << reg;
19619
19620 /* Handle the frame pointer as a special case. */
19621 if (frame_pointer_needed)
19622 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19623
19624 if (flag_pic
19625 && !TARGET_SINGLE_PIC_BASE
19626 && arm_pic_register != INVALID_REGNUM
19627 && crtl->uses_pic_offset_table)
19628 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19629
19630 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19631 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19632 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19633
19634 /* LR will also be pushed if any lo regs are pushed. */
19635 if (mask & 0xff || thumb_force_lr_save ())
19636 mask |= (1 << LR_REGNUM);
19637
19638 /* Make sure we have a low work register if we need one.
19639 We will need one if we are going to push a high register,
19640 but we are not currently intending to push a low register. */
19641 if ((mask & 0xff) == 0
19642 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19643 {
19644 /* Use thumb_find_work_register to choose which register
19645 we will use. If the register is live then we will
19646 have to push it. Use LAST_LO_REGNUM as our fallback
19647 choice for the register to select. */
19648 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19649 /* Make sure the register returned by thumb_find_work_register is
19650 not part of the return value. */
19651 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19652 reg = LAST_LO_REGNUM;
19653
19654 if (callee_saved_reg_p (reg))
19655 mask |= 1 << reg;
19656 }
19657
19658 /* The 504 below is 8 bytes less than 512 because there are two possible
19659 alignment words. We can't tell here if they will be present or not so we
19660 have to play it safe and assume that they are. */
19661 if ((CALLER_INTERWORKING_SLOT_SIZE +
19662 ROUND_UP_WORD (get_frame_size ()) +
19663 crtl->outgoing_args_size) >= 504)
19664 {
19665 /* This is the same as the code in thumb1_expand_prologue() which
19666 determines which register to use for stack decrement. */
19667 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19668 if (mask & (1 << reg))
19669 break;
19670
19671 if (reg > LAST_LO_REGNUM)
19672 {
19673 /* Make sure we have a register available for stack decrement. */
19674 mask |= 1 << LAST_LO_REGNUM;
19675 }
19676 }
19677
19678 return mask;
19679 }
19680
19681
19682 /* Return the number of bytes required to save VFP registers. */
19683 static int
19684 arm_get_vfp_saved_size (void)
19685 {
19686 unsigned int regno;
19687 int count;
19688 int saved;
19689
19690 saved = 0;
19691 /* Space for saved VFP registers. */
19692 if (TARGET_HARD_FLOAT)
19693 {
19694 count = 0;
19695 for (regno = FIRST_VFP_REGNUM;
19696 regno < LAST_VFP_REGNUM;
19697 regno += 2)
19698 {
19699 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19700 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19701 {
19702 if (count > 0)
19703 {
19704 /* Workaround ARM10 VFPr1 bug. */
19705 if (count == 2 && !arm_arch6)
19706 count++;
19707 saved += count * 8;
19708 }
19709 count = 0;
19710 }
19711 else
19712 count++;
19713 }
19714 if (count > 0)
19715 {
19716 if (count == 2 && !arm_arch6)
19717 count++;
19718 saved += count * 8;
19719 }
19720 }
19721 return saved;
19722 }
19723
19724
19725 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19726 everything bar the final return instruction. If simple_return is true,
19727 then do not output epilogue, because it has already been emitted in RTL.
19728
19729 Note: do not forget to update length attribute of corresponding insn pattern
19730 when changing assembly output (eg. length attribute of
19731 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19732 register clearing sequences). */
19733 const char *
19734 output_return_instruction (rtx operand, bool really_return, bool reverse,
19735 bool simple_return)
19736 {
19737 char conditional[10];
19738 char instr[100];
19739 unsigned reg;
19740 unsigned long live_regs_mask;
19741 unsigned long func_type;
19742 arm_stack_offsets *offsets;
19743
19744 func_type = arm_current_func_type ();
19745
19746 if (IS_NAKED (func_type))
19747 return "";
19748
19749 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19750 {
19751 /* If this function was declared non-returning, and we have
19752 found a tail call, then we have to trust that the called
19753 function won't return. */
19754 if (really_return)
19755 {
19756 rtx ops[2];
19757
19758 /* Otherwise, trap an attempted return by aborting. */
19759 ops[0] = operand;
19760 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19761 : "abort");
19762 assemble_external_libcall (ops[1]);
19763 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19764 }
19765
19766 return "";
19767 }
19768
19769 gcc_assert (!cfun->calls_alloca || really_return);
19770
19771 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19772
19773 cfun->machine->return_used_this_function = 1;
19774
19775 offsets = arm_get_frame_offsets ();
19776 live_regs_mask = offsets->saved_regs_mask;
19777
19778 if (!simple_return && live_regs_mask)
19779 {
19780 const char * return_reg;
19781
19782 /* If we do not have any special requirements for function exit
19783 (e.g. interworking) then we can load the return address
19784 directly into the PC. Otherwise we must load it into LR. */
19785 if (really_return
19786 && !IS_CMSE_ENTRY (func_type)
19787 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19788 return_reg = reg_names[PC_REGNUM];
19789 else
19790 return_reg = reg_names[LR_REGNUM];
19791
19792 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19793 {
19794 /* There are three possible reasons for the IP register
19795 being saved. 1) a stack frame was created, in which case
19796 IP contains the old stack pointer, or 2) an ISR routine
19797 corrupted it, or 3) it was saved to align the stack on
19798 iWMMXt. In case 1, restore IP into SP, otherwise just
19799 restore IP. */
19800 if (frame_pointer_needed)
19801 {
19802 live_regs_mask &= ~ (1 << IP_REGNUM);
19803 live_regs_mask |= (1 << SP_REGNUM);
19804 }
19805 else
19806 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19807 }
19808
19809 /* On some ARM architectures it is faster to use LDR rather than
19810 LDM to load a single register. On other architectures, the
19811 cost is the same. In 26 bit mode, or for exception handlers,
19812 we have to use LDM to load the PC so that the CPSR is also
19813 restored. */
19814 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19815 if (live_regs_mask == (1U << reg))
19816 break;
19817
19818 if (reg <= LAST_ARM_REGNUM
19819 && (reg != LR_REGNUM
19820 || ! really_return
19821 || ! IS_INTERRUPT (func_type)))
19822 {
19823 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19824 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19825 }
19826 else
19827 {
19828 char *p;
19829 int first = 1;
19830
19831 /* Generate the load multiple instruction to restore the
19832 registers. Note we can get here, even if
19833 frame_pointer_needed is true, but only if sp already
19834 points to the base of the saved core registers. */
19835 if (live_regs_mask & (1 << SP_REGNUM))
19836 {
19837 unsigned HOST_WIDE_INT stack_adjust;
19838
19839 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19840 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19841
19842 if (stack_adjust && arm_arch5t && TARGET_ARM)
19843 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19844 else
19845 {
19846 /* If we can't use ldmib (SA110 bug),
19847 then try to pop r3 instead. */
19848 if (stack_adjust)
19849 live_regs_mask |= 1 << 3;
19850
19851 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19852 }
19853 }
19854 /* For interrupt returns we have to use an LDM rather than
19855 a POP so that we can use the exception return variant. */
19856 else if (IS_INTERRUPT (func_type))
19857 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19858 else
19859 sprintf (instr, "pop%s\t{", conditional);
19860
19861 p = instr + strlen (instr);
19862
19863 for (reg = 0; reg <= SP_REGNUM; reg++)
19864 if (live_regs_mask & (1 << reg))
19865 {
19866 int l = strlen (reg_names[reg]);
19867
19868 if (first)
19869 first = 0;
19870 else
19871 {
19872 memcpy (p, ", ", 2);
19873 p += 2;
19874 }
19875
19876 memcpy (p, "%|", 2);
19877 memcpy (p + 2, reg_names[reg], l);
19878 p += l + 2;
19879 }
19880
19881 if (live_regs_mask & (1 << LR_REGNUM))
19882 {
19883 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19884 /* If returning from an interrupt, restore the CPSR. */
19885 if (IS_INTERRUPT (func_type))
19886 strcat (p, "^");
19887 }
19888 else
19889 strcpy (p, "}");
19890 }
19891
19892 output_asm_insn (instr, & operand);
19893
19894 /* See if we need to generate an extra instruction to
19895 perform the actual function return. */
19896 if (really_return
19897 && func_type != ARM_FT_INTERWORKED
19898 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19899 {
19900 /* The return has already been handled
19901 by loading the LR into the PC. */
19902 return "";
19903 }
19904 }
19905
19906 if (really_return)
19907 {
19908 switch ((int) ARM_FUNC_TYPE (func_type))
19909 {
19910 case ARM_FT_ISR:
19911 case ARM_FT_FIQ:
19912 /* ??? This is wrong for unified assembly syntax. */
19913 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19914 break;
19915
19916 case ARM_FT_INTERWORKED:
19917 gcc_assert (arm_arch5t || arm_arch4t);
19918 sprintf (instr, "bx%s\t%%|lr", conditional);
19919 break;
19920
19921 case ARM_FT_EXCEPTION:
19922 /* ??? This is wrong for unified assembly syntax. */
19923 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19924 break;
19925
19926 default:
19927 if (IS_CMSE_ENTRY (func_type))
19928 {
19929 /* Check if we have to clear the 'GE bits' which is only used if
19930 parallel add and subtraction instructions are available. */
19931 if (TARGET_INT_SIMD)
19932 snprintf (instr, sizeof (instr),
19933 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19934 else
19935 snprintf (instr, sizeof (instr),
19936 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19937
19938 output_asm_insn (instr, & operand);
19939 if (TARGET_HARD_FLOAT)
19940 {
19941 /* Clear the cumulative exception-status bits (0-4,7) and the
19942 condition code bits (28-31) of the FPSCR. We need to
19943 remember to clear the first scratch register used (IP) and
19944 save and restore the second (r4). */
19945 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19946 output_asm_insn (instr, & operand);
19947 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19948 output_asm_insn (instr, & operand);
19949 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19950 output_asm_insn (instr, & operand);
19951 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19952 output_asm_insn (instr, & operand);
19953 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19954 output_asm_insn (instr, & operand);
19955 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19956 output_asm_insn (instr, & operand);
19957 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19958 output_asm_insn (instr, & operand);
19959 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19960 output_asm_insn (instr, & operand);
19961 }
19962 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19963 }
19964 /* Use bx if it's available. */
19965 else if (arm_arch5t || arm_arch4t)
19966 sprintf (instr, "bx%s\t%%|lr", conditional);
19967 else
19968 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19969 break;
19970 }
19971
19972 output_asm_insn (instr, & operand);
19973 }
19974
19975 return "";
19976 }
19977
19978 /* Output in FILE asm statements needed to declare the NAME of the function
19979 defined by its DECL node. */
19980
19981 void
19982 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19983 {
19984 size_t cmse_name_len;
19985 char *cmse_name = 0;
19986 char cmse_prefix[] = "__acle_se_";
19987
19988 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19989 extra function label for each function with the 'cmse_nonsecure_entry'
19990 attribute. This extra function label should be prepended with
19991 '__acle_se_', telling the linker that it needs to create secure gateway
19992 veneers for this function. */
19993 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19994 DECL_ATTRIBUTES (decl)))
19995 {
19996 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19997 cmse_name = XALLOCAVEC (char, cmse_name_len);
19998 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19999 targetm.asm_out.globalize_label (file, cmse_name);
20000
20001 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
20002 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
20003 }
20004
20005 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
20006 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20007 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20008 ASM_OUTPUT_LABEL (file, name);
20009
20010 if (cmse_name)
20011 ASM_OUTPUT_LABEL (file, cmse_name);
20012
20013 ARM_OUTPUT_FN_UNWIND (file, TRUE);
20014 }
20015
20016 /* Write the function name into the code section, directly preceding
20017 the function prologue.
20018
20019 Code will be output similar to this:
20020 t0
20021 .ascii "arm_poke_function_name", 0
20022 .align
20023 t1
20024 .word 0xff000000 + (t1 - t0)
20025 arm_poke_function_name
20026 mov ip, sp
20027 stmfd sp!, {fp, ip, lr, pc}
20028 sub fp, ip, #4
20029
20030 When performing a stack backtrace, code can inspect the value
20031 of 'pc' stored at 'fp' + 0. If the trace function then looks
20032 at location pc - 12 and the top 8 bits are set, then we know
20033 that there is a function name embedded immediately preceding this
20034 location and has length ((pc[-3]) & 0xff000000).
20035
20036 We assume that pc is declared as a pointer to an unsigned long.
20037
20038 It is of no benefit to output the function name if we are assembling
20039 a leaf function. These function types will not contain a stack
20040 backtrace structure, therefore it is not possible to determine the
20041 function name. */
20042 void
20043 arm_poke_function_name (FILE *stream, const char *name)
20044 {
20045 unsigned long alignlength;
20046 unsigned long length;
20047 rtx x;
20048
20049 length = strlen (name) + 1;
20050 alignlength = ROUND_UP_WORD (length);
20051
20052 ASM_OUTPUT_ASCII (stream, name, length);
20053 ASM_OUTPUT_ALIGN (stream, 2);
20054 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
20055 assemble_aligned_integer (UNITS_PER_WORD, x);
20056 }
20057
20058 /* Place some comments into the assembler stream
20059 describing the current function. */
20060 static void
20061 arm_output_function_prologue (FILE *f)
20062 {
20063 unsigned long func_type;
20064
20065 /* Sanity check. */
20066 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
20067
20068 func_type = arm_current_func_type ();
20069
20070 switch ((int) ARM_FUNC_TYPE (func_type))
20071 {
20072 default:
20073 case ARM_FT_NORMAL:
20074 break;
20075 case ARM_FT_INTERWORKED:
20076 asm_fprintf (f, "\t%@ Function supports interworking.\n");
20077 break;
20078 case ARM_FT_ISR:
20079 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
20080 break;
20081 case ARM_FT_FIQ:
20082 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
20083 break;
20084 case ARM_FT_EXCEPTION:
20085 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
20086 break;
20087 }
20088
20089 if (IS_NAKED (func_type))
20090 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
20091
20092 if (IS_VOLATILE (func_type))
20093 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
20094
20095 if (IS_NESTED (func_type))
20096 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
20097 if (IS_STACKALIGN (func_type))
20098 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
20099 if (IS_CMSE_ENTRY (func_type))
20100 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
20101
20102 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
20103 (HOST_WIDE_INT) crtl->args.size,
20104 crtl->args.pretend_args_size,
20105 (HOST_WIDE_INT) get_frame_size ());
20106
20107 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20108 frame_pointer_needed,
20109 cfun->machine->uses_anonymous_args);
20110
20111 if (cfun->machine->lr_save_eliminated)
20112 asm_fprintf (f, "\t%@ link register save eliminated.\n");
20113
20114 if (crtl->calls_eh_return)
20115 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
20116
20117 }
20118
20119 static void
20120 arm_output_function_epilogue (FILE *)
20121 {
20122 arm_stack_offsets *offsets;
20123
20124 if (TARGET_THUMB1)
20125 {
20126 int regno;
20127
20128 /* Emit any call-via-reg trampolines that are needed for v4t support
20129 of call_reg and call_value_reg type insns. */
20130 for (regno = 0; regno < LR_REGNUM; regno++)
20131 {
20132 rtx label = cfun->machine->call_via[regno];
20133
20134 if (label != NULL)
20135 {
20136 switch_to_section (function_section (current_function_decl));
20137 targetm.asm_out.internal_label (asm_out_file, "L",
20138 CODE_LABEL_NUMBER (label));
20139 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20140 }
20141 }
20142
20143 /* ??? Probably not safe to set this here, since it assumes that a
20144 function will be emitted as assembly immediately after we generate
20145 RTL for it. This does not happen for inline functions. */
20146 cfun->machine->return_used_this_function = 0;
20147 }
20148 else /* TARGET_32BIT */
20149 {
20150 /* We need to take into account any stack-frame rounding. */
20151 offsets = arm_get_frame_offsets ();
20152
20153 gcc_assert (!use_return_insn (FALSE, NULL)
20154 || (cfun->machine->return_used_this_function != 0)
20155 || offsets->saved_regs == offsets->outgoing_args
20156 || frame_pointer_needed);
20157 }
20158 }
20159
20160 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20161 STR and STRD. If an even number of registers are being pushed, one
20162 or more STRD patterns are created for each register pair. If an
20163 odd number of registers are pushed, emit an initial STR followed by
20164 as many STRD instructions as are needed. This works best when the
20165 stack is initially 64-bit aligned (the normal case), since it
20166 ensures that each STRD is also 64-bit aligned. */
20167 static void
20168 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20169 {
20170 int num_regs = 0;
20171 int i;
20172 int regno;
20173 rtx par = NULL_RTX;
20174 rtx dwarf = NULL_RTX;
20175 rtx tmp;
20176 bool first = true;
20177
20178 num_regs = bit_count (saved_regs_mask);
20179
20180 /* Must be at least one register to save, and can't save SP or PC. */
20181 gcc_assert (num_regs > 0 && num_regs <= 14);
20182 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20183 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20184
20185 /* Create sequence for DWARF info. All the frame-related data for
20186 debugging is held in this wrapper. */
20187 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20188
20189 /* Describe the stack adjustment. */
20190 tmp = gen_rtx_SET (stack_pointer_rtx,
20191 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20192 RTX_FRAME_RELATED_P (tmp) = 1;
20193 XVECEXP (dwarf, 0, 0) = tmp;
20194
20195 /* Find the first register. */
20196 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20197 ;
20198
20199 i = 0;
20200
20201 /* If there's an odd number of registers to push. Start off by
20202 pushing a single register. This ensures that subsequent strd
20203 operations are dword aligned (assuming that SP was originally
20204 64-bit aligned). */
20205 if ((num_regs & 1) != 0)
20206 {
20207 rtx reg, mem, insn;
20208
20209 reg = gen_rtx_REG (SImode, regno);
20210 if (num_regs == 1)
20211 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20212 stack_pointer_rtx));
20213 else
20214 mem = gen_frame_mem (Pmode,
20215 gen_rtx_PRE_MODIFY
20216 (Pmode, stack_pointer_rtx,
20217 plus_constant (Pmode, stack_pointer_rtx,
20218 -4 * num_regs)));
20219
20220 tmp = gen_rtx_SET (mem, reg);
20221 RTX_FRAME_RELATED_P (tmp) = 1;
20222 insn = emit_insn (tmp);
20223 RTX_FRAME_RELATED_P (insn) = 1;
20224 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20225 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20226 RTX_FRAME_RELATED_P (tmp) = 1;
20227 i++;
20228 regno++;
20229 XVECEXP (dwarf, 0, i) = tmp;
20230 first = false;
20231 }
20232
20233 while (i < num_regs)
20234 if (saved_regs_mask & (1 << regno))
20235 {
20236 rtx reg1, reg2, mem1, mem2;
20237 rtx tmp0, tmp1, tmp2;
20238 int regno2;
20239
20240 /* Find the register to pair with this one. */
20241 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20242 regno2++)
20243 ;
20244
20245 reg1 = gen_rtx_REG (SImode, regno);
20246 reg2 = gen_rtx_REG (SImode, regno2);
20247
20248 if (first)
20249 {
20250 rtx insn;
20251
20252 first = false;
20253 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20254 stack_pointer_rtx,
20255 -4 * num_regs));
20256 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20257 stack_pointer_rtx,
20258 -4 * (num_regs - 1)));
20259 tmp0 = gen_rtx_SET (stack_pointer_rtx,
20260 plus_constant (Pmode, stack_pointer_rtx,
20261 -4 * (num_regs)));
20262 tmp1 = gen_rtx_SET (mem1, reg1);
20263 tmp2 = gen_rtx_SET (mem2, reg2);
20264 RTX_FRAME_RELATED_P (tmp0) = 1;
20265 RTX_FRAME_RELATED_P (tmp1) = 1;
20266 RTX_FRAME_RELATED_P (tmp2) = 1;
20267 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20268 XVECEXP (par, 0, 0) = tmp0;
20269 XVECEXP (par, 0, 1) = tmp1;
20270 XVECEXP (par, 0, 2) = tmp2;
20271 insn = emit_insn (par);
20272 RTX_FRAME_RELATED_P (insn) = 1;
20273 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20274 }
20275 else
20276 {
20277 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20278 stack_pointer_rtx,
20279 4 * i));
20280 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20281 stack_pointer_rtx,
20282 4 * (i + 1)));
20283 tmp1 = gen_rtx_SET (mem1, reg1);
20284 tmp2 = gen_rtx_SET (mem2, reg2);
20285 RTX_FRAME_RELATED_P (tmp1) = 1;
20286 RTX_FRAME_RELATED_P (tmp2) = 1;
20287 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20288 XVECEXP (par, 0, 0) = tmp1;
20289 XVECEXP (par, 0, 1) = tmp2;
20290 emit_insn (par);
20291 }
20292
20293 /* Create unwind information. This is an approximation. */
20294 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20295 plus_constant (Pmode,
20296 stack_pointer_rtx,
20297 4 * i)),
20298 reg1);
20299 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20300 plus_constant (Pmode,
20301 stack_pointer_rtx,
20302 4 * (i + 1))),
20303 reg2);
20304
20305 RTX_FRAME_RELATED_P (tmp1) = 1;
20306 RTX_FRAME_RELATED_P (tmp2) = 1;
20307 XVECEXP (dwarf, 0, i + 1) = tmp1;
20308 XVECEXP (dwarf, 0, i + 2) = tmp2;
20309 i += 2;
20310 regno = regno2 + 1;
20311 }
20312 else
20313 regno++;
20314
20315 return;
20316 }
20317
20318 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20319 whenever possible, otherwise it emits single-word stores. The first store
20320 also allocates stack space for all saved registers, using writeback with
20321 post-addressing mode. All other stores use offset addressing. If no STRD
20322 can be emitted, this function emits a sequence of single-word stores,
20323 and not an STM as before, because single-word stores provide more freedom
20324 scheduling and can be turned into an STM by peephole optimizations. */
20325 static void
20326 arm_emit_strd_push (unsigned long saved_regs_mask)
20327 {
20328 int num_regs = 0;
20329 int i, j, dwarf_index = 0;
20330 int offset = 0;
20331 rtx dwarf = NULL_RTX;
20332 rtx insn = NULL_RTX;
20333 rtx tmp, mem;
20334
20335 /* TODO: A more efficient code can be emitted by changing the
20336 layout, e.g., first push all pairs that can use STRD to keep the
20337 stack aligned, and then push all other registers. */
20338 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20339 if (saved_regs_mask & (1 << i))
20340 num_regs++;
20341
20342 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20343 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20344 gcc_assert (num_regs > 0);
20345
20346 /* Create sequence for DWARF info. */
20347 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20348
20349 /* For dwarf info, we generate explicit stack update. */
20350 tmp = gen_rtx_SET (stack_pointer_rtx,
20351 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20352 RTX_FRAME_RELATED_P (tmp) = 1;
20353 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20354
20355 /* Save registers. */
20356 offset = - 4 * num_regs;
20357 j = 0;
20358 while (j <= LAST_ARM_REGNUM)
20359 if (saved_regs_mask & (1 << j))
20360 {
20361 if ((j % 2 == 0)
20362 && (saved_regs_mask & (1 << (j + 1))))
20363 {
20364 /* Current register and previous register form register pair for
20365 which STRD can be generated. */
20366 if (offset < 0)
20367 {
20368 /* Allocate stack space for all saved registers. */
20369 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20370 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20371 mem = gen_frame_mem (DImode, tmp);
20372 offset = 0;
20373 }
20374 else if (offset > 0)
20375 mem = gen_frame_mem (DImode,
20376 plus_constant (Pmode,
20377 stack_pointer_rtx,
20378 offset));
20379 else
20380 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20381
20382 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20383 RTX_FRAME_RELATED_P (tmp) = 1;
20384 tmp = emit_insn (tmp);
20385
20386 /* Record the first store insn. */
20387 if (dwarf_index == 1)
20388 insn = tmp;
20389
20390 /* Generate dwarf info. */
20391 mem = gen_frame_mem (SImode,
20392 plus_constant (Pmode,
20393 stack_pointer_rtx,
20394 offset));
20395 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20396 RTX_FRAME_RELATED_P (tmp) = 1;
20397 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20398
20399 mem = gen_frame_mem (SImode,
20400 plus_constant (Pmode,
20401 stack_pointer_rtx,
20402 offset + 4));
20403 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20404 RTX_FRAME_RELATED_P (tmp) = 1;
20405 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20406
20407 offset += 8;
20408 j += 2;
20409 }
20410 else
20411 {
20412 /* Emit a single word store. */
20413 if (offset < 0)
20414 {
20415 /* Allocate stack space for all saved registers. */
20416 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20417 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20418 mem = gen_frame_mem (SImode, tmp);
20419 offset = 0;
20420 }
20421 else if (offset > 0)
20422 mem = gen_frame_mem (SImode,
20423 plus_constant (Pmode,
20424 stack_pointer_rtx,
20425 offset));
20426 else
20427 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20428
20429 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20430 RTX_FRAME_RELATED_P (tmp) = 1;
20431 tmp = emit_insn (tmp);
20432
20433 /* Record the first store insn. */
20434 if (dwarf_index == 1)
20435 insn = tmp;
20436
20437 /* Generate dwarf info. */
20438 mem = gen_frame_mem (SImode,
20439 plus_constant(Pmode,
20440 stack_pointer_rtx,
20441 offset));
20442 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20443 RTX_FRAME_RELATED_P (tmp) = 1;
20444 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20445
20446 offset += 4;
20447 j += 1;
20448 }
20449 }
20450 else
20451 j++;
20452
20453 /* Attach dwarf info to the first insn we generate. */
20454 gcc_assert (insn != NULL_RTX);
20455 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20456 RTX_FRAME_RELATED_P (insn) = 1;
20457 }
20458
20459 /* Generate and emit an insn that we will recognize as a push_multi.
20460 Unfortunately, since this insn does not reflect very well the actual
20461 semantics of the operation, we need to annotate the insn for the benefit
20462 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20463 MASK for registers that should be annotated for DWARF2 frame unwind
20464 information. */
20465 static rtx
20466 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20467 {
20468 int num_regs = 0;
20469 int num_dwarf_regs = 0;
20470 int i, j;
20471 rtx par;
20472 rtx dwarf;
20473 int dwarf_par_index;
20474 rtx tmp, reg;
20475
20476 /* We don't record the PC in the dwarf frame information. */
20477 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20478
20479 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20480 {
20481 if (mask & (1 << i))
20482 num_regs++;
20483 if (dwarf_regs_mask & (1 << i))
20484 num_dwarf_regs++;
20485 }
20486
20487 gcc_assert (num_regs && num_regs <= 16);
20488 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20489
20490 /* For the body of the insn we are going to generate an UNSPEC in
20491 parallel with several USEs. This allows the insn to be recognized
20492 by the push_multi pattern in the arm.md file.
20493
20494 The body of the insn looks something like this:
20495
20496 (parallel [
20497 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20498 (const_int:SI <num>)))
20499 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20500 (use (reg:SI XX))
20501 (use (reg:SI YY))
20502 ...
20503 ])
20504
20505 For the frame note however, we try to be more explicit and actually
20506 show each register being stored into the stack frame, plus a (single)
20507 decrement of the stack pointer. We do it this way in order to be
20508 friendly to the stack unwinding code, which only wants to see a single
20509 stack decrement per instruction. The RTL we generate for the note looks
20510 something like this:
20511
20512 (sequence [
20513 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20514 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20515 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20516 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20517 ...
20518 ])
20519
20520 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20521 instead we'd have a parallel expression detailing all
20522 the stores to the various memory addresses so that debug
20523 information is more up-to-date. Remember however while writing
20524 this to take care of the constraints with the push instruction.
20525
20526 Note also that this has to be taken care of for the VFP registers.
20527
20528 For more see PR43399. */
20529
20530 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20531 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20532 dwarf_par_index = 1;
20533
20534 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20535 {
20536 if (mask & (1 << i))
20537 {
20538 reg = gen_rtx_REG (SImode, i);
20539
20540 XVECEXP (par, 0, 0)
20541 = gen_rtx_SET (gen_frame_mem
20542 (BLKmode,
20543 gen_rtx_PRE_MODIFY (Pmode,
20544 stack_pointer_rtx,
20545 plus_constant
20546 (Pmode, stack_pointer_rtx,
20547 -4 * num_regs))
20548 ),
20549 gen_rtx_UNSPEC (BLKmode,
20550 gen_rtvec (1, reg),
20551 UNSPEC_PUSH_MULT));
20552
20553 if (dwarf_regs_mask & (1 << i))
20554 {
20555 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20556 reg);
20557 RTX_FRAME_RELATED_P (tmp) = 1;
20558 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20559 }
20560
20561 break;
20562 }
20563 }
20564
20565 for (j = 1, i++; j < num_regs; i++)
20566 {
20567 if (mask & (1 << i))
20568 {
20569 reg = gen_rtx_REG (SImode, i);
20570
20571 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20572
20573 if (dwarf_regs_mask & (1 << i))
20574 {
20575 tmp
20576 = gen_rtx_SET (gen_frame_mem
20577 (SImode,
20578 plus_constant (Pmode, stack_pointer_rtx,
20579 4 * j)),
20580 reg);
20581 RTX_FRAME_RELATED_P (tmp) = 1;
20582 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20583 }
20584
20585 j++;
20586 }
20587 }
20588
20589 par = emit_insn (par);
20590
20591 tmp = gen_rtx_SET (stack_pointer_rtx,
20592 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20593 RTX_FRAME_RELATED_P (tmp) = 1;
20594 XVECEXP (dwarf, 0, 0) = tmp;
20595
20596 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20597
20598 return par;
20599 }
20600
20601 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20602 SIZE is the offset to be adjusted.
20603 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20604 static void
20605 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20606 {
20607 rtx dwarf;
20608
20609 RTX_FRAME_RELATED_P (insn) = 1;
20610 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20611 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20612 }
20613
20614 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20615 SAVED_REGS_MASK shows which registers need to be restored.
20616
20617 Unfortunately, since this insn does not reflect very well the actual
20618 semantics of the operation, we need to annotate the insn for the benefit
20619 of DWARF2 frame unwind information. */
20620 static void
20621 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20622 {
20623 int num_regs = 0;
20624 int i, j;
20625 rtx par;
20626 rtx dwarf = NULL_RTX;
20627 rtx tmp, reg;
20628 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20629 int offset_adj;
20630 int emit_update;
20631
20632 offset_adj = return_in_pc ? 1 : 0;
20633 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20634 if (saved_regs_mask & (1 << i))
20635 num_regs++;
20636
20637 gcc_assert (num_regs && num_regs <= 16);
20638
20639 /* If SP is in reglist, then we don't emit SP update insn. */
20640 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20641
20642 /* The parallel needs to hold num_regs SETs
20643 and one SET for the stack update. */
20644 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20645
20646 if (return_in_pc)
20647 XVECEXP (par, 0, 0) = ret_rtx;
20648
20649 if (emit_update)
20650 {
20651 /* Increment the stack pointer, based on there being
20652 num_regs 4-byte registers to restore. */
20653 tmp = gen_rtx_SET (stack_pointer_rtx,
20654 plus_constant (Pmode,
20655 stack_pointer_rtx,
20656 4 * num_regs));
20657 RTX_FRAME_RELATED_P (tmp) = 1;
20658 XVECEXP (par, 0, offset_adj) = tmp;
20659 }
20660
20661 /* Now restore every reg, which may include PC. */
20662 for (j = 0, i = 0; j < num_regs; i++)
20663 if (saved_regs_mask & (1 << i))
20664 {
20665 reg = gen_rtx_REG (SImode, i);
20666 if ((num_regs == 1) && emit_update && !return_in_pc)
20667 {
20668 /* Emit single load with writeback. */
20669 tmp = gen_frame_mem (SImode,
20670 gen_rtx_POST_INC (Pmode,
20671 stack_pointer_rtx));
20672 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20673 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20674 return;
20675 }
20676
20677 tmp = gen_rtx_SET (reg,
20678 gen_frame_mem
20679 (SImode,
20680 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20681 RTX_FRAME_RELATED_P (tmp) = 1;
20682 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20683
20684 /* We need to maintain a sequence for DWARF info too. As dwarf info
20685 should not have PC, skip PC. */
20686 if (i != PC_REGNUM)
20687 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20688
20689 j++;
20690 }
20691
20692 if (return_in_pc)
20693 par = emit_jump_insn (par);
20694 else
20695 par = emit_insn (par);
20696
20697 REG_NOTES (par) = dwarf;
20698 if (!return_in_pc)
20699 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20700 stack_pointer_rtx, stack_pointer_rtx);
20701 }
20702
20703 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20704 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20705
20706 Unfortunately, since this insn does not reflect very well the actual
20707 semantics of the operation, we need to annotate the insn for the benefit
20708 of DWARF2 frame unwind information. */
20709 static void
20710 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20711 {
20712 int i, j;
20713 rtx par;
20714 rtx dwarf = NULL_RTX;
20715 rtx tmp, reg;
20716
20717 gcc_assert (num_regs && num_regs <= 32);
20718
20719 /* Workaround ARM10 VFPr1 bug. */
20720 if (num_regs == 2 && !arm_arch6)
20721 {
20722 if (first_reg == 15)
20723 first_reg--;
20724
20725 num_regs++;
20726 }
20727
20728 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20729 there could be up to 32 D-registers to restore.
20730 If there are more than 16 D-registers, make two recursive calls,
20731 each of which emits one pop_multi instruction. */
20732 if (num_regs > 16)
20733 {
20734 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20735 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20736 return;
20737 }
20738
20739 /* The parallel needs to hold num_regs SETs
20740 and one SET for the stack update. */
20741 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20742
20743 /* Increment the stack pointer, based on there being
20744 num_regs 8-byte registers to restore. */
20745 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20746 RTX_FRAME_RELATED_P (tmp) = 1;
20747 XVECEXP (par, 0, 0) = tmp;
20748
20749 /* Now show every reg that will be restored, using a SET for each. */
20750 for (j = 0, i=first_reg; j < num_regs; i += 2)
20751 {
20752 reg = gen_rtx_REG (DFmode, i);
20753
20754 tmp = gen_rtx_SET (reg,
20755 gen_frame_mem
20756 (DFmode,
20757 plus_constant (Pmode, base_reg, 8 * j)));
20758 RTX_FRAME_RELATED_P (tmp) = 1;
20759 XVECEXP (par, 0, j + 1) = tmp;
20760
20761 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20762
20763 j++;
20764 }
20765
20766 par = emit_insn (par);
20767 REG_NOTES (par) = dwarf;
20768
20769 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20770 if (REGNO (base_reg) == IP_REGNUM)
20771 {
20772 RTX_FRAME_RELATED_P (par) = 1;
20773 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20774 }
20775 else
20776 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20777 base_reg, base_reg);
20778 }
20779
20780 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20781 number of registers are being popped, multiple LDRD patterns are created for
20782 all register pairs. If odd number of registers are popped, last register is
20783 loaded by using LDR pattern. */
20784 static void
20785 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20786 {
20787 int num_regs = 0;
20788 int i, j;
20789 rtx par = NULL_RTX;
20790 rtx dwarf = NULL_RTX;
20791 rtx tmp, reg, tmp1;
20792 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20793
20794 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20795 if (saved_regs_mask & (1 << i))
20796 num_regs++;
20797
20798 gcc_assert (num_regs && num_regs <= 16);
20799
20800 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20801 to be popped. So, if num_regs is even, now it will become odd,
20802 and we can generate pop with PC. If num_regs is odd, it will be
20803 even now, and ldr with return can be generated for PC. */
20804 if (return_in_pc)
20805 num_regs--;
20806
20807 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20808
20809 /* Var j iterates over all the registers to gather all the registers in
20810 saved_regs_mask. Var i gives index of saved registers in stack frame.
20811 A PARALLEL RTX of register-pair is created here, so that pattern for
20812 LDRD can be matched. As PC is always last register to be popped, and
20813 we have already decremented num_regs if PC, we don't have to worry
20814 about PC in this loop. */
20815 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20816 if (saved_regs_mask & (1 << j))
20817 {
20818 /* Create RTX for memory load. */
20819 reg = gen_rtx_REG (SImode, j);
20820 tmp = gen_rtx_SET (reg,
20821 gen_frame_mem (SImode,
20822 plus_constant (Pmode,
20823 stack_pointer_rtx, 4 * i)));
20824 RTX_FRAME_RELATED_P (tmp) = 1;
20825
20826 if (i % 2 == 0)
20827 {
20828 /* When saved-register index (i) is even, the RTX to be emitted is
20829 yet to be created. Hence create it first. The LDRD pattern we
20830 are generating is :
20831 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20832 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20833 where target registers need not be consecutive. */
20834 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20835 dwarf = NULL_RTX;
20836 }
20837
20838 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20839 added as 0th element and if i is odd, reg_i is added as 1st element
20840 of LDRD pattern shown above. */
20841 XVECEXP (par, 0, (i % 2)) = tmp;
20842 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20843
20844 if ((i % 2) == 1)
20845 {
20846 /* When saved-register index (i) is odd, RTXs for both the registers
20847 to be loaded are generated in above given LDRD pattern, and the
20848 pattern can be emitted now. */
20849 par = emit_insn (par);
20850 REG_NOTES (par) = dwarf;
20851 RTX_FRAME_RELATED_P (par) = 1;
20852 }
20853
20854 i++;
20855 }
20856
20857 /* If the number of registers pushed is odd AND return_in_pc is false OR
20858 number of registers are even AND return_in_pc is true, last register is
20859 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20860 then LDR with post increment. */
20861
20862 /* Increment the stack pointer, based on there being
20863 num_regs 4-byte registers to restore. */
20864 tmp = gen_rtx_SET (stack_pointer_rtx,
20865 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20866 RTX_FRAME_RELATED_P (tmp) = 1;
20867 tmp = emit_insn (tmp);
20868 if (!return_in_pc)
20869 {
20870 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20871 stack_pointer_rtx, stack_pointer_rtx);
20872 }
20873
20874 dwarf = NULL_RTX;
20875
20876 if (((num_regs % 2) == 1 && !return_in_pc)
20877 || ((num_regs % 2) == 0 && return_in_pc))
20878 {
20879 /* Scan for the single register to be popped. Skip until the saved
20880 register is found. */
20881 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20882
20883 /* Gen LDR with post increment here. */
20884 tmp1 = gen_rtx_MEM (SImode,
20885 gen_rtx_POST_INC (SImode,
20886 stack_pointer_rtx));
20887 set_mem_alias_set (tmp1, get_frame_alias_set ());
20888
20889 reg = gen_rtx_REG (SImode, j);
20890 tmp = gen_rtx_SET (reg, tmp1);
20891 RTX_FRAME_RELATED_P (tmp) = 1;
20892 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20893
20894 if (return_in_pc)
20895 {
20896 /* If return_in_pc, j must be PC_REGNUM. */
20897 gcc_assert (j == PC_REGNUM);
20898 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20899 XVECEXP (par, 0, 0) = ret_rtx;
20900 XVECEXP (par, 0, 1) = tmp;
20901 par = emit_jump_insn (par);
20902 }
20903 else
20904 {
20905 par = emit_insn (tmp);
20906 REG_NOTES (par) = dwarf;
20907 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20908 stack_pointer_rtx, stack_pointer_rtx);
20909 }
20910
20911 }
20912 else if ((num_regs % 2) == 1 && return_in_pc)
20913 {
20914 /* There are 2 registers to be popped. So, generate the pattern
20915 pop_multiple_with_stack_update_and_return to pop in PC. */
20916 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20917 }
20918
20919 return;
20920 }
20921
20922 /* LDRD in ARM mode needs consecutive registers as operands. This function
20923 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20924 offset addressing and then generates one separate stack udpate. This provides
20925 more scheduling freedom, compared to writeback on every load. However,
20926 if the function returns using load into PC directly
20927 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20928 before the last load. TODO: Add a peephole optimization to recognize
20929 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20930 peephole optimization to merge the load at stack-offset zero
20931 with the stack update instruction using load with writeback
20932 in post-index addressing mode. */
20933 static void
20934 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20935 {
20936 int j = 0;
20937 int offset = 0;
20938 rtx par = NULL_RTX;
20939 rtx dwarf = NULL_RTX;
20940 rtx tmp, mem;
20941
20942 /* Restore saved registers. */
20943 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20944 j = 0;
20945 while (j <= LAST_ARM_REGNUM)
20946 if (saved_regs_mask & (1 << j))
20947 {
20948 if ((j % 2) == 0
20949 && (saved_regs_mask & (1 << (j + 1)))
20950 && (j + 1) != PC_REGNUM)
20951 {
20952 /* Current register and next register form register pair for which
20953 LDRD can be generated. PC is always the last register popped, and
20954 we handle it separately. */
20955 if (offset > 0)
20956 mem = gen_frame_mem (DImode,
20957 plus_constant (Pmode,
20958 stack_pointer_rtx,
20959 offset));
20960 else
20961 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20962
20963 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20964 tmp = emit_insn (tmp);
20965 RTX_FRAME_RELATED_P (tmp) = 1;
20966
20967 /* Generate dwarf info. */
20968
20969 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20970 gen_rtx_REG (SImode, j),
20971 NULL_RTX);
20972 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20973 gen_rtx_REG (SImode, j + 1),
20974 dwarf);
20975
20976 REG_NOTES (tmp) = dwarf;
20977
20978 offset += 8;
20979 j += 2;
20980 }
20981 else if (j != PC_REGNUM)
20982 {
20983 /* Emit a single word load. */
20984 if (offset > 0)
20985 mem = gen_frame_mem (SImode,
20986 plus_constant (Pmode,
20987 stack_pointer_rtx,
20988 offset));
20989 else
20990 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20991
20992 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20993 tmp = emit_insn (tmp);
20994 RTX_FRAME_RELATED_P (tmp) = 1;
20995
20996 /* Generate dwarf info. */
20997 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20998 gen_rtx_REG (SImode, j),
20999 NULL_RTX);
21000
21001 offset += 4;
21002 j += 1;
21003 }
21004 else /* j == PC_REGNUM */
21005 j++;
21006 }
21007 else
21008 j++;
21009
21010 /* Update the stack. */
21011 if (offset > 0)
21012 {
21013 tmp = gen_rtx_SET (stack_pointer_rtx,
21014 plus_constant (Pmode,
21015 stack_pointer_rtx,
21016 offset));
21017 tmp = emit_insn (tmp);
21018 arm_add_cfa_adjust_cfa_note (tmp, offset,
21019 stack_pointer_rtx, stack_pointer_rtx);
21020 offset = 0;
21021 }
21022
21023 if (saved_regs_mask & (1 << PC_REGNUM))
21024 {
21025 /* Only PC is to be popped. */
21026 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21027 XVECEXP (par, 0, 0) = ret_rtx;
21028 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
21029 gen_frame_mem (SImode,
21030 gen_rtx_POST_INC (SImode,
21031 stack_pointer_rtx)));
21032 RTX_FRAME_RELATED_P (tmp) = 1;
21033 XVECEXP (par, 0, 1) = tmp;
21034 par = emit_jump_insn (par);
21035
21036 /* Generate dwarf info. */
21037 dwarf = alloc_reg_note (REG_CFA_RESTORE,
21038 gen_rtx_REG (SImode, PC_REGNUM),
21039 NULL_RTX);
21040 REG_NOTES (par) = dwarf;
21041 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
21042 stack_pointer_rtx, stack_pointer_rtx);
21043 }
21044 }
21045
21046 /* Calculate the size of the return value that is passed in registers. */
21047 static unsigned
21048 arm_size_return_regs (void)
21049 {
21050 machine_mode mode;
21051
21052 if (crtl->return_rtx != 0)
21053 mode = GET_MODE (crtl->return_rtx);
21054 else
21055 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21056
21057 return GET_MODE_SIZE (mode);
21058 }
21059
21060 /* Return true if the current function needs to save/restore LR. */
21061 static bool
21062 thumb_force_lr_save (void)
21063 {
21064 return !cfun->machine->lr_save_eliminated
21065 && (!crtl->is_leaf
21066 || thumb_far_jump_used_p ()
21067 || df_regs_ever_live_p (LR_REGNUM));
21068 }
21069
21070 /* We do not know if r3 will be available because
21071 we do have an indirect tailcall happening in this
21072 particular case. */
21073 static bool
21074 is_indirect_tailcall_p (rtx call)
21075 {
21076 rtx pat = PATTERN (call);
21077
21078 /* Indirect tail call. */
21079 pat = XVECEXP (pat, 0, 0);
21080 if (GET_CODE (pat) == SET)
21081 pat = SET_SRC (pat);
21082
21083 pat = XEXP (XEXP (pat, 0), 0);
21084 return REG_P (pat);
21085 }
21086
21087 /* Return true if r3 is used by any of the tail call insns in the
21088 current function. */
21089 static bool
21090 any_sibcall_could_use_r3 (void)
21091 {
21092 edge_iterator ei;
21093 edge e;
21094
21095 if (!crtl->tail_call_emit)
21096 return false;
21097 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
21098 if (e->flags & EDGE_SIBCALL)
21099 {
21100 rtx_insn *call = BB_END (e->src);
21101 if (!CALL_P (call))
21102 call = prev_nonnote_nondebug_insn (call);
21103 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
21104 if (find_regno_fusage (call, USE, 3)
21105 || is_indirect_tailcall_p (call))
21106 return true;
21107 }
21108 return false;
21109 }
21110
21111
21112 /* Compute the distance from register FROM to register TO.
21113 These can be the arg pointer (26), the soft frame pointer (25),
21114 the stack pointer (13) or the hard frame pointer (11).
21115 In thumb mode r7 is used as the soft frame pointer, if needed.
21116 Typical stack layout looks like this:
21117
21118 old stack pointer -> | |
21119 ----
21120 | | \
21121 | | saved arguments for
21122 | | vararg functions
21123 | | /
21124 --
21125 hard FP & arg pointer -> | | \
21126 | | stack
21127 | | frame
21128 | | /
21129 --
21130 | | \
21131 | | call saved
21132 | | registers
21133 soft frame pointer -> | | /
21134 --
21135 | | \
21136 | | local
21137 | | variables
21138 locals base pointer -> | | /
21139 --
21140 | | \
21141 | | outgoing
21142 | | arguments
21143 current stack pointer -> | | /
21144 --
21145
21146 For a given function some or all of these stack components
21147 may not be needed, giving rise to the possibility of
21148 eliminating some of the registers.
21149
21150 The values returned by this function must reflect the behavior
21151 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21152
21153 The sign of the number returned reflects the direction of stack
21154 growth, so the values are positive for all eliminations except
21155 from the soft frame pointer to the hard frame pointer.
21156
21157 SFP may point just inside the local variables block to ensure correct
21158 alignment. */
21159
21160
21161 /* Return cached stack offsets. */
21162
21163 static arm_stack_offsets *
21164 arm_get_frame_offsets (void)
21165 {
21166 struct arm_stack_offsets *offsets;
21167
21168 offsets = &cfun->machine->stack_offsets;
21169
21170 return offsets;
21171 }
21172
21173
21174 /* Calculate stack offsets. These are used to calculate register elimination
21175 offsets and in prologue/epilogue code. Also calculates which registers
21176 should be saved. */
21177
21178 static void
21179 arm_compute_frame_layout (void)
21180 {
21181 struct arm_stack_offsets *offsets;
21182 unsigned long func_type;
21183 int saved;
21184 int core_saved;
21185 HOST_WIDE_INT frame_size;
21186 int i;
21187
21188 offsets = &cfun->machine->stack_offsets;
21189
21190 /* Initially this is the size of the local variables. It will translated
21191 into an offset once we have determined the size of preceding data. */
21192 frame_size = ROUND_UP_WORD (get_frame_size ());
21193
21194 /* Space for variadic functions. */
21195 offsets->saved_args = crtl->args.pretend_args_size;
21196
21197 /* In Thumb mode this is incorrect, but never used. */
21198 offsets->frame
21199 = (offsets->saved_args
21200 + arm_compute_static_chain_stack_bytes ()
21201 + (frame_pointer_needed ? 4 : 0));
21202
21203 if (TARGET_32BIT)
21204 {
21205 unsigned int regno;
21206
21207 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21208 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21209 saved = core_saved;
21210
21211 /* We know that SP will be doubleword aligned on entry, and we must
21212 preserve that condition at any subroutine call. We also require the
21213 soft frame pointer to be doubleword aligned. */
21214
21215 if (TARGET_REALLY_IWMMXT)
21216 {
21217 /* Check for the call-saved iWMMXt registers. */
21218 for (regno = FIRST_IWMMXT_REGNUM;
21219 regno <= LAST_IWMMXT_REGNUM;
21220 regno++)
21221 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
21222 saved += 8;
21223 }
21224
21225 func_type = arm_current_func_type ();
21226 /* Space for saved VFP registers. */
21227 if (! IS_VOLATILE (func_type)
21228 && TARGET_HARD_FLOAT)
21229 saved += arm_get_vfp_saved_size ();
21230 }
21231 else /* TARGET_THUMB1 */
21232 {
21233 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21234 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21235 saved = core_saved;
21236 if (TARGET_BACKTRACE)
21237 saved += 16;
21238 }
21239
21240 /* Saved registers include the stack frame. */
21241 offsets->saved_regs
21242 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21243 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21244
21245 /* A leaf function does not need any stack alignment if it has nothing
21246 on the stack. */
21247 if (crtl->is_leaf && frame_size == 0
21248 /* However if it calls alloca(), we have a dynamically allocated
21249 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21250 && ! cfun->calls_alloca)
21251 {
21252 offsets->outgoing_args = offsets->soft_frame;
21253 offsets->locals_base = offsets->soft_frame;
21254 return;
21255 }
21256
21257 /* Ensure SFP has the correct alignment. */
21258 if (ARM_DOUBLEWORD_ALIGN
21259 && (offsets->soft_frame & 7))
21260 {
21261 offsets->soft_frame += 4;
21262 /* Try to align stack by pushing an extra reg. Don't bother doing this
21263 when there is a stack frame as the alignment will be rolled into
21264 the normal stack adjustment. */
21265 if (frame_size + crtl->outgoing_args_size == 0)
21266 {
21267 int reg = -1;
21268
21269 /* Register r3 is caller-saved. Normally it does not need to be
21270 saved on entry by the prologue. However if we choose to save
21271 it for padding then we may confuse the compiler into thinking
21272 a prologue sequence is required when in fact it is not. This
21273 will occur when shrink-wrapping if r3 is used as a scratch
21274 register and there are no other callee-saved writes.
21275
21276 This situation can be avoided when other callee-saved registers
21277 are available and r3 is not mandatory if we choose a callee-saved
21278 register for padding. */
21279 bool prefer_callee_reg_p = false;
21280
21281 /* If it is safe to use r3, then do so. This sometimes
21282 generates better code on Thumb-2 by avoiding the need to
21283 use 32-bit push/pop instructions. */
21284 if (! any_sibcall_could_use_r3 ()
21285 && arm_size_return_regs () <= 12
21286 && (offsets->saved_regs_mask & (1 << 3)) == 0
21287 && (TARGET_THUMB2
21288 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21289 {
21290 reg = 3;
21291 if (!TARGET_THUMB2)
21292 prefer_callee_reg_p = true;
21293 }
21294 if (reg == -1
21295 || prefer_callee_reg_p)
21296 {
21297 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21298 {
21299 /* Avoid fixed registers; they may be changed at
21300 arbitrary times so it's unsafe to restore them
21301 during the epilogue. */
21302 if (!fixed_regs[i]
21303 && (offsets->saved_regs_mask & (1 << i)) == 0)
21304 {
21305 reg = i;
21306 break;
21307 }
21308 }
21309 }
21310
21311 if (reg != -1)
21312 {
21313 offsets->saved_regs += 4;
21314 offsets->saved_regs_mask |= (1 << reg);
21315 }
21316 }
21317 }
21318
21319 offsets->locals_base = offsets->soft_frame + frame_size;
21320 offsets->outgoing_args = (offsets->locals_base
21321 + crtl->outgoing_args_size);
21322
21323 if (ARM_DOUBLEWORD_ALIGN)
21324 {
21325 /* Ensure SP remains doubleword aligned. */
21326 if (offsets->outgoing_args & 7)
21327 offsets->outgoing_args += 4;
21328 gcc_assert (!(offsets->outgoing_args & 7));
21329 }
21330 }
21331
21332
21333 /* Calculate the relative offsets for the different stack pointers. Positive
21334 offsets are in the direction of stack growth. */
21335
21336 HOST_WIDE_INT
21337 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21338 {
21339 arm_stack_offsets *offsets;
21340
21341 offsets = arm_get_frame_offsets ();
21342
21343 /* OK, now we have enough information to compute the distances.
21344 There must be an entry in these switch tables for each pair
21345 of registers in ELIMINABLE_REGS, even if some of the entries
21346 seem to be redundant or useless. */
21347 switch (from)
21348 {
21349 case ARG_POINTER_REGNUM:
21350 switch (to)
21351 {
21352 case THUMB_HARD_FRAME_POINTER_REGNUM:
21353 return 0;
21354
21355 case FRAME_POINTER_REGNUM:
21356 /* This is the reverse of the soft frame pointer
21357 to hard frame pointer elimination below. */
21358 return offsets->soft_frame - offsets->saved_args;
21359
21360 case ARM_HARD_FRAME_POINTER_REGNUM:
21361 /* This is only non-zero in the case where the static chain register
21362 is stored above the frame. */
21363 return offsets->frame - offsets->saved_args - 4;
21364
21365 case STACK_POINTER_REGNUM:
21366 /* If nothing has been pushed on the stack at all
21367 then this will return -4. This *is* correct! */
21368 return offsets->outgoing_args - (offsets->saved_args + 4);
21369
21370 default:
21371 gcc_unreachable ();
21372 }
21373 gcc_unreachable ();
21374
21375 case FRAME_POINTER_REGNUM:
21376 switch (to)
21377 {
21378 case THUMB_HARD_FRAME_POINTER_REGNUM:
21379 return 0;
21380
21381 case ARM_HARD_FRAME_POINTER_REGNUM:
21382 /* The hard frame pointer points to the top entry in the
21383 stack frame. The soft frame pointer to the bottom entry
21384 in the stack frame. If there is no stack frame at all,
21385 then they are identical. */
21386
21387 return offsets->frame - offsets->soft_frame;
21388
21389 case STACK_POINTER_REGNUM:
21390 return offsets->outgoing_args - offsets->soft_frame;
21391
21392 default:
21393 gcc_unreachable ();
21394 }
21395 gcc_unreachable ();
21396
21397 default:
21398 /* You cannot eliminate from the stack pointer.
21399 In theory you could eliminate from the hard frame
21400 pointer to the stack pointer, but this will never
21401 happen, since if a stack frame is not needed the
21402 hard frame pointer will never be used. */
21403 gcc_unreachable ();
21404 }
21405 }
21406
21407 /* Given FROM and TO register numbers, say whether this elimination is
21408 allowed. Frame pointer elimination is automatically handled.
21409
21410 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21411 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21412 pointer, we must eliminate FRAME_POINTER_REGNUM into
21413 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21414 ARG_POINTER_REGNUM. */
21415
21416 bool
21417 arm_can_eliminate (const int from, const int to)
21418 {
21419 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21420 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21421 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21422 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21423 true);
21424 }
21425
21426 /* Emit RTL to save coprocessor registers on function entry. Returns the
21427 number of bytes pushed. */
21428
21429 static int
21430 arm_save_coproc_regs(void)
21431 {
21432 int saved_size = 0;
21433 unsigned reg;
21434 unsigned start_reg;
21435 rtx insn;
21436
21437 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21438 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21439 {
21440 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21441 insn = gen_rtx_MEM (V2SImode, insn);
21442 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21443 RTX_FRAME_RELATED_P (insn) = 1;
21444 saved_size += 8;
21445 }
21446
21447 if (TARGET_HARD_FLOAT)
21448 {
21449 start_reg = FIRST_VFP_REGNUM;
21450
21451 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21452 {
21453 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21454 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21455 {
21456 if (start_reg != reg)
21457 saved_size += vfp_emit_fstmd (start_reg,
21458 (reg - start_reg) / 2);
21459 start_reg = reg + 2;
21460 }
21461 }
21462 if (start_reg != reg)
21463 saved_size += vfp_emit_fstmd (start_reg,
21464 (reg - start_reg) / 2);
21465 }
21466 return saved_size;
21467 }
21468
21469
21470 /* Set the Thumb frame pointer from the stack pointer. */
21471
21472 static void
21473 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21474 {
21475 HOST_WIDE_INT amount;
21476 rtx insn, dwarf;
21477
21478 amount = offsets->outgoing_args - offsets->locals_base;
21479 if (amount < 1024)
21480 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21481 stack_pointer_rtx, GEN_INT (amount)));
21482 else
21483 {
21484 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21485 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21486 expects the first two operands to be the same. */
21487 if (TARGET_THUMB2)
21488 {
21489 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21490 stack_pointer_rtx,
21491 hard_frame_pointer_rtx));
21492 }
21493 else
21494 {
21495 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21496 hard_frame_pointer_rtx,
21497 stack_pointer_rtx));
21498 }
21499 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21500 plus_constant (Pmode, stack_pointer_rtx, amount));
21501 RTX_FRAME_RELATED_P (dwarf) = 1;
21502 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21503 }
21504
21505 RTX_FRAME_RELATED_P (insn) = 1;
21506 }
21507
21508 struct scratch_reg {
21509 rtx reg;
21510 bool saved;
21511 };
21512
21513 /* Return a short-lived scratch register for use as a 2nd scratch register on
21514 function entry after the registers are saved in the prologue. This register
21515 must be released by means of release_scratch_register_on_entry. IP is not
21516 considered since it is always used as the 1st scratch register if available.
21517
21518 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21519 mask of live registers. */
21520
21521 static void
21522 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21523 unsigned long live_regs)
21524 {
21525 int regno = -1;
21526
21527 sr->saved = false;
21528
21529 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21530 regno = LR_REGNUM;
21531 else
21532 {
21533 unsigned int i;
21534
21535 for (i = 4; i < 11; i++)
21536 if (regno1 != i && (live_regs & (1 << i)) != 0)
21537 {
21538 regno = i;
21539 break;
21540 }
21541
21542 if (regno < 0)
21543 {
21544 /* If IP is used as the 1st scratch register for a nested function,
21545 then either r3 wasn't available or is used to preserve IP. */
21546 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21547 regno1 = 3;
21548 regno = (regno1 == 3 ? 2 : 3);
21549 sr->saved
21550 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21551 regno);
21552 }
21553 }
21554
21555 sr->reg = gen_rtx_REG (SImode, regno);
21556 if (sr->saved)
21557 {
21558 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21559 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21560 rtx x = gen_rtx_SET (stack_pointer_rtx,
21561 plus_constant (Pmode, stack_pointer_rtx, -4));
21562 RTX_FRAME_RELATED_P (insn) = 1;
21563 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21564 }
21565 }
21566
21567 /* Release a scratch register obtained from the preceding function. */
21568
21569 static void
21570 release_scratch_register_on_entry (struct scratch_reg *sr)
21571 {
21572 if (sr->saved)
21573 {
21574 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21575 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21576 rtx x = gen_rtx_SET (stack_pointer_rtx,
21577 plus_constant (Pmode, stack_pointer_rtx, 4));
21578 RTX_FRAME_RELATED_P (insn) = 1;
21579 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21580 }
21581 }
21582
21583 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21584
21585 #if PROBE_INTERVAL > 4096
21586 #error Cannot use indexed addressing mode for stack probing
21587 #endif
21588
21589 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21590 inclusive. These are offsets from the current stack pointer. REGNO1
21591 is the index number of the 1st scratch register and LIVE_REGS is the
21592 mask of live registers. */
21593
21594 static void
21595 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21596 unsigned int regno1, unsigned long live_regs)
21597 {
21598 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21599
21600 /* See if we have a constant small number of probes to generate. If so,
21601 that's the easy case. */
21602 if (size <= PROBE_INTERVAL)
21603 {
21604 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21605 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21606 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21607 }
21608
21609 /* The run-time loop is made up of 10 insns in the generic case while the
21610 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21611 else if (size <= 5 * PROBE_INTERVAL)
21612 {
21613 HOST_WIDE_INT i, rem;
21614
21615 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21616 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21617 emit_stack_probe (reg1);
21618
21619 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21620 it exceeds SIZE. If only two probes are needed, this will not
21621 generate any code. Then probe at FIRST + SIZE. */
21622 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21623 {
21624 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21625 emit_stack_probe (reg1);
21626 }
21627
21628 rem = size - (i - PROBE_INTERVAL);
21629 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21630 {
21631 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21632 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21633 }
21634 else
21635 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21636 }
21637
21638 /* Otherwise, do the same as above, but in a loop. Note that we must be
21639 extra careful with variables wrapping around because we might be at
21640 the very top (or the very bottom) of the address space and we have
21641 to be able to handle this case properly; in particular, we use an
21642 equality test for the loop condition. */
21643 else
21644 {
21645 HOST_WIDE_INT rounded_size;
21646 struct scratch_reg sr;
21647
21648 get_scratch_register_on_entry (&sr, regno1, live_regs);
21649
21650 emit_move_insn (reg1, GEN_INT (first));
21651
21652
21653 /* Step 1: round SIZE to the previous multiple of the interval. */
21654
21655 rounded_size = size & -PROBE_INTERVAL;
21656 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21657
21658
21659 /* Step 2: compute initial and final value of the loop counter. */
21660
21661 /* TEST_ADDR = SP + FIRST. */
21662 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21663
21664 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21665 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21666
21667
21668 /* Step 3: the loop
21669
21670 do
21671 {
21672 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21673 probe at TEST_ADDR
21674 }
21675 while (TEST_ADDR != LAST_ADDR)
21676
21677 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21678 until it is equal to ROUNDED_SIZE. */
21679
21680 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21681
21682
21683 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21684 that SIZE is equal to ROUNDED_SIZE. */
21685
21686 if (size != rounded_size)
21687 {
21688 HOST_WIDE_INT rem = size - rounded_size;
21689
21690 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21691 {
21692 emit_set_insn (sr.reg,
21693 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21694 emit_stack_probe (plus_constant (Pmode, sr.reg,
21695 PROBE_INTERVAL - rem));
21696 }
21697 else
21698 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21699 }
21700
21701 release_scratch_register_on_entry (&sr);
21702 }
21703
21704 /* Make sure nothing is scheduled before we are done. */
21705 emit_insn (gen_blockage ());
21706 }
21707
21708 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21709 absolute addresses. */
21710
21711 const char *
21712 output_probe_stack_range (rtx reg1, rtx reg2)
21713 {
21714 static int labelno = 0;
21715 char loop_lab[32];
21716 rtx xops[2];
21717
21718 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21719
21720 /* Loop. */
21721 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21722
21723 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21724 xops[0] = reg1;
21725 xops[1] = GEN_INT (PROBE_INTERVAL);
21726 output_asm_insn ("sub\t%0, %0, %1", xops);
21727
21728 /* Probe at TEST_ADDR. */
21729 output_asm_insn ("str\tr0, [%0, #0]", xops);
21730
21731 /* Test if TEST_ADDR == LAST_ADDR. */
21732 xops[1] = reg2;
21733 output_asm_insn ("cmp\t%0, %1", xops);
21734
21735 /* Branch. */
21736 fputs ("\tbne\t", asm_out_file);
21737 assemble_name_raw (asm_out_file, loop_lab);
21738 fputc ('\n', asm_out_file);
21739
21740 return "";
21741 }
21742
21743 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21744 function. */
21745 void
21746 arm_expand_prologue (void)
21747 {
21748 rtx amount;
21749 rtx insn;
21750 rtx ip_rtx;
21751 unsigned long live_regs_mask;
21752 unsigned long func_type;
21753 int fp_offset = 0;
21754 int saved_pretend_args = 0;
21755 int saved_regs = 0;
21756 unsigned HOST_WIDE_INT args_to_push;
21757 HOST_WIDE_INT size;
21758 arm_stack_offsets *offsets;
21759 bool clobber_ip;
21760
21761 func_type = arm_current_func_type ();
21762
21763 /* Naked functions don't have prologues. */
21764 if (IS_NAKED (func_type))
21765 {
21766 if (flag_stack_usage_info)
21767 current_function_static_stack_size = 0;
21768 return;
21769 }
21770
21771 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21772 args_to_push = crtl->args.pretend_args_size;
21773
21774 /* Compute which register we will have to save onto the stack. */
21775 offsets = arm_get_frame_offsets ();
21776 live_regs_mask = offsets->saved_regs_mask;
21777
21778 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21779
21780 if (IS_STACKALIGN (func_type))
21781 {
21782 rtx r0, r1;
21783
21784 /* Handle a word-aligned stack pointer. We generate the following:
21785
21786 mov r0, sp
21787 bic r1, r0, #7
21788 mov sp, r1
21789 <save and restore r0 in normal prologue/epilogue>
21790 mov sp, r0
21791 bx lr
21792
21793 The unwinder doesn't need to know about the stack realignment.
21794 Just tell it we saved SP in r0. */
21795 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21796
21797 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21798 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21799
21800 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21801 RTX_FRAME_RELATED_P (insn) = 1;
21802 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21803
21804 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21805
21806 /* ??? The CFA changes here, which may cause GDB to conclude that it
21807 has entered a different function. That said, the unwind info is
21808 correct, individually, before and after this instruction because
21809 we've described the save of SP, which will override the default
21810 handling of SP as restoring from the CFA. */
21811 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21812 }
21813
21814 /* Let's compute the static_chain_stack_bytes required and store it. Right
21815 now the value must be -1 as stored by arm_init_machine_status (). */
21816 cfun->machine->static_chain_stack_bytes
21817 = arm_compute_static_chain_stack_bytes ();
21818
21819 /* The static chain register is the same as the IP register. If it is
21820 clobbered when creating the frame, we need to save and restore it. */
21821 clobber_ip = IS_NESTED (func_type)
21822 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21823 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21824 || flag_stack_clash_protection)
21825 && !df_regs_ever_live_p (LR_REGNUM)
21826 && arm_r3_live_at_start_p ()));
21827
21828 /* Find somewhere to store IP whilst the frame is being created.
21829 We try the following places in order:
21830
21831 1. The last argument register r3 if it is available.
21832 2. A slot on the stack above the frame if there are no
21833 arguments to push onto the stack.
21834 3. Register r3 again, after pushing the argument registers
21835 onto the stack, if this is a varargs function.
21836 4. The last slot on the stack created for the arguments to
21837 push, if this isn't a varargs function.
21838
21839 Note - we only need to tell the dwarf2 backend about the SP
21840 adjustment in the second variant; the static chain register
21841 doesn't need to be unwound, as it doesn't contain a value
21842 inherited from the caller. */
21843 if (clobber_ip)
21844 {
21845 if (!arm_r3_live_at_start_p ())
21846 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21847 else if (args_to_push == 0)
21848 {
21849 rtx addr, dwarf;
21850
21851 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21852 saved_regs += 4;
21853
21854 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21855 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21856 fp_offset = 4;
21857
21858 /* Just tell the dwarf backend that we adjusted SP. */
21859 dwarf = gen_rtx_SET (stack_pointer_rtx,
21860 plus_constant (Pmode, stack_pointer_rtx,
21861 -fp_offset));
21862 RTX_FRAME_RELATED_P (insn) = 1;
21863 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21864 }
21865 else
21866 {
21867 /* Store the args on the stack. */
21868 if (cfun->machine->uses_anonymous_args)
21869 {
21870 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21871 (0xf0 >> (args_to_push / 4)) & 0xf);
21872 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21873 saved_pretend_args = 1;
21874 }
21875 else
21876 {
21877 rtx addr, dwarf;
21878
21879 if (args_to_push == 4)
21880 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21881 else
21882 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21883 plus_constant (Pmode,
21884 stack_pointer_rtx,
21885 -args_to_push));
21886
21887 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21888
21889 /* Just tell the dwarf backend that we adjusted SP. */
21890 dwarf = gen_rtx_SET (stack_pointer_rtx,
21891 plus_constant (Pmode, stack_pointer_rtx,
21892 -args_to_push));
21893 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21894 }
21895
21896 RTX_FRAME_RELATED_P (insn) = 1;
21897 fp_offset = args_to_push;
21898 args_to_push = 0;
21899 }
21900 }
21901
21902 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21903 {
21904 if (IS_INTERRUPT (func_type))
21905 {
21906 /* Interrupt functions must not corrupt any registers.
21907 Creating a frame pointer however, corrupts the IP
21908 register, so we must push it first. */
21909 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21910
21911 /* Do not set RTX_FRAME_RELATED_P on this insn.
21912 The dwarf stack unwinding code only wants to see one
21913 stack decrement per function, and this is not it. If
21914 this instruction is labeled as being part of the frame
21915 creation sequence then dwarf2out_frame_debug_expr will
21916 die when it encounters the assignment of IP to FP
21917 later on, since the use of SP here establishes SP as
21918 the CFA register and not IP.
21919
21920 Anyway this instruction is not really part of the stack
21921 frame creation although it is part of the prologue. */
21922 }
21923
21924 insn = emit_set_insn (ip_rtx,
21925 plus_constant (Pmode, stack_pointer_rtx,
21926 fp_offset));
21927 RTX_FRAME_RELATED_P (insn) = 1;
21928 }
21929
21930 if (args_to_push)
21931 {
21932 /* Push the argument registers, or reserve space for them. */
21933 if (cfun->machine->uses_anonymous_args)
21934 insn = emit_multi_reg_push
21935 ((0xf0 >> (args_to_push / 4)) & 0xf,
21936 (0xf0 >> (args_to_push / 4)) & 0xf);
21937 else
21938 insn = emit_insn
21939 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21940 GEN_INT (- args_to_push)));
21941 RTX_FRAME_RELATED_P (insn) = 1;
21942 }
21943
21944 /* If this is an interrupt service routine, and the link register
21945 is going to be pushed, and we're not generating extra
21946 push of IP (needed when frame is needed and frame layout if apcs),
21947 subtracting four from LR now will mean that the function return
21948 can be done with a single instruction. */
21949 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21950 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21951 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21952 && TARGET_ARM)
21953 {
21954 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21955
21956 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21957 }
21958
21959 if (live_regs_mask)
21960 {
21961 unsigned long dwarf_regs_mask = live_regs_mask;
21962
21963 saved_regs += bit_count (live_regs_mask) * 4;
21964 if (optimize_size && !frame_pointer_needed
21965 && saved_regs == offsets->saved_regs - offsets->saved_args)
21966 {
21967 /* If no coprocessor registers are being pushed and we don't have
21968 to worry about a frame pointer then push extra registers to
21969 create the stack frame. This is done in a way that does not
21970 alter the frame layout, so is independent of the epilogue. */
21971 int n;
21972 int frame;
21973 n = 0;
21974 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21975 n++;
21976 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21977 if (frame && n * 4 >= frame)
21978 {
21979 n = frame / 4;
21980 live_regs_mask |= (1 << n) - 1;
21981 saved_regs += frame;
21982 }
21983 }
21984
21985 if (TARGET_LDRD
21986 && current_tune->prefer_ldrd_strd
21987 && !optimize_function_for_size_p (cfun))
21988 {
21989 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21990 if (TARGET_THUMB2)
21991 thumb2_emit_strd_push (live_regs_mask);
21992 else if (TARGET_ARM
21993 && !TARGET_APCS_FRAME
21994 && !IS_INTERRUPT (func_type))
21995 arm_emit_strd_push (live_regs_mask);
21996 else
21997 {
21998 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21999 RTX_FRAME_RELATED_P (insn) = 1;
22000 }
22001 }
22002 else
22003 {
22004 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
22005 RTX_FRAME_RELATED_P (insn) = 1;
22006 }
22007 }
22008
22009 if (! IS_VOLATILE (func_type))
22010 saved_regs += arm_save_coproc_regs ();
22011
22012 if (frame_pointer_needed && TARGET_ARM)
22013 {
22014 /* Create the new frame pointer. */
22015 if (TARGET_APCS_FRAME)
22016 {
22017 insn = GEN_INT (-(4 + args_to_push + fp_offset));
22018 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
22019 RTX_FRAME_RELATED_P (insn) = 1;
22020 }
22021 else
22022 {
22023 insn = GEN_INT (saved_regs - (4 + fp_offset));
22024 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22025 stack_pointer_rtx, insn));
22026 RTX_FRAME_RELATED_P (insn) = 1;
22027 }
22028 }
22029
22030 size = offsets->outgoing_args - offsets->saved_args;
22031 if (flag_stack_usage_info)
22032 current_function_static_stack_size = size;
22033
22034 /* If this isn't an interrupt service routine and we have a frame, then do
22035 stack checking. We use IP as the first scratch register, except for the
22036 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
22037 if (!IS_INTERRUPT (func_type)
22038 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
22039 || flag_stack_clash_protection))
22040 {
22041 unsigned int regno;
22042
22043 if (!IS_NESTED (func_type) || clobber_ip)
22044 regno = IP_REGNUM;
22045 else if (df_regs_ever_live_p (LR_REGNUM))
22046 regno = LR_REGNUM;
22047 else
22048 regno = 3;
22049
22050 if (crtl->is_leaf && !cfun->calls_alloca)
22051 {
22052 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
22053 arm_emit_probe_stack_range (get_stack_check_protect (),
22054 size - get_stack_check_protect (),
22055 regno, live_regs_mask);
22056 }
22057 else if (size > 0)
22058 arm_emit_probe_stack_range (get_stack_check_protect (), size,
22059 regno, live_regs_mask);
22060 }
22061
22062 /* Recover the static chain register. */
22063 if (clobber_ip)
22064 {
22065 if (!arm_r3_live_at_start_p () || saved_pretend_args)
22066 insn = gen_rtx_REG (SImode, 3);
22067 else
22068 {
22069 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
22070 insn = gen_frame_mem (SImode, insn);
22071 }
22072 emit_set_insn (ip_rtx, insn);
22073 emit_insn (gen_force_register_use (ip_rtx));
22074 }
22075
22076 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
22077 {
22078 /* This add can produce multiple insns for a large constant, so we
22079 need to get tricky. */
22080 rtx_insn *last = get_last_insn ();
22081
22082 amount = GEN_INT (offsets->saved_args + saved_regs
22083 - offsets->outgoing_args);
22084
22085 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22086 amount));
22087 do
22088 {
22089 last = last ? NEXT_INSN (last) : get_insns ();
22090 RTX_FRAME_RELATED_P (last) = 1;
22091 }
22092 while (last != insn);
22093
22094 /* If the frame pointer is needed, emit a special barrier that
22095 will prevent the scheduler from moving stores to the frame
22096 before the stack adjustment. */
22097 if (frame_pointer_needed)
22098 emit_insn (gen_stack_tie (stack_pointer_rtx,
22099 hard_frame_pointer_rtx));
22100 }
22101
22102
22103 if (frame_pointer_needed && TARGET_THUMB2)
22104 thumb_set_frame_pointer (offsets);
22105
22106 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22107 {
22108 unsigned long mask;
22109
22110 mask = live_regs_mask;
22111 mask &= THUMB2_WORK_REGS;
22112 if (!IS_NESTED (func_type))
22113 mask |= (1 << IP_REGNUM);
22114 arm_load_pic_register (mask, NULL_RTX);
22115 }
22116
22117 /* If we are profiling, make sure no instructions are scheduled before
22118 the call to mcount. Similarly if the user has requested no
22119 scheduling in the prolog. Similarly if we want non-call exceptions
22120 using the EABI unwinder, to prevent faulting instructions from being
22121 swapped with a stack adjustment. */
22122 if (crtl->profile || !TARGET_SCHED_PROLOG
22123 || (arm_except_unwind_info (&global_options) == UI_TARGET
22124 && cfun->can_throw_non_call_exceptions))
22125 emit_insn (gen_blockage ());
22126
22127 /* If the link register is being kept alive, with the return address in it,
22128 then make sure that it does not get reused by the ce2 pass. */
22129 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22130 cfun->machine->lr_save_eliminated = 1;
22131 }
22132 \f
22133 /* Print condition code to STREAM. Helper function for arm_print_operand. */
22134 static void
22135 arm_print_condition (FILE *stream)
22136 {
22137 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22138 {
22139 /* Branch conversion is not implemented for Thumb-2. */
22140 if (TARGET_THUMB)
22141 {
22142 output_operand_lossage ("predicated Thumb instruction");
22143 return;
22144 }
22145 if (current_insn_predicate != NULL)
22146 {
22147 output_operand_lossage
22148 ("predicated instruction in conditional sequence");
22149 return;
22150 }
22151
22152 fputs (arm_condition_codes[arm_current_cc], stream);
22153 }
22154 else if (current_insn_predicate)
22155 {
22156 enum arm_cond_code code;
22157
22158 if (TARGET_THUMB1)
22159 {
22160 output_operand_lossage ("predicated Thumb instruction");
22161 return;
22162 }
22163
22164 code = get_arm_condition_code (current_insn_predicate);
22165 fputs (arm_condition_codes[code], stream);
22166 }
22167 }
22168
22169
22170 /* Globally reserved letters: acln
22171 Puncutation letters currently used: @_|?().!#
22172 Lower case letters currently used: bcdefhimpqtvwxyz
22173 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22174 Letters previously used, but now deprecated/obsolete: sVWXYZ.
22175
22176 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22177
22178 If CODE is 'd', then the X is a condition operand and the instruction
22179 should only be executed if the condition is true.
22180 if CODE is 'D', then the X is a condition operand and the instruction
22181 should only be executed if the condition is false: however, if the mode
22182 of the comparison is CCFPEmode, then always execute the instruction -- we
22183 do this because in these circumstances !GE does not necessarily imply LT;
22184 in these cases the instruction pattern will take care to make sure that
22185 an instruction containing %d will follow, thereby undoing the effects of
22186 doing this instruction unconditionally.
22187 If CODE is 'N' then X is a floating point operand that must be negated
22188 before output.
22189 If CODE is 'B' then output a bitwise inverted value of X (a const int).
22190 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
22191 static void
22192 arm_print_operand (FILE *stream, rtx x, int code)
22193 {
22194 switch (code)
22195 {
22196 case '@':
22197 fputs (ASM_COMMENT_START, stream);
22198 return;
22199
22200 case '_':
22201 fputs (user_label_prefix, stream);
22202 return;
22203
22204 case '|':
22205 fputs (REGISTER_PREFIX, stream);
22206 return;
22207
22208 case '?':
22209 arm_print_condition (stream);
22210 return;
22211
22212 case '.':
22213 /* The current condition code for a condition code setting instruction.
22214 Preceded by 's' in unified syntax, otherwise followed by 's'. */
22215 fputc('s', stream);
22216 arm_print_condition (stream);
22217 return;
22218
22219 case '!':
22220 /* If the instruction is conditionally executed then print
22221 the current condition code, otherwise print 's'. */
22222 gcc_assert (TARGET_THUMB2);
22223 if (current_insn_predicate)
22224 arm_print_condition (stream);
22225 else
22226 fputc('s', stream);
22227 break;
22228
22229 /* %# is a "break" sequence. It doesn't output anything, but is used to
22230 separate e.g. operand numbers from following text, if that text consists
22231 of further digits which we don't want to be part of the operand
22232 number. */
22233 case '#':
22234 return;
22235
22236 case 'N':
22237 {
22238 REAL_VALUE_TYPE r;
22239 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22240 fprintf (stream, "%s", fp_const_from_val (&r));
22241 }
22242 return;
22243
22244 /* An integer or symbol address without a preceding # sign. */
22245 case 'c':
22246 switch (GET_CODE (x))
22247 {
22248 case CONST_INT:
22249 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22250 break;
22251
22252 case SYMBOL_REF:
22253 output_addr_const (stream, x);
22254 break;
22255
22256 case CONST:
22257 if (GET_CODE (XEXP (x, 0)) == PLUS
22258 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22259 {
22260 output_addr_const (stream, x);
22261 break;
22262 }
22263 /* Fall through. */
22264
22265 default:
22266 output_operand_lossage ("Unsupported operand for code '%c'", code);
22267 }
22268 return;
22269
22270 /* An integer that we want to print in HEX. */
22271 case 'x':
22272 switch (GET_CODE (x))
22273 {
22274 case CONST_INT:
22275 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22276 break;
22277
22278 default:
22279 output_operand_lossage ("Unsupported operand for code '%c'", code);
22280 }
22281 return;
22282
22283 case 'B':
22284 if (CONST_INT_P (x))
22285 {
22286 HOST_WIDE_INT val;
22287 val = ARM_SIGN_EXTEND (~INTVAL (x));
22288 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22289 }
22290 else
22291 {
22292 putc ('~', stream);
22293 output_addr_const (stream, x);
22294 }
22295 return;
22296
22297 case 'b':
22298 /* Print the log2 of a CONST_INT. */
22299 {
22300 HOST_WIDE_INT val;
22301
22302 if (!CONST_INT_P (x)
22303 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22304 output_operand_lossage ("Unsupported operand for code '%c'", code);
22305 else
22306 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22307 }
22308 return;
22309
22310 case 'L':
22311 /* The low 16 bits of an immediate constant. */
22312 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22313 return;
22314
22315 case 'i':
22316 fprintf (stream, "%s", arithmetic_instr (x, 1));
22317 return;
22318
22319 case 'I':
22320 fprintf (stream, "%s", arithmetic_instr (x, 0));
22321 return;
22322
22323 case 'S':
22324 {
22325 HOST_WIDE_INT val;
22326 const char *shift;
22327
22328 shift = shift_op (x, &val);
22329
22330 if (shift)
22331 {
22332 fprintf (stream, ", %s ", shift);
22333 if (val == -1)
22334 arm_print_operand (stream, XEXP (x, 1), 0);
22335 else
22336 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22337 }
22338 }
22339 return;
22340
22341 /* An explanation of the 'Q', 'R' and 'H' register operands:
22342
22343 In a pair of registers containing a DI or DF value the 'Q'
22344 operand returns the register number of the register containing
22345 the least significant part of the value. The 'R' operand returns
22346 the register number of the register containing the most
22347 significant part of the value.
22348
22349 The 'H' operand returns the higher of the two register numbers.
22350 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22351 same as the 'Q' operand, since the most significant part of the
22352 value is held in the lower number register. The reverse is true
22353 on systems where WORDS_BIG_ENDIAN is false.
22354
22355 The purpose of these operands is to distinguish between cases
22356 where the endian-ness of the values is important (for example
22357 when they are added together), and cases where the endian-ness
22358 is irrelevant, but the order of register operations is important.
22359 For example when loading a value from memory into a register
22360 pair, the endian-ness does not matter. Provided that the value
22361 from the lower memory address is put into the lower numbered
22362 register, and the value from the higher address is put into the
22363 higher numbered register, the load will work regardless of whether
22364 the value being loaded is big-wordian or little-wordian. The
22365 order of the two register loads can matter however, if the address
22366 of the memory location is actually held in one of the registers
22367 being overwritten by the load.
22368
22369 The 'Q' and 'R' constraints are also available for 64-bit
22370 constants. */
22371 case 'Q':
22372 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22373 {
22374 rtx part = gen_lowpart (SImode, x);
22375 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22376 return;
22377 }
22378
22379 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22380 {
22381 output_operand_lossage ("invalid operand for code '%c'", code);
22382 return;
22383 }
22384
22385 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22386 return;
22387
22388 case 'R':
22389 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22390 {
22391 machine_mode mode = GET_MODE (x);
22392 rtx part;
22393
22394 if (mode == VOIDmode)
22395 mode = DImode;
22396 part = gen_highpart_mode (SImode, mode, x);
22397 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22398 return;
22399 }
22400
22401 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22402 {
22403 output_operand_lossage ("invalid operand for code '%c'", code);
22404 return;
22405 }
22406
22407 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22408 return;
22409
22410 case 'H':
22411 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22412 {
22413 output_operand_lossage ("invalid operand for code '%c'", code);
22414 return;
22415 }
22416
22417 asm_fprintf (stream, "%r", REGNO (x) + 1);
22418 return;
22419
22420 case 'J':
22421 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22422 {
22423 output_operand_lossage ("invalid operand for code '%c'", code);
22424 return;
22425 }
22426
22427 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22428 return;
22429
22430 case 'K':
22431 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22432 {
22433 output_operand_lossage ("invalid operand for code '%c'", code);
22434 return;
22435 }
22436
22437 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22438 return;
22439
22440 case 'm':
22441 asm_fprintf (stream, "%r",
22442 REG_P (XEXP (x, 0))
22443 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22444 return;
22445
22446 case 'M':
22447 asm_fprintf (stream, "{%r-%r}",
22448 REGNO (x),
22449 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22450 return;
22451
22452 /* Like 'M', but writing doubleword vector registers, for use by Neon
22453 insns. */
22454 case 'h':
22455 {
22456 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22457 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22458 if (numregs == 1)
22459 asm_fprintf (stream, "{d%d}", regno);
22460 else
22461 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22462 }
22463 return;
22464
22465 case 'd':
22466 /* CONST_TRUE_RTX means always -- that's the default. */
22467 if (x == const_true_rtx)
22468 return;
22469
22470 if (!COMPARISON_P (x))
22471 {
22472 output_operand_lossage ("invalid operand for code '%c'", code);
22473 return;
22474 }
22475
22476 fputs (arm_condition_codes[get_arm_condition_code (x)],
22477 stream);
22478 return;
22479
22480 case 'D':
22481 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22482 want to do that. */
22483 if (x == const_true_rtx)
22484 {
22485 output_operand_lossage ("instruction never executed");
22486 return;
22487 }
22488 if (!COMPARISON_P (x))
22489 {
22490 output_operand_lossage ("invalid operand for code '%c'", code);
22491 return;
22492 }
22493
22494 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22495 (get_arm_condition_code (x))],
22496 stream);
22497 return;
22498
22499 case 's':
22500 case 'V':
22501 case 'W':
22502 case 'X':
22503 case 'Y':
22504 case 'Z':
22505 /* Former Maverick support, removed after GCC-4.7. */
22506 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22507 return;
22508
22509 case 'U':
22510 if (!REG_P (x)
22511 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22512 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22513 /* Bad value for wCG register number. */
22514 {
22515 output_operand_lossage ("invalid operand for code '%c'", code);
22516 return;
22517 }
22518
22519 else
22520 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22521 return;
22522
22523 /* Print an iWMMXt control register name. */
22524 case 'w':
22525 if (!CONST_INT_P (x)
22526 || INTVAL (x) < 0
22527 || INTVAL (x) >= 16)
22528 /* Bad value for wC register number. */
22529 {
22530 output_operand_lossage ("invalid operand for code '%c'", code);
22531 return;
22532 }
22533
22534 else
22535 {
22536 static const char * wc_reg_names [16] =
22537 {
22538 "wCID", "wCon", "wCSSF", "wCASF",
22539 "wC4", "wC5", "wC6", "wC7",
22540 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22541 "wC12", "wC13", "wC14", "wC15"
22542 };
22543
22544 fputs (wc_reg_names [INTVAL (x)], stream);
22545 }
22546 return;
22547
22548 /* Print the high single-precision register of a VFP double-precision
22549 register. */
22550 case 'p':
22551 {
22552 machine_mode mode = GET_MODE (x);
22553 int regno;
22554
22555 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22556 {
22557 output_operand_lossage ("invalid operand for code '%c'", code);
22558 return;
22559 }
22560
22561 regno = REGNO (x);
22562 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22563 {
22564 output_operand_lossage ("invalid operand for code '%c'", code);
22565 return;
22566 }
22567
22568 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22569 }
22570 return;
22571
22572 /* Print a VFP/Neon double precision or quad precision register name. */
22573 case 'P':
22574 case 'q':
22575 {
22576 machine_mode mode = GET_MODE (x);
22577 int is_quad = (code == 'q');
22578 int regno;
22579
22580 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22581 {
22582 output_operand_lossage ("invalid operand for code '%c'", code);
22583 return;
22584 }
22585
22586 if (!REG_P (x)
22587 || !IS_VFP_REGNUM (REGNO (x)))
22588 {
22589 output_operand_lossage ("invalid operand for code '%c'", code);
22590 return;
22591 }
22592
22593 regno = REGNO (x);
22594 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22595 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22596 {
22597 output_operand_lossage ("invalid operand for code '%c'", code);
22598 return;
22599 }
22600
22601 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22602 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22603 }
22604 return;
22605
22606 /* These two codes print the low/high doubleword register of a Neon quad
22607 register, respectively. For pair-structure types, can also print
22608 low/high quadword registers. */
22609 case 'e':
22610 case 'f':
22611 {
22612 machine_mode mode = GET_MODE (x);
22613 int regno;
22614
22615 if ((GET_MODE_SIZE (mode) != 16
22616 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22617 {
22618 output_operand_lossage ("invalid operand for code '%c'", code);
22619 return;
22620 }
22621
22622 regno = REGNO (x);
22623 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22624 {
22625 output_operand_lossage ("invalid operand for code '%c'", code);
22626 return;
22627 }
22628
22629 if (GET_MODE_SIZE (mode) == 16)
22630 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22631 + (code == 'f' ? 1 : 0));
22632 else
22633 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22634 + (code == 'f' ? 1 : 0));
22635 }
22636 return;
22637
22638 /* Print a VFPv3 floating-point constant, represented as an integer
22639 index. */
22640 case 'G':
22641 {
22642 int index = vfp3_const_double_index (x);
22643 gcc_assert (index != -1);
22644 fprintf (stream, "%d", index);
22645 }
22646 return;
22647
22648 /* Print bits representing opcode features for Neon.
22649
22650 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22651 and polynomials as unsigned.
22652
22653 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22654
22655 Bit 2 is 1 for rounding functions, 0 otherwise. */
22656
22657 /* Identify the type as 's', 'u', 'p' or 'f'. */
22658 case 'T':
22659 {
22660 HOST_WIDE_INT bits = INTVAL (x);
22661 fputc ("uspf"[bits & 3], stream);
22662 }
22663 return;
22664
22665 /* Likewise, but signed and unsigned integers are both 'i'. */
22666 case 'F':
22667 {
22668 HOST_WIDE_INT bits = INTVAL (x);
22669 fputc ("iipf"[bits & 3], stream);
22670 }
22671 return;
22672
22673 /* As for 'T', but emit 'u' instead of 'p'. */
22674 case 't':
22675 {
22676 HOST_WIDE_INT bits = INTVAL (x);
22677 fputc ("usuf"[bits & 3], stream);
22678 }
22679 return;
22680
22681 /* Bit 2: rounding (vs none). */
22682 case 'O':
22683 {
22684 HOST_WIDE_INT bits = INTVAL (x);
22685 fputs ((bits & 4) != 0 ? "r" : "", stream);
22686 }
22687 return;
22688
22689 /* Memory operand for vld1/vst1 instruction. */
22690 case 'A':
22691 {
22692 rtx addr;
22693 bool postinc = FALSE;
22694 rtx postinc_reg = NULL;
22695 unsigned align, memsize, align_bits;
22696
22697 gcc_assert (MEM_P (x));
22698 addr = XEXP (x, 0);
22699 if (GET_CODE (addr) == POST_INC)
22700 {
22701 postinc = 1;
22702 addr = XEXP (addr, 0);
22703 }
22704 if (GET_CODE (addr) == POST_MODIFY)
22705 {
22706 postinc_reg = XEXP( XEXP (addr, 1), 1);
22707 addr = XEXP (addr, 0);
22708 }
22709 asm_fprintf (stream, "[%r", REGNO (addr));
22710
22711 /* We know the alignment of this access, so we can emit a hint in the
22712 instruction (for some alignments) as an aid to the memory subsystem
22713 of the target. */
22714 align = MEM_ALIGN (x) >> 3;
22715 memsize = MEM_SIZE (x);
22716
22717 /* Only certain alignment specifiers are supported by the hardware. */
22718 if (memsize == 32 && (align % 32) == 0)
22719 align_bits = 256;
22720 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22721 align_bits = 128;
22722 else if (memsize >= 8 && (align % 8) == 0)
22723 align_bits = 64;
22724 else
22725 align_bits = 0;
22726
22727 if (align_bits != 0)
22728 asm_fprintf (stream, ":%d", align_bits);
22729
22730 asm_fprintf (stream, "]");
22731
22732 if (postinc)
22733 fputs("!", stream);
22734 if (postinc_reg)
22735 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22736 }
22737 return;
22738
22739 case 'C':
22740 {
22741 rtx addr;
22742
22743 gcc_assert (MEM_P (x));
22744 addr = XEXP (x, 0);
22745 gcc_assert (REG_P (addr));
22746 asm_fprintf (stream, "[%r]", REGNO (addr));
22747 }
22748 return;
22749
22750 /* Translate an S register number into a D register number and element index. */
22751 case 'y':
22752 {
22753 machine_mode mode = GET_MODE (x);
22754 int regno;
22755
22756 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22757 {
22758 output_operand_lossage ("invalid operand for code '%c'", code);
22759 return;
22760 }
22761
22762 regno = REGNO (x);
22763 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22764 {
22765 output_operand_lossage ("invalid operand for code '%c'", code);
22766 return;
22767 }
22768
22769 regno = regno - FIRST_VFP_REGNUM;
22770 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22771 }
22772 return;
22773
22774 case 'v':
22775 gcc_assert (CONST_DOUBLE_P (x));
22776 int result;
22777 result = vfp3_const_double_for_fract_bits (x);
22778 if (result == 0)
22779 result = vfp3_const_double_for_bits (x);
22780 fprintf (stream, "#%d", result);
22781 return;
22782
22783 /* Register specifier for vld1.16/vst1.16. Translate the S register
22784 number into a D register number and element index. */
22785 case 'z':
22786 {
22787 machine_mode mode = GET_MODE (x);
22788 int regno;
22789
22790 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22791 {
22792 output_operand_lossage ("invalid operand for code '%c'", code);
22793 return;
22794 }
22795
22796 regno = REGNO (x);
22797 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22798 {
22799 output_operand_lossage ("invalid operand for code '%c'", code);
22800 return;
22801 }
22802
22803 regno = regno - FIRST_VFP_REGNUM;
22804 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22805 }
22806 return;
22807
22808 default:
22809 if (x == 0)
22810 {
22811 output_operand_lossage ("missing operand");
22812 return;
22813 }
22814
22815 switch (GET_CODE (x))
22816 {
22817 case REG:
22818 asm_fprintf (stream, "%r", REGNO (x));
22819 break;
22820
22821 case MEM:
22822 output_address (GET_MODE (x), XEXP (x, 0));
22823 break;
22824
22825 case CONST_DOUBLE:
22826 {
22827 char fpstr[20];
22828 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22829 sizeof (fpstr), 0, 1);
22830 fprintf (stream, "#%s", fpstr);
22831 }
22832 break;
22833
22834 default:
22835 gcc_assert (GET_CODE (x) != NEG);
22836 fputc ('#', stream);
22837 if (GET_CODE (x) == HIGH)
22838 {
22839 fputs (":lower16:", stream);
22840 x = XEXP (x, 0);
22841 }
22842
22843 output_addr_const (stream, x);
22844 break;
22845 }
22846 }
22847 }
22848 \f
22849 /* Target hook for printing a memory address. */
22850 static void
22851 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22852 {
22853 if (TARGET_32BIT)
22854 {
22855 int is_minus = GET_CODE (x) == MINUS;
22856
22857 if (REG_P (x))
22858 asm_fprintf (stream, "[%r]", REGNO (x));
22859 else if (GET_CODE (x) == PLUS || is_minus)
22860 {
22861 rtx base = XEXP (x, 0);
22862 rtx index = XEXP (x, 1);
22863 HOST_WIDE_INT offset = 0;
22864 if (!REG_P (base)
22865 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22866 {
22867 /* Ensure that BASE is a register. */
22868 /* (one of them must be). */
22869 /* Also ensure the SP is not used as in index register. */
22870 std::swap (base, index);
22871 }
22872 switch (GET_CODE (index))
22873 {
22874 case CONST_INT:
22875 offset = INTVAL (index);
22876 if (is_minus)
22877 offset = -offset;
22878 asm_fprintf (stream, "[%r, #%wd]",
22879 REGNO (base), offset);
22880 break;
22881
22882 case REG:
22883 asm_fprintf (stream, "[%r, %s%r]",
22884 REGNO (base), is_minus ? "-" : "",
22885 REGNO (index));
22886 break;
22887
22888 case MULT:
22889 case ASHIFTRT:
22890 case LSHIFTRT:
22891 case ASHIFT:
22892 case ROTATERT:
22893 {
22894 asm_fprintf (stream, "[%r, %s%r",
22895 REGNO (base), is_minus ? "-" : "",
22896 REGNO (XEXP (index, 0)));
22897 arm_print_operand (stream, index, 'S');
22898 fputs ("]", stream);
22899 break;
22900 }
22901
22902 default:
22903 gcc_unreachable ();
22904 }
22905 }
22906 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22907 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22908 {
22909 gcc_assert (REG_P (XEXP (x, 0)));
22910
22911 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22912 asm_fprintf (stream, "[%r, #%s%d]!",
22913 REGNO (XEXP (x, 0)),
22914 GET_CODE (x) == PRE_DEC ? "-" : "",
22915 GET_MODE_SIZE (mode));
22916 else
22917 asm_fprintf (stream, "[%r], #%s%d",
22918 REGNO (XEXP (x, 0)),
22919 GET_CODE (x) == POST_DEC ? "-" : "",
22920 GET_MODE_SIZE (mode));
22921 }
22922 else if (GET_CODE (x) == PRE_MODIFY)
22923 {
22924 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22925 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22926 asm_fprintf (stream, "#%wd]!",
22927 INTVAL (XEXP (XEXP (x, 1), 1)));
22928 else
22929 asm_fprintf (stream, "%r]!",
22930 REGNO (XEXP (XEXP (x, 1), 1)));
22931 }
22932 else if (GET_CODE (x) == POST_MODIFY)
22933 {
22934 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22935 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22936 asm_fprintf (stream, "#%wd",
22937 INTVAL (XEXP (XEXP (x, 1), 1)));
22938 else
22939 asm_fprintf (stream, "%r",
22940 REGNO (XEXP (XEXP (x, 1), 1)));
22941 }
22942 else output_addr_const (stream, x);
22943 }
22944 else
22945 {
22946 if (REG_P (x))
22947 asm_fprintf (stream, "[%r]", REGNO (x));
22948 else if (GET_CODE (x) == POST_INC)
22949 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22950 else if (GET_CODE (x) == PLUS)
22951 {
22952 gcc_assert (REG_P (XEXP (x, 0)));
22953 if (CONST_INT_P (XEXP (x, 1)))
22954 asm_fprintf (stream, "[%r, #%wd]",
22955 REGNO (XEXP (x, 0)),
22956 INTVAL (XEXP (x, 1)));
22957 else
22958 asm_fprintf (stream, "[%r, %r]",
22959 REGNO (XEXP (x, 0)),
22960 REGNO (XEXP (x, 1)));
22961 }
22962 else
22963 output_addr_const (stream, x);
22964 }
22965 }
22966 \f
22967 /* Target hook for indicating whether a punctuation character for
22968 TARGET_PRINT_OPERAND is valid. */
22969 static bool
22970 arm_print_operand_punct_valid_p (unsigned char code)
22971 {
22972 return (code == '@' || code == '|' || code == '.'
22973 || code == '(' || code == ')' || code == '#'
22974 || (TARGET_32BIT && (code == '?'))
22975 || (TARGET_THUMB2 && (code == '!'))
22976 || (TARGET_THUMB && (code == '_')));
22977 }
22978 \f
22979 /* Target hook for assembling integer objects. The ARM version needs to
22980 handle word-sized values specially. */
22981 static bool
22982 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22983 {
22984 machine_mode mode;
22985
22986 if (size == UNITS_PER_WORD && aligned_p)
22987 {
22988 fputs ("\t.word\t", asm_out_file);
22989 output_addr_const (asm_out_file, x);
22990
22991 /* Mark symbols as position independent. We only do this in the
22992 .text segment, not in the .data segment. */
22993 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22994 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22995 {
22996 /* See legitimize_pic_address for an explanation of the
22997 TARGET_VXWORKS_RTP check. */
22998 /* References to weak symbols cannot be resolved locally:
22999 they may be overridden by a non-weak definition at link
23000 time. */
23001 if (!arm_pic_data_is_text_relative
23002 || (GET_CODE (x) == SYMBOL_REF
23003 && (!SYMBOL_REF_LOCAL_P (x)
23004 || (SYMBOL_REF_DECL (x)
23005 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
23006 fputs ("(GOT)", asm_out_file);
23007 else
23008 fputs ("(GOTOFF)", asm_out_file);
23009 }
23010 fputc ('\n', asm_out_file);
23011 return true;
23012 }
23013
23014 mode = GET_MODE (x);
23015
23016 if (arm_vector_mode_supported_p (mode))
23017 {
23018 int i, units;
23019
23020 gcc_assert (GET_CODE (x) == CONST_VECTOR);
23021
23022 units = CONST_VECTOR_NUNITS (x);
23023 size = GET_MODE_UNIT_SIZE (mode);
23024
23025 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23026 for (i = 0; i < units; i++)
23027 {
23028 rtx elt = CONST_VECTOR_ELT (x, i);
23029 assemble_integer
23030 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
23031 }
23032 else
23033 for (i = 0; i < units; i++)
23034 {
23035 rtx elt = CONST_VECTOR_ELT (x, i);
23036 assemble_real
23037 (*CONST_DOUBLE_REAL_VALUE (elt),
23038 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
23039 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
23040 }
23041
23042 return true;
23043 }
23044
23045 return default_assemble_integer (x, size, aligned_p);
23046 }
23047
23048 static void
23049 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
23050 {
23051 section *s;
23052
23053 if (!TARGET_AAPCS_BASED)
23054 {
23055 (is_ctor ?
23056 default_named_section_asm_out_constructor
23057 : default_named_section_asm_out_destructor) (symbol, priority);
23058 return;
23059 }
23060
23061 /* Put these in the .init_array section, using a special relocation. */
23062 if (priority != DEFAULT_INIT_PRIORITY)
23063 {
23064 char buf[18];
23065 sprintf (buf, "%s.%.5u",
23066 is_ctor ? ".init_array" : ".fini_array",
23067 priority);
23068 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
23069 }
23070 else if (is_ctor)
23071 s = ctors_section;
23072 else
23073 s = dtors_section;
23074
23075 switch_to_section (s);
23076 assemble_align (POINTER_SIZE);
23077 fputs ("\t.word\t", asm_out_file);
23078 output_addr_const (asm_out_file, symbol);
23079 fputs ("(target1)\n", asm_out_file);
23080 }
23081
23082 /* Add a function to the list of static constructors. */
23083
23084 static void
23085 arm_elf_asm_constructor (rtx symbol, int priority)
23086 {
23087 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
23088 }
23089
23090 /* Add a function to the list of static destructors. */
23091
23092 static void
23093 arm_elf_asm_destructor (rtx symbol, int priority)
23094 {
23095 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
23096 }
23097 \f
23098 /* A finite state machine takes care of noticing whether or not instructions
23099 can be conditionally executed, and thus decrease execution time and code
23100 size by deleting branch instructions. The fsm is controlled by
23101 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
23102
23103 /* The state of the fsm controlling condition codes are:
23104 0: normal, do nothing special
23105 1: make ASM_OUTPUT_OPCODE not output this instruction
23106 2: make ASM_OUTPUT_OPCODE not output this instruction
23107 3: make instructions conditional
23108 4: make instructions conditional
23109
23110 State transitions (state->state by whom under condition):
23111 0 -> 1 final_prescan_insn if the `target' is a label
23112 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23113 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23114 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23115 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23116 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23117 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23118 (the target insn is arm_target_insn).
23119
23120 If the jump clobbers the conditions then we use states 2 and 4.
23121
23122 A similar thing can be done with conditional return insns.
23123
23124 XXX In case the `target' is an unconditional branch, this conditionalising
23125 of the instructions always reduces code size, but not always execution
23126 time. But then, I want to reduce the code size to somewhere near what
23127 /bin/cc produces. */
23128
23129 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23130 instructions. When a COND_EXEC instruction is seen the subsequent
23131 instructions are scanned so that multiple conditional instructions can be
23132 combined into a single IT block. arm_condexec_count and arm_condexec_mask
23133 specify the length and true/false mask for the IT block. These will be
23134 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
23135
23136 /* Returns the index of the ARM condition code string in
23137 `arm_condition_codes', or ARM_NV if the comparison is invalid.
23138 COMPARISON should be an rtx like `(eq (...) (...))'. */
23139
23140 enum arm_cond_code
23141 maybe_get_arm_condition_code (rtx comparison)
23142 {
23143 machine_mode mode = GET_MODE (XEXP (comparison, 0));
23144 enum arm_cond_code code;
23145 enum rtx_code comp_code = GET_CODE (comparison);
23146
23147 if (GET_MODE_CLASS (mode) != MODE_CC)
23148 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23149 XEXP (comparison, 1));
23150
23151 switch (mode)
23152 {
23153 case E_CC_DNEmode: code = ARM_NE; goto dominance;
23154 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23155 case E_CC_DGEmode: code = ARM_GE; goto dominance;
23156 case E_CC_DGTmode: code = ARM_GT; goto dominance;
23157 case E_CC_DLEmode: code = ARM_LE; goto dominance;
23158 case E_CC_DLTmode: code = ARM_LT; goto dominance;
23159 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23160 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23161 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23162 case E_CC_DLTUmode: code = ARM_CC;
23163
23164 dominance:
23165 if (comp_code == EQ)
23166 return ARM_INVERSE_CONDITION_CODE (code);
23167 if (comp_code == NE)
23168 return code;
23169 return ARM_NV;
23170
23171 case E_CC_NOOVmode:
23172 switch (comp_code)
23173 {
23174 case NE: return ARM_NE;
23175 case EQ: return ARM_EQ;
23176 case GE: return ARM_PL;
23177 case LT: return ARM_MI;
23178 default: return ARM_NV;
23179 }
23180
23181 case E_CC_Zmode:
23182 switch (comp_code)
23183 {
23184 case NE: return ARM_NE;
23185 case EQ: return ARM_EQ;
23186 default: return ARM_NV;
23187 }
23188
23189 case E_CC_Nmode:
23190 switch (comp_code)
23191 {
23192 case NE: return ARM_MI;
23193 case EQ: return ARM_PL;
23194 default: return ARM_NV;
23195 }
23196
23197 case E_CCFPEmode:
23198 case E_CCFPmode:
23199 /* We can handle all cases except UNEQ and LTGT. */
23200 switch (comp_code)
23201 {
23202 case GE: return ARM_GE;
23203 case GT: return ARM_GT;
23204 case LE: return ARM_LS;
23205 case LT: return ARM_MI;
23206 case NE: return ARM_NE;
23207 case EQ: return ARM_EQ;
23208 case ORDERED: return ARM_VC;
23209 case UNORDERED: return ARM_VS;
23210 case UNLT: return ARM_LT;
23211 case UNLE: return ARM_LE;
23212 case UNGT: return ARM_HI;
23213 case UNGE: return ARM_PL;
23214 /* UNEQ and LTGT do not have a representation. */
23215 case UNEQ: /* Fall through. */
23216 case LTGT: /* Fall through. */
23217 default: return ARM_NV;
23218 }
23219
23220 case E_CC_SWPmode:
23221 switch (comp_code)
23222 {
23223 case NE: return ARM_NE;
23224 case EQ: return ARM_EQ;
23225 case GE: return ARM_LE;
23226 case GT: return ARM_LT;
23227 case LE: return ARM_GE;
23228 case LT: return ARM_GT;
23229 case GEU: return ARM_LS;
23230 case GTU: return ARM_CC;
23231 case LEU: return ARM_CS;
23232 case LTU: return ARM_HI;
23233 default: return ARM_NV;
23234 }
23235
23236 case E_CC_Cmode:
23237 switch (comp_code)
23238 {
23239 case LTU: return ARM_CS;
23240 case GEU: return ARM_CC;
23241 case NE: return ARM_CS;
23242 case EQ: return ARM_CC;
23243 default: return ARM_NV;
23244 }
23245
23246 case E_CC_CZmode:
23247 switch (comp_code)
23248 {
23249 case NE: return ARM_NE;
23250 case EQ: return ARM_EQ;
23251 case GEU: return ARM_CS;
23252 case GTU: return ARM_HI;
23253 case LEU: return ARM_LS;
23254 case LTU: return ARM_CC;
23255 default: return ARM_NV;
23256 }
23257
23258 case E_CC_NCVmode:
23259 switch (comp_code)
23260 {
23261 case GE: return ARM_GE;
23262 case LT: return ARM_LT;
23263 case GEU: return ARM_CS;
23264 case LTU: return ARM_CC;
23265 default: return ARM_NV;
23266 }
23267
23268 case E_CC_Vmode:
23269 switch (comp_code)
23270 {
23271 case NE: return ARM_VS;
23272 case EQ: return ARM_VC;
23273 default: return ARM_NV;
23274 }
23275
23276 case E_CCmode:
23277 switch (comp_code)
23278 {
23279 case NE: return ARM_NE;
23280 case EQ: return ARM_EQ;
23281 case GE: return ARM_GE;
23282 case GT: return ARM_GT;
23283 case LE: return ARM_LE;
23284 case LT: return ARM_LT;
23285 case GEU: return ARM_CS;
23286 case GTU: return ARM_HI;
23287 case LEU: return ARM_LS;
23288 case LTU: return ARM_CC;
23289 default: return ARM_NV;
23290 }
23291
23292 default: gcc_unreachable ();
23293 }
23294 }
23295
23296 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23297 static enum arm_cond_code
23298 get_arm_condition_code (rtx comparison)
23299 {
23300 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23301 gcc_assert (code != ARM_NV);
23302 return code;
23303 }
23304
23305 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
23306 code registers when not targetting Thumb1. The VFP condition register
23307 only exists when generating hard-float code. */
23308 static bool
23309 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23310 {
23311 if (!TARGET_32BIT)
23312 return false;
23313
23314 *p1 = CC_REGNUM;
23315 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23316 return true;
23317 }
23318
23319 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23320 instructions. */
23321 void
23322 thumb2_final_prescan_insn (rtx_insn *insn)
23323 {
23324 rtx_insn *first_insn = insn;
23325 rtx body = PATTERN (insn);
23326 rtx predicate;
23327 enum arm_cond_code code;
23328 int n;
23329 int mask;
23330 int max;
23331
23332 /* max_insns_skipped in the tune was already taken into account in the
23333 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23334 just emit the IT blocks as we can. It does not make sense to split
23335 the IT blocks. */
23336 max = MAX_INSN_PER_IT_BLOCK;
23337
23338 /* Remove the previous insn from the count of insns to be output. */
23339 if (arm_condexec_count)
23340 arm_condexec_count--;
23341
23342 /* Nothing to do if we are already inside a conditional block. */
23343 if (arm_condexec_count)
23344 return;
23345
23346 if (GET_CODE (body) != COND_EXEC)
23347 return;
23348
23349 /* Conditional jumps are implemented directly. */
23350 if (JUMP_P (insn))
23351 return;
23352
23353 predicate = COND_EXEC_TEST (body);
23354 arm_current_cc = get_arm_condition_code (predicate);
23355
23356 n = get_attr_ce_count (insn);
23357 arm_condexec_count = 1;
23358 arm_condexec_mask = (1 << n) - 1;
23359 arm_condexec_masklen = n;
23360 /* See if subsequent instructions can be combined into the same block. */
23361 for (;;)
23362 {
23363 insn = next_nonnote_insn (insn);
23364
23365 /* Jumping into the middle of an IT block is illegal, so a label or
23366 barrier terminates the block. */
23367 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23368 break;
23369
23370 body = PATTERN (insn);
23371 /* USE and CLOBBER aren't really insns, so just skip them. */
23372 if (GET_CODE (body) == USE
23373 || GET_CODE (body) == CLOBBER)
23374 continue;
23375
23376 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23377 if (GET_CODE (body) != COND_EXEC)
23378 break;
23379 /* Maximum number of conditionally executed instructions in a block. */
23380 n = get_attr_ce_count (insn);
23381 if (arm_condexec_masklen + n > max)
23382 break;
23383
23384 predicate = COND_EXEC_TEST (body);
23385 code = get_arm_condition_code (predicate);
23386 mask = (1 << n) - 1;
23387 if (arm_current_cc == code)
23388 arm_condexec_mask |= (mask << arm_condexec_masklen);
23389 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23390 break;
23391
23392 arm_condexec_count++;
23393 arm_condexec_masklen += n;
23394
23395 /* A jump must be the last instruction in a conditional block. */
23396 if (JUMP_P (insn))
23397 break;
23398 }
23399 /* Restore recog_data (getting the attributes of other insns can
23400 destroy this array, but final.c assumes that it remains intact
23401 across this call). */
23402 extract_constrain_insn_cached (first_insn);
23403 }
23404
23405 void
23406 arm_final_prescan_insn (rtx_insn *insn)
23407 {
23408 /* BODY will hold the body of INSN. */
23409 rtx body = PATTERN (insn);
23410
23411 /* This will be 1 if trying to repeat the trick, and things need to be
23412 reversed if it appears to fail. */
23413 int reverse = 0;
23414
23415 /* If we start with a return insn, we only succeed if we find another one. */
23416 int seeking_return = 0;
23417 enum rtx_code return_code = UNKNOWN;
23418
23419 /* START_INSN will hold the insn from where we start looking. This is the
23420 first insn after the following code_label if REVERSE is true. */
23421 rtx_insn *start_insn = insn;
23422
23423 /* If in state 4, check if the target branch is reached, in order to
23424 change back to state 0. */
23425 if (arm_ccfsm_state == 4)
23426 {
23427 if (insn == arm_target_insn)
23428 {
23429 arm_target_insn = NULL;
23430 arm_ccfsm_state = 0;
23431 }
23432 return;
23433 }
23434
23435 /* If in state 3, it is possible to repeat the trick, if this insn is an
23436 unconditional branch to a label, and immediately following this branch
23437 is the previous target label which is only used once, and the label this
23438 branch jumps to is not too far off. */
23439 if (arm_ccfsm_state == 3)
23440 {
23441 if (simplejump_p (insn))
23442 {
23443 start_insn = next_nonnote_insn (start_insn);
23444 if (BARRIER_P (start_insn))
23445 {
23446 /* XXX Isn't this always a barrier? */
23447 start_insn = next_nonnote_insn (start_insn);
23448 }
23449 if (LABEL_P (start_insn)
23450 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23451 && LABEL_NUSES (start_insn) == 1)
23452 reverse = TRUE;
23453 else
23454 return;
23455 }
23456 else if (ANY_RETURN_P (body))
23457 {
23458 start_insn = next_nonnote_insn (start_insn);
23459 if (BARRIER_P (start_insn))
23460 start_insn = next_nonnote_insn (start_insn);
23461 if (LABEL_P (start_insn)
23462 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23463 && LABEL_NUSES (start_insn) == 1)
23464 {
23465 reverse = TRUE;
23466 seeking_return = 1;
23467 return_code = GET_CODE (body);
23468 }
23469 else
23470 return;
23471 }
23472 else
23473 return;
23474 }
23475
23476 gcc_assert (!arm_ccfsm_state || reverse);
23477 if (!JUMP_P (insn))
23478 return;
23479
23480 /* This jump might be paralleled with a clobber of the condition codes
23481 the jump should always come first */
23482 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23483 body = XVECEXP (body, 0, 0);
23484
23485 if (reverse
23486 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23487 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23488 {
23489 int insns_skipped;
23490 int fail = FALSE, succeed = FALSE;
23491 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23492 int then_not_else = TRUE;
23493 rtx_insn *this_insn = start_insn;
23494 rtx label = 0;
23495
23496 /* Register the insn jumped to. */
23497 if (reverse)
23498 {
23499 if (!seeking_return)
23500 label = XEXP (SET_SRC (body), 0);
23501 }
23502 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23503 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23504 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23505 {
23506 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23507 then_not_else = FALSE;
23508 }
23509 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23510 {
23511 seeking_return = 1;
23512 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23513 }
23514 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23515 {
23516 seeking_return = 1;
23517 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23518 then_not_else = FALSE;
23519 }
23520 else
23521 gcc_unreachable ();
23522
23523 /* See how many insns this branch skips, and what kind of insns. If all
23524 insns are okay, and the label or unconditional branch to the same
23525 label is not too far away, succeed. */
23526 for (insns_skipped = 0;
23527 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23528 {
23529 rtx scanbody;
23530
23531 this_insn = next_nonnote_insn (this_insn);
23532 if (!this_insn)
23533 break;
23534
23535 switch (GET_CODE (this_insn))
23536 {
23537 case CODE_LABEL:
23538 /* Succeed if it is the target label, otherwise fail since
23539 control falls in from somewhere else. */
23540 if (this_insn == label)
23541 {
23542 arm_ccfsm_state = 1;
23543 succeed = TRUE;
23544 }
23545 else
23546 fail = TRUE;
23547 break;
23548
23549 case BARRIER:
23550 /* Succeed if the following insn is the target label.
23551 Otherwise fail.
23552 If return insns are used then the last insn in a function
23553 will be a barrier. */
23554 this_insn = next_nonnote_insn (this_insn);
23555 if (this_insn && this_insn == label)
23556 {
23557 arm_ccfsm_state = 1;
23558 succeed = TRUE;
23559 }
23560 else
23561 fail = TRUE;
23562 break;
23563
23564 case CALL_INSN:
23565 /* The AAPCS says that conditional calls should not be
23566 used since they make interworking inefficient (the
23567 linker can't transform BL<cond> into BLX). That's
23568 only a problem if the machine has BLX. */
23569 if (arm_arch5t)
23570 {
23571 fail = TRUE;
23572 break;
23573 }
23574
23575 /* Succeed if the following insn is the target label, or
23576 if the following two insns are a barrier and the
23577 target label. */
23578 this_insn = next_nonnote_insn (this_insn);
23579 if (this_insn && BARRIER_P (this_insn))
23580 this_insn = next_nonnote_insn (this_insn);
23581
23582 if (this_insn && this_insn == label
23583 && insns_skipped < max_insns_skipped)
23584 {
23585 arm_ccfsm_state = 1;
23586 succeed = TRUE;
23587 }
23588 else
23589 fail = TRUE;
23590 break;
23591
23592 case JUMP_INSN:
23593 /* If this is an unconditional branch to the same label, succeed.
23594 If it is to another label, do nothing. If it is conditional,
23595 fail. */
23596 /* XXX Probably, the tests for SET and the PC are
23597 unnecessary. */
23598
23599 scanbody = PATTERN (this_insn);
23600 if (GET_CODE (scanbody) == SET
23601 && GET_CODE (SET_DEST (scanbody)) == PC)
23602 {
23603 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23604 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23605 {
23606 arm_ccfsm_state = 2;
23607 succeed = TRUE;
23608 }
23609 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23610 fail = TRUE;
23611 }
23612 /* Fail if a conditional return is undesirable (e.g. on a
23613 StrongARM), but still allow this if optimizing for size. */
23614 else if (GET_CODE (scanbody) == return_code
23615 && !use_return_insn (TRUE, NULL)
23616 && !optimize_size)
23617 fail = TRUE;
23618 else if (GET_CODE (scanbody) == return_code)
23619 {
23620 arm_ccfsm_state = 2;
23621 succeed = TRUE;
23622 }
23623 else if (GET_CODE (scanbody) == PARALLEL)
23624 {
23625 switch (get_attr_conds (this_insn))
23626 {
23627 case CONDS_NOCOND:
23628 break;
23629 default:
23630 fail = TRUE;
23631 break;
23632 }
23633 }
23634 else
23635 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23636
23637 break;
23638
23639 case INSN:
23640 /* Instructions using or affecting the condition codes make it
23641 fail. */
23642 scanbody = PATTERN (this_insn);
23643 if (!(GET_CODE (scanbody) == SET
23644 || GET_CODE (scanbody) == PARALLEL)
23645 || get_attr_conds (this_insn) != CONDS_NOCOND)
23646 fail = TRUE;
23647 break;
23648
23649 default:
23650 break;
23651 }
23652 }
23653 if (succeed)
23654 {
23655 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23656 arm_target_label = CODE_LABEL_NUMBER (label);
23657 else
23658 {
23659 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23660
23661 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23662 {
23663 this_insn = next_nonnote_insn (this_insn);
23664 gcc_assert (!this_insn
23665 || (!BARRIER_P (this_insn)
23666 && !LABEL_P (this_insn)));
23667 }
23668 if (!this_insn)
23669 {
23670 /* Oh, dear! we ran off the end.. give up. */
23671 extract_constrain_insn_cached (insn);
23672 arm_ccfsm_state = 0;
23673 arm_target_insn = NULL;
23674 return;
23675 }
23676 arm_target_insn = this_insn;
23677 }
23678
23679 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23680 what it was. */
23681 if (!reverse)
23682 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23683
23684 if (reverse || then_not_else)
23685 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23686 }
23687
23688 /* Restore recog_data (getting the attributes of other insns can
23689 destroy this array, but final.c assumes that it remains intact
23690 across this call. */
23691 extract_constrain_insn_cached (insn);
23692 }
23693 }
23694
23695 /* Output IT instructions. */
23696 void
23697 thumb2_asm_output_opcode (FILE * stream)
23698 {
23699 char buff[5];
23700 int n;
23701
23702 if (arm_condexec_mask)
23703 {
23704 for (n = 0; n < arm_condexec_masklen; n++)
23705 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23706 buff[n] = 0;
23707 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23708 arm_condition_codes[arm_current_cc]);
23709 arm_condexec_mask = 0;
23710 }
23711 }
23712
23713 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23714 UNITS_PER_WORD bytes wide. */
23715 static unsigned int
23716 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23717 {
23718 if (TARGET_32BIT
23719 && regno > PC_REGNUM
23720 && regno != FRAME_POINTER_REGNUM
23721 && regno != ARG_POINTER_REGNUM
23722 && !IS_VFP_REGNUM (regno))
23723 return 1;
23724
23725 return ARM_NUM_REGS (mode);
23726 }
23727
23728 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23729 static bool
23730 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23731 {
23732 if (GET_MODE_CLASS (mode) == MODE_CC)
23733 return (regno == CC_REGNUM
23734 || (TARGET_HARD_FLOAT
23735 && regno == VFPCC_REGNUM));
23736
23737 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23738 return false;
23739
23740 if (TARGET_THUMB1)
23741 /* For the Thumb we only allow values bigger than SImode in
23742 registers 0 - 6, so that there is always a second low
23743 register available to hold the upper part of the value.
23744 We probably we ought to ensure that the register is the
23745 start of an even numbered register pair. */
23746 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23747
23748 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23749 {
23750 if (mode == SFmode || mode == SImode)
23751 return VFP_REGNO_OK_FOR_SINGLE (regno);
23752
23753 if (mode == DFmode)
23754 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23755
23756 if (mode == HFmode)
23757 return VFP_REGNO_OK_FOR_SINGLE (regno);
23758
23759 /* VFP registers can hold HImode values. */
23760 if (mode == HImode)
23761 return VFP_REGNO_OK_FOR_SINGLE (regno);
23762
23763 if (TARGET_NEON)
23764 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23765 || (VALID_NEON_QREG_MODE (mode)
23766 && NEON_REGNO_OK_FOR_QUAD (regno))
23767 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23768 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23769 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23770 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23771 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23772
23773 return false;
23774 }
23775
23776 if (TARGET_REALLY_IWMMXT)
23777 {
23778 if (IS_IWMMXT_GR_REGNUM (regno))
23779 return mode == SImode;
23780
23781 if (IS_IWMMXT_REGNUM (regno))
23782 return VALID_IWMMXT_REG_MODE (mode);
23783 }
23784
23785 /* We allow almost any value to be stored in the general registers.
23786 Restrict doubleword quantities to even register pairs in ARM state
23787 so that we can use ldrd. Do not allow very large Neon structure
23788 opaque modes in general registers; they would use too many. */
23789 if (regno <= LAST_ARM_REGNUM)
23790 {
23791 if (ARM_NUM_REGS (mode) > 4)
23792 return false;
23793
23794 if (TARGET_THUMB2)
23795 return true;
23796
23797 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23798 }
23799
23800 if (regno == FRAME_POINTER_REGNUM
23801 || regno == ARG_POINTER_REGNUM)
23802 /* We only allow integers in the fake hard registers. */
23803 return GET_MODE_CLASS (mode) == MODE_INT;
23804
23805 return false;
23806 }
23807
23808 /* Implement TARGET_MODES_TIEABLE_P. */
23809
23810 static bool
23811 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23812 {
23813 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23814 return true;
23815
23816 /* We specifically want to allow elements of "structure" modes to
23817 be tieable to the structure. This more general condition allows
23818 other rarer situations too. */
23819 if (TARGET_NEON
23820 && (VALID_NEON_DREG_MODE (mode1)
23821 || VALID_NEON_QREG_MODE (mode1)
23822 || VALID_NEON_STRUCT_MODE (mode1))
23823 && (VALID_NEON_DREG_MODE (mode2)
23824 || VALID_NEON_QREG_MODE (mode2)
23825 || VALID_NEON_STRUCT_MODE (mode2)))
23826 return true;
23827
23828 return false;
23829 }
23830
23831 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23832 not used in arm mode. */
23833
23834 enum reg_class
23835 arm_regno_class (int regno)
23836 {
23837 if (regno == PC_REGNUM)
23838 return NO_REGS;
23839
23840 if (TARGET_THUMB1)
23841 {
23842 if (regno == STACK_POINTER_REGNUM)
23843 return STACK_REG;
23844 if (regno == CC_REGNUM)
23845 return CC_REG;
23846 if (regno < 8)
23847 return LO_REGS;
23848 return HI_REGS;
23849 }
23850
23851 if (TARGET_THUMB2 && regno < 8)
23852 return LO_REGS;
23853
23854 if ( regno <= LAST_ARM_REGNUM
23855 || regno == FRAME_POINTER_REGNUM
23856 || regno == ARG_POINTER_REGNUM)
23857 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23858
23859 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23860 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23861
23862 if (IS_VFP_REGNUM (regno))
23863 {
23864 if (regno <= D7_VFP_REGNUM)
23865 return VFP_D0_D7_REGS;
23866 else if (regno <= LAST_LO_VFP_REGNUM)
23867 return VFP_LO_REGS;
23868 else
23869 return VFP_HI_REGS;
23870 }
23871
23872 if (IS_IWMMXT_REGNUM (regno))
23873 return IWMMXT_REGS;
23874
23875 if (IS_IWMMXT_GR_REGNUM (regno))
23876 return IWMMXT_GR_REGS;
23877
23878 return NO_REGS;
23879 }
23880
23881 /* Handle a special case when computing the offset
23882 of an argument from the frame pointer. */
23883 int
23884 arm_debugger_arg_offset (int value, rtx addr)
23885 {
23886 rtx_insn *insn;
23887
23888 /* We are only interested if dbxout_parms() failed to compute the offset. */
23889 if (value != 0)
23890 return 0;
23891
23892 /* We can only cope with the case where the address is held in a register. */
23893 if (!REG_P (addr))
23894 return 0;
23895
23896 /* If we are using the frame pointer to point at the argument, then
23897 an offset of 0 is correct. */
23898 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23899 return 0;
23900
23901 /* If we are using the stack pointer to point at the
23902 argument, then an offset of 0 is correct. */
23903 /* ??? Check this is consistent with thumb2 frame layout. */
23904 if ((TARGET_THUMB || !frame_pointer_needed)
23905 && REGNO (addr) == SP_REGNUM)
23906 return 0;
23907
23908 /* Oh dear. The argument is pointed to by a register rather
23909 than being held in a register, or being stored at a known
23910 offset from the frame pointer. Since GDB only understands
23911 those two kinds of argument we must translate the address
23912 held in the register into an offset from the frame pointer.
23913 We do this by searching through the insns for the function
23914 looking to see where this register gets its value. If the
23915 register is initialized from the frame pointer plus an offset
23916 then we are in luck and we can continue, otherwise we give up.
23917
23918 This code is exercised by producing debugging information
23919 for a function with arguments like this:
23920
23921 double func (double a, double b, int c, double d) {return d;}
23922
23923 Without this code the stab for parameter 'd' will be set to
23924 an offset of 0 from the frame pointer, rather than 8. */
23925
23926 /* The if() statement says:
23927
23928 If the insn is a normal instruction
23929 and if the insn is setting the value in a register
23930 and if the register being set is the register holding the address of the argument
23931 and if the address is computing by an addition
23932 that involves adding to a register
23933 which is the frame pointer
23934 a constant integer
23935
23936 then... */
23937
23938 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23939 {
23940 if ( NONJUMP_INSN_P (insn)
23941 && GET_CODE (PATTERN (insn)) == SET
23942 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23943 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23944 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23945 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23946 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23947 )
23948 {
23949 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23950
23951 break;
23952 }
23953 }
23954
23955 if (value == 0)
23956 {
23957 debug_rtx (addr);
23958 warning (0, "unable to compute real location of stacked parameter");
23959 value = 8; /* XXX magic hack */
23960 }
23961
23962 return value;
23963 }
23964 \f
23965 /* Implement TARGET_PROMOTED_TYPE. */
23966
23967 static tree
23968 arm_promoted_type (const_tree t)
23969 {
23970 if (SCALAR_FLOAT_TYPE_P (t)
23971 && TYPE_PRECISION (t) == 16
23972 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23973 return float_type_node;
23974 return NULL_TREE;
23975 }
23976
23977 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23978 This simply adds HFmode as a supported mode; even though we don't
23979 implement arithmetic on this type directly, it's supported by
23980 optabs conversions, much the way the double-word arithmetic is
23981 special-cased in the default hook. */
23982
23983 static bool
23984 arm_scalar_mode_supported_p (scalar_mode mode)
23985 {
23986 if (mode == HFmode)
23987 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23988 else if (ALL_FIXED_POINT_MODE_P (mode))
23989 return true;
23990 else
23991 return default_scalar_mode_supported_p (mode);
23992 }
23993
23994 /* Set the value of FLT_EVAL_METHOD.
23995 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23996
23997 0: evaluate all operations and constants, whose semantic type has at
23998 most the range and precision of type float, to the range and
23999 precision of float; evaluate all other operations and constants to
24000 the range and precision of the semantic type;
24001
24002 N, where _FloatN is a supported interchange floating type
24003 evaluate all operations and constants, whose semantic type has at
24004 most the range and precision of _FloatN type, to the range and
24005 precision of the _FloatN type; evaluate all other operations and
24006 constants to the range and precision of the semantic type;
24007
24008 If we have the ARMv8.2-A extensions then we support _Float16 in native
24009 precision, so we should set this to 16. Otherwise, we support the type,
24010 but want to evaluate expressions in float precision, so set this to
24011 0. */
24012
24013 static enum flt_eval_method
24014 arm_excess_precision (enum excess_precision_type type)
24015 {
24016 switch (type)
24017 {
24018 case EXCESS_PRECISION_TYPE_FAST:
24019 case EXCESS_PRECISION_TYPE_STANDARD:
24020 /* We can calculate either in 16-bit range and precision or
24021 32-bit range and precision. Make that decision based on whether
24022 we have native support for the ARMv8.2-A 16-bit floating-point
24023 instructions or not. */
24024 return (TARGET_VFP_FP16INST
24025 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
24026 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
24027 case EXCESS_PRECISION_TYPE_IMPLICIT:
24028 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
24029 default:
24030 gcc_unreachable ();
24031 }
24032 return FLT_EVAL_METHOD_UNPREDICTABLE;
24033 }
24034
24035
24036 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
24037 _Float16 if we are using anything other than ieee format for 16-bit
24038 floating point. Otherwise, punt to the default implementation. */
24039 static opt_scalar_float_mode
24040 arm_floatn_mode (int n, bool extended)
24041 {
24042 if (!extended && n == 16)
24043 {
24044 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
24045 return HFmode;
24046 return opt_scalar_float_mode ();
24047 }
24048
24049 return default_floatn_mode (n, extended);
24050 }
24051
24052
24053 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
24054 not to early-clobber SRC registers in the process.
24055
24056 We assume that the operands described by SRC and DEST represent a
24057 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
24058 number of components into which the copy has been decomposed. */
24059 void
24060 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
24061 {
24062 unsigned int i;
24063
24064 if (!reg_overlap_mentioned_p (operands[0], operands[1])
24065 || REGNO (operands[0]) < REGNO (operands[1]))
24066 {
24067 for (i = 0; i < count; i++)
24068 {
24069 operands[2 * i] = dest[i];
24070 operands[2 * i + 1] = src[i];
24071 }
24072 }
24073 else
24074 {
24075 for (i = 0; i < count; i++)
24076 {
24077 operands[2 * i] = dest[count - i - 1];
24078 operands[2 * i + 1] = src[count - i - 1];
24079 }
24080 }
24081 }
24082
24083 /* Split operands into moves from op[1] + op[2] into op[0]. */
24084
24085 void
24086 neon_split_vcombine (rtx operands[3])
24087 {
24088 unsigned int dest = REGNO (operands[0]);
24089 unsigned int src1 = REGNO (operands[1]);
24090 unsigned int src2 = REGNO (operands[2]);
24091 machine_mode halfmode = GET_MODE (operands[1]);
24092 unsigned int halfregs = REG_NREGS (operands[1]);
24093 rtx destlo, desthi;
24094
24095 if (src1 == dest && src2 == dest + halfregs)
24096 {
24097 /* No-op move. Can't split to nothing; emit something. */
24098 emit_note (NOTE_INSN_DELETED);
24099 return;
24100 }
24101
24102 /* Preserve register attributes for variable tracking. */
24103 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
24104 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
24105 GET_MODE_SIZE (halfmode));
24106
24107 /* Special case of reversed high/low parts. Use VSWP. */
24108 if (src2 == dest && src1 == dest + halfregs)
24109 {
24110 rtx x = gen_rtx_SET (destlo, operands[1]);
24111 rtx y = gen_rtx_SET (desthi, operands[2]);
24112 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
24113 return;
24114 }
24115
24116 if (!reg_overlap_mentioned_p (operands[2], destlo))
24117 {
24118 /* Try to avoid unnecessary moves if part of the result
24119 is in the right place already. */
24120 if (src1 != dest)
24121 emit_move_insn (destlo, operands[1]);
24122 if (src2 != dest + halfregs)
24123 emit_move_insn (desthi, operands[2]);
24124 }
24125 else
24126 {
24127 if (src2 != dest + halfregs)
24128 emit_move_insn (desthi, operands[2]);
24129 if (src1 != dest)
24130 emit_move_insn (destlo, operands[1]);
24131 }
24132 }
24133 \f
24134 /* Return the number (counting from 0) of
24135 the least significant set bit in MASK. */
24136
24137 inline static int
24138 number_of_first_bit_set (unsigned mask)
24139 {
24140 return ctz_hwi (mask);
24141 }
24142
24143 /* Like emit_multi_reg_push, but allowing for a different set of
24144 registers to be described as saved. MASK is the set of registers
24145 to be saved; REAL_REGS is the set of registers to be described as
24146 saved. If REAL_REGS is 0, only describe the stack adjustment. */
24147
24148 static rtx_insn *
24149 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24150 {
24151 unsigned long regno;
24152 rtx par[10], tmp, reg;
24153 rtx_insn *insn;
24154 int i, j;
24155
24156 /* Build the parallel of the registers actually being stored. */
24157 for (i = 0; mask; ++i, mask &= mask - 1)
24158 {
24159 regno = ctz_hwi (mask);
24160 reg = gen_rtx_REG (SImode, regno);
24161
24162 if (i == 0)
24163 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24164 else
24165 tmp = gen_rtx_USE (VOIDmode, reg);
24166
24167 par[i] = tmp;
24168 }
24169
24170 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24171 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24172 tmp = gen_frame_mem (BLKmode, tmp);
24173 tmp = gen_rtx_SET (tmp, par[0]);
24174 par[0] = tmp;
24175
24176 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24177 insn = emit_insn (tmp);
24178
24179 /* Always build the stack adjustment note for unwind info. */
24180 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24181 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24182 par[0] = tmp;
24183
24184 /* Build the parallel of the registers recorded as saved for unwind. */
24185 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24186 {
24187 regno = ctz_hwi (real_regs);
24188 reg = gen_rtx_REG (SImode, regno);
24189
24190 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24191 tmp = gen_frame_mem (SImode, tmp);
24192 tmp = gen_rtx_SET (tmp, reg);
24193 RTX_FRAME_RELATED_P (tmp) = 1;
24194 par[j + 1] = tmp;
24195 }
24196
24197 if (j == 0)
24198 tmp = par[0];
24199 else
24200 {
24201 RTX_FRAME_RELATED_P (par[0]) = 1;
24202 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24203 }
24204
24205 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24206
24207 return insn;
24208 }
24209
24210 /* Emit code to push or pop registers to or from the stack. F is the
24211 assembly file. MASK is the registers to pop. */
24212 static void
24213 thumb_pop (FILE *f, unsigned long mask)
24214 {
24215 int regno;
24216 int lo_mask = mask & 0xFF;
24217
24218 gcc_assert (mask);
24219
24220 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24221 {
24222 /* Special case. Do not generate a POP PC statement here, do it in
24223 thumb_exit() */
24224 thumb_exit (f, -1);
24225 return;
24226 }
24227
24228 fprintf (f, "\tpop\t{");
24229
24230 /* Look at the low registers first. */
24231 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24232 {
24233 if (lo_mask & 1)
24234 {
24235 asm_fprintf (f, "%r", regno);
24236
24237 if ((lo_mask & ~1) != 0)
24238 fprintf (f, ", ");
24239 }
24240 }
24241
24242 if (mask & (1 << PC_REGNUM))
24243 {
24244 /* Catch popping the PC. */
24245 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24246 || IS_CMSE_ENTRY (arm_current_func_type ()))
24247 {
24248 /* The PC is never poped directly, instead
24249 it is popped into r3 and then BX is used. */
24250 fprintf (f, "}\n");
24251
24252 thumb_exit (f, -1);
24253
24254 return;
24255 }
24256 else
24257 {
24258 if (mask & 0xFF)
24259 fprintf (f, ", ");
24260
24261 asm_fprintf (f, "%r", PC_REGNUM);
24262 }
24263 }
24264
24265 fprintf (f, "}\n");
24266 }
24267
24268 /* Generate code to return from a thumb function.
24269 If 'reg_containing_return_addr' is -1, then the return address is
24270 actually on the stack, at the stack pointer.
24271
24272 Note: do not forget to update length attribute of corresponding insn pattern
24273 when changing assembly output (eg. length attribute of epilogue_insns when
24274 updating Armv8-M Baseline Security Extensions register clearing
24275 sequences). */
24276 static void
24277 thumb_exit (FILE *f, int reg_containing_return_addr)
24278 {
24279 unsigned regs_available_for_popping;
24280 unsigned regs_to_pop;
24281 int pops_needed;
24282 unsigned available;
24283 unsigned required;
24284 machine_mode mode;
24285 int size;
24286 int restore_a4 = FALSE;
24287
24288 /* Compute the registers we need to pop. */
24289 regs_to_pop = 0;
24290 pops_needed = 0;
24291
24292 if (reg_containing_return_addr == -1)
24293 {
24294 regs_to_pop |= 1 << LR_REGNUM;
24295 ++pops_needed;
24296 }
24297
24298 if (TARGET_BACKTRACE)
24299 {
24300 /* Restore the (ARM) frame pointer and stack pointer. */
24301 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24302 pops_needed += 2;
24303 }
24304
24305 /* If there is nothing to pop then just emit the BX instruction and
24306 return. */
24307 if (pops_needed == 0)
24308 {
24309 if (crtl->calls_eh_return)
24310 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24311
24312 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24313 {
24314 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24315 reg_containing_return_addr);
24316 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24317 }
24318 else
24319 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24320 return;
24321 }
24322 /* Otherwise if we are not supporting interworking and we have not created
24323 a backtrace structure and the function was not entered in ARM mode then
24324 just pop the return address straight into the PC. */
24325 else if (!TARGET_INTERWORK
24326 && !TARGET_BACKTRACE
24327 && !is_called_in_ARM_mode (current_function_decl)
24328 && !crtl->calls_eh_return
24329 && !IS_CMSE_ENTRY (arm_current_func_type ()))
24330 {
24331 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24332 return;
24333 }
24334
24335 /* Find out how many of the (return) argument registers we can corrupt. */
24336 regs_available_for_popping = 0;
24337
24338 /* If returning via __builtin_eh_return, the bottom three registers
24339 all contain information needed for the return. */
24340 if (crtl->calls_eh_return)
24341 size = 12;
24342 else
24343 {
24344 /* If we can deduce the registers used from the function's
24345 return value. This is more reliable that examining
24346 df_regs_ever_live_p () because that will be set if the register is
24347 ever used in the function, not just if the register is used
24348 to hold a return value. */
24349
24350 if (crtl->return_rtx != 0)
24351 mode = GET_MODE (crtl->return_rtx);
24352 else
24353 mode = DECL_MODE (DECL_RESULT (current_function_decl));
24354
24355 size = GET_MODE_SIZE (mode);
24356
24357 if (size == 0)
24358 {
24359 /* In a void function we can use any argument register.
24360 In a function that returns a structure on the stack
24361 we can use the second and third argument registers. */
24362 if (mode == VOIDmode)
24363 regs_available_for_popping =
24364 (1 << ARG_REGISTER (1))
24365 | (1 << ARG_REGISTER (2))
24366 | (1 << ARG_REGISTER (3));
24367 else
24368 regs_available_for_popping =
24369 (1 << ARG_REGISTER (2))
24370 | (1 << ARG_REGISTER (3));
24371 }
24372 else if (size <= 4)
24373 regs_available_for_popping =
24374 (1 << ARG_REGISTER (2))
24375 | (1 << ARG_REGISTER (3));
24376 else if (size <= 8)
24377 regs_available_for_popping =
24378 (1 << ARG_REGISTER (3));
24379 }
24380
24381 /* Match registers to be popped with registers into which we pop them. */
24382 for (available = regs_available_for_popping,
24383 required = regs_to_pop;
24384 required != 0 && available != 0;
24385 available &= ~(available & - available),
24386 required &= ~(required & - required))
24387 -- pops_needed;
24388
24389 /* If we have any popping registers left over, remove them. */
24390 if (available > 0)
24391 regs_available_for_popping &= ~available;
24392
24393 /* Otherwise if we need another popping register we can use
24394 the fourth argument register. */
24395 else if (pops_needed)
24396 {
24397 /* If we have not found any free argument registers and
24398 reg a4 contains the return address, we must move it. */
24399 if (regs_available_for_popping == 0
24400 && reg_containing_return_addr == LAST_ARG_REGNUM)
24401 {
24402 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24403 reg_containing_return_addr = LR_REGNUM;
24404 }
24405 else if (size > 12)
24406 {
24407 /* Register a4 is being used to hold part of the return value,
24408 but we have dire need of a free, low register. */
24409 restore_a4 = TRUE;
24410
24411 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24412 }
24413
24414 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24415 {
24416 /* The fourth argument register is available. */
24417 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24418
24419 --pops_needed;
24420 }
24421 }
24422
24423 /* Pop as many registers as we can. */
24424 thumb_pop (f, regs_available_for_popping);
24425
24426 /* Process the registers we popped. */
24427 if (reg_containing_return_addr == -1)
24428 {
24429 /* The return address was popped into the lowest numbered register. */
24430 regs_to_pop &= ~(1 << LR_REGNUM);
24431
24432 reg_containing_return_addr =
24433 number_of_first_bit_set (regs_available_for_popping);
24434
24435 /* Remove this register for the mask of available registers, so that
24436 the return address will not be corrupted by further pops. */
24437 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24438 }
24439
24440 /* If we popped other registers then handle them here. */
24441 if (regs_available_for_popping)
24442 {
24443 int frame_pointer;
24444
24445 /* Work out which register currently contains the frame pointer. */
24446 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24447
24448 /* Move it into the correct place. */
24449 asm_fprintf (f, "\tmov\t%r, %r\n",
24450 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24451
24452 /* (Temporarily) remove it from the mask of popped registers. */
24453 regs_available_for_popping &= ~(1 << frame_pointer);
24454 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24455
24456 if (regs_available_for_popping)
24457 {
24458 int stack_pointer;
24459
24460 /* We popped the stack pointer as well,
24461 find the register that contains it. */
24462 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24463
24464 /* Move it into the stack register. */
24465 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24466
24467 /* At this point we have popped all necessary registers, so
24468 do not worry about restoring regs_available_for_popping
24469 to its correct value:
24470
24471 assert (pops_needed == 0)
24472 assert (regs_available_for_popping == (1 << frame_pointer))
24473 assert (regs_to_pop == (1 << STACK_POINTER)) */
24474 }
24475 else
24476 {
24477 /* Since we have just move the popped value into the frame
24478 pointer, the popping register is available for reuse, and
24479 we know that we still have the stack pointer left to pop. */
24480 regs_available_for_popping |= (1 << frame_pointer);
24481 }
24482 }
24483
24484 /* If we still have registers left on the stack, but we no longer have
24485 any registers into which we can pop them, then we must move the return
24486 address into the link register and make available the register that
24487 contained it. */
24488 if (regs_available_for_popping == 0 && pops_needed > 0)
24489 {
24490 regs_available_for_popping |= 1 << reg_containing_return_addr;
24491
24492 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24493 reg_containing_return_addr);
24494
24495 reg_containing_return_addr = LR_REGNUM;
24496 }
24497
24498 /* If we have registers left on the stack then pop some more.
24499 We know that at most we will want to pop FP and SP. */
24500 if (pops_needed > 0)
24501 {
24502 int popped_into;
24503 int move_to;
24504
24505 thumb_pop (f, regs_available_for_popping);
24506
24507 /* We have popped either FP or SP.
24508 Move whichever one it is into the correct register. */
24509 popped_into = number_of_first_bit_set (regs_available_for_popping);
24510 move_to = number_of_first_bit_set (regs_to_pop);
24511
24512 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24513 --pops_needed;
24514 }
24515
24516 /* If we still have not popped everything then we must have only
24517 had one register available to us and we are now popping the SP. */
24518 if (pops_needed > 0)
24519 {
24520 int popped_into;
24521
24522 thumb_pop (f, regs_available_for_popping);
24523
24524 popped_into = number_of_first_bit_set (regs_available_for_popping);
24525
24526 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24527 /*
24528 assert (regs_to_pop == (1 << STACK_POINTER))
24529 assert (pops_needed == 1)
24530 */
24531 }
24532
24533 /* If necessary restore the a4 register. */
24534 if (restore_a4)
24535 {
24536 if (reg_containing_return_addr != LR_REGNUM)
24537 {
24538 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24539 reg_containing_return_addr = LR_REGNUM;
24540 }
24541
24542 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24543 }
24544
24545 if (crtl->calls_eh_return)
24546 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24547
24548 /* Return to caller. */
24549 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24550 {
24551 /* This is for the cases where LR is not being used to contain the return
24552 address. It may therefore contain information that we might not want
24553 to leak, hence it must be cleared. The value in R0 will never be a
24554 secret at this point, so it is safe to use it, see the clearing code
24555 in 'cmse_nonsecure_entry_clear_before_return'. */
24556 if (reg_containing_return_addr != LR_REGNUM)
24557 asm_fprintf (f, "\tmov\tlr, r0\n");
24558
24559 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24560 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24561 }
24562 else
24563 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24564 }
24565 \f
24566 /* Scan INSN just before assembler is output for it.
24567 For Thumb-1, we track the status of the condition codes; this
24568 information is used in the cbranchsi4_insn pattern. */
24569 void
24570 thumb1_final_prescan_insn (rtx_insn *insn)
24571 {
24572 if (flag_print_asm_name)
24573 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24574 INSN_ADDRESSES (INSN_UID (insn)));
24575 /* Don't overwrite the previous setter when we get to a cbranch. */
24576 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24577 {
24578 enum attr_conds conds;
24579
24580 if (cfun->machine->thumb1_cc_insn)
24581 {
24582 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24583 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24584 CC_STATUS_INIT;
24585 }
24586 conds = get_attr_conds (insn);
24587 if (conds == CONDS_SET)
24588 {
24589 rtx set = single_set (insn);
24590 cfun->machine->thumb1_cc_insn = insn;
24591 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24592 cfun->machine->thumb1_cc_op1 = const0_rtx;
24593 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24594 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24595 {
24596 rtx src1 = XEXP (SET_SRC (set), 1);
24597 if (src1 == const0_rtx)
24598 cfun->machine->thumb1_cc_mode = CCmode;
24599 }
24600 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24601 {
24602 /* Record the src register operand instead of dest because
24603 cprop_hardreg pass propagates src. */
24604 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24605 }
24606 }
24607 else if (conds != CONDS_NOCOND)
24608 cfun->machine->thumb1_cc_insn = NULL_RTX;
24609 }
24610
24611 /* Check if unexpected far jump is used. */
24612 if (cfun->machine->lr_save_eliminated
24613 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24614 internal_error("Unexpected thumb1 far jump");
24615 }
24616
24617 int
24618 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24619 {
24620 unsigned HOST_WIDE_INT mask = 0xff;
24621 int i;
24622
24623 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24624 if (val == 0) /* XXX */
24625 return 0;
24626
24627 for (i = 0; i < 25; i++)
24628 if ((val & (mask << i)) == val)
24629 return 1;
24630
24631 return 0;
24632 }
24633
24634 /* Returns nonzero if the current function contains,
24635 or might contain a far jump. */
24636 static int
24637 thumb_far_jump_used_p (void)
24638 {
24639 rtx_insn *insn;
24640 bool far_jump = false;
24641 unsigned int func_size = 0;
24642
24643 /* If we have already decided that far jumps may be used,
24644 do not bother checking again, and always return true even if
24645 it turns out that they are not being used. Once we have made
24646 the decision that far jumps are present (and that hence the link
24647 register will be pushed onto the stack) we cannot go back on it. */
24648 if (cfun->machine->far_jump_used)
24649 return 1;
24650
24651 /* If this function is not being called from the prologue/epilogue
24652 generation code then it must be being called from the
24653 INITIAL_ELIMINATION_OFFSET macro. */
24654 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24655 {
24656 /* In this case we know that we are being asked about the elimination
24657 of the arg pointer register. If that register is not being used,
24658 then there are no arguments on the stack, and we do not have to
24659 worry that a far jump might force the prologue to push the link
24660 register, changing the stack offsets. In this case we can just
24661 return false, since the presence of far jumps in the function will
24662 not affect stack offsets.
24663
24664 If the arg pointer is live (or if it was live, but has now been
24665 eliminated and so set to dead) then we do have to test to see if
24666 the function might contain a far jump. This test can lead to some
24667 false negatives, since before reload is completed, then length of
24668 branch instructions is not known, so gcc defaults to returning their
24669 longest length, which in turn sets the far jump attribute to true.
24670
24671 A false negative will not result in bad code being generated, but it
24672 will result in a needless push and pop of the link register. We
24673 hope that this does not occur too often.
24674
24675 If we need doubleword stack alignment this could affect the other
24676 elimination offsets so we can't risk getting it wrong. */
24677 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24678 cfun->machine->arg_pointer_live = 1;
24679 else if (!cfun->machine->arg_pointer_live)
24680 return 0;
24681 }
24682
24683 /* We should not change far_jump_used during or after reload, as there is
24684 no chance to change stack frame layout. */
24685 if (reload_in_progress || reload_completed)
24686 return 0;
24687
24688 /* Check to see if the function contains a branch
24689 insn with the far jump attribute set. */
24690 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24691 {
24692 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24693 {
24694 far_jump = true;
24695 }
24696 func_size += get_attr_length (insn);
24697 }
24698
24699 /* Attribute far_jump will always be true for thumb1 before
24700 shorten_branch pass. So checking far_jump attribute before
24701 shorten_branch isn't much useful.
24702
24703 Following heuristic tries to estimate more accurately if a far jump
24704 may finally be used. The heuristic is very conservative as there is
24705 no chance to roll-back the decision of not to use far jump.
24706
24707 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24708 2-byte insn is associated with a 4 byte constant pool. Using
24709 function size 2048/3 as the threshold is conservative enough. */
24710 if (far_jump)
24711 {
24712 if ((func_size * 3) >= 2048)
24713 {
24714 /* Record the fact that we have decided that
24715 the function does use far jumps. */
24716 cfun->machine->far_jump_used = 1;
24717 return 1;
24718 }
24719 }
24720
24721 return 0;
24722 }
24723
24724 /* Return nonzero if FUNC must be entered in ARM mode. */
24725 static bool
24726 is_called_in_ARM_mode (tree func)
24727 {
24728 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24729
24730 /* Ignore the problem about functions whose address is taken. */
24731 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24732 return true;
24733
24734 #ifdef ARM_PE
24735 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24736 #else
24737 return false;
24738 #endif
24739 }
24740
24741 /* Given the stack offsets and register mask in OFFSETS, decide how
24742 many additional registers to push instead of subtracting a constant
24743 from SP. For epilogues the principle is the same except we use pop.
24744 FOR_PROLOGUE indicates which we're generating. */
24745 static int
24746 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24747 {
24748 HOST_WIDE_INT amount;
24749 unsigned long live_regs_mask = offsets->saved_regs_mask;
24750 /* Extract a mask of the ones we can give to the Thumb's push/pop
24751 instruction. */
24752 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24753 /* Then count how many other high registers will need to be pushed. */
24754 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24755 int n_free, reg_base, size;
24756
24757 if (!for_prologue && frame_pointer_needed)
24758 amount = offsets->locals_base - offsets->saved_regs;
24759 else
24760 amount = offsets->outgoing_args - offsets->saved_regs;
24761
24762 /* If the stack frame size is 512 exactly, we can save one load
24763 instruction, which should make this a win even when optimizing
24764 for speed. */
24765 if (!optimize_size && amount != 512)
24766 return 0;
24767
24768 /* Can't do this if there are high registers to push. */
24769 if (high_regs_pushed != 0)
24770 return 0;
24771
24772 /* Shouldn't do it in the prologue if no registers would normally
24773 be pushed at all. In the epilogue, also allow it if we'll have
24774 a pop insn for the PC. */
24775 if (l_mask == 0
24776 && (for_prologue
24777 || TARGET_BACKTRACE
24778 || (live_regs_mask & 1 << LR_REGNUM) == 0
24779 || TARGET_INTERWORK
24780 || crtl->args.pretend_args_size != 0))
24781 return 0;
24782
24783 /* Don't do this if thumb_expand_prologue wants to emit instructions
24784 between the push and the stack frame allocation. */
24785 if (for_prologue
24786 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24787 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24788 return 0;
24789
24790 reg_base = 0;
24791 n_free = 0;
24792 if (!for_prologue)
24793 {
24794 size = arm_size_return_regs ();
24795 reg_base = ARM_NUM_INTS (size);
24796 live_regs_mask >>= reg_base;
24797 }
24798
24799 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24800 && (for_prologue || call_used_regs[reg_base + n_free]))
24801 {
24802 live_regs_mask >>= 1;
24803 n_free++;
24804 }
24805
24806 if (n_free == 0)
24807 return 0;
24808 gcc_assert (amount / 4 * 4 == amount);
24809
24810 if (amount >= 512 && (amount - n_free * 4) < 512)
24811 return (amount - 508) / 4;
24812 if (amount <= n_free * 4)
24813 return amount / 4;
24814 return 0;
24815 }
24816
24817 /* The bits which aren't usefully expanded as rtl. */
24818 const char *
24819 thumb1_unexpanded_epilogue (void)
24820 {
24821 arm_stack_offsets *offsets;
24822 int regno;
24823 unsigned long live_regs_mask = 0;
24824 int high_regs_pushed = 0;
24825 int extra_pop;
24826 int had_to_push_lr;
24827 int size;
24828
24829 if (cfun->machine->return_used_this_function != 0)
24830 return "";
24831
24832 if (IS_NAKED (arm_current_func_type ()))
24833 return "";
24834
24835 offsets = arm_get_frame_offsets ();
24836 live_regs_mask = offsets->saved_regs_mask;
24837 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24838
24839 /* If we can deduce the registers used from the function's return value.
24840 This is more reliable that examining df_regs_ever_live_p () because that
24841 will be set if the register is ever used in the function, not just if
24842 the register is used to hold a return value. */
24843 size = arm_size_return_regs ();
24844
24845 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24846 if (extra_pop > 0)
24847 {
24848 unsigned long extra_mask = (1 << extra_pop) - 1;
24849 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24850 }
24851
24852 /* The prolog may have pushed some high registers to use as
24853 work registers. e.g. the testsuite file:
24854 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24855 compiles to produce:
24856 push {r4, r5, r6, r7, lr}
24857 mov r7, r9
24858 mov r6, r8
24859 push {r6, r7}
24860 as part of the prolog. We have to undo that pushing here. */
24861
24862 if (high_regs_pushed)
24863 {
24864 unsigned long mask = live_regs_mask & 0xff;
24865 int next_hi_reg;
24866
24867 /* The available low registers depend on the size of the value we are
24868 returning. */
24869 if (size <= 12)
24870 mask |= 1 << 3;
24871 if (size <= 8)
24872 mask |= 1 << 2;
24873
24874 if (mask == 0)
24875 /* Oh dear! We have no low registers into which we can pop
24876 high registers! */
24877 internal_error
24878 ("no low registers available for popping high registers");
24879
24880 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24881 if (live_regs_mask & (1 << next_hi_reg))
24882 break;
24883
24884 while (high_regs_pushed)
24885 {
24886 /* Find lo register(s) into which the high register(s) can
24887 be popped. */
24888 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24889 {
24890 if (mask & (1 << regno))
24891 high_regs_pushed--;
24892 if (high_regs_pushed == 0)
24893 break;
24894 }
24895
24896 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24897
24898 /* Pop the values into the low register(s). */
24899 thumb_pop (asm_out_file, mask);
24900
24901 /* Move the value(s) into the high registers. */
24902 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24903 {
24904 if (mask & (1 << regno))
24905 {
24906 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24907 regno);
24908
24909 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24910 if (live_regs_mask & (1 << next_hi_reg))
24911 break;
24912 }
24913 }
24914 }
24915 live_regs_mask &= ~0x0f00;
24916 }
24917
24918 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24919 live_regs_mask &= 0xff;
24920
24921 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24922 {
24923 /* Pop the return address into the PC. */
24924 if (had_to_push_lr)
24925 live_regs_mask |= 1 << PC_REGNUM;
24926
24927 /* Either no argument registers were pushed or a backtrace
24928 structure was created which includes an adjusted stack
24929 pointer, so just pop everything. */
24930 if (live_regs_mask)
24931 thumb_pop (asm_out_file, live_regs_mask);
24932
24933 /* We have either just popped the return address into the
24934 PC or it is was kept in LR for the entire function.
24935 Note that thumb_pop has already called thumb_exit if the
24936 PC was in the list. */
24937 if (!had_to_push_lr)
24938 thumb_exit (asm_out_file, LR_REGNUM);
24939 }
24940 else
24941 {
24942 /* Pop everything but the return address. */
24943 if (live_regs_mask)
24944 thumb_pop (asm_out_file, live_regs_mask);
24945
24946 if (had_to_push_lr)
24947 {
24948 if (size > 12)
24949 {
24950 /* We have no free low regs, so save one. */
24951 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24952 LAST_ARG_REGNUM);
24953 }
24954
24955 /* Get the return address into a temporary register. */
24956 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24957
24958 if (size > 12)
24959 {
24960 /* Move the return address to lr. */
24961 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24962 LAST_ARG_REGNUM);
24963 /* Restore the low register. */
24964 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24965 IP_REGNUM);
24966 regno = LR_REGNUM;
24967 }
24968 else
24969 regno = LAST_ARG_REGNUM;
24970 }
24971 else
24972 regno = LR_REGNUM;
24973
24974 /* Remove the argument registers that were pushed onto the stack. */
24975 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24976 SP_REGNUM, SP_REGNUM,
24977 crtl->args.pretend_args_size);
24978
24979 thumb_exit (asm_out_file, regno);
24980 }
24981
24982 return "";
24983 }
24984
24985 /* Functions to save and restore machine-specific function data. */
24986 static struct machine_function *
24987 arm_init_machine_status (void)
24988 {
24989 struct machine_function *machine;
24990 machine = ggc_cleared_alloc<machine_function> ();
24991
24992 #if ARM_FT_UNKNOWN != 0
24993 machine->func_type = ARM_FT_UNKNOWN;
24994 #endif
24995 machine->static_chain_stack_bytes = -1;
24996 return machine;
24997 }
24998
24999 /* Return an RTX indicating where the return address to the
25000 calling function can be found. */
25001 rtx
25002 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
25003 {
25004 if (count != 0)
25005 return NULL_RTX;
25006
25007 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
25008 }
25009
25010 /* Do anything needed before RTL is emitted for each function. */
25011 void
25012 arm_init_expanders (void)
25013 {
25014 /* Arrange to initialize and mark the machine per-function status. */
25015 init_machine_status = arm_init_machine_status;
25016
25017 /* This is to stop the combine pass optimizing away the alignment
25018 adjustment of va_arg. */
25019 /* ??? It is claimed that this should not be necessary. */
25020 if (cfun)
25021 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
25022 }
25023
25024 /* Check that FUNC is called with a different mode. */
25025
25026 bool
25027 arm_change_mode_p (tree func)
25028 {
25029 if (TREE_CODE (func) != FUNCTION_DECL)
25030 return false;
25031
25032 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
25033
25034 if (!callee_tree)
25035 callee_tree = target_option_default_node;
25036
25037 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25038 int flags = callee_opts->x_target_flags;
25039
25040 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
25041 }
25042
25043 /* Like arm_compute_initial_elimination offset. Simpler because there
25044 isn't an ABI specified frame pointer for Thumb. Instead, we set it
25045 to point at the base of the local variables after static stack
25046 space for a function has been allocated. */
25047
25048 HOST_WIDE_INT
25049 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
25050 {
25051 arm_stack_offsets *offsets;
25052
25053 offsets = arm_get_frame_offsets ();
25054
25055 switch (from)
25056 {
25057 case ARG_POINTER_REGNUM:
25058 switch (to)
25059 {
25060 case STACK_POINTER_REGNUM:
25061 return offsets->outgoing_args - offsets->saved_args;
25062
25063 case FRAME_POINTER_REGNUM:
25064 return offsets->soft_frame - offsets->saved_args;
25065
25066 case ARM_HARD_FRAME_POINTER_REGNUM:
25067 return offsets->saved_regs - offsets->saved_args;
25068
25069 case THUMB_HARD_FRAME_POINTER_REGNUM:
25070 return offsets->locals_base - offsets->saved_args;
25071
25072 default:
25073 gcc_unreachable ();
25074 }
25075 break;
25076
25077 case FRAME_POINTER_REGNUM:
25078 switch (to)
25079 {
25080 case STACK_POINTER_REGNUM:
25081 return offsets->outgoing_args - offsets->soft_frame;
25082
25083 case ARM_HARD_FRAME_POINTER_REGNUM:
25084 return offsets->saved_regs - offsets->soft_frame;
25085
25086 case THUMB_HARD_FRAME_POINTER_REGNUM:
25087 return offsets->locals_base - offsets->soft_frame;
25088
25089 default:
25090 gcc_unreachable ();
25091 }
25092 break;
25093
25094 default:
25095 gcc_unreachable ();
25096 }
25097 }
25098
25099 /* Generate the function's prologue. */
25100
25101 void
25102 thumb1_expand_prologue (void)
25103 {
25104 rtx_insn *insn;
25105
25106 HOST_WIDE_INT amount;
25107 HOST_WIDE_INT size;
25108 arm_stack_offsets *offsets;
25109 unsigned long func_type;
25110 int regno;
25111 unsigned long live_regs_mask;
25112 unsigned long l_mask;
25113 unsigned high_regs_pushed = 0;
25114 bool lr_needs_saving;
25115
25116 func_type = arm_current_func_type ();
25117
25118 /* Naked functions don't have prologues. */
25119 if (IS_NAKED (func_type))
25120 {
25121 if (flag_stack_usage_info)
25122 current_function_static_stack_size = 0;
25123 return;
25124 }
25125
25126 if (IS_INTERRUPT (func_type))
25127 {
25128 error ("interrupt Service Routines cannot be coded in Thumb mode");
25129 return;
25130 }
25131
25132 if (is_called_in_ARM_mode (current_function_decl))
25133 emit_insn (gen_prologue_thumb1_interwork ());
25134
25135 offsets = arm_get_frame_offsets ();
25136 live_regs_mask = offsets->saved_regs_mask;
25137 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25138
25139 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
25140 l_mask = live_regs_mask & 0x40ff;
25141 /* Then count how many other high registers will need to be pushed. */
25142 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25143
25144 if (crtl->args.pretend_args_size)
25145 {
25146 rtx x = GEN_INT (-crtl->args.pretend_args_size);
25147
25148 if (cfun->machine->uses_anonymous_args)
25149 {
25150 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25151 unsigned long mask;
25152
25153 mask = 1ul << (LAST_ARG_REGNUM + 1);
25154 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25155
25156 insn = thumb1_emit_multi_reg_push (mask, 0);
25157 }
25158 else
25159 {
25160 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25161 stack_pointer_rtx, x));
25162 }
25163 RTX_FRAME_RELATED_P (insn) = 1;
25164 }
25165
25166 if (TARGET_BACKTRACE)
25167 {
25168 HOST_WIDE_INT offset = 0;
25169 unsigned work_register;
25170 rtx work_reg, x, arm_hfp_rtx;
25171
25172 /* We have been asked to create a stack backtrace structure.
25173 The code looks like this:
25174
25175 0 .align 2
25176 0 func:
25177 0 sub SP, #16 Reserve space for 4 registers.
25178 2 push {R7} Push low registers.
25179 4 add R7, SP, #20 Get the stack pointer before the push.
25180 6 str R7, [SP, #8] Store the stack pointer
25181 (before reserving the space).
25182 8 mov R7, PC Get hold of the start of this code + 12.
25183 10 str R7, [SP, #16] Store it.
25184 12 mov R7, FP Get hold of the current frame pointer.
25185 14 str R7, [SP, #4] Store it.
25186 16 mov R7, LR Get hold of the current return address.
25187 18 str R7, [SP, #12] Store it.
25188 20 add R7, SP, #16 Point at the start of the
25189 backtrace structure.
25190 22 mov FP, R7 Put this value into the frame pointer. */
25191
25192 work_register = thumb_find_work_register (live_regs_mask);
25193 work_reg = gen_rtx_REG (SImode, work_register);
25194 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25195
25196 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25197 stack_pointer_rtx, GEN_INT (-16)));
25198 RTX_FRAME_RELATED_P (insn) = 1;
25199
25200 if (l_mask)
25201 {
25202 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25203 RTX_FRAME_RELATED_P (insn) = 1;
25204 lr_needs_saving = false;
25205
25206 offset = bit_count (l_mask) * UNITS_PER_WORD;
25207 }
25208
25209 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25210 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25211
25212 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25213 x = gen_frame_mem (SImode, x);
25214 emit_move_insn (x, work_reg);
25215
25216 /* Make sure that the instruction fetching the PC is in the right place
25217 to calculate "start of backtrace creation code + 12". */
25218 /* ??? The stores using the common WORK_REG ought to be enough to
25219 prevent the scheduler from doing anything weird. Failing that
25220 we could always move all of the following into an UNSPEC_VOLATILE. */
25221 if (l_mask)
25222 {
25223 x = gen_rtx_REG (SImode, PC_REGNUM);
25224 emit_move_insn (work_reg, x);
25225
25226 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25227 x = gen_frame_mem (SImode, x);
25228 emit_move_insn (x, work_reg);
25229
25230 emit_move_insn (work_reg, arm_hfp_rtx);
25231
25232 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25233 x = gen_frame_mem (SImode, x);
25234 emit_move_insn (x, work_reg);
25235 }
25236 else
25237 {
25238 emit_move_insn (work_reg, arm_hfp_rtx);
25239
25240 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25241 x = gen_frame_mem (SImode, x);
25242 emit_move_insn (x, work_reg);
25243
25244 x = gen_rtx_REG (SImode, PC_REGNUM);
25245 emit_move_insn (work_reg, x);
25246
25247 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25248 x = gen_frame_mem (SImode, x);
25249 emit_move_insn (x, work_reg);
25250 }
25251
25252 x = gen_rtx_REG (SImode, LR_REGNUM);
25253 emit_move_insn (work_reg, x);
25254
25255 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25256 x = gen_frame_mem (SImode, x);
25257 emit_move_insn (x, work_reg);
25258
25259 x = GEN_INT (offset + 12);
25260 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25261
25262 emit_move_insn (arm_hfp_rtx, work_reg);
25263 }
25264 /* Optimization: If we are not pushing any low registers but we are going
25265 to push some high registers then delay our first push. This will just
25266 be a push of LR and we can combine it with the push of the first high
25267 register. */
25268 else if ((l_mask & 0xff) != 0
25269 || (high_regs_pushed == 0 && lr_needs_saving))
25270 {
25271 unsigned long mask = l_mask;
25272 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25273 insn = thumb1_emit_multi_reg_push (mask, mask);
25274 RTX_FRAME_RELATED_P (insn) = 1;
25275 lr_needs_saving = false;
25276 }
25277
25278 if (high_regs_pushed)
25279 {
25280 unsigned pushable_regs;
25281 unsigned next_hi_reg;
25282 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25283 : crtl->args.info.nregs;
25284 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25285
25286 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25287 if (live_regs_mask & (1 << next_hi_reg))
25288 break;
25289
25290 /* Here we need to mask out registers used for passing arguments
25291 even if they can be pushed. This is to avoid using them to stash the high
25292 registers. Such kind of stash may clobber the use of arguments. */
25293 pushable_regs = l_mask & (~arg_regs_mask);
25294 if (lr_needs_saving)
25295 pushable_regs &= ~(1 << LR_REGNUM);
25296
25297 if (pushable_regs == 0)
25298 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25299
25300 while (high_regs_pushed > 0)
25301 {
25302 unsigned long real_regs_mask = 0;
25303 unsigned long push_mask = 0;
25304
25305 for (regno = LR_REGNUM; regno >= 0; regno --)
25306 {
25307 if (pushable_regs & (1 << regno))
25308 {
25309 emit_move_insn (gen_rtx_REG (SImode, regno),
25310 gen_rtx_REG (SImode, next_hi_reg));
25311
25312 high_regs_pushed --;
25313 real_regs_mask |= (1 << next_hi_reg);
25314 push_mask |= (1 << regno);
25315
25316 if (high_regs_pushed)
25317 {
25318 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25319 next_hi_reg --)
25320 if (live_regs_mask & (1 << next_hi_reg))
25321 break;
25322 }
25323 else
25324 break;
25325 }
25326 }
25327
25328 /* If we had to find a work register and we have not yet
25329 saved the LR then add it to the list of regs to push. */
25330 if (lr_needs_saving)
25331 {
25332 push_mask |= 1 << LR_REGNUM;
25333 real_regs_mask |= 1 << LR_REGNUM;
25334 lr_needs_saving = false;
25335 }
25336
25337 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25338 RTX_FRAME_RELATED_P (insn) = 1;
25339 }
25340 }
25341
25342 /* Load the pic register before setting the frame pointer,
25343 so we can use r7 as a temporary work register. */
25344 if (flag_pic && arm_pic_register != INVALID_REGNUM)
25345 arm_load_pic_register (live_regs_mask, NULL_RTX);
25346
25347 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25348 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25349 stack_pointer_rtx);
25350
25351 size = offsets->outgoing_args - offsets->saved_args;
25352 if (flag_stack_usage_info)
25353 current_function_static_stack_size = size;
25354
25355 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25356 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25357 || flag_stack_clash_protection)
25358 && size)
25359 sorry ("-fstack-check=specific for Thumb-1");
25360
25361 amount = offsets->outgoing_args - offsets->saved_regs;
25362 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25363 if (amount)
25364 {
25365 if (amount < 512)
25366 {
25367 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25368 GEN_INT (- amount)));
25369 RTX_FRAME_RELATED_P (insn) = 1;
25370 }
25371 else
25372 {
25373 rtx reg, dwarf;
25374
25375 /* The stack decrement is too big for an immediate value in a single
25376 insn. In theory we could issue multiple subtracts, but after
25377 three of them it becomes more space efficient to place the full
25378 value in the constant pool and load into a register. (Also the
25379 ARM debugger really likes to see only one stack decrement per
25380 function). So instead we look for a scratch register into which
25381 we can load the decrement, and then we subtract this from the
25382 stack pointer. Unfortunately on the thumb the only available
25383 scratch registers are the argument registers, and we cannot use
25384 these as they may hold arguments to the function. Instead we
25385 attempt to locate a call preserved register which is used by this
25386 function. If we can find one, then we know that it will have
25387 been pushed at the start of the prologue and so we can corrupt
25388 it now. */
25389 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25390 if (live_regs_mask & (1 << regno))
25391 break;
25392
25393 gcc_assert(regno <= LAST_LO_REGNUM);
25394
25395 reg = gen_rtx_REG (SImode, regno);
25396
25397 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25398
25399 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25400 stack_pointer_rtx, reg));
25401
25402 dwarf = gen_rtx_SET (stack_pointer_rtx,
25403 plus_constant (Pmode, stack_pointer_rtx,
25404 -amount));
25405 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25406 RTX_FRAME_RELATED_P (insn) = 1;
25407 }
25408 }
25409
25410 if (frame_pointer_needed)
25411 thumb_set_frame_pointer (offsets);
25412
25413 /* If we are profiling, make sure no instructions are scheduled before
25414 the call to mcount. Similarly if the user has requested no
25415 scheduling in the prolog. Similarly if we want non-call exceptions
25416 using the EABI unwinder, to prevent faulting instructions from being
25417 swapped with a stack adjustment. */
25418 if (crtl->profile || !TARGET_SCHED_PROLOG
25419 || (arm_except_unwind_info (&global_options) == UI_TARGET
25420 && cfun->can_throw_non_call_exceptions))
25421 emit_insn (gen_blockage ());
25422
25423 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25424 if (live_regs_mask & 0xff)
25425 cfun->machine->lr_save_eliminated = 0;
25426 }
25427
25428 /* Clear caller saved registers not used to pass return values and leaked
25429 condition flags before exiting a cmse_nonsecure_entry function. */
25430
25431 void
25432 cmse_nonsecure_entry_clear_before_return (void)
25433 {
25434 int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25435 uint32_t padding_bits_to_clear = 0;
25436 auto_sbitmap to_clear_bitmap (maxregno + 1);
25437 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
25438 tree result_type;
25439
25440 bitmap_clear (to_clear_bitmap);
25441 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25442 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25443
25444 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25445 registers. */
25446 if (TARGET_HARD_FLOAT)
25447 {
25448 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25449
25450 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25451
25452 /* Make sure we don't clear the two scratch registers used to clear the
25453 relevant FPSCR bits in output_return_instruction. */
25454 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25455 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25456 emit_use (gen_rtx_REG (SImode, 4));
25457 bitmap_clear_bit (to_clear_bitmap, 4);
25458 }
25459
25460 /* If the user has defined registers to be caller saved, these are no longer
25461 restored by the function before returning and must thus be cleared for
25462 security purposes. */
25463 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25464 {
25465 /* We do not touch registers that can be used to pass arguments as per
25466 the AAPCS, since these should never be made callee-saved by user
25467 options. */
25468 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25469 continue;
25470 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25471 continue;
25472 if (call_used_regs[regno])
25473 bitmap_set_bit (to_clear_bitmap, regno);
25474 }
25475
25476 /* Make sure we do not clear the registers used to return the result in. */
25477 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25478 if (!VOID_TYPE_P (result_type))
25479 {
25480 uint64_t to_clear_return_mask;
25481 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25482
25483 /* No need to check that we return in registers, because we don't
25484 support returning on stack yet. */
25485 gcc_assert (REG_P (result_rtl));
25486 to_clear_return_mask
25487 = compute_not_to_clear_mask (result_type, result_rtl, 0,
25488 &padding_bits_to_clear);
25489 if (to_clear_return_mask)
25490 {
25491 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25492 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25493 {
25494 if (to_clear_return_mask & (1ULL << regno))
25495 bitmap_clear_bit (to_clear_bitmap, regno);
25496 }
25497 }
25498 }
25499
25500 if (padding_bits_to_clear != 0)
25501 {
25502 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
25503 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
25504
25505 /* Padding_bits_to_clear is not 0 so we know we are dealing with
25506 returning a composite type, which only uses r0. Let's make sure that
25507 r1-r3 is cleared too. */
25508 bitmap_clear (to_clear_arg_regs_bitmap);
25509 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
25510 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25511 }
25512
25513 /* Clear full registers that leak before returning. */
25514 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
25515 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
25516 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
25517 clearing_reg);
25518 }
25519
25520 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25521 POP instruction can be generated. LR should be replaced by PC. All
25522 the checks required are already done by USE_RETURN_INSN (). Hence,
25523 all we really need to check here is if single register is to be
25524 returned, or multiple register return. */
25525 void
25526 thumb2_expand_return (bool simple_return)
25527 {
25528 int i, num_regs;
25529 unsigned long saved_regs_mask;
25530 arm_stack_offsets *offsets;
25531
25532 offsets = arm_get_frame_offsets ();
25533 saved_regs_mask = offsets->saved_regs_mask;
25534
25535 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25536 if (saved_regs_mask & (1 << i))
25537 num_regs++;
25538
25539 if (!simple_return && saved_regs_mask)
25540 {
25541 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25542 functions or adapt code to handle according to ACLE. This path should
25543 not be reachable for cmse_nonsecure_entry functions though we prefer
25544 to assert it for now to ensure that future code changes do not silently
25545 change this behavior. */
25546 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25547 if (num_regs == 1)
25548 {
25549 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25550 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25551 rtx addr = gen_rtx_MEM (SImode,
25552 gen_rtx_POST_INC (SImode,
25553 stack_pointer_rtx));
25554 set_mem_alias_set (addr, get_frame_alias_set ());
25555 XVECEXP (par, 0, 0) = ret_rtx;
25556 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25557 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25558 emit_jump_insn (par);
25559 }
25560 else
25561 {
25562 saved_regs_mask &= ~ (1 << LR_REGNUM);
25563 saved_regs_mask |= (1 << PC_REGNUM);
25564 arm_emit_multi_reg_pop (saved_regs_mask);
25565 }
25566 }
25567 else
25568 {
25569 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25570 cmse_nonsecure_entry_clear_before_return ();
25571 emit_jump_insn (simple_return_rtx);
25572 }
25573 }
25574
25575 void
25576 thumb1_expand_epilogue (void)
25577 {
25578 HOST_WIDE_INT amount;
25579 arm_stack_offsets *offsets;
25580 int regno;
25581
25582 /* Naked functions don't have prologues. */
25583 if (IS_NAKED (arm_current_func_type ()))
25584 return;
25585
25586 offsets = arm_get_frame_offsets ();
25587 amount = offsets->outgoing_args - offsets->saved_regs;
25588
25589 if (frame_pointer_needed)
25590 {
25591 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25592 amount = offsets->locals_base - offsets->saved_regs;
25593 }
25594 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25595
25596 gcc_assert (amount >= 0);
25597 if (amount)
25598 {
25599 emit_insn (gen_blockage ());
25600
25601 if (amount < 512)
25602 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25603 GEN_INT (amount)));
25604 else
25605 {
25606 /* r3 is always free in the epilogue. */
25607 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25608
25609 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25610 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25611 }
25612 }
25613
25614 /* Emit a USE (stack_pointer_rtx), so that
25615 the stack adjustment will not be deleted. */
25616 emit_insn (gen_force_register_use (stack_pointer_rtx));
25617
25618 if (crtl->profile || !TARGET_SCHED_PROLOG)
25619 emit_insn (gen_blockage ());
25620
25621 /* Emit a clobber for each insn that will be restored in the epilogue,
25622 so that flow2 will get register lifetimes correct. */
25623 for (regno = 0; regno < 13; regno++)
25624 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25625 emit_clobber (gen_rtx_REG (SImode, regno));
25626
25627 if (! df_regs_ever_live_p (LR_REGNUM))
25628 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25629
25630 /* Clear all caller-saved regs that are not used to return. */
25631 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25632 cmse_nonsecure_entry_clear_before_return ();
25633 }
25634
25635 /* Epilogue code for APCS frame. */
25636 static void
25637 arm_expand_epilogue_apcs_frame (bool really_return)
25638 {
25639 unsigned long func_type;
25640 unsigned long saved_regs_mask;
25641 int num_regs = 0;
25642 int i;
25643 int floats_from_frame = 0;
25644 arm_stack_offsets *offsets;
25645
25646 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25647 func_type = arm_current_func_type ();
25648
25649 /* Get frame offsets for ARM. */
25650 offsets = arm_get_frame_offsets ();
25651 saved_regs_mask = offsets->saved_regs_mask;
25652
25653 /* Find the offset of the floating-point save area in the frame. */
25654 floats_from_frame
25655 = (offsets->saved_args
25656 + arm_compute_static_chain_stack_bytes ()
25657 - offsets->frame);
25658
25659 /* Compute how many core registers saved and how far away the floats are. */
25660 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25661 if (saved_regs_mask & (1 << i))
25662 {
25663 num_regs++;
25664 floats_from_frame += 4;
25665 }
25666
25667 if (TARGET_HARD_FLOAT)
25668 {
25669 int start_reg;
25670 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25671
25672 /* The offset is from IP_REGNUM. */
25673 int saved_size = arm_get_vfp_saved_size ();
25674 if (saved_size > 0)
25675 {
25676 rtx_insn *insn;
25677 floats_from_frame += saved_size;
25678 insn = emit_insn (gen_addsi3 (ip_rtx,
25679 hard_frame_pointer_rtx,
25680 GEN_INT (-floats_from_frame)));
25681 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25682 ip_rtx, hard_frame_pointer_rtx);
25683 }
25684
25685 /* Generate VFP register multi-pop. */
25686 start_reg = FIRST_VFP_REGNUM;
25687
25688 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25689 /* Look for a case where a reg does not need restoring. */
25690 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25691 && (!df_regs_ever_live_p (i + 1)
25692 || call_used_regs[i + 1]))
25693 {
25694 if (start_reg != i)
25695 arm_emit_vfp_multi_reg_pop (start_reg,
25696 (i - start_reg) / 2,
25697 gen_rtx_REG (SImode,
25698 IP_REGNUM));
25699 start_reg = i + 2;
25700 }
25701
25702 /* Restore the remaining regs that we have discovered (or possibly
25703 even all of them, if the conditional in the for loop never
25704 fired). */
25705 if (start_reg != i)
25706 arm_emit_vfp_multi_reg_pop (start_reg,
25707 (i - start_reg) / 2,
25708 gen_rtx_REG (SImode, IP_REGNUM));
25709 }
25710
25711 if (TARGET_IWMMXT)
25712 {
25713 /* The frame pointer is guaranteed to be non-double-word aligned, as
25714 it is set to double-word-aligned old_stack_pointer - 4. */
25715 rtx_insn *insn;
25716 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25717
25718 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25719 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25720 {
25721 rtx addr = gen_frame_mem (V2SImode,
25722 plus_constant (Pmode, hard_frame_pointer_rtx,
25723 - lrm_count * 4));
25724 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25725 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25726 gen_rtx_REG (V2SImode, i),
25727 NULL_RTX);
25728 lrm_count += 2;
25729 }
25730 }
25731
25732 /* saved_regs_mask should contain IP which contains old stack pointer
25733 at the time of activation creation. Since SP and IP are adjacent registers,
25734 we can restore the value directly into SP. */
25735 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25736 saved_regs_mask &= ~(1 << IP_REGNUM);
25737 saved_regs_mask |= (1 << SP_REGNUM);
25738
25739 /* There are two registers left in saved_regs_mask - LR and PC. We
25740 only need to restore LR (the return address), but to
25741 save time we can load it directly into PC, unless we need a
25742 special function exit sequence, or we are not really returning. */
25743 if (really_return
25744 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25745 && !crtl->calls_eh_return)
25746 /* Delete LR from the register mask, so that LR on
25747 the stack is loaded into the PC in the register mask. */
25748 saved_regs_mask &= ~(1 << LR_REGNUM);
25749 else
25750 saved_regs_mask &= ~(1 << PC_REGNUM);
25751
25752 num_regs = bit_count (saved_regs_mask);
25753 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25754 {
25755 rtx_insn *insn;
25756 emit_insn (gen_blockage ());
25757 /* Unwind the stack to just below the saved registers. */
25758 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25759 hard_frame_pointer_rtx,
25760 GEN_INT (- 4 * num_regs)));
25761
25762 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25763 stack_pointer_rtx, hard_frame_pointer_rtx);
25764 }
25765
25766 arm_emit_multi_reg_pop (saved_regs_mask);
25767
25768 if (IS_INTERRUPT (func_type))
25769 {
25770 /* Interrupt handlers will have pushed the
25771 IP onto the stack, so restore it now. */
25772 rtx_insn *insn;
25773 rtx addr = gen_rtx_MEM (SImode,
25774 gen_rtx_POST_INC (SImode,
25775 stack_pointer_rtx));
25776 set_mem_alias_set (addr, get_frame_alias_set ());
25777 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25778 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25779 gen_rtx_REG (SImode, IP_REGNUM),
25780 NULL_RTX);
25781 }
25782
25783 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25784 return;
25785
25786 if (crtl->calls_eh_return)
25787 emit_insn (gen_addsi3 (stack_pointer_rtx,
25788 stack_pointer_rtx,
25789 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25790
25791 if (IS_STACKALIGN (func_type))
25792 /* Restore the original stack pointer. Before prologue, the stack was
25793 realigned and the original stack pointer saved in r0. For details,
25794 see comment in arm_expand_prologue. */
25795 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25796
25797 emit_jump_insn (simple_return_rtx);
25798 }
25799
25800 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25801 function is not a sibcall. */
25802 void
25803 arm_expand_epilogue (bool really_return)
25804 {
25805 unsigned long func_type;
25806 unsigned long saved_regs_mask;
25807 int num_regs = 0;
25808 int i;
25809 int amount;
25810 arm_stack_offsets *offsets;
25811
25812 func_type = arm_current_func_type ();
25813
25814 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25815 let output_return_instruction take care of instruction emission if any. */
25816 if (IS_NAKED (func_type)
25817 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25818 {
25819 if (really_return)
25820 emit_jump_insn (simple_return_rtx);
25821 return;
25822 }
25823
25824 /* If we are throwing an exception, then we really must be doing a
25825 return, so we can't tail-call. */
25826 gcc_assert (!crtl->calls_eh_return || really_return);
25827
25828 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25829 {
25830 arm_expand_epilogue_apcs_frame (really_return);
25831 return;
25832 }
25833
25834 /* Get frame offsets for ARM. */
25835 offsets = arm_get_frame_offsets ();
25836 saved_regs_mask = offsets->saved_regs_mask;
25837 num_regs = bit_count (saved_regs_mask);
25838
25839 if (frame_pointer_needed)
25840 {
25841 rtx_insn *insn;
25842 /* Restore stack pointer if necessary. */
25843 if (TARGET_ARM)
25844 {
25845 /* In ARM mode, frame pointer points to first saved register.
25846 Restore stack pointer to last saved register. */
25847 amount = offsets->frame - offsets->saved_regs;
25848
25849 /* Force out any pending memory operations that reference stacked data
25850 before stack de-allocation occurs. */
25851 emit_insn (gen_blockage ());
25852 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25853 hard_frame_pointer_rtx,
25854 GEN_INT (amount)));
25855 arm_add_cfa_adjust_cfa_note (insn, amount,
25856 stack_pointer_rtx,
25857 hard_frame_pointer_rtx);
25858
25859 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25860 deleted. */
25861 emit_insn (gen_force_register_use (stack_pointer_rtx));
25862 }
25863 else
25864 {
25865 /* In Thumb-2 mode, the frame pointer points to the last saved
25866 register. */
25867 amount = offsets->locals_base - offsets->saved_regs;
25868 if (amount)
25869 {
25870 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25871 hard_frame_pointer_rtx,
25872 GEN_INT (amount)));
25873 arm_add_cfa_adjust_cfa_note (insn, amount,
25874 hard_frame_pointer_rtx,
25875 hard_frame_pointer_rtx);
25876 }
25877
25878 /* Force out any pending memory operations that reference stacked data
25879 before stack de-allocation occurs. */
25880 emit_insn (gen_blockage ());
25881 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25882 hard_frame_pointer_rtx));
25883 arm_add_cfa_adjust_cfa_note (insn, 0,
25884 stack_pointer_rtx,
25885 hard_frame_pointer_rtx);
25886 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25887 deleted. */
25888 emit_insn (gen_force_register_use (stack_pointer_rtx));
25889 }
25890 }
25891 else
25892 {
25893 /* Pop off outgoing args and local frame to adjust stack pointer to
25894 last saved register. */
25895 amount = offsets->outgoing_args - offsets->saved_regs;
25896 if (amount)
25897 {
25898 rtx_insn *tmp;
25899 /* Force out any pending memory operations that reference stacked data
25900 before stack de-allocation occurs. */
25901 emit_insn (gen_blockage ());
25902 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25903 stack_pointer_rtx,
25904 GEN_INT (amount)));
25905 arm_add_cfa_adjust_cfa_note (tmp, amount,
25906 stack_pointer_rtx, stack_pointer_rtx);
25907 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25908 not deleted. */
25909 emit_insn (gen_force_register_use (stack_pointer_rtx));
25910 }
25911 }
25912
25913 if (TARGET_HARD_FLOAT)
25914 {
25915 /* Generate VFP register multi-pop. */
25916 int end_reg = LAST_VFP_REGNUM + 1;
25917
25918 /* Scan the registers in reverse order. We need to match
25919 any groupings made in the prologue and generate matching
25920 vldm operations. The need to match groups is because,
25921 unlike pop, vldm can only do consecutive regs. */
25922 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25923 /* Look for a case where a reg does not need restoring. */
25924 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25925 && (!df_regs_ever_live_p (i + 1)
25926 || call_used_regs[i + 1]))
25927 {
25928 /* Restore the regs discovered so far (from reg+2 to
25929 end_reg). */
25930 if (end_reg > i + 2)
25931 arm_emit_vfp_multi_reg_pop (i + 2,
25932 (end_reg - (i + 2)) / 2,
25933 stack_pointer_rtx);
25934 end_reg = i;
25935 }
25936
25937 /* Restore the remaining regs that we have discovered (or possibly
25938 even all of them, if the conditional in the for loop never
25939 fired). */
25940 if (end_reg > i + 2)
25941 arm_emit_vfp_multi_reg_pop (i + 2,
25942 (end_reg - (i + 2)) / 2,
25943 stack_pointer_rtx);
25944 }
25945
25946 if (TARGET_IWMMXT)
25947 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25948 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25949 {
25950 rtx_insn *insn;
25951 rtx addr = gen_rtx_MEM (V2SImode,
25952 gen_rtx_POST_INC (SImode,
25953 stack_pointer_rtx));
25954 set_mem_alias_set (addr, get_frame_alias_set ());
25955 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25956 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25957 gen_rtx_REG (V2SImode, i),
25958 NULL_RTX);
25959 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25960 stack_pointer_rtx, stack_pointer_rtx);
25961 }
25962
25963 if (saved_regs_mask)
25964 {
25965 rtx insn;
25966 bool return_in_pc = false;
25967
25968 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25969 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25970 && !IS_CMSE_ENTRY (func_type)
25971 && !IS_STACKALIGN (func_type)
25972 && really_return
25973 && crtl->args.pretend_args_size == 0
25974 && saved_regs_mask & (1 << LR_REGNUM)
25975 && !crtl->calls_eh_return)
25976 {
25977 saved_regs_mask &= ~(1 << LR_REGNUM);
25978 saved_regs_mask |= (1 << PC_REGNUM);
25979 return_in_pc = true;
25980 }
25981
25982 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25983 {
25984 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25985 if (saved_regs_mask & (1 << i))
25986 {
25987 rtx addr = gen_rtx_MEM (SImode,
25988 gen_rtx_POST_INC (SImode,
25989 stack_pointer_rtx));
25990 set_mem_alias_set (addr, get_frame_alias_set ());
25991
25992 if (i == PC_REGNUM)
25993 {
25994 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25995 XVECEXP (insn, 0, 0) = ret_rtx;
25996 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25997 addr);
25998 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25999 insn = emit_jump_insn (insn);
26000 }
26001 else
26002 {
26003 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
26004 addr));
26005 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26006 gen_rtx_REG (SImode, i),
26007 NULL_RTX);
26008 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
26009 stack_pointer_rtx,
26010 stack_pointer_rtx);
26011 }
26012 }
26013 }
26014 else
26015 {
26016 if (TARGET_LDRD
26017 && current_tune->prefer_ldrd_strd
26018 && !optimize_function_for_size_p (cfun))
26019 {
26020 if (TARGET_THUMB2)
26021 thumb2_emit_ldrd_pop (saved_regs_mask);
26022 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
26023 arm_emit_ldrd_pop (saved_regs_mask);
26024 else
26025 arm_emit_multi_reg_pop (saved_regs_mask);
26026 }
26027 else
26028 arm_emit_multi_reg_pop (saved_regs_mask);
26029 }
26030
26031 if (return_in_pc)
26032 return;
26033 }
26034
26035 amount
26036 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
26037 if (amount)
26038 {
26039 int i, j;
26040 rtx dwarf = NULL_RTX;
26041 rtx_insn *tmp =
26042 emit_insn (gen_addsi3 (stack_pointer_rtx,
26043 stack_pointer_rtx,
26044 GEN_INT (amount)));
26045
26046 RTX_FRAME_RELATED_P (tmp) = 1;
26047
26048 if (cfun->machine->uses_anonymous_args)
26049 {
26050 /* Restore pretend args. Refer arm_expand_prologue on how to save
26051 pretend_args in stack. */
26052 int num_regs = crtl->args.pretend_args_size / 4;
26053 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
26054 for (j = 0, i = 0; j < num_regs; i++)
26055 if (saved_regs_mask & (1 << i))
26056 {
26057 rtx reg = gen_rtx_REG (SImode, i);
26058 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
26059 j++;
26060 }
26061 REG_NOTES (tmp) = dwarf;
26062 }
26063 arm_add_cfa_adjust_cfa_note (tmp, amount,
26064 stack_pointer_rtx, stack_pointer_rtx);
26065 }
26066
26067 /* Clear all caller-saved regs that are not used to return. */
26068 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26069 {
26070 /* CMSE_ENTRY always returns. */
26071 gcc_assert (really_return);
26072 cmse_nonsecure_entry_clear_before_return ();
26073 }
26074
26075 if (!really_return)
26076 return;
26077
26078 if (crtl->calls_eh_return)
26079 emit_insn (gen_addsi3 (stack_pointer_rtx,
26080 stack_pointer_rtx,
26081 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
26082
26083 if (IS_STACKALIGN (func_type))
26084 /* Restore the original stack pointer. Before prologue, the stack was
26085 realigned and the original stack pointer saved in r0. For details,
26086 see comment in arm_expand_prologue. */
26087 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
26088
26089 emit_jump_insn (simple_return_rtx);
26090 }
26091
26092 /* Implementation of insn prologue_thumb1_interwork. This is the first
26093 "instruction" of a function called in ARM mode. Swap to thumb mode. */
26094
26095 const char *
26096 thumb1_output_interwork (void)
26097 {
26098 const char * name;
26099 FILE *f = asm_out_file;
26100
26101 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
26102 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
26103 == SYMBOL_REF);
26104 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
26105
26106 /* Generate code sequence to switch us into Thumb mode. */
26107 /* The .code 32 directive has already been emitted by
26108 ASM_DECLARE_FUNCTION_NAME. */
26109 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
26110 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
26111
26112 /* Generate a label, so that the debugger will notice the
26113 change in instruction sets. This label is also used by
26114 the assembler to bypass the ARM code when this function
26115 is called from a Thumb encoded function elsewhere in the
26116 same file. Hence the definition of STUB_NAME here must
26117 agree with the definition in gas/config/tc-arm.c. */
26118
26119 #define STUB_NAME ".real_start_of"
26120
26121 fprintf (f, "\t.code\t16\n");
26122 #ifdef ARM_PE
26123 if (arm_dllexport_name_p (name))
26124 name = arm_strip_name_encoding (name);
26125 #endif
26126 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26127 fprintf (f, "\t.thumb_func\n");
26128 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26129
26130 return "";
26131 }
26132
26133 /* Handle the case of a double word load into a low register from
26134 a computed memory address. The computed address may involve a
26135 register which is overwritten by the load. */
26136 const char *
26137 thumb_load_double_from_address (rtx *operands)
26138 {
26139 rtx addr;
26140 rtx base;
26141 rtx offset;
26142 rtx arg1;
26143 rtx arg2;
26144
26145 gcc_assert (REG_P (operands[0]));
26146 gcc_assert (MEM_P (operands[1]));
26147
26148 /* Get the memory address. */
26149 addr = XEXP (operands[1], 0);
26150
26151 /* Work out how the memory address is computed. */
26152 switch (GET_CODE (addr))
26153 {
26154 case REG:
26155 operands[2] = adjust_address (operands[1], SImode, 4);
26156
26157 if (REGNO (operands[0]) == REGNO (addr))
26158 {
26159 output_asm_insn ("ldr\t%H0, %2", operands);
26160 output_asm_insn ("ldr\t%0, %1", operands);
26161 }
26162 else
26163 {
26164 output_asm_insn ("ldr\t%0, %1", operands);
26165 output_asm_insn ("ldr\t%H0, %2", operands);
26166 }
26167 break;
26168
26169 case CONST:
26170 /* Compute <address> + 4 for the high order load. */
26171 operands[2] = adjust_address (operands[1], SImode, 4);
26172
26173 output_asm_insn ("ldr\t%0, %1", operands);
26174 output_asm_insn ("ldr\t%H0, %2", operands);
26175 break;
26176
26177 case PLUS:
26178 arg1 = XEXP (addr, 0);
26179 arg2 = XEXP (addr, 1);
26180
26181 if (CONSTANT_P (arg1))
26182 base = arg2, offset = arg1;
26183 else
26184 base = arg1, offset = arg2;
26185
26186 gcc_assert (REG_P (base));
26187
26188 /* Catch the case of <address> = <reg> + <reg> */
26189 if (REG_P (offset))
26190 {
26191 int reg_offset = REGNO (offset);
26192 int reg_base = REGNO (base);
26193 int reg_dest = REGNO (operands[0]);
26194
26195 /* Add the base and offset registers together into the
26196 higher destination register. */
26197 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26198 reg_dest + 1, reg_base, reg_offset);
26199
26200 /* Load the lower destination register from the address in
26201 the higher destination register. */
26202 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26203 reg_dest, reg_dest + 1);
26204
26205 /* Load the higher destination register from its own address
26206 plus 4. */
26207 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26208 reg_dest + 1, reg_dest + 1);
26209 }
26210 else
26211 {
26212 /* Compute <address> + 4 for the high order load. */
26213 operands[2] = adjust_address (operands[1], SImode, 4);
26214
26215 /* If the computed address is held in the low order register
26216 then load the high order register first, otherwise always
26217 load the low order register first. */
26218 if (REGNO (operands[0]) == REGNO (base))
26219 {
26220 output_asm_insn ("ldr\t%H0, %2", operands);
26221 output_asm_insn ("ldr\t%0, %1", operands);
26222 }
26223 else
26224 {
26225 output_asm_insn ("ldr\t%0, %1", operands);
26226 output_asm_insn ("ldr\t%H0, %2", operands);
26227 }
26228 }
26229 break;
26230
26231 case LABEL_REF:
26232 /* With no registers to worry about we can just load the value
26233 directly. */
26234 operands[2] = adjust_address (operands[1], SImode, 4);
26235
26236 output_asm_insn ("ldr\t%H0, %2", operands);
26237 output_asm_insn ("ldr\t%0, %1", operands);
26238 break;
26239
26240 default:
26241 gcc_unreachable ();
26242 }
26243
26244 return "";
26245 }
26246
26247 const char *
26248 thumb_output_move_mem_multiple (int n, rtx *operands)
26249 {
26250 switch (n)
26251 {
26252 case 2:
26253 if (REGNO (operands[4]) > REGNO (operands[5]))
26254 std::swap (operands[4], operands[5]);
26255
26256 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26257 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26258 break;
26259
26260 case 3:
26261 if (REGNO (operands[4]) > REGNO (operands[5]))
26262 std::swap (operands[4], operands[5]);
26263 if (REGNO (operands[5]) > REGNO (operands[6]))
26264 std::swap (operands[5], operands[6]);
26265 if (REGNO (operands[4]) > REGNO (operands[5]))
26266 std::swap (operands[4], operands[5]);
26267
26268 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26269 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26270 break;
26271
26272 default:
26273 gcc_unreachable ();
26274 }
26275
26276 return "";
26277 }
26278
26279 /* Output a call-via instruction for thumb state. */
26280 const char *
26281 thumb_call_via_reg (rtx reg)
26282 {
26283 int regno = REGNO (reg);
26284 rtx *labelp;
26285
26286 gcc_assert (regno < LR_REGNUM);
26287
26288 /* If we are in the normal text section we can use a single instance
26289 per compilation unit. If we are doing function sections, then we need
26290 an entry per section, since we can't rely on reachability. */
26291 if (in_section == text_section)
26292 {
26293 thumb_call_reg_needed = 1;
26294
26295 if (thumb_call_via_label[regno] == NULL)
26296 thumb_call_via_label[regno] = gen_label_rtx ();
26297 labelp = thumb_call_via_label + regno;
26298 }
26299 else
26300 {
26301 if (cfun->machine->call_via[regno] == NULL)
26302 cfun->machine->call_via[regno] = gen_label_rtx ();
26303 labelp = cfun->machine->call_via + regno;
26304 }
26305
26306 output_asm_insn ("bl\t%a0", labelp);
26307 return "";
26308 }
26309
26310 /* Routines for generating rtl. */
26311 void
26312 thumb_expand_movmemqi (rtx *operands)
26313 {
26314 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26315 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26316 HOST_WIDE_INT len = INTVAL (operands[2]);
26317 HOST_WIDE_INT offset = 0;
26318
26319 while (len >= 12)
26320 {
26321 emit_insn (gen_movmem12b (out, in, out, in));
26322 len -= 12;
26323 }
26324
26325 if (len >= 8)
26326 {
26327 emit_insn (gen_movmem8b (out, in, out, in));
26328 len -= 8;
26329 }
26330
26331 if (len >= 4)
26332 {
26333 rtx reg = gen_reg_rtx (SImode);
26334 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26335 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26336 len -= 4;
26337 offset += 4;
26338 }
26339
26340 if (len >= 2)
26341 {
26342 rtx reg = gen_reg_rtx (HImode);
26343 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26344 plus_constant (Pmode, in,
26345 offset))));
26346 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26347 offset)),
26348 reg));
26349 len -= 2;
26350 offset += 2;
26351 }
26352
26353 if (len)
26354 {
26355 rtx reg = gen_reg_rtx (QImode);
26356 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26357 plus_constant (Pmode, in,
26358 offset))));
26359 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26360 offset)),
26361 reg));
26362 }
26363 }
26364
26365 void
26366 thumb_reload_out_hi (rtx *operands)
26367 {
26368 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26369 }
26370
26371 /* Return the length of a function name prefix
26372 that starts with the character 'c'. */
26373 static int
26374 arm_get_strip_length (int c)
26375 {
26376 switch (c)
26377 {
26378 ARM_NAME_ENCODING_LENGTHS
26379 default: return 0;
26380 }
26381 }
26382
26383 /* Return a pointer to a function's name with any
26384 and all prefix encodings stripped from it. */
26385 const char *
26386 arm_strip_name_encoding (const char *name)
26387 {
26388 int skip;
26389
26390 while ((skip = arm_get_strip_length (* name)))
26391 name += skip;
26392
26393 return name;
26394 }
26395
26396 /* If there is a '*' anywhere in the name's prefix, then
26397 emit the stripped name verbatim, otherwise prepend an
26398 underscore if leading underscores are being used. */
26399 void
26400 arm_asm_output_labelref (FILE *stream, const char *name)
26401 {
26402 int skip;
26403 int verbatim = 0;
26404
26405 while ((skip = arm_get_strip_length (* name)))
26406 {
26407 verbatim |= (*name == '*');
26408 name += skip;
26409 }
26410
26411 if (verbatim)
26412 fputs (name, stream);
26413 else
26414 asm_fprintf (stream, "%U%s", name);
26415 }
26416
26417 /* This function is used to emit an EABI tag and its associated value.
26418 We emit the numerical value of the tag in case the assembler does not
26419 support textual tags. (Eg gas prior to 2.20). If requested we include
26420 the tag name in a comment so that anyone reading the assembler output
26421 will know which tag is being set.
26422
26423 This function is not static because arm-c.c needs it too. */
26424
26425 void
26426 arm_emit_eabi_attribute (const char *name, int num, int val)
26427 {
26428 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26429 if (flag_verbose_asm || flag_debug_asm)
26430 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26431 asm_fprintf (asm_out_file, "\n");
26432 }
26433
26434 /* This function is used to print CPU tuning information as comment
26435 in assembler file. Pointers are not printed for now. */
26436
26437 void
26438 arm_print_tune_info (void)
26439 {
26440 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26441 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26442 current_tune->constant_limit);
26443 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26444 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26445 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26446 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26447 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26448 "prefetch.l1_cache_size:\t%d\n",
26449 current_tune->prefetch.l1_cache_size);
26450 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26451 "prefetch.l1_cache_line_size:\t%d\n",
26452 current_tune->prefetch.l1_cache_line_size);
26453 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26454 "prefer_constant_pool:\t%d\n",
26455 (int) current_tune->prefer_constant_pool);
26456 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26457 "branch_cost:\t(s:speed, p:predictable)\n");
26458 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26459 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26460 current_tune->branch_cost (false, false));
26461 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26462 current_tune->branch_cost (false, true));
26463 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26464 current_tune->branch_cost (true, false));
26465 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26466 current_tune->branch_cost (true, true));
26467 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26468 "prefer_ldrd_strd:\t%d\n",
26469 (int) current_tune->prefer_ldrd_strd);
26470 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26471 "logical_op_non_short_circuit:\t[%d,%d]\n",
26472 (int) current_tune->logical_op_non_short_circuit_thumb,
26473 (int) current_tune->logical_op_non_short_circuit_arm);
26474 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26475 "prefer_neon_for_64bits:\t%d\n",
26476 (int) current_tune->prefer_neon_for_64bits);
26477 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26478 "disparage_flag_setting_t16_encodings:\t%d\n",
26479 (int) current_tune->disparage_flag_setting_t16_encodings);
26480 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26481 "string_ops_prefer_neon:\t%d\n",
26482 (int) current_tune->string_ops_prefer_neon);
26483 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26484 "max_insns_inline_memset:\t%d\n",
26485 current_tune->max_insns_inline_memset);
26486 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26487 current_tune->fusible_ops);
26488 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26489 (int) current_tune->sched_autopref);
26490 }
26491
26492 /* Print .arch and .arch_extension directives corresponding to the
26493 current architecture configuration. */
26494 static void
26495 arm_print_asm_arch_directives ()
26496 {
26497 const arch_option *arch
26498 = arm_parse_arch_option_name (all_architectures, "-march",
26499 arm_active_target.arch_name);
26500 auto_sbitmap opt_bits (isa_num_bits);
26501
26502 gcc_assert (arch);
26503
26504 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26505 arm_last_printed_arch_string = arm_active_target.arch_name;
26506 if (!arch->common.extensions)
26507 return;
26508
26509 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26510 opt->name != NULL;
26511 opt++)
26512 {
26513 if (!opt->remove)
26514 {
26515 arm_initialize_isa (opt_bits, opt->isa_bits);
26516
26517 /* If every feature bit of this option is set in the target
26518 ISA specification, print out the option name. However,
26519 don't print anything if all the bits are part of the
26520 FPU specification. */
26521 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26522 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26523 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26524 }
26525 }
26526 }
26527
26528 static void
26529 arm_file_start (void)
26530 {
26531 int val;
26532
26533 if (TARGET_BPABI)
26534 {
26535 /* We don't have a specified CPU. Use the architecture to
26536 generate the tags.
26537
26538 Note: it might be better to do this unconditionally, then the
26539 assembler would not need to know about all new CPU names as
26540 they are added. */
26541 if (!arm_active_target.core_name)
26542 {
26543 /* armv7ve doesn't support any extensions. */
26544 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26545 {
26546 /* Keep backward compatability for assemblers
26547 which don't support armv7ve. */
26548 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26549 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26550 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26551 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26552 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26553 arm_last_printed_arch_string = "armv7ve";
26554 }
26555 else
26556 arm_print_asm_arch_directives ();
26557 }
26558 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26559 {
26560 asm_fprintf (asm_out_file, "\t.arch %s\n",
26561 arm_active_target.core_name + 8);
26562 arm_last_printed_arch_string = arm_active_target.core_name + 8;
26563 }
26564 else
26565 {
26566 const char* truncated_name
26567 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26568 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26569 }
26570
26571 if (print_tune_info)
26572 arm_print_tune_info ();
26573
26574 if (! TARGET_SOFT_FLOAT)
26575 {
26576 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26577 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26578
26579 if (TARGET_HARD_FLOAT_ABI)
26580 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26581 }
26582
26583 /* Some of these attributes only apply when the corresponding features
26584 are used. However we don't have any easy way of figuring this out.
26585 Conservatively record the setting that would have been used. */
26586
26587 if (flag_rounding_math)
26588 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26589
26590 if (!flag_unsafe_math_optimizations)
26591 {
26592 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26593 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26594 }
26595 if (flag_signaling_nans)
26596 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26597
26598 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26599 flag_finite_math_only ? 1 : 3);
26600
26601 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26602 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26603 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26604 flag_short_enums ? 1 : 2);
26605
26606 /* Tag_ABI_optimization_goals. */
26607 if (optimize_size)
26608 val = 4;
26609 else if (optimize >= 2)
26610 val = 2;
26611 else if (optimize)
26612 val = 1;
26613 else
26614 val = 6;
26615 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26616
26617 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26618 unaligned_access);
26619
26620 if (arm_fp16_format)
26621 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26622 (int) arm_fp16_format);
26623
26624 if (arm_lang_output_object_attributes_hook)
26625 arm_lang_output_object_attributes_hook();
26626 }
26627
26628 default_file_start ();
26629 }
26630
26631 static void
26632 arm_file_end (void)
26633 {
26634 int regno;
26635
26636 if (NEED_INDICATE_EXEC_STACK)
26637 /* Add .note.GNU-stack. */
26638 file_end_indicate_exec_stack ();
26639
26640 if (! thumb_call_reg_needed)
26641 return;
26642
26643 switch_to_section (text_section);
26644 asm_fprintf (asm_out_file, "\t.code 16\n");
26645 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26646
26647 for (regno = 0; regno < LR_REGNUM; regno++)
26648 {
26649 rtx label = thumb_call_via_label[regno];
26650
26651 if (label != 0)
26652 {
26653 targetm.asm_out.internal_label (asm_out_file, "L",
26654 CODE_LABEL_NUMBER (label));
26655 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26656 }
26657 }
26658 }
26659
26660 #ifndef ARM_PE
26661 /* Symbols in the text segment can be accessed without indirecting via the
26662 constant pool; it may take an extra binary operation, but this is still
26663 faster than indirecting via memory. Don't do this when not optimizing,
26664 since we won't be calculating al of the offsets necessary to do this
26665 simplification. */
26666
26667 static void
26668 arm_encode_section_info (tree decl, rtx rtl, int first)
26669 {
26670 if (optimize > 0 && TREE_CONSTANT (decl))
26671 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26672
26673 default_encode_section_info (decl, rtl, first);
26674 }
26675 #endif /* !ARM_PE */
26676
26677 static void
26678 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26679 {
26680 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26681 && !strcmp (prefix, "L"))
26682 {
26683 arm_ccfsm_state = 0;
26684 arm_target_insn = NULL;
26685 }
26686 default_internal_label (stream, prefix, labelno);
26687 }
26688
26689 /* Output code to add DELTA to the first argument, and then jump
26690 to FUNCTION. Used for C++ multiple inheritance. */
26691
26692 static void
26693 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26694 HOST_WIDE_INT, tree function)
26695 {
26696 static int thunk_label = 0;
26697 char label[256];
26698 char labelpc[256];
26699 int mi_delta = delta;
26700 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26701 int shift = 0;
26702 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26703 ? 1 : 0);
26704 if (mi_delta < 0)
26705 mi_delta = - mi_delta;
26706
26707 final_start_function (emit_barrier (), file, 1);
26708
26709 if (TARGET_THUMB1)
26710 {
26711 int labelno = thunk_label++;
26712 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26713 /* Thunks are entered in arm mode when available. */
26714 if (TARGET_THUMB1_ONLY)
26715 {
26716 /* push r3 so we can use it as a temporary. */
26717 /* TODO: Omit this save if r3 is not used. */
26718 fputs ("\tpush {r3}\n", file);
26719 fputs ("\tldr\tr3, ", file);
26720 }
26721 else
26722 {
26723 fputs ("\tldr\tr12, ", file);
26724 }
26725 assemble_name (file, label);
26726 fputc ('\n', file);
26727 if (flag_pic)
26728 {
26729 /* If we are generating PIC, the ldr instruction below loads
26730 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26731 the address of the add + 8, so we have:
26732
26733 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26734 = target + 1.
26735
26736 Note that we have "+ 1" because some versions of GNU ld
26737 don't set the low bit of the result for R_ARM_REL32
26738 relocations against thumb function symbols.
26739 On ARMv6M this is +4, not +8. */
26740 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26741 assemble_name (file, labelpc);
26742 fputs (":\n", file);
26743 if (TARGET_THUMB1_ONLY)
26744 {
26745 /* This is 2 insns after the start of the thunk, so we know it
26746 is 4-byte aligned. */
26747 fputs ("\tadd\tr3, pc, r3\n", file);
26748 fputs ("\tmov r12, r3\n", file);
26749 }
26750 else
26751 fputs ("\tadd\tr12, pc, r12\n", file);
26752 }
26753 else if (TARGET_THUMB1_ONLY)
26754 fputs ("\tmov r12, r3\n", file);
26755 }
26756 if (TARGET_THUMB1_ONLY)
26757 {
26758 if (mi_delta > 255)
26759 {
26760 fputs ("\tldr\tr3, ", file);
26761 assemble_name (file, label);
26762 fputs ("+4\n", file);
26763 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26764 mi_op, this_regno, this_regno);
26765 }
26766 else if (mi_delta != 0)
26767 {
26768 /* Thumb1 unified syntax requires s suffix in instruction name when
26769 one of the operands is immediate. */
26770 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26771 mi_op, this_regno, this_regno,
26772 mi_delta);
26773 }
26774 }
26775 else
26776 {
26777 /* TODO: Use movw/movt for large constants when available. */
26778 while (mi_delta != 0)
26779 {
26780 if ((mi_delta & (3 << shift)) == 0)
26781 shift += 2;
26782 else
26783 {
26784 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26785 mi_op, this_regno, this_regno,
26786 mi_delta & (0xff << shift));
26787 mi_delta &= ~(0xff << shift);
26788 shift += 8;
26789 }
26790 }
26791 }
26792 if (TARGET_THUMB1)
26793 {
26794 if (TARGET_THUMB1_ONLY)
26795 fputs ("\tpop\t{r3}\n", file);
26796
26797 fprintf (file, "\tbx\tr12\n");
26798 ASM_OUTPUT_ALIGN (file, 2);
26799 assemble_name (file, label);
26800 fputs (":\n", file);
26801 if (flag_pic)
26802 {
26803 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26804 rtx tem = XEXP (DECL_RTL (function), 0);
26805 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26806 pipeline offset is four rather than eight. Adjust the offset
26807 accordingly. */
26808 tem = plus_constant (GET_MODE (tem), tem,
26809 TARGET_THUMB1_ONLY ? -3 : -7);
26810 tem = gen_rtx_MINUS (GET_MODE (tem),
26811 tem,
26812 gen_rtx_SYMBOL_REF (Pmode,
26813 ggc_strdup (labelpc)));
26814 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26815 }
26816 else
26817 /* Output ".word .LTHUNKn". */
26818 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26819
26820 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26821 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26822 }
26823 else
26824 {
26825 fputs ("\tb\t", file);
26826 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26827 if (NEED_PLT_RELOC)
26828 fputs ("(PLT)", file);
26829 fputc ('\n', file);
26830 }
26831
26832 final_end_function ();
26833 }
26834
26835 /* MI thunk handling for TARGET_32BIT. */
26836
26837 static void
26838 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26839 HOST_WIDE_INT vcall_offset, tree function)
26840 {
26841 const bool long_call_p = arm_is_long_call_p (function);
26842
26843 /* On ARM, this_regno is R0 or R1 depending on
26844 whether the function returns an aggregate or not.
26845 */
26846 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26847 function)
26848 ? R1_REGNUM : R0_REGNUM);
26849
26850 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26851 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26852 reload_completed = 1;
26853 emit_note (NOTE_INSN_PROLOGUE_END);
26854
26855 /* Add DELTA to THIS_RTX. */
26856 if (delta != 0)
26857 arm_split_constant (PLUS, Pmode, NULL_RTX,
26858 delta, this_rtx, this_rtx, false);
26859
26860 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26861 if (vcall_offset != 0)
26862 {
26863 /* Load *THIS_RTX. */
26864 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26865 /* Compute *THIS_RTX + VCALL_OFFSET. */
26866 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26867 false);
26868 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26869 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26870 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26871 }
26872
26873 /* Generate a tail call to the target function. */
26874 if (!TREE_USED (function))
26875 {
26876 assemble_external (function);
26877 TREE_USED (function) = 1;
26878 }
26879 rtx funexp = XEXP (DECL_RTL (function), 0);
26880 if (long_call_p)
26881 {
26882 emit_move_insn (temp, funexp);
26883 funexp = temp;
26884 }
26885 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26886 rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26887 SIBLING_CALL_P (insn) = 1;
26888 emit_barrier ();
26889
26890 /* Indirect calls require a bit of fixup in PIC mode. */
26891 if (long_call_p)
26892 {
26893 split_all_insns_noflow ();
26894 arm_reorg ();
26895 }
26896
26897 insn = get_insns ();
26898 shorten_branches (insn);
26899 final_start_function (insn, file, 1);
26900 final (insn, file, 1);
26901 final_end_function ();
26902
26903 /* Stop pretending this is a post-reload pass. */
26904 reload_completed = 0;
26905 }
26906
26907 /* Output code to add DELTA to the first argument, and then jump
26908 to FUNCTION. Used for C++ multiple inheritance. */
26909
26910 static void
26911 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26912 HOST_WIDE_INT vcall_offset, tree function)
26913 {
26914 if (TARGET_32BIT)
26915 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26916 else
26917 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26918 }
26919
26920 int
26921 arm_emit_vector_const (FILE *file, rtx x)
26922 {
26923 int i;
26924 const char * pattern;
26925
26926 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26927
26928 switch (GET_MODE (x))
26929 {
26930 case E_V2SImode: pattern = "%08x"; break;
26931 case E_V4HImode: pattern = "%04x"; break;
26932 case E_V8QImode: pattern = "%02x"; break;
26933 default: gcc_unreachable ();
26934 }
26935
26936 fprintf (file, "0x");
26937 for (i = CONST_VECTOR_NUNITS (x); i--;)
26938 {
26939 rtx element;
26940
26941 element = CONST_VECTOR_ELT (x, i);
26942 fprintf (file, pattern, INTVAL (element));
26943 }
26944
26945 return 1;
26946 }
26947
26948 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26949 HFmode constant pool entries are actually loaded with ldr. */
26950 void
26951 arm_emit_fp16_const (rtx c)
26952 {
26953 long bits;
26954
26955 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26956 if (WORDS_BIG_ENDIAN)
26957 assemble_zeros (2);
26958 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26959 if (!WORDS_BIG_ENDIAN)
26960 assemble_zeros (2);
26961 }
26962
26963 const char *
26964 arm_output_load_gr (rtx *operands)
26965 {
26966 rtx reg;
26967 rtx offset;
26968 rtx wcgr;
26969 rtx sum;
26970
26971 if (!MEM_P (operands [1])
26972 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26973 || !REG_P (reg = XEXP (sum, 0))
26974 || !CONST_INT_P (offset = XEXP (sum, 1))
26975 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26976 return "wldrw%?\t%0, %1";
26977
26978 /* Fix up an out-of-range load of a GR register. */
26979 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26980 wcgr = operands[0];
26981 operands[0] = reg;
26982 output_asm_insn ("ldr%?\t%0, %1", operands);
26983
26984 operands[0] = wcgr;
26985 operands[1] = reg;
26986 output_asm_insn ("tmcr%?\t%0, %1", operands);
26987 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26988
26989 return "";
26990 }
26991
26992 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26993
26994 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26995 named arg and all anonymous args onto the stack.
26996 XXX I know the prologue shouldn't be pushing registers, but it is faster
26997 that way. */
26998
26999 static void
27000 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
27001 machine_mode mode,
27002 tree type,
27003 int *pretend_size,
27004 int second_time ATTRIBUTE_UNUSED)
27005 {
27006 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
27007 int nregs;
27008
27009 cfun->machine->uses_anonymous_args = 1;
27010 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
27011 {
27012 nregs = pcum->aapcs_ncrn;
27013 if (nregs & 1)
27014 {
27015 int res = arm_needs_doubleword_align (mode, type);
27016 if (res < 0 && warn_psabi)
27017 inform (input_location, "parameter passing for argument of "
27018 "type %qT changed in GCC 7.1", type);
27019 else if (res > 0)
27020 {
27021 nregs++;
27022 if (res > 1 && warn_psabi)
27023 inform (input_location,
27024 "parameter passing for argument of type "
27025 "%qT changed in GCC 9.1", type);
27026 }
27027 }
27028 }
27029 else
27030 nregs = pcum->nregs;
27031
27032 if (nregs < NUM_ARG_REGS)
27033 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
27034 }
27035
27036 /* We can't rely on the caller doing the proper promotion when
27037 using APCS or ATPCS. */
27038
27039 static bool
27040 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
27041 {
27042 return !TARGET_AAPCS_BASED;
27043 }
27044
27045 static machine_mode
27046 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
27047 machine_mode mode,
27048 int *punsignedp ATTRIBUTE_UNUSED,
27049 const_tree fntype ATTRIBUTE_UNUSED,
27050 int for_return ATTRIBUTE_UNUSED)
27051 {
27052 if (GET_MODE_CLASS (mode) == MODE_INT
27053 && GET_MODE_SIZE (mode) < 4)
27054 return SImode;
27055
27056 return mode;
27057 }
27058
27059
27060 static bool
27061 arm_default_short_enums (void)
27062 {
27063 return ARM_DEFAULT_SHORT_ENUMS;
27064 }
27065
27066
27067 /* AAPCS requires that anonymous bitfields affect structure alignment. */
27068
27069 static bool
27070 arm_align_anon_bitfield (void)
27071 {
27072 return TARGET_AAPCS_BASED;
27073 }
27074
27075
27076 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
27077
27078 static tree
27079 arm_cxx_guard_type (void)
27080 {
27081 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
27082 }
27083
27084
27085 /* The EABI says test the least significant bit of a guard variable. */
27086
27087 static bool
27088 arm_cxx_guard_mask_bit (void)
27089 {
27090 return TARGET_AAPCS_BASED;
27091 }
27092
27093
27094 /* The EABI specifies that all array cookies are 8 bytes long. */
27095
27096 static tree
27097 arm_get_cookie_size (tree type)
27098 {
27099 tree size;
27100
27101 if (!TARGET_AAPCS_BASED)
27102 return default_cxx_get_cookie_size (type);
27103
27104 size = build_int_cst (sizetype, 8);
27105 return size;
27106 }
27107
27108
27109 /* The EABI says that array cookies should also contain the element size. */
27110
27111 static bool
27112 arm_cookie_has_size (void)
27113 {
27114 return TARGET_AAPCS_BASED;
27115 }
27116
27117
27118 /* The EABI says constructors and destructors should return a pointer to
27119 the object constructed/destroyed. */
27120
27121 static bool
27122 arm_cxx_cdtor_returns_this (void)
27123 {
27124 return TARGET_AAPCS_BASED;
27125 }
27126
27127 /* The EABI says that an inline function may never be the key
27128 method. */
27129
27130 static bool
27131 arm_cxx_key_method_may_be_inline (void)
27132 {
27133 return !TARGET_AAPCS_BASED;
27134 }
27135
27136 static void
27137 arm_cxx_determine_class_data_visibility (tree decl)
27138 {
27139 if (!TARGET_AAPCS_BASED
27140 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27141 return;
27142
27143 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27144 is exported. However, on systems without dynamic vague linkage,
27145 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27146 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27147 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27148 else
27149 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27150 DECL_VISIBILITY_SPECIFIED (decl) = 1;
27151 }
27152
27153 static bool
27154 arm_cxx_class_data_always_comdat (void)
27155 {
27156 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27157 vague linkage if the class has no key function. */
27158 return !TARGET_AAPCS_BASED;
27159 }
27160
27161
27162 /* The EABI says __aeabi_atexit should be used to register static
27163 destructors. */
27164
27165 static bool
27166 arm_cxx_use_aeabi_atexit (void)
27167 {
27168 return TARGET_AAPCS_BASED;
27169 }
27170
27171
27172 void
27173 arm_set_return_address (rtx source, rtx scratch)
27174 {
27175 arm_stack_offsets *offsets;
27176 HOST_WIDE_INT delta;
27177 rtx addr, mem;
27178 unsigned long saved_regs;
27179
27180 offsets = arm_get_frame_offsets ();
27181 saved_regs = offsets->saved_regs_mask;
27182
27183 if ((saved_regs & (1 << LR_REGNUM)) == 0)
27184 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27185 else
27186 {
27187 if (frame_pointer_needed)
27188 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27189 else
27190 {
27191 /* LR will be the first saved register. */
27192 delta = offsets->outgoing_args - (offsets->frame + 4);
27193
27194
27195 if (delta >= 4096)
27196 {
27197 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27198 GEN_INT (delta & ~4095)));
27199 addr = scratch;
27200 delta &= 4095;
27201 }
27202 else
27203 addr = stack_pointer_rtx;
27204
27205 addr = plus_constant (Pmode, addr, delta);
27206 }
27207
27208 /* The store needs to be marked to prevent DSE from deleting
27209 it as dead if it is based on fp. */
27210 mem = gen_frame_mem (Pmode, addr);
27211 MEM_VOLATILE_P (mem) = true;
27212 emit_move_insn (mem, source);
27213 }
27214 }
27215
27216
27217 void
27218 thumb_set_return_address (rtx source, rtx scratch)
27219 {
27220 arm_stack_offsets *offsets;
27221 HOST_WIDE_INT delta;
27222 HOST_WIDE_INT limit;
27223 int reg;
27224 rtx addr, mem;
27225 unsigned long mask;
27226
27227 emit_use (source);
27228
27229 offsets = arm_get_frame_offsets ();
27230 mask = offsets->saved_regs_mask;
27231 if (mask & (1 << LR_REGNUM))
27232 {
27233 limit = 1024;
27234 /* Find the saved regs. */
27235 if (frame_pointer_needed)
27236 {
27237 delta = offsets->soft_frame - offsets->saved_args;
27238 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27239 if (TARGET_THUMB1)
27240 limit = 128;
27241 }
27242 else
27243 {
27244 delta = offsets->outgoing_args - offsets->saved_args;
27245 reg = SP_REGNUM;
27246 }
27247 /* Allow for the stack frame. */
27248 if (TARGET_THUMB1 && TARGET_BACKTRACE)
27249 delta -= 16;
27250 /* The link register is always the first saved register. */
27251 delta -= 4;
27252
27253 /* Construct the address. */
27254 addr = gen_rtx_REG (SImode, reg);
27255 if (delta > limit)
27256 {
27257 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27258 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27259 addr = scratch;
27260 }
27261 else
27262 addr = plus_constant (Pmode, addr, delta);
27263
27264 /* The store needs to be marked to prevent DSE from deleting
27265 it as dead if it is based on fp. */
27266 mem = gen_frame_mem (Pmode, addr);
27267 MEM_VOLATILE_P (mem) = true;
27268 emit_move_insn (mem, source);
27269 }
27270 else
27271 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27272 }
27273
27274 /* Implements target hook vector_mode_supported_p. */
27275 bool
27276 arm_vector_mode_supported_p (machine_mode mode)
27277 {
27278 /* Neon also supports V2SImode, etc. listed in the clause below. */
27279 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27280 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27281 || mode == V2DImode || mode == V8HFmode))
27282 return true;
27283
27284 if ((TARGET_NEON || TARGET_IWMMXT)
27285 && ((mode == V2SImode)
27286 || (mode == V4HImode)
27287 || (mode == V8QImode)))
27288 return true;
27289
27290 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27291 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27292 || mode == V2HAmode))
27293 return true;
27294
27295 return false;
27296 }
27297
27298 /* Implements target hook array_mode_supported_p. */
27299
27300 static bool
27301 arm_array_mode_supported_p (machine_mode mode,
27302 unsigned HOST_WIDE_INT nelems)
27303 {
27304 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
27305 for now, as the lane-swapping logic needs to be extended in the expanders.
27306 See PR target/82518. */
27307 if (TARGET_NEON && !BYTES_BIG_ENDIAN
27308 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27309 && (nelems >= 2 && nelems <= 4))
27310 return true;
27311
27312 return false;
27313 }
27314
27315 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27316 registers when autovectorizing for Neon, at least until multiple vector
27317 widths are supported properly by the middle-end. */
27318
27319 static machine_mode
27320 arm_preferred_simd_mode (scalar_mode mode)
27321 {
27322 if (TARGET_NEON)
27323 switch (mode)
27324 {
27325 case E_SFmode:
27326 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27327 case E_SImode:
27328 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27329 case E_HImode:
27330 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27331 case E_QImode:
27332 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27333 case E_DImode:
27334 if (!TARGET_NEON_VECTORIZE_DOUBLE)
27335 return V2DImode;
27336 break;
27337
27338 default:;
27339 }
27340
27341 if (TARGET_REALLY_IWMMXT)
27342 switch (mode)
27343 {
27344 case E_SImode:
27345 return V2SImode;
27346 case E_HImode:
27347 return V4HImode;
27348 case E_QImode:
27349 return V8QImode;
27350
27351 default:;
27352 }
27353
27354 return word_mode;
27355 }
27356
27357 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27358
27359 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27360 using r0-r4 for function arguments, r7 for the stack frame and don't have
27361 enough left over to do doubleword arithmetic. For Thumb-2 all the
27362 potentially problematic instructions accept high registers so this is not
27363 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27364 that require many low registers. */
27365 static bool
27366 arm_class_likely_spilled_p (reg_class_t rclass)
27367 {
27368 if ((TARGET_THUMB1 && rclass == LO_REGS)
27369 || rclass == CC_REG)
27370 return true;
27371
27372 return false;
27373 }
27374
27375 /* Implements target hook small_register_classes_for_mode_p. */
27376 bool
27377 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27378 {
27379 return TARGET_THUMB1;
27380 }
27381
27382 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27383 ARM insns and therefore guarantee that the shift count is modulo 256.
27384 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27385 guarantee no particular behavior for out-of-range counts. */
27386
27387 static unsigned HOST_WIDE_INT
27388 arm_shift_truncation_mask (machine_mode mode)
27389 {
27390 return mode == SImode ? 255 : 0;
27391 }
27392
27393
27394 /* Map internal gcc register numbers to DWARF2 register numbers. */
27395
27396 unsigned int
27397 arm_dbx_register_number (unsigned int regno)
27398 {
27399 if (regno < 16)
27400 return regno;
27401
27402 if (IS_VFP_REGNUM (regno))
27403 {
27404 /* See comment in arm_dwarf_register_span. */
27405 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27406 return 64 + regno - FIRST_VFP_REGNUM;
27407 else
27408 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27409 }
27410
27411 if (IS_IWMMXT_GR_REGNUM (regno))
27412 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27413
27414 if (IS_IWMMXT_REGNUM (regno))
27415 return 112 + regno - FIRST_IWMMXT_REGNUM;
27416
27417 return DWARF_FRAME_REGISTERS;
27418 }
27419
27420 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27421 GCC models tham as 64 32-bit registers, so we need to describe this to
27422 the DWARF generation code. Other registers can use the default. */
27423 static rtx
27424 arm_dwarf_register_span (rtx rtl)
27425 {
27426 machine_mode mode;
27427 unsigned regno;
27428 rtx parts[16];
27429 int nregs;
27430 int i;
27431
27432 regno = REGNO (rtl);
27433 if (!IS_VFP_REGNUM (regno))
27434 return NULL_RTX;
27435
27436 /* XXX FIXME: The EABI defines two VFP register ranges:
27437 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27438 256-287: D0-D31
27439 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27440 corresponding D register. Until GDB supports this, we shall use the
27441 legacy encodings. We also use these encodings for D0-D15 for
27442 compatibility with older debuggers. */
27443 mode = GET_MODE (rtl);
27444 if (GET_MODE_SIZE (mode) < 8)
27445 return NULL_RTX;
27446
27447 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27448 {
27449 nregs = GET_MODE_SIZE (mode) / 4;
27450 for (i = 0; i < nregs; i += 2)
27451 if (TARGET_BIG_END)
27452 {
27453 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27454 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27455 }
27456 else
27457 {
27458 parts[i] = gen_rtx_REG (SImode, regno + i);
27459 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27460 }
27461 }
27462 else
27463 {
27464 nregs = GET_MODE_SIZE (mode) / 8;
27465 for (i = 0; i < nregs; i++)
27466 parts[i] = gen_rtx_REG (DImode, regno + i);
27467 }
27468
27469 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27470 }
27471
27472 #if ARM_UNWIND_INFO
27473 /* Emit unwind directives for a store-multiple instruction or stack pointer
27474 push during alignment.
27475 These should only ever be generated by the function prologue code, so
27476 expect them to have a particular form.
27477 The store-multiple instruction sometimes pushes pc as the last register,
27478 although it should not be tracked into unwind information, or for -Os
27479 sometimes pushes some dummy registers before first register that needs
27480 to be tracked in unwind information; such dummy registers are there just
27481 to avoid separate stack adjustment, and will not be restored in the
27482 epilogue. */
27483
27484 static void
27485 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27486 {
27487 int i;
27488 HOST_WIDE_INT offset;
27489 HOST_WIDE_INT nregs;
27490 int reg_size;
27491 unsigned reg;
27492 unsigned lastreg;
27493 unsigned padfirst = 0, padlast = 0;
27494 rtx e;
27495
27496 e = XVECEXP (p, 0, 0);
27497 gcc_assert (GET_CODE (e) == SET);
27498
27499 /* First insn will adjust the stack pointer. */
27500 gcc_assert (GET_CODE (e) == SET
27501 && REG_P (SET_DEST (e))
27502 && REGNO (SET_DEST (e)) == SP_REGNUM
27503 && GET_CODE (SET_SRC (e)) == PLUS);
27504
27505 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27506 nregs = XVECLEN (p, 0) - 1;
27507 gcc_assert (nregs);
27508
27509 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27510 if (reg < 16)
27511 {
27512 /* For -Os dummy registers can be pushed at the beginning to
27513 avoid separate stack pointer adjustment. */
27514 e = XVECEXP (p, 0, 1);
27515 e = XEXP (SET_DEST (e), 0);
27516 if (GET_CODE (e) == PLUS)
27517 padfirst = INTVAL (XEXP (e, 1));
27518 gcc_assert (padfirst == 0 || optimize_size);
27519 /* The function prologue may also push pc, but not annotate it as it is
27520 never restored. We turn this into a stack pointer adjustment. */
27521 e = XVECEXP (p, 0, nregs);
27522 e = XEXP (SET_DEST (e), 0);
27523 if (GET_CODE (e) == PLUS)
27524 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27525 else
27526 padlast = offset - 4;
27527 gcc_assert (padlast == 0 || padlast == 4);
27528 if (padlast == 4)
27529 fprintf (asm_out_file, "\t.pad #4\n");
27530 reg_size = 4;
27531 fprintf (asm_out_file, "\t.save {");
27532 }
27533 else if (IS_VFP_REGNUM (reg))
27534 {
27535 reg_size = 8;
27536 fprintf (asm_out_file, "\t.vsave {");
27537 }
27538 else
27539 /* Unknown register type. */
27540 gcc_unreachable ();
27541
27542 /* If the stack increment doesn't match the size of the saved registers,
27543 something has gone horribly wrong. */
27544 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27545
27546 offset = padfirst;
27547 lastreg = 0;
27548 /* The remaining insns will describe the stores. */
27549 for (i = 1; i <= nregs; i++)
27550 {
27551 /* Expect (set (mem <addr>) (reg)).
27552 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27553 e = XVECEXP (p, 0, i);
27554 gcc_assert (GET_CODE (e) == SET
27555 && MEM_P (SET_DEST (e))
27556 && REG_P (SET_SRC (e)));
27557
27558 reg = REGNO (SET_SRC (e));
27559 gcc_assert (reg >= lastreg);
27560
27561 if (i != 1)
27562 fprintf (asm_out_file, ", ");
27563 /* We can't use %r for vfp because we need to use the
27564 double precision register names. */
27565 if (IS_VFP_REGNUM (reg))
27566 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27567 else
27568 asm_fprintf (asm_out_file, "%r", reg);
27569
27570 if (flag_checking)
27571 {
27572 /* Check that the addresses are consecutive. */
27573 e = XEXP (SET_DEST (e), 0);
27574 if (GET_CODE (e) == PLUS)
27575 gcc_assert (REG_P (XEXP (e, 0))
27576 && REGNO (XEXP (e, 0)) == SP_REGNUM
27577 && CONST_INT_P (XEXP (e, 1))
27578 && offset == INTVAL (XEXP (e, 1)));
27579 else
27580 gcc_assert (i == 1
27581 && REG_P (e)
27582 && REGNO (e) == SP_REGNUM);
27583 offset += reg_size;
27584 }
27585 }
27586 fprintf (asm_out_file, "}\n");
27587 if (padfirst)
27588 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27589 }
27590
27591 /* Emit unwind directives for a SET. */
27592
27593 static void
27594 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27595 {
27596 rtx e0;
27597 rtx e1;
27598 unsigned reg;
27599
27600 e0 = XEXP (p, 0);
27601 e1 = XEXP (p, 1);
27602 switch (GET_CODE (e0))
27603 {
27604 case MEM:
27605 /* Pushing a single register. */
27606 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27607 || !REG_P (XEXP (XEXP (e0, 0), 0))
27608 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27609 abort ();
27610
27611 asm_fprintf (asm_out_file, "\t.save ");
27612 if (IS_VFP_REGNUM (REGNO (e1)))
27613 asm_fprintf(asm_out_file, "{d%d}\n",
27614 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27615 else
27616 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27617 break;
27618
27619 case REG:
27620 if (REGNO (e0) == SP_REGNUM)
27621 {
27622 /* A stack increment. */
27623 if (GET_CODE (e1) != PLUS
27624 || !REG_P (XEXP (e1, 0))
27625 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27626 || !CONST_INT_P (XEXP (e1, 1)))
27627 abort ();
27628
27629 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27630 -INTVAL (XEXP (e1, 1)));
27631 }
27632 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27633 {
27634 HOST_WIDE_INT offset;
27635
27636 if (GET_CODE (e1) == PLUS)
27637 {
27638 if (!REG_P (XEXP (e1, 0))
27639 || !CONST_INT_P (XEXP (e1, 1)))
27640 abort ();
27641 reg = REGNO (XEXP (e1, 0));
27642 offset = INTVAL (XEXP (e1, 1));
27643 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27644 HARD_FRAME_POINTER_REGNUM, reg,
27645 offset);
27646 }
27647 else if (REG_P (e1))
27648 {
27649 reg = REGNO (e1);
27650 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27651 HARD_FRAME_POINTER_REGNUM, reg);
27652 }
27653 else
27654 abort ();
27655 }
27656 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27657 {
27658 /* Move from sp to reg. */
27659 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27660 }
27661 else if (GET_CODE (e1) == PLUS
27662 && REG_P (XEXP (e1, 0))
27663 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27664 && CONST_INT_P (XEXP (e1, 1)))
27665 {
27666 /* Set reg to offset from sp. */
27667 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27668 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27669 }
27670 else
27671 abort ();
27672 break;
27673
27674 default:
27675 abort ();
27676 }
27677 }
27678
27679
27680 /* Emit unwind directives for the given insn. */
27681
27682 static void
27683 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27684 {
27685 rtx note, pat;
27686 bool handled_one = false;
27687
27688 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27689 return;
27690
27691 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27692 && (TREE_NOTHROW (current_function_decl)
27693 || crtl->all_throwers_are_sibcalls))
27694 return;
27695
27696 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27697 return;
27698
27699 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27700 {
27701 switch (REG_NOTE_KIND (note))
27702 {
27703 case REG_FRAME_RELATED_EXPR:
27704 pat = XEXP (note, 0);
27705 goto found;
27706
27707 case REG_CFA_REGISTER:
27708 pat = XEXP (note, 0);
27709 if (pat == NULL)
27710 {
27711 pat = PATTERN (insn);
27712 if (GET_CODE (pat) == PARALLEL)
27713 pat = XVECEXP (pat, 0, 0);
27714 }
27715
27716 /* Only emitted for IS_STACKALIGN re-alignment. */
27717 {
27718 rtx dest, src;
27719 unsigned reg;
27720
27721 src = SET_SRC (pat);
27722 dest = SET_DEST (pat);
27723
27724 gcc_assert (src == stack_pointer_rtx);
27725 reg = REGNO (dest);
27726 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27727 reg + 0x90, reg);
27728 }
27729 handled_one = true;
27730 break;
27731
27732 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27733 to get correct dwarf information for shrink-wrap. We should not
27734 emit unwind information for it because these are used either for
27735 pretend arguments or notes to adjust sp and restore registers from
27736 stack. */
27737 case REG_CFA_DEF_CFA:
27738 case REG_CFA_ADJUST_CFA:
27739 case REG_CFA_RESTORE:
27740 return;
27741
27742 case REG_CFA_EXPRESSION:
27743 case REG_CFA_OFFSET:
27744 /* ??? Only handling here what we actually emit. */
27745 gcc_unreachable ();
27746
27747 default:
27748 break;
27749 }
27750 }
27751 if (handled_one)
27752 return;
27753 pat = PATTERN (insn);
27754 found:
27755
27756 switch (GET_CODE (pat))
27757 {
27758 case SET:
27759 arm_unwind_emit_set (asm_out_file, pat);
27760 break;
27761
27762 case SEQUENCE:
27763 /* Store multiple. */
27764 arm_unwind_emit_sequence (asm_out_file, pat);
27765 break;
27766
27767 default:
27768 abort();
27769 }
27770 }
27771
27772
27773 /* Output a reference from a function exception table to the type_info
27774 object X. The EABI specifies that the symbol should be relocated by
27775 an R_ARM_TARGET2 relocation. */
27776
27777 static bool
27778 arm_output_ttype (rtx x)
27779 {
27780 fputs ("\t.word\t", asm_out_file);
27781 output_addr_const (asm_out_file, x);
27782 /* Use special relocations for symbol references. */
27783 if (!CONST_INT_P (x))
27784 fputs ("(TARGET2)", asm_out_file);
27785 fputc ('\n', asm_out_file);
27786
27787 return TRUE;
27788 }
27789
27790 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27791
27792 static void
27793 arm_asm_emit_except_personality (rtx personality)
27794 {
27795 fputs ("\t.personality\t", asm_out_file);
27796 output_addr_const (asm_out_file, personality);
27797 fputc ('\n', asm_out_file);
27798 }
27799 #endif /* ARM_UNWIND_INFO */
27800
27801 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27802
27803 static void
27804 arm_asm_init_sections (void)
27805 {
27806 #if ARM_UNWIND_INFO
27807 exception_section = get_unnamed_section (0, output_section_asm_op,
27808 "\t.handlerdata");
27809 #endif /* ARM_UNWIND_INFO */
27810
27811 #ifdef OBJECT_FORMAT_ELF
27812 if (target_pure_code)
27813 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27814 #endif
27815 }
27816
27817 /* Output unwind directives for the start/end of a function. */
27818
27819 void
27820 arm_output_fn_unwind (FILE * f, bool prologue)
27821 {
27822 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27823 return;
27824
27825 if (prologue)
27826 fputs ("\t.fnstart\n", f);
27827 else
27828 {
27829 /* If this function will never be unwound, then mark it as such.
27830 The came condition is used in arm_unwind_emit to suppress
27831 the frame annotations. */
27832 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27833 && (TREE_NOTHROW (current_function_decl)
27834 || crtl->all_throwers_are_sibcalls))
27835 fputs("\t.cantunwind\n", f);
27836
27837 fputs ("\t.fnend\n", f);
27838 }
27839 }
27840
27841 static bool
27842 arm_emit_tls_decoration (FILE *fp, rtx x)
27843 {
27844 enum tls_reloc reloc;
27845 rtx val;
27846
27847 val = XVECEXP (x, 0, 0);
27848 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27849
27850 output_addr_const (fp, val);
27851
27852 switch (reloc)
27853 {
27854 case TLS_GD32:
27855 fputs ("(tlsgd)", fp);
27856 break;
27857 case TLS_LDM32:
27858 fputs ("(tlsldm)", fp);
27859 break;
27860 case TLS_LDO32:
27861 fputs ("(tlsldo)", fp);
27862 break;
27863 case TLS_IE32:
27864 fputs ("(gottpoff)", fp);
27865 break;
27866 case TLS_LE32:
27867 fputs ("(tpoff)", fp);
27868 break;
27869 case TLS_DESCSEQ:
27870 fputs ("(tlsdesc)", fp);
27871 break;
27872 default:
27873 gcc_unreachable ();
27874 }
27875
27876 switch (reloc)
27877 {
27878 case TLS_GD32:
27879 case TLS_LDM32:
27880 case TLS_IE32:
27881 case TLS_DESCSEQ:
27882 fputs (" + (. - ", fp);
27883 output_addr_const (fp, XVECEXP (x, 0, 2));
27884 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27885 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27886 output_addr_const (fp, XVECEXP (x, 0, 3));
27887 fputc (')', fp);
27888 break;
27889 default:
27890 break;
27891 }
27892
27893 return TRUE;
27894 }
27895
27896 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27897
27898 static void
27899 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27900 {
27901 gcc_assert (size == 4);
27902 fputs ("\t.word\t", file);
27903 output_addr_const (file, x);
27904 fputs ("(tlsldo)", file);
27905 }
27906
27907 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27908
27909 static bool
27910 arm_output_addr_const_extra (FILE *fp, rtx x)
27911 {
27912 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27913 return arm_emit_tls_decoration (fp, x);
27914 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27915 {
27916 char label[256];
27917 int labelno = INTVAL (XVECEXP (x, 0, 0));
27918
27919 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27920 assemble_name_raw (fp, label);
27921
27922 return TRUE;
27923 }
27924 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27925 {
27926 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27927 if (GOT_PCREL)
27928 fputs ("+.", fp);
27929 fputs ("-(", fp);
27930 output_addr_const (fp, XVECEXP (x, 0, 0));
27931 fputc (')', fp);
27932 return TRUE;
27933 }
27934 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27935 {
27936 output_addr_const (fp, XVECEXP (x, 0, 0));
27937 if (GOT_PCREL)
27938 fputs ("+.", fp);
27939 fputs ("-(", fp);
27940 output_addr_const (fp, XVECEXP (x, 0, 1));
27941 fputc (')', fp);
27942 return TRUE;
27943 }
27944 else if (GET_CODE (x) == CONST_VECTOR)
27945 return arm_emit_vector_const (fp, x);
27946
27947 return FALSE;
27948 }
27949
27950 /* Output assembly for a shift instruction.
27951 SET_FLAGS determines how the instruction modifies the condition codes.
27952 0 - Do not set condition codes.
27953 1 - Set condition codes.
27954 2 - Use smallest instruction. */
27955 const char *
27956 arm_output_shift(rtx * operands, int set_flags)
27957 {
27958 char pattern[100];
27959 static const char flag_chars[3] = {'?', '.', '!'};
27960 const char *shift;
27961 HOST_WIDE_INT val;
27962 char c;
27963
27964 c = flag_chars[set_flags];
27965 shift = shift_op(operands[3], &val);
27966 if (shift)
27967 {
27968 if (val != -1)
27969 operands[2] = GEN_INT(val);
27970 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27971 }
27972 else
27973 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27974
27975 output_asm_insn (pattern, operands);
27976 return "";
27977 }
27978
27979 /* Output assembly for a WMMX immediate shift instruction. */
27980 const char *
27981 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27982 {
27983 int shift = INTVAL (operands[2]);
27984 char templ[50];
27985 machine_mode opmode = GET_MODE (operands[0]);
27986
27987 gcc_assert (shift >= 0);
27988
27989 /* If the shift value in the register versions is > 63 (for D qualifier),
27990 31 (for W qualifier) or 15 (for H qualifier). */
27991 if (((opmode == V4HImode) && (shift > 15))
27992 || ((opmode == V2SImode) && (shift > 31))
27993 || ((opmode == DImode) && (shift > 63)))
27994 {
27995 if (wror_or_wsra)
27996 {
27997 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27998 output_asm_insn (templ, operands);
27999 if (opmode == DImode)
28000 {
28001 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
28002 output_asm_insn (templ, operands);
28003 }
28004 }
28005 else
28006 {
28007 /* The destination register will contain all zeros. */
28008 sprintf (templ, "wzero\t%%0");
28009 output_asm_insn (templ, operands);
28010 }
28011 return "";
28012 }
28013
28014 if ((opmode == DImode) && (shift > 32))
28015 {
28016 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28017 output_asm_insn (templ, operands);
28018 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
28019 output_asm_insn (templ, operands);
28020 }
28021 else
28022 {
28023 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
28024 output_asm_insn (templ, operands);
28025 }
28026 return "";
28027 }
28028
28029 /* Output assembly for a WMMX tinsr instruction. */
28030 const char *
28031 arm_output_iwmmxt_tinsr (rtx *operands)
28032 {
28033 int mask = INTVAL (operands[3]);
28034 int i;
28035 char templ[50];
28036 int units = mode_nunits[GET_MODE (operands[0])];
28037 gcc_assert ((mask & (mask - 1)) == 0);
28038 for (i = 0; i < units; ++i)
28039 {
28040 if ((mask & 0x01) == 1)
28041 {
28042 break;
28043 }
28044 mask >>= 1;
28045 }
28046 gcc_assert (i < units);
28047 {
28048 switch (GET_MODE (operands[0]))
28049 {
28050 case E_V8QImode:
28051 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
28052 break;
28053 case E_V4HImode:
28054 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
28055 break;
28056 case E_V2SImode:
28057 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
28058 break;
28059 default:
28060 gcc_unreachable ();
28061 break;
28062 }
28063 output_asm_insn (templ, operands);
28064 }
28065 return "";
28066 }
28067
28068 /* Output a Thumb-1 casesi dispatch sequence. */
28069 const char *
28070 thumb1_output_casesi (rtx *operands)
28071 {
28072 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
28073
28074 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28075
28076 switch (GET_MODE(diff_vec))
28077 {
28078 case E_QImode:
28079 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28080 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
28081 case E_HImode:
28082 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28083 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
28084 case E_SImode:
28085 return "bl\t%___gnu_thumb1_case_si";
28086 default:
28087 gcc_unreachable ();
28088 }
28089 }
28090
28091 /* Output a Thumb-2 casesi instruction. */
28092 const char *
28093 thumb2_output_casesi (rtx *operands)
28094 {
28095 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
28096
28097 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28098
28099 output_asm_insn ("cmp\t%0, %1", operands);
28100 output_asm_insn ("bhi\t%l3", operands);
28101 switch (GET_MODE(diff_vec))
28102 {
28103 case E_QImode:
28104 return "tbb\t[%|pc, %0]";
28105 case E_HImode:
28106 return "tbh\t[%|pc, %0, lsl #1]";
28107 case E_SImode:
28108 if (flag_pic)
28109 {
28110 output_asm_insn ("adr\t%4, %l2", operands);
28111 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
28112 output_asm_insn ("add\t%4, %4, %5", operands);
28113 return "bx\t%4";
28114 }
28115 else
28116 {
28117 output_asm_insn ("adr\t%4, %l2", operands);
28118 return "ldr\t%|pc, [%4, %0, lsl #2]";
28119 }
28120 default:
28121 gcc_unreachable ();
28122 }
28123 }
28124
28125 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
28126 per-core tuning structs. */
28127 static int
28128 arm_issue_rate (void)
28129 {
28130 return current_tune->issue_rate;
28131 }
28132
28133 /* Return how many instructions should scheduler lookahead to choose the
28134 best one. */
28135 static int
28136 arm_first_cycle_multipass_dfa_lookahead (void)
28137 {
28138 int issue_rate = arm_issue_rate ();
28139
28140 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
28141 }
28142
28143 /* Enable modeling of L2 auto-prefetcher. */
28144 static int
28145 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28146 {
28147 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28148 }
28149
28150 const char *
28151 arm_mangle_type (const_tree type)
28152 {
28153 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28154 has to be managled as if it is in the "std" namespace. */
28155 if (TARGET_AAPCS_BASED
28156 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28157 return "St9__va_list";
28158
28159 /* Half-precision float. */
28160 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28161 return "Dh";
28162
28163 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28164 builtin type. */
28165 if (TYPE_NAME (type) != NULL)
28166 return arm_mangle_builtin_type (type);
28167
28168 /* Use the default mangling. */
28169 return NULL;
28170 }
28171
28172 /* Order of allocation of core registers for Thumb: this allocation is
28173 written over the corresponding initial entries of the array
28174 initialized with REG_ALLOC_ORDER. We allocate all low registers
28175 first. Saving and restoring a low register is usually cheaper than
28176 using a call-clobbered high register. */
28177
28178 static const int thumb_core_reg_alloc_order[] =
28179 {
28180 3, 2, 1, 0, 4, 5, 6, 7,
28181 12, 14, 8, 9, 10, 11
28182 };
28183
28184 /* Adjust register allocation order when compiling for Thumb. */
28185
28186 void
28187 arm_order_regs_for_local_alloc (void)
28188 {
28189 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28190 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28191 if (TARGET_THUMB)
28192 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28193 sizeof (thumb_core_reg_alloc_order));
28194 }
28195
28196 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28197
28198 bool
28199 arm_frame_pointer_required (void)
28200 {
28201 if (SUBTARGET_FRAME_POINTER_REQUIRED)
28202 return true;
28203
28204 /* If the function receives nonlocal gotos, it needs to save the frame
28205 pointer in the nonlocal_goto_save_area object. */
28206 if (cfun->has_nonlocal_label)
28207 return true;
28208
28209 /* The frame pointer is required for non-leaf APCS frames. */
28210 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28211 return true;
28212
28213 /* If we are probing the stack in the prologue, we will have a faulting
28214 instruction prior to the stack adjustment and this requires a frame
28215 pointer if we want to catch the exception using the EABI unwinder. */
28216 if (!IS_INTERRUPT (arm_current_func_type ())
28217 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28218 || flag_stack_clash_protection)
28219 && arm_except_unwind_info (&global_options) == UI_TARGET
28220 && cfun->can_throw_non_call_exceptions)
28221 {
28222 HOST_WIDE_INT size = get_frame_size ();
28223
28224 /* That's irrelevant if there is no stack adjustment. */
28225 if (size <= 0)
28226 return false;
28227
28228 /* That's relevant only if there is a stack probe. */
28229 if (crtl->is_leaf && !cfun->calls_alloca)
28230 {
28231 /* We don't have the final size of the frame so adjust. */
28232 size += 32 * UNITS_PER_WORD;
28233 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
28234 return true;
28235 }
28236 else
28237 return true;
28238 }
28239
28240 return false;
28241 }
28242
28243 /* Only thumb1 can't support conditional execution, so return true if
28244 the target is not thumb1. */
28245 static bool
28246 arm_have_conditional_execution (void)
28247 {
28248 return !TARGET_THUMB1;
28249 }
28250
28251 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
28252 static HOST_WIDE_INT
28253 arm_vector_alignment (const_tree type)
28254 {
28255 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28256
28257 if (TARGET_AAPCS_BASED)
28258 align = MIN (align, 64);
28259
28260 return align;
28261 }
28262
28263 static void
28264 arm_autovectorize_vector_sizes (vector_sizes *sizes)
28265 {
28266 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28267 {
28268 sizes->safe_push (16);
28269 sizes->safe_push (8);
28270 }
28271 }
28272
28273 static bool
28274 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28275 {
28276 /* Vectors which aren't in packed structures will not be less aligned than
28277 the natural alignment of their element type, so this is safe. */
28278 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28279 return !is_packed;
28280
28281 return default_builtin_vector_alignment_reachable (type, is_packed);
28282 }
28283
28284 static bool
28285 arm_builtin_support_vector_misalignment (machine_mode mode,
28286 const_tree type, int misalignment,
28287 bool is_packed)
28288 {
28289 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28290 {
28291 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28292
28293 if (is_packed)
28294 return align == 1;
28295
28296 /* If the misalignment is unknown, we should be able to handle the access
28297 so long as it is not to a member of a packed data structure. */
28298 if (misalignment == -1)
28299 return true;
28300
28301 /* Return true if the misalignment is a multiple of the natural alignment
28302 of the vector's element type. This is probably always going to be
28303 true in practice, since we've already established that this isn't a
28304 packed access. */
28305 return ((misalignment % align) == 0);
28306 }
28307
28308 return default_builtin_support_vector_misalignment (mode, type, misalignment,
28309 is_packed);
28310 }
28311
28312 static void
28313 arm_conditional_register_usage (void)
28314 {
28315 int regno;
28316
28317 if (TARGET_THUMB1 && optimize_size)
28318 {
28319 /* When optimizing for size on Thumb-1, it's better not
28320 to use the HI regs, because of the overhead of
28321 stacking them. */
28322 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28323 fixed_regs[regno] = call_used_regs[regno] = 1;
28324 }
28325
28326 /* The link register can be clobbered by any branch insn,
28327 but we have no way to track that at present, so mark
28328 it as unavailable. */
28329 if (TARGET_THUMB1)
28330 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28331
28332 if (TARGET_32BIT && TARGET_HARD_FLOAT)
28333 {
28334 /* VFPv3 registers are disabled when earlier VFP
28335 versions are selected due to the definition of
28336 LAST_VFP_REGNUM. */
28337 for (regno = FIRST_VFP_REGNUM;
28338 regno <= LAST_VFP_REGNUM; ++ regno)
28339 {
28340 fixed_regs[regno] = 0;
28341 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28342 || regno >= FIRST_VFP_REGNUM + 32;
28343 }
28344 }
28345
28346 if (TARGET_REALLY_IWMMXT)
28347 {
28348 regno = FIRST_IWMMXT_GR_REGNUM;
28349 /* The 2002/10/09 revision of the XScale ABI has wCG0
28350 and wCG1 as call-preserved registers. The 2002/11/21
28351 revision changed this so that all wCG registers are
28352 scratch registers. */
28353 for (regno = FIRST_IWMMXT_GR_REGNUM;
28354 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28355 fixed_regs[regno] = 0;
28356 /* The XScale ABI has wR0 - wR9 as scratch registers,
28357 the rest as call-preserved registers. */
28358 for (regno = FIRST_IWMMXT_REGNUM;
28359 regno <= LAST_IWMMXT_REGNUM; ++ regno)
28360 {
28361 fixed_regs[regno] = 0;
28362 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28363 }
28364 }
28365
28366 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28367 {
28368 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28369 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28370 }
28371 else if (TARGET_APCS_STACK)
28372 {
28373 fixed_regs[10] = 1;
28374 call_used_regs[10] = 1;
28375 }
28376 /* -mcaller-super-interworking reserves r11 for calls to
28377 _interwork_r11_call_via_rN(). Making the register global
28378 is an easy way of ensuring that it remains valid for all
28379 calls. */
28380 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28381 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28382 {
28383 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28384 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28385 if (TARGET_CALLER_INTERWORKING)
28386 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28387 }
28388 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28389 }
28390
28391 static reg_class_t
28392 arm_preferred_rename_class (reg_class_t rclass)
28393 {
28394 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28395 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28396 and code size can be reduced. */
28397 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28398 return LO_REGS;
28399 else
28400 return NO_REGS;
28401 }
28402
28403 /* Compute the attribute "length" of insn "*push_multi".
28404 So this function MUST be kept in sync with that insn pattern. */
28405 int
28406 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28407 {
28408 int i, regno, hi_reg;
28409 int num_saves = XVECLEN (parallel_op, 0);
28410
28411 /* ARM mode. */
28412 if (TARGET_ARM)
28413 return 4;
28414 /* Thumb1 mode. */
28415 if (TARGET_THUMB1)
28416 return 2;
28417
28418 /* Thumb2 mode. */
28419 regno = REGNO (first_op);
28420 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28421 list is 8-bit. Normally this means all registers in the list must be
28422 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28423 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28424 with 16-bit encoding. */
28425 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28426 for (i = 1; i < num_saves && !hi_reg; i++)
28427 {
28428 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28429 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28430 }
28431
28432 if (!hi_reg)
28433 return 2;
28434 return 4;
28435 }
28436
28437 /* Compute the attribute "length" of insn. Currently, this function is used
28438 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28439 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28440 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28441 true if OPERANDS contains insn which explicit updates base register. */
28442
28443 int
28444 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28445 {
28446 /* ARM mode. */
28447 if (TARGET_ARM)
28448 return 4;
28449 /* Thumb1 mode. */
28450 if (TARGET_THUMB1)
28451 return 2;
28452
28453 rtx parallel_op = operands[0];
28454 /* Initialize to elements number of PARALLEL. */
28455 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28456 /* Initialize the value to base register. */
28457 unsigned regno = REGNO (operands[1]);
28458 /* Skip return and write back pattern.
28459 We only need register pop pattern for later analysis. */
28460 unsigned first_indx = 0;
28461 first_indx += return_pc ? 1 : 0;
28462 first_indx += write_back_p ? 1 : 0;
28463
28464 /* A pop operation can be done through LDM or POP. If the base register is SP
28465 and if it's with write back, then a LDM will be alias of POP. */
28466 bool pop_p = (regno == SP_REGNUM && write_back_p);
28467 bool ldm_p = !pop_p;
28468
28469 /* Check base register for LDM. */
28470 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28471 return 4;
28472
28473 /* Check each register in the list. */
28474 for (; indx >= first_indx; indx--)
28475 {
28476 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28477 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28478 comment in arm_attr_length_push_multi. */
28479 if (REGNO_REG_CLASS (regno) == HI_REGS
28480 && (regno != PC_REGNUM || ldm_p))
28481 return 4;
28482 }
28483
28484 return 2;
28485 }
28486
28487 /* Compute the number of instructions emitted by output_move_double. */
28488 int
28489 arm_count_output_move_double_insns (rtx *operands)
28490 {
28491 int count;
28492 rtx ops[2];
28493 /* output_move_double may modify the operands array, so call it
28494 here on a copy of the array. */
28495 ops[0] = operands[0];
28496 ops[1] = operands[1];
28497 output_move_double (ops, false, &count);
28498 return count;
28499 }
28500
28501 int
28502 vfp3_const_double_for_fract_bits (rtx operand)
28503 {
28504 REAL_VALUE_TYPE r0;
28505
28506 if (!CONST_DOUBLE_P (operand))
28507 return 0;
28508
28509 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28510 if (exact_real_inverse (DFmode, &r0)
28511 && !REAL_VALUE_NEGATIVE (r0))
28512 {
28513 if (exact_real_truncate (DFmode, &r0))
28514 {
28515 HOST_WIDE_INT value = real_to_integer (&r0);
28516 value = value & 0xffffffff;
28517 if ((value != 0) && ( (value & (value - 1)) == 0))
28518 {
28519 int ret = exact_log2 (value);
28520 gcc_assert (IN_RANGE (ret, 0, 31));
28521 return ret;
28522 }
28523 }
28524 }
28525 return 0;
28526 }
28527
28528 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28529 log2 is in [1, 32], return that log2. Otherwise return -1.
28530 This is used in the patterns for vcvt.s32.f32 floating-point to
28531 fixed-point conversions. */
28532
28533 int
28534 vfp3_const_double_for_bits (rtx x)
28535 {
28536 const REAL_VALUE_TYPE *r;
28537
28538 if (!CONST_DOUBLE_P (x))
28539 return -1;
28540
28541 r = CONST_DOUBLE_REAL_VALUE (x);
28542
28543 if (REAL_VALUE_NEGATIVE (*r)
28544 || REAL_VALUE_ISNAN (*r)
28545 || REAL_VALUE_ISINF (*r)
28546 || !real_isinteger (r, SFmode))
28547 return -1;
28548
28549 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28550
28551 /* The exact_log2 above will have returned -1 if this is
28552 not an exact log2. */
28553 if (!IN_RANGE (hwint, 1, 32))
28554 return -1;
28555
28556 return hwint;
28557 }
28558
28559 \f
28560 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28561
28562 static void
28563 arm_pre_atomic_barrier (enum memmodel model)
28564 {
28565 if (need_atomic_barrier_p (model, true))
28566 emit_insn (gen_memory_barrier ());
28567 }
28568
28569 static void
28570 arm_post_atomic_barrier (enum memmodel model)
28571 {
28572 if (need_atomic_barrier_p (model, false))
28573 emit_insn (gen_memory_barrier ());
28574 }
28575
28576 /* Emit the load-exclusive and store-exclusive instructions.
28577 Use acquire and release versions if necessary. */
28578
28579 static void
28580 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28581 {
28582 rtx (*gen) (rtx, rtx);
28583
28584 if (acq)
28585 {
28586 switch (mode)
28587 {
28588 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28589 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28590 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28591 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28592 default:
28593 gcc_unreachable ();
28594 }
28595 }
28596 else
28597 {
28598 switch (mode)
28599 {
28600 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28601 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28602 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28603 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28604 default:
28605 gcc_unreachable ();
28606 }
28607 }
28608
28609 emit_insn (gen (rval, mem));
28610 }
28611
28612 static void
28613 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28614 rtx mem, bool rel)
28615 {
28616 rtx (*gen) (rtx, rtx, rtx);
28617
28618 if (rel)
28619 {
28620 switch (mode)
28621 {
28622 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28623 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28624 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28625 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28626 default:
28627 gcc_unreachable ();
28628 }
28629 }
28630 else
28631 {
28632 switch (mode)
28633 {
28634 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28635 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28636 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28637 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28638 default:
28639 gcc_unreachable ();
28640 }
28641 }
28642
28643 emit_insn (gen (bval, rval, mem));
28644 }
28645
28646 /* Mark the previous jump instruction as unlikely. */
28647
28648 static void
28649 emit_unlikely_jump (rtx insn)
28650 {
28651 rtx_insn *jump = emit_jump_insn (insn);
28652 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28653 }
28654
28655 /* Expand a compare and swap pattern. */
28656
28657 void
28658 arm_expand_compare_and_swap (rtx operands[])
28659 {
28660 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28661 machine_mode mode, cmp_mode;
28662
28663 bval = operands[0];
28664 rval = operands[1];
28665 mem = operands[2];
28666 oldval = operands[3];
28667 newval = operands[4];
28668 is_weak = operands[5];
28669 mod_s = operands[6];
28670 mod_f = operands[7];
28671 mode = GET_MODE (mem);
28672
28673 /* Normally the succ memory model must be stronger than fail, but in the
28674 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28675 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28676
28677 if (TARGET_HAVE_LDACQ
28678 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28679 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28680 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28681
28682 switch (mode)
28683 {
28684 case E_QImode:
28685 case E_HImode:
28686 /* For narrow modes, we're going to perform the comparison in SImode,
28687 so do the zero-extension now. */
28688 rval = gen_reg_rtx (SImode);
28689 oldval = convert_modes (SImode, mode, oldval, true);
28690 /* FALLTHRU */
28691
28692 case E_SImode:
28693 /* Force the value into a register if needed. We waited until after
28694 the zero-extension above to do this properly. */
28695 if (!arm_add_operand (oldval, SImode))
28696 oldval = force_reg (SImode, oldval);
28697 break;
28698
28699 case E_DImode:
28700 if (!cmpdi_operand (oldval, mode))
28701 oldval = force_reg (mode, oldval);
28702 break;
28703
28704 default:
28705 gcc_unreachable ();
28706 }
28707
28708 if (TARGET_THUMB1)
28709 cmp_mode = E_SImode;
28710 else
28711 cmp_mode = CC_Zmode;
28712
28713 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28714 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
28715 oldval, newval, is_weak, mod_s, mod_f));
28716
28717 if (mode == QImode || mode == HImode)
28718 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28719
28720 /* In all cases, we arrange for success to be signaled by Z set.
28721 This arrangement allows for the boolean result to be used directly
28722 in a subsequent branch, post optimization. For Thumb-1 targets, the
28723 boolean negation of the result is also stored in bval because Thumb-1
28724 backend lacks dependency tracking for CC flag due to flag-setting not
28725 being represented at RTL level. */
28726 if (TARGET_THUMB1)
28727 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28728 else
28729 {
28730 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28731 emit_insn (gen_rtx_SET (bval, x));
28732 }
28733 }
28734
28735 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28736 another memory store between the load-exclusive and store-exclusive can
28737 reset the monitor from Exclusive to Open state. This means we must wait
28738 until after reload to split the pattern, lest we get a register spill in
28739 the middle of the atomic sequence. Success of the compare and swap is
28740 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28741 for Thumb-1 targets (ie. negation of the boolean value returned by
28742 atomic_compare_and_swapmode standard pattern in operand 0). */
28743
28744 void
28745 arm_split_compare_and_swap (rtx operands[])
28746 {
28747 rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
28748 machine_mode mode;
28749 enum memmodel mod_s, mod_f;
28750 bool is_weak;
28751 rtx_code_label *label1, *label2;
28752 rtx x, cond;
28753
28754 rval = operands[1];
28755 mem = operands[2];
28756 oldval = operands[3];
28757 newval = operands[4];
28758 is_weak = (operands[5] != const0_rtx);
28759 mod_s_rtx = operands[6];
28760 mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
28761 mod_f = memmodel_from_int (INTVAL (operands[7]));
28762 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28763 mode = GET_MODE (mem);
28764
28765 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28766
28767 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
28768 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
28769
28770 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28771 a full barrier is emitted after the store-release. */
28772 if (is_armv8_sync)
28773 use_acquire = false;
28774
28775 /* Checks whether a barrier is needed and emits one accordingly. */
28776 if (!(use_acquire || use_release))
28777 arm_pre_atomic_barrier (mod_s);
28778
28779 label1 = NULL;
28780 if (!is_weak)
28781 {
28782 label1 = gen_label_rtx ();
28783 emit_label (label1);
28784 }
28785 label2 = gen_label_rtx ();
28786
28787 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28788
28789 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28790 as required to communicate with arm_expand_compare_and_swap. */
28791 if (TARGET_32BIT)
28792 {
28793 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28794 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28795 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28796 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28797 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28798 }
28799 else
28800 {
28801 emit_move_insn (neg_bval, const1_rtx);
28802 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28803 if (thumb1_cmpneg_operand (oldval, SImode))
28804 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28805 label2, cond));
28806 else
28807 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28808 }
28809
28810 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28811
28812 /* Weak or strong, we want EQ to be true for success, so that we
28813 match the flags that we got from the compare above. */
28814 if (TARGET_32BIT)
28815 {
28816 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28817 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28818 emit_insn (gen_rtx_SET (cond, x));
28819 }
28820
28821 if (!is_weak)
28822 {
28823 /* Z is set to boolean value of !neg_bval, as required to communicate
28824 with arm_expand_compare_and_swap. */
28825 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28826 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28827 }
28828
28829 if (!is_mm_relaxed (mod_f))
28830 emit_label (label2);
28831
28832 /* Checks whether a barrier is needed and emits one accordingly. */
28833 if (is_armv8_sync
28834 || !(use_acquire || use_release))
28835 arm_post_atomic_barrier (mod_s);
28836
28837 if (is_mm_relaxed (mod_f))
28838 emit_label (label2);
28839 }
28840
28841 /* Split an atomic operation pattern. Operation is given by CODE and is one
28842 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28843 operation). Operation is performed on the content at MEM and on VALUE
28844 following the memory model MODEL_RTX. The content at MEM before and after
28845 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28846 success of the operation is returned in COND. Using a scratch register or
28847 an operand register for these determines what result is returned for that
28848 pattern. */
28849
28850 void
28851 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28852 rtx value, rtx model_rtx, rtx cond)
28853 {
28854 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28855 machine_mode mode = GET_MODE (mem);
28856 machine_mode wmode = (mode == DImode ? DImode : SImode);
28857 rtx_code_label *label;
28858 bool all_low_regs, bind_old_new;
28859 rtx x;
28860
28861 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28862
28863 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
28864 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
28865
28866 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28867 a full barrier is emitted after the store-release. */
28868 if (is_armv8_sync)
28869 use_acquire = false;
28870
28871 /* Checks whether a barrier is needed and emits one accordingly. */
28872 if (!(use_acquire || use_release))
28873 arm_pre_atomic_barrier (model);
28874
28875 label = gen_label_rtx ();
28876 emit_label (label);
28877
28878 if (new_out)
28879 new_out = gen_lowpart (wmode, new_out);
28880 if (old_out)
28881 old_out = gen_lowpart (wmode, old_out);
28882 else
28883 old_out = new_out;
28884 value = simplify_gen_subreg (wmode, value, mode, 0);
28885
28886 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28887
28888 /* Does the operation require destination and first operand to use the same
28889 register? This is decided by register constraints of relevant insn
28890 patterns in thumb1.md. */
28891 gcc_assert (!new_out || REG_P (new_out));
28892 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28893 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28894 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28895 bind_old_new =
28896 (TARGET_THUMB1
28897 && code != SET
28898 && code != MINUS
28899 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28900
28901 /* We want to return the old value while putting the result of the operation
28902 in the same register as the old value so copy the old value over to the
28903 destination register and use that register for the operation. */
28904 if (old_out && bind_old_new)
28905 {
28906 emit_move_insn (new_out, old_out);
28907 old_out = new_out;
28908 }
28909
28910 switch (code)
28911 {
28912 case SET:
28913 new_out = value;
28914 break;
28915
28916 case NOT:
28917 x = gen_rtx_AND (wmode, old_out, value);
28918 emit_insn (gen_rtx_SET (new_out, x));
28919 x = gen_rtx_NOT (wmode, new_out);
28920 emit_insn (gen_rtx_SET (new_out, x));
28921 break;
28922
28923 case MINUS:
28924 if (CONST_INT_P (value))
28925 {
28926 value = GEN_INT (-INTVAL (value));
28927 code = PLUS;
28928 }
28929 /* FALLTHRU */
28930
28931 case PLUS:
28932 if (mode == DImode)
28933 {
28934 /* DImode plus/minus need to clobber flags. */
28935 /* The adddi3 and subdi3 patterns are incorrectly written so that
28936 they require matching operands, even when we could easily support
28937 three operands. Thankfully, this can be fixed up post-splitting,
28938 as the individual add+adc patterns do accept three operands and
28939 post-reload cprop can make these moves go away. */
28940 emit_move_insn (new_out, old_out);
28941 if (code == PLUS)
28942 x = gen_adddi3 (new_out, new_out, value);
28943 else
28944 x = gen_subdi3 (new_out, new_out, value);
28945 emit_insn (x);
28946 break;
28947 }
28948 /* FALLTHRU */
28949
28950 default:
28951 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28952 emit_insn (gen_rtx_SET (new_out, x));
28953 break;
28954 }
28955
28956 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28957 use_release);
28958
28959 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28960 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28961
28962 /* Checks whether a barrier is needed and emits one accordingly. */
28963 if (is_armv8_sync
28964 || !(use_acquire || use_release))
28965 arm_post_atomic_barrier (model);
28966 }
28967 \f
28968 #define MAX_VECT_LEN 16
28969
28970 struct expand_vec_perm_d
28971 {
28972 rtx target, op0, op1;
28973 vec_perm_indices perm;
28974 machine_mode vmode;
28975 bool one_vector_p;
28976 bool testing_p;
28977 };
28978
28979 /* Generate a variable permutation. */
28980
28981 static void
28982 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28983 {
28984 machine_mode vmode = GET_MODE (target);
28985 bool one_vector_p = rtx_equal_p (op0, op1);
28986
28987 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28988 gcc_checking_assert (GET_MODE (op0) == vmode);
28989 gcc_checking_assert (GET_MODE (op1) == vmode);
28990 gcc_checking_assert (GET_MODE (sel) == vmode);
28991 gcc_checking_assert (TARGET_NEON);
28992
28993 if (one_vector_p)
28994 {
28995 if (vmode == V8QImode)
28996 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28997 else
28998 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28999 }
29000 else
29001 {
29002 rtx pair;
29003
29004 if (vmode == V8QImode)
29005 {
29006 pair = gen_reg_rtx (V16QImode);
29007 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
29008 pair = gen_lowpart (TImode, pair);
29009 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
29010 }
29011 else
29012 {
29013 pair = gen_reg_rtx (OImode);
29014 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
29015 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
29016 }
29017 }
29018 }
29019
29020 void
29021 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
29022 {
29023 machine_mode vmode = GET_MODE (target);
29024 unsigned int nelt = GET_MODE_NUNITS (vmode);
29025 bool one_vector_p = rtx_equal_p (op0, op1);
29026 rtx mask;
29027
29028 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29029 numbering of elements for big-endian, we must reverse the order. */
29030 gcc_checking_assert (!BYTES_BIG_ENDIAN);
29031
29032 /* The VTBL instruction does not use a modulo index, so we must take care
29033 of that ourselves. */
29034 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
29035 mask = gen_const_vec_duplicate (vmode, mask);
29036 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
29037
29038 arm_expand_vec_perm_1 (target, op0, op1, sel);
29039 }
29040
29041 /* Map lane ordering between architectural lane order, and GCC lane order,
29042 taking into account ABI. See comment above output_move_neon for details. */
29043
29044 static int
29045 neon_endian_lane_map (machine_mode mode, int lane)
29046 {
29047 if (BYTES_BIG_ENDIAN)
29048 {
29049 int nelems = GET_MODE_NUNITS (mode);
29050 /* Reverse lane order. */
29051 lane = (nelems - 1 - lane);
29052 /* Reverse D register order, to match ABI. */
29053 if (GET_MODE_SIZE (mode) == 16)
29054 lane = lane ^ (nelems / 2);
29055 }
29056 return lane;
29057 }
29058
29059 /* Some permutations index into pairs of vectors, this is a helper function
29060 to map indexes into those pairs of vectors. */
29061
29062 static int
29063 neon_pair_endian_lane_map (machine_mode mode, int lane)
29064 {
29065 int nelem = GET_MODE_NUNITS (mode);
29066 if (BYTES_BIG_ENDIAN)
29067 lane =
29068 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
29069 return lane;
29070 }
29071
29072 /* Generate or test for an insn that supports a constant permutation. */
29073
29074 /* Recognize patterns for the VUZP insns. */
29075
29076 static bool
29077 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
29078 {
29079 unsigned int i, odd, mask, nelt = d->perm.length ();
29080 rtx out0, out1, in0, in1;
29081 int first_elem;
29082 int swap_nelt;
29083
29084 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29085 return false;
29086
29087 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
29088 big endian pattern on 64 bit vectors, so we correct for that. */
29089 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
29090 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
29091
29092 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
29093
29094 if (first_elem == neon_endian_lane_map (d->vmode, 0))
29095 odd = 0;
29096 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
29097 odd = 1;
29098 else
29099 return false;
29100 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29101
29102 for (i = 0; i < nelt; i++)
29103 {
29104 unsigned elt =
29105 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
29106 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
29107 return false;
29108 }
29109
29110 /* Success! */
29111 if (d->testing_p)
29112 return true;
29113
29114 in0 = d->op0;
29115 in1 = d->op1;
29116 if (swap_nelt != 0)
29117 std::swap (in0, in1);
29118
29119 out0 = d->target;
29120 out1 = gen_reg_rtx (d->vmode);
29121 if (odd)
29122 std::swap (out0, out1);
29123
29124 emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
29125 return true;
29126 }
29127
29128 /* Recognize patterns for the VZIP insns. */
29129
29130 static bool
29131 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29132 {
29133 unsigned int i, high, mask, nelt = d->perm.length ();
29134 rtx out0, out1, in0, in1;
29135 int first_elem;
29136 bool is_swapped;
29137
29138 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29139 return false;
29140
29141 is_swapped = BYTES_BIG_ENDIAN;
29142
29143 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29144
29145 high = nelt / 2;
29146 if (first_elem == neon_endian_lane_map (d->vmode, high))
29147 ;
29148 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29149 high = 0;
29150 else
29151 return false;
29152 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29153
29154 for (i = 0; i < nelt / 2; i++)
29155 {
29156 unsigned elt =
29157 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29158 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29159 != elt)
29160 return false;
29161 elt =
29162 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29163 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29164 != elt)
29165 return false;
29166 }
29167
29168 /* Success! */
29169 if (d->testing_p)
29170 return true;
29171
29172 in0 = d->op0;
29173 in1 = d->op1;
29174 if (is_swapped)
29175 std::swap (in0, in1);
29176
29177 out0 = d->target;
29178 out1 = gen_reg_rtx (d->vmode);
29179 if (high)
29180 std::swap (out0, out1);
29181
29182 emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
29183 return true;
29184 }
29185
29186 /* Recognize patterns for the VREV insns. */
29187 static bool
29188 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29189 {
29190 unsigned int i, j, diff, nelt = d->perm.length ();
29191 rtx (*gen) (machine_mode, rtx, rtx);
29192
29193 if (!d->one_vector_p)
29194 return false;
29195
29196 diff = d->perm[0];
29197 switch (diff)
29198 {
29199 case 7:
29200 switch (d->vmode)
29201 {
29202 case E_V16QImode:
29203 case E_V8QImode:
29204 gen = gen_neon_vrev64;
29205 break;
29206 default:
29207 return false;
29208 }
29209 break;
29210 case 3:
29211 switch (d->vmode)
29212 {
29213 case E_V16QImode:
29214 case E_V8QImode:
29215 gen = gen_neon_vrev32;
29216 break;
29217 case E_V8HImode:
29218 case E_V4HImode:
29219 case E_V8HFmode:
29220 case E_V4HFmode:
29221 gen = gen_neon_vrev64;
29222 break;
29223 default:
29224 return false;
29225 }
29226 break;
29227 case 1:
29228 switch (d->vmode)
29229 {
29230 case E_V16QImode:
29231 case E_V8QImode:
29232 gen = gen_neon_vrev16;
29233 break;
29234 case E_V8HImode:
29235 case E_V4HImode:
29236 gen = gen_neon_vrev32;
29237 break;
29238 case E_V4SImode:
29239 case E_V2SImode:
29240 case E_V4SFmode:
29241 case E_V2SFmode:
29242 gen = gen_neon_vrev64;
29243 break;
29244 default:
29245 return false;
29246 }
29247 break;
29248 default:
29249 return false;
29250 }
29251
29252 for (i = 0; i < nelt ; i += diff + 1)
29253 for (j = 0; j <= diff; j += 1)
29254 {
29255 /* This is guaranteed to be true as the value of diff
29256 is 7, 3, 1 and we should have enough elements in the
29257 queue to generate this. Getting a vector mask with a
29258 value of diff other than these values implies that
29259 something is wrong by the time we get here. */
29260 gcc_assert (i + j < nelt);
29261 if (d->perm[i + j] != i + diff - j)
29262 return false;
29263 }
29264
29265 /* Success! */
29266 if (d->testing_p)
29267 return true;
29268
29269 emit_insn (gen (d->vmode, d->target, d->op0));
29270 return true;
29271 }
29272
29273 /* Recognize patterns for the VTRN insns. */
29274
29275 static bool
29276 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29277 {
29278 unsigned int i, odd, mask, nelt = d->perm.length ();
29279 rtx out0, out1, in0, in1;
29280
29281 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29282 return false;
29283
29284 /* Note that these are little-endian tests. Adjust for big-endian later. */
29285 if (d->perm[0] == 0)
29286 odd = 0;
29287 else if (d->perm[0] == 1)
29288 odd = 1;
29289 else
29290 return false;
29291 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29292
29293 for (i = 0; i < nelt; i += 2)
29294 {
29295 if (d->perm[i] != i + odd)
29296 return false;
29297 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29298 return false;
29299 }
29300
29301 /* Success! */
29302 if (d->testing_p)
29303 return true;
29304
29305 in0 = d->op0;
29306 in1 = d->op1;
29307 if (BYTES_BIG_ENDIAN)
29308 {
29309 std::swap (in0, in1);
29310 odd = !odd;
29311 }
29312
29313 out0 = d->target;
29314 out1 = gen_reg_rtx (d->vmode);
29315 if (odd)
29316 std::swap (out0, out1);
29317
29318 emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
29319 return true;
29320 }
29321
29322 /* Recognize patterns for the VEXT insns. */
29323
29324 static bool
29325 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29326 {
29327 unsigned int i, nelt = d->perm.length ();
29328 rtx offset;
29329
29330 unsigned int location;
29331
29332 unsigned int next = d->perm[0] + 1;
29333
29334 /* TODO: Handle GCC's numbering of elements for big-endian. */
29335 if (BYTES_BIG_ENDIAN)
29336 return false;
29337
29338 /* Check if the extracted indexes are increasing by one. */
29339 for (i = 1; i < nelt; next++, i++)
29340 {
29341 /* If we hit the most significant element of the 2nd vector in
29342 the previous iteration, no need to test further. */
29343 if (next == 2 * nelt)
29344 return false;
29345
29346 /* If we are operating on only one vector: it could be a
29347 rotation. If there are only two elements of size < 64, let
29348 arm_evpc_neon_vrev catch it. */
29349 if (d->one_vector_p && (next == nelt))
29350 {
29351 if ((nelt == 2) && (d->vmode != V2DImode))
29352 return false;
29353 else
29354 next = 0;
29355 }
29356
29357 if (d->perm[i] != next)
29358 return false;
29359 }
29360
29361 location = d->perm[0];
29362
29363 /* Success! */
29364 if (d->testing_p)
29365 return true;
29366
29367 offset = GEN_INT (location);
29368
29369 if(d->vmode == E_DImode)
29370 return false;
29371
29372 emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
29373 return true;
29374 }
29375
29376 /* The NEON VTBL instruction is a fully variable permuation that's even
29377 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29378 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29379 can do slightly better by expanding this as a constant where we don't
29380 have to apply a mask. */
29381
29382 static bool
29383 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29384 {
29385 rtx rperm[MAX_VECT_LEN], sel;
29386 machine_mode vmode = d->vmode;
29387 unsigned int i, nelt = d->perm.length ();
29388
29389 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29390 numbering of elements for big-endian, we must reverse the order. */
29391 if (BYTES_BIG_ENDIAN)
29392 return false;
29393
29394 if (d->testing_p)
29395 return true;
29396
29397 /* Generic code will try constant permutation twice. Once with the
29398 original mode and again with the elements lowered to QImode.
29399 So wait and don't do the selector expansion ourselves. */
29400 if (vmode != V8QImode && vmode != V16QImode)
29401 return false;
29402
29403 for (i = 0; i < nelt; ++i)
29404 rperm[i] = GEN_INT (d->perm[i]);
29405 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29406 sel = force_reg (vmode, sel);
29407
29408 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29409 return true;
29410 }
29411
29412 static bool
29413 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29414 {
29415 /* Check if the input mask matches vext before reordering the
29416 operands. */
29417 if (TARGET_NEON)
29418 if (arm_evpc_neon_vext (d))
29419 return true;
29420
29421 /* The pattern matching functions above are written to look for a small
29422 number to begin the sequence (0, 1, N/2). If we begin with an index
29423 from the second operand, we can swap the operands. */
29424 unsigned int nelt = d->perm.length ();
29425 if (d->perm[0] >= nelt)
29426 {
29427 d->perm.rotate_inputs (1);
29428 std::swap (d->op0, d->op1);
29429 }
29430
29431 if (TARGET_NEON)
29432 {
29433 if (arm_evpc_neon_vuzp (d))
29434 return true;
29435 if (arm_evpc_neon_vzip (d))
29436 return true;
29437 if (arm_evpc_neon_vrev (d))
29438 return true;
29439 if (arm_evpc_neon_vtrn (d))
29440 return true;
29441 return arm_evpc_neon_vtbl (d);
29442 }
29443 return false;
29444 }
29445
29446 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
29447
29448 static bool
29449 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
29450 const vec_perm_indices &sel)
29451 {
29452 struct expand_vec_perm_d d;
29453 int i, nelt, which;
29454
29455 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
29456 return false;
29457
29458 d.target = target;
29459 d.op0 = op0;
29460 d.op1 = op1;
29461
29462 d.vmode = vmode;
29463 gcc_assert (VECTOR_MODE_P (d.vmode));
29464 d.testing_p = !target;
29465
29466 nelt = GET_MODE_NUNITS (d.vmode);
29467 for (i = which = 0; i < nelt; ++i)
29468 {
29469 int ei = sel[i] & (2 * nelt - 1);
29470 which |= (ei < nelt ? 1 : 2);
29471 }
29472
29473 switch (which)
29474 {
29475 default:
29476 gcc_unreachable();
29477
29478 case 3:
29479 d.one_vector_p = false;
29480 if (d.testing_p || !rtx_equal_p (op0, op1))
29481 break;
29482
29483 /* The elements of PERM do not suggest that only the first operand
29484 is used, but both operands are identical. Allow easier matching
29485 of the permutation by folding the permutation into the single
29486 input vector. */
29487 /* FALLTHRU */
29488 case 2:
29489 d.op0 = op1;
29490 d.one_vector_p = true;
29491 break;
29492
29493 case 1:
29494 d.op1 = op0;
29495 d.one_vector_p = true;
29496 break;
29497 }
29498
29499 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
29500
29501 if (!d.testing_p)
29502 return arm_expand_vec_perm_const_1 (&d);
29503
29504 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29505 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29506 if (!d.one_vector_p)
29507 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29508
29509 start_sequence ();
29510 bool ret = arm_expand_vec_perm_const_1 (&d);
29511 end_sequence ();
29512
29513 return ret;
29514 }
29515
29516 bool
29517 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29518 {
29519 /* If we are soft float and we do not have ldrd
29520 then all auto increment forms are ok. */
29521 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29522 return true;
29523
29524 switch (code)
29525 {
29526 /* Post increment and Pre Decrement are supported for all
29527 instruction forms except for vector forms. */
29528 case ARM_POST_INC:
29529 case ARM_PRE_DEC:
29530 if (VECTOR_MODE_P (mode))
29531 {
29532 if (code != ARM_PRE_DEC)
29533 return true;
29534 else
29535 return false;
29536 }
29537
29538 return true;
29539
29540 case ARM_POST_DEC:
29541 case ARM_PRE_INC:
29542 /* Without LDRD and mode size greater than
29543 word size, there is no point in auto-incrementing
29544 because ldm and stm will not have these forms. */
29545 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29546 return false;
29547
29548 /* Vector and floating point modes do not support
29549 these auto increment forms. */
29550 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29551 return false;
29552
29553 return true;
29554
29555 default:
29556 return false;
29557
29558 }
29559
29560 return false;
29561 }
29562
29563 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29564 on ARM, since we know that shifts by negative amounts are no-ops.
29565 Additionally, the default expansion code is not available or suitable
29566 for post-reload insn splits (this can occur when the register allocator
29567 chooses not to do a shift in NEON).
29568
29569 This function is used in both initial expand and post-reload splits, and
29570 handles all kinds of 64-bit shifts.
29571
29572 Input requirements:
29573 - It is safe for the input and output to be the same register, but
29574 early-clobber rules apply for the shift amount and scratch registers.
29575 - Shift by register requires both scratch registers. In all other cases
29576 the scratch registers may be NULL.
29577 - Ashiftrt by a register also clobbers the CC register. */
29578 void
29579 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29580 rtx amount, rtx scratch1, rtx scratch2)
29581 {
29582 rtx out_high = gen_highpart (SImode, out);
29583 rtx out_low = gen_lowpart (SImode, out);
29584 rtx in_high = gen_highpart (SImode, in);
29585 rtx in_low = gen_lowpart (SImode, in);
29586
29587 /* Terminology:
29588 in = the register pair containing the input value.
29589 out = the destination register pair.
29590 up = the high- or low-part of each pair.
29591 down = the opposite part to "up".
29592 In a shift, we can consider bits to shift from "up"-stream to
29593 "down"-stream, so in a left-shift "up" is the low-part and "down"
29594 is the high-part of each register pair. */
29595
29596 rtx out_up = code == ASHIFT ? out_low : out_high;
29597 rtx out_down = code == ASHIFT ? out_high : out_low;
29598 rtx in_up = code == ASHIFT ? in_low : in_high;
29599 rtx in_down = code == ASHIFT ? in_high : in_low;
29600
29601 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29602 gcc_assert (out
29603 && (REG_P (out) || GET_CODE (out) == SUBREG)
29604 && GET_MODE (out) == DImode);
29605 gcc_assert (in
29606 && (REG_P (in) || GET_CODE (in) == SUBREG)
29607 && GET_MODE (in) == DImode);
29608 gcc_assert (amount
29609 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29610 && GET_MODE (amount) == SImode)
29611 || CONST_INT_P (amount)));
29612 gcc_assert (scratch1 == NULL
29613 || (GET_CODE (scratch1) == SCRATCH)
29614 || (GET_MODE (scratch1) == SImode
29615 && REG_P (scratch1)));
29616 gcc_assert (scratch2 == NULL
29617 || (GET_CODE (scratch2) == SCRATCH)
29618 || (GET_MODE (scratch2) == SImode
29619 && REG_P (scratch2)));
29620 gcc_assert (!REG_P (out) || !REG_P (amount)
29621 || !HARD_REGISTER_P (out)
29622 || (REGNO (out) != REGNO (amount)
29623 && REGNO (out) + 1 != REGNO (amount)));
29624
29625 /* Macros to make following code more readable. */
29626 #define SUB_32(DEST,SRC) \
29627 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29628 #define RSB_32(DEST,SRC) \
29629 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29630 #define SUB_S_32(DEST,SRC) \
29631 gen_addsi3_compare0 ((DEST), (SRC), \
29632 GEN_INT (-32))
29633 #define SET(DEST,SRC) \
29634 gen_rtx_SET ((DEST), (SRC))
29635 #define SHIFT(CODE,SRC,AMOUNT) \
29636 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29637 #define LSHIFT(CODE,SRC,AMOUNT) \
29638 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29639 SImode, (SRC), (AMOUNT))
29640 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29641 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29642 SImode, (SRC), (AMOUNT))
29643 #define ORR(A,B) \
29644 gen_rtx_IOR (SImode, (A), (B))
29645 #define BRANCH(COND,LABEL) \
29646 gen_arm_cond_branch ((LABEL), \
29647 gen_rtx_ ## COND (CCmode, cc_reg, \
29648 const0_rtx), \
29649 cc_reg)
29650
29651 /* Shifts by register and shifts by constant are handled separately. */
29652 if (CONST_INT_P (amount))
29653 {
29654 /* We have a shift-by-constant. */
29655
29656 /* First, handle out-of-range shift amounts.
29657 In both cases we try to match the result an ARM instruction in a
29658 shift-by-register would give. This helps reduce execution
29659 differences between optimization levels, but it won't stop other
29660 parts of the compiler doing different things. This is "undefined
29661 behavior, in any case. */
29662 if (INTVAL (amount) <= 0)
29663 emit_insn (gen_movdi (out, in));
29664 else if (INTVAL (amount) >= 64)
29665 {
29666 if (code == ASHIFTRT)
29667 {
29668 rtx const31_rtx = GEN_INT (31);
29669 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29670 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29671 }
29672 else
29673 emit_insn (gen_movdi (out, const0_rtx));
29674 }
29675
29676 /* Now handle valid shifts. */
29677 else if (INTVAL (amount) < 32)
29678 {
29679 /* Shifts by a constant less than 32. */
29680 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29681
29682 /* Clearing the out register in DImode first avoids lots
29683 of spilling and results in less stack usage.
29684 Later this redundant insn is completely removed.
29685 Do that only if "in" and "out" are different registers. */
29686 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29687 emit_insn (SET (out, const0_rtx));
29688 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29689 emit_insn (SET (out_down,
29690 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29691 out_down)));
29692 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29693 }
29694 else
29695 {
29696 /* Shifts by a constant greater than 31. */
29697 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29698
29699 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29700 emit_insn (SET (out, const0_rtx));
29701 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29702 if (code == ASHIFTRT)
29703 emit_insn (gen_ashrsi3 (out_up, in_up,
29704 GEN_INT (31)));
29705 else
29706 emit_insn (SET (out_up, const0_rtx));
29707 }
29708 }
29709 else
29710 {
29711 /* We have a shift-by-register. */
29712 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29713
29714 /* This alternative requires the scratch registers. */
29715 gcc_assert (scratch1 && REG_P (scratch1));
29716 gcc_assert (scratch2 && REG_P (scratch2));
29717
29718 /* We will need the values "amount-32" and "32-amount" later.
29719 Swapping them around now allows the later code to be more general. */
29720 switch (code)
29721 {
29722 case ASHIFT:
29723 emit_insn (SUB_32 (scratch1, amount));
29724 emit_insn (RSB_32 (scratch2, amount));
29725 break;
29726 case ASHIFTRT:
29727 emit_insn (RSB_32 (scratch1, amount));
29728 /* Also set CC = amount > 32. */
29729 emit_insn (SUB_S_32 (scratch2, amount));
29730 break;
29731 case LSHIFTRT:
29732 emit_insn (RSB_32 (scratch1, amount));
29733 emit_insn (SUB_32 (scratch2, amount));
29734 break;
29735 default:
29736 gcc_unreachable ();
29737 }
29738
29739 /* Emit code like this:
29740
29741 arithmetic-left:
29742 out_down = in_down << amount;
29743 out_down = (in_up << (amount - 32)) | out_down;
29744 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29745 out_up = in_up << amount;
29746
29747 arithmetic-right:
29748 out_down = in_down >> amount;
29749 out_down = (in_up << (32 - amount)) | out_down;
29750 if (amount < 32)
29751 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29752 out_up = in_up << amount;
29753
29754 logical-right:
29755 out_down = in_down >> amount;
29756 out_down = (in_up << (32 - amount)) | out_down;
29757 if (amount < 32)
29758 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29759 out_up = in_up << amount;
29760
29761 The ARM and Thumb2 variants are the same but implemented slightly
29762 differently. If this were only called during expand we could just
29763 use the Thumb2 case and let combine do the right thing, but this
29764 can also be called from post-reload splitters. */
29765
29766 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29767
29768 if (!TARGET_THUMB2)
29769 {
29770 /* Emit code for ARM mode. */
29771 emit_insn (SET (out_down,
29772 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29773 if (code == ASHIFTRT)
29774 {
29775 rtx_code_label *done_label = gen_label_rtx ();
29776 emit_jump_insn (BRANCH (LT, done_label));
29777 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29778 out_down)));
29779 emit_label (done_label);
29780 }
29781 else
29782 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29783 out_down)));
29784 }
29785 else
29786 {
29787 /* Emit code for Thumb2 mode.
29788 Thumb2 can't do shift and or in one insn. */
29789 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29790 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29791
29792 if (code == ASHIFTRT)
29793 {
29794 rtx_code_label *done_label = gen_label_rtx ();
29795 emit_jump_insn (BRANCH (LT, done_label));
29796 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29797 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29798 emit_label (done_label);
29799 }
29800 else
29801 {
29802 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29803 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29804 }
29805 }
29806
29807 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29808 }
29809
29810 #undef SUB_32
29811 #undef RSB_32
29812 #undef SUB_S_32
29813 #undef SET
29814 #undef SHIFT
29815 #undef LSHIFT
29816 #undef REV_LSHIFT
29817 #undef ORR
29818 #undef BRANCH
29819 }
29820
29821 /* Returns true if the pattern is a valid symbolic address, which is either a
29822 symbol_ref or (symbol_ref + addend).
29823
29824 According to the ARM ELF ABI, the initial addend of REL-type relocations
29825 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29826 literal field of the instruction as a 16-bit signed value in the range
29827 -32768 <= A < 32768. */
29828
29829 bool
29830 arm_valid_symbolic_address_p (rtx addr)
29831 {
29832 rtx xop0, xop1 = NULL_RTX;
29833 rtx tmp = addr;
29834
29835 if (target_word_relocations)
29836 return false;
29837
29838 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29839 return true;
29840
29841 /* (const (plus: symbol_ref const_int)) */
29842 if (GET_CODE (addr) == CONST)
29843 tmp = XEXP (addr, 0);
29844
29845 if (GET_CODE (tmp) == PLUS)
29846 {
29847 xop0 = XEXP (tmp, 0);
29848 xop1 = XEXP (tmp, 1);
29849
29850 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29851 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29852 }
29853
29854 return false;
29855 }
29856
29857 /* Returns true if a valid comparison operation and makes
29858 the operands in a form that is valid. */
29859 bool
29860 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29861 {
29862 enum rtx_code code = GET_CODE (*comparison);
29863 int code_int;
29864 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29865 ? GET_MODE (*op2) : GET_MODE (*op1);
29866
29867 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29868
29869 if (code == UNEQ || code == LTGT)
29870 return false;
29871
29872 code_int = (int)code;
29873 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29874 PUT_CODE (*comparison, (enum rtx_code)code_int);
29875
29876 switch (mode)
29877 {
29878 case E_SImode:
29879 if (!arm_add_operand (*op1, mode))
29880 *op1 = force_reg (mode, *op1);
29881 if (!arm_add_operand (*op2, mode))
29882 *op2 = force_reg (mode, *op2);
29883 return true;
29884
29885 case E_DImode:
29886 if (!cmpdi_operand (*op1, mode))
29887 *op1 = force_reg (mode, *op1);
29888 if (!cmpdi_operand (*op2, mode))
29889 *op2 = force_reg (mode, *op2);
29890 return true;
29891
29892 case E_HFmode:
29893 if (!TARGET_VFP_FP16INST)
29894 break;
29895 /* FP16 comparisons are done in SF mode. */
29896 mode = SFmode;
29897 *op1 = convert_to_mode (mode, *op1, 1);
29898 *op2 = convert_to_mode (mode, *op2, 1);
29899 /* Fall through. */
29900 case E_SFmode:
29901 case E_DFmode:
29902 if (!vfp_compare_operand (*op1, mode))
29903 *op1 = force_reg (mode, *op1);
29904 if (!vfp_compare_operand (*op2, mode))
29905 *op2 = force_reg (mode, *op2);
29906 return true;
29907 default:
29908 break;
29909 }
29910
29911 return false;
29912
29913 }
29914
29915 /* Maximum number of instructions to set block of memory. */
29916 static int
29917 arm_block_set_max_insns (void)
29918 {
29919 if (optimize_function_for_size_p (cfun))
29920 return 4;
29921 else
29922 return current_tune->max_insns_inline_memset;
29923 }
29924
29925 /* Return TRUE if it's profitable to set block of memory for
29926 non-vectorized case. VAL is the value to set the memory
29927 with. LENGTH is the number of bytes to set. ALIGN is the
29928 alignment of the destination memory in bytes. UNALIGNED_P
29929 is TRUE if we can only set the memory with instructions
29930 meeting alignment requirements. USE_STRD_P is TRUE if we
29931 can use strd to set the memory. */
29932 static bool
29933 arm_block_set_non_vect_profit_p (rtx val,
29934 unsigned HOST_WIDE_INT length,
29935 unsigned HOST_WIDE_INT align,
29936 bool unaligned_p, bool use_strd_p)
29937 {
29938 int num = 0;
29939 /* For leftovers in bytes of 0-7, we can set the memory block using
29940 strb/strh/str with minimum instruction number. */
29941 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29942
29943 if (unaligned_p)
29944 {
29945 num = arm_const_inline_cost (SET, val);
29946 num += length / align + length % align;
29947 }
29948 else if (use_strd_p)
29949 {
29950 num = arm_const_double_inline_cost (val);
29951 num += (length >> 3) + leftover[length & 7];
29952 }
29953 else
29954 {
29955 num = arm_const_inline_cost (SET, val);
29956 num += (length >> 2) + leftover[length & 3];
29957 }
29958
29959 /* We may be able to combine last pair STRH/STRB into a single STR
29960 by shifting one byte back. */
29961 if (unaligned_access && length > 3 && (length & 3) == 3)
29962 num--;
29963
29964 return (num <= arm_block_set_max_insns ());
29965 }
29966
29967 /* Return TRUE if it's profitable to set block of memory for
29968 vectorized case. LENGTH is the number of bytes to set.
29969 ALIGN is the alignment of destination memory in bytes.
29970 MODE is the vector mode used to set the memory. */
29971 static bool
29972 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29973 unsigned HOST_WIDE_INT align,
29974 machine_mode mode)
29975 {
29976 int num;
29977 bool unaligned_p = ((align & 3) != 0);
29978 unsigned int nelt = GET_MODE_NUNITS (mode);
29979
29980 /* Instruction loading constant value. */
29981 num = 1;
29982 /* Instructions storing the memory. */
29983 num += (length + nelt - 1) / nelt;
29984 /* Instructions adjusting the address expression. Only need to
29985 adjust address expression if it's 4 bytes aligned and bytes
29986 leftover can only be stored by mis-aligned store instruction. */
29987 if (!unaligned_p && (length & 3) != 0)
29988 num++;
29989
29990 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29991 if (!unaligned_p && mode == V16QImode)
29992 num--;
29993
29994 return (num <= arm_block_set_max_insns ());
29995 }
29996
29997 /* Set a block of memory using vectorization instructions for the
29998 unaligned case. We fill the first LENGTH bytes of the memory
29999 area starting from DSTBASE with byte constant VALUE. ALIGN is
30000 the alignment requirement of memory. Return TRUE if succeeded. */
30001 static bool
30002 arm_block_set_unaligned_vect (rtx dstbase,
30003 unsigned HOST_WIDE_INT length,
30004 unsigned HOST_WIDE_INT value,
30005 unsigned HOST_WIDE_INT align)
30006 {
30007 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
30008 rtx dst, mem;
30009 rtx val_vec, reg;
30010 rtx (*gen_func) (rtx, rtx);
30011 machine_mode mode;
30012 unsigned HOST_WIDE_INT v = value;
30013 unsigned int offset = 0;
30014 gcc_assert ((align & 0x3) != 0);
30015 nelt_v8 = GET_MODE_NUNITS (V8QImode);
30016 nelt_v16 = GET_MODE_NUNITS (V16QImode);
30017 if (length >= nelt_v16)
30018 {
30019 mode = V16QImode;
30020 gen_func = gen_movmisalignv16qi;
30021 }
30022 else
30023 {
30024 mode = V8QImode;
30025 gen_func = gen_movmisalignv8qi;
30026 }
30027 nelt_mode = GET_MODE_NUNITS (mode);
30028 gcc_assert (length >= nelt_mode);
30029 /* Skip if it isn't profitable. */
30030 if (!arm_block_set_vect_profit_p (length, align, mode))
30031 return false;
30032
30033 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30034 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30035
30036 v = sext_hwi (v, BITS_PER_WORD);
30037
30038 reg = gen_reg_rtx (mode);
30039 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30040 /* Emit instruction loading the constant value. */
30041 emit_move_insn (reg, val_vec);
30042
30043 /* Handle nelt_mode bytes in a vector. */
30044 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
30045 {
30046 emit_insn ((*gen_func) (mem, reg));
30047 if (i + 2 * nelt_mode <= length)
30048 {
30049 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
30050 offset += nelt_mode;
30051 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30052 }
30053 }
30054
30055 /* If there are not less than nelt_v8 bytes leftover, we must be in
30056 V16QI mode. */
30057 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
30058
30059 /* Handle (8, 16) bytes leftover. */
30060 if (i + nelt_v8 < length)
30061 {
30062 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
30063 offset += length - i;
30064 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30065
30066 /* We are shifting bytes back, set the alignment accordingly. */
30067 if ((length & 1) != 0 && align >= 2)
30068 set_mem_align (mem, BITS_PER_UNIT);
30069
30070 emit_insn (gen_movmisalignv16qi (mem, reg));
30071 }
30072 /* Handle (0, 8] bytes leftover. */
30073 else if (i < length && i + nelt_v8 >= length)
30074 {
30075 if (mode == V16QImode)
30076 reg = gen_lowpart (V8QImode, reg);
30077
30078 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30079 + (nelt_mode - nelt_v8))));
30080 offset += (length - i) + (nelt_mode - nelt_v8);
30081 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30082
30083 /* We are shifting bytes back, set the alignment accordingly. */
30084 if ((length & 1) != 0 && align >= 2)
30085 set_mem_align (mem, BITS_PER_UNIT);
30086
30087 emit_insn (gen_movmisalignv8qi (mem, reg));
30088 }
30089
30090 return true;
30091 }
30092
30093 /* Set a block of memory using vectorization instructions for the
30094 aligned case. We fill the first LENGTH bytes of the memory area
30095 starting from DSTBASE with byte constant VALUE. ALIGN is the
30096 alignment requirement of memory. Return TRUE if succeeded. */
30097 static bool
30098 arm_block_set_aligned_vect (rtx dstbase,
30099 unsigned HOST_WIDE_INT length,
30100 unsigned HOST_WIDE_INT value,
30101 unsigned HOST_WIDE_INT align)
30102 {
30103 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30104 rtx dst, addr, mem;
30105 rtx val_vec, reg;
30106 machine_mode mode;
30107 unsigned int offset = 0;
30108
30109 gcc_assert ((align & 0x3) == 0);
30110 nelt_v8 = GET_MODE_NUNITS (V8QImode);
30111 nelt_v16 = GET_MODE_NUNITS (V16QImode);
30112 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30113 mode = V16QImode;
30114 else
30115 mode = V8QImode;
30116
30117 nelt_mode = GET_MODE_NUNITS (mode);
30118 gcc_assert (length >= nelt_mode);
30119 /* Skip if it isn't profitable. */
30120 if (!arm_block_set_vect_profit_p (length, align, mode))
30121 return false;
30122
30123 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30124
30125 reg = gen_reg_rtx (mode);
30126 val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
30127 /* Emit instruction loading the constant value. */
30128 emit_move_insn (reg, val_vec);
30129
30130 i = 0;
30131 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
30132 if (mode == V16QImode)
30133 {
30134 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30135 emit_insn (gen_movmisalignv16qi (mem, reg));
30136 i += nelt_mode;
30137 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
30138 if (i + nelt_v8 < length && i + nelt_v16 > length)
30139 {
30140 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30141 offset += length - nelt_mode;
30142 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30143 /* We are shifting bytes back, set the alignment accordingly. */
30144 if ((length & 0x3) == 0)
30145 set_mem_align (mem, BITS_PER_UNIT * 4);
30146 else if ((length & 0x1) == 0)
30147 set_mem_align (mem, BITS_PER_UNIT * 2);
30148 else
30149 set_mem_align (mem, BITS_PER_UNIT);
30150
30151 emit_insn (gen_movmisalignv16qi (mem, reg));
30152 return true;
30153 }
30154 /* Fall through for bytes leftover. */
30155 mode = V8QImode;
30156 nelt_mode = GET_MODE_NUNITS (mode);
30157 reg = gen_lowpart (V8QImode, reg);
30158 }
30159
30160 /* Handle 8 bytes in a vector. */
30161 for (; (i + nelt_mode <= length); i += nelt_mode)
30162 {
30163 addr = plus_constant (Pmode, dst, i);
30164 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30165 emit_move_insn (mem, reg);
30166 }
30167
30168 /* Handle single word leftover by shifting 4 bytes back. We can
30169 use aligned access for this case. */
30170 if (i + UNITS_PER_WORD == length)
30171 {
30172 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30173 offset += i - UNITS_PER_WORD;
30174 mem = adjust_automodify_address (dstbase, mode, addr, offset);
30175 /* We are shifting 4 bytes back, set the alignment accordingly. */
30176 if (align > UNITS_PER_WORD)
30177 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30178
30179 emit_move_insn (mem, reg);
30180 }
30181 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30182 We have to use unaligned access for this case. */
30183 else if (i < length)
30184 {
30185 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30186 offset += length - nelt_mode;
30187 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30188 /* We are shifting bytes back, set the alignment accordingly. */
30189 if ((length & 1) == 0)
30190 set_mem_align (mem, BITS_PER_UNIT * 2);
30191 else
30192 set_mem_align (mem, BITS_PER_UNIT);
30193
30194 emit_insn (gen_movmisalignv8qi (mem, reg));
30195 }
30196
30197 return true;
30198 }
30199
30200 /* Set a block of memory using plain strh/strb instructions, only
30201 using instructions allowed by ALIGN on processor. We fill the
30202 first LENGTH bytes of the memory area starting from DSTBASE
30203 with byte constant VALUE. ALIGN is the alignment requirement
30204 of memory. */
30205 static bool
30206 arm_block_set_unaligned_non_vect (rtx dstbase,
30207 unsigned HOST_WIDE_INT length,
30208 unsigned HOST_WIDE_INT value,
30209 unsigned HOST_WIDE_INT align)
30210 {
30211 unsigned int i;
30212 rtx dst, addr, mem;
30213 rtx val_exp, val_reg, reg;
30214 machine_mode mode;
30215 HOST_WIDE_INT v = value;
30216
30217 gcc_assert (align == 1 || align == 2);
30218
30219 if (align == 2)
30220 v |= (value << BITS_PER_UNIT);
30221
30222 v = sext_hwi (v, BITS_PER_WORD);
30223 val_exp = GEN_INT (v);
30224 /* Skip if it isn't profitable. */
30225 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30226 align, true, false))
30227 return false;
30228
30229 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30230 mode = (align == 2 ? HImode : QImode);
30231 val_reg = force_reg (SImode, val_exp);
30232 reg = gen_lowpart (mode, val_reg);
30233
30234 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30235 {
30236 addr = plus_constant (Pmode, dst, i);
30237 mem = adjust_automodify_address (dstbase, mode, addr, i);
30238 emit_move_insn (mem, reg);
30239 }
30240
30241 /* Handle single byte leftover. */
30242 if (i + 1 == length)
30243 {
30244 reg = gen_lowpart (QImode, val_reg);
30245 addr = plus_constant (Pmode, dst, i);
30246 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30247 emit_move_insn (mem, reg);
30248 i++;
30249 }
30250
30251 gcc_assert (i == length);
30252 return true;
30253 }
30254
30255 /* Set a block of memory using plain strd/str/strh/strb instructions,
30256 to permit unaligned copies on processors which support unaligned
30257 semantics for those instructions. We fill the first LENGTH bytes
30258 of the memory area starting from DSTBASE with byte constant VALUE.
30259 ALIGN is the alignment requirement of memory. */
30260 static bool
30261 arm_block_set_aligned_non_vect (rtx dstbase,
30262 unsigned HOST_WIDE_INT length,
30263 unsigned HOST_WIDE_INT value,
30264 unsigned HOST_WIDE_INT align)
30265 {
30266 unsigned int i;
30267 rtx dst, addr, mem;
30268 rtx val_exp, val_reg, reg;
30269 unsigned HOST_WIDE_INT v;
30270 bool use_strd_p;
30271
30272 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30273 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30274
30275 v = (value | (value << 8) | (value << 16) | (value << 24));
30276 if (length < UNITS_PER_WORD)
30277 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30278
30279 if (use_strd_p)
30280 v |= (v << BITS_PER_WORD);
30281 else
30282 v = sext_hwi (v, BITS_PER_WORD);
30283
30284 val_exp = GEN_INT (v);
30285 /* Skip if it isn't profitable. */
30286 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30287 align, false, use_strd_p))
30288 {
30289 if (!use_strd_p)
30290 return false;
30291
30292 /* Try without strd. */
30293 v = (v >> BITS_PER_WORD);
30294 v = sext_hwi (v, BITS_PER_WORD);
30295 val_exp = GEN_INT (v);
30296 use_strd_p = false;
30297 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30298 align, false, use_strd_p))
30299 return false;
30300 }
30301
30302 i = 0;
30303 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30304 /* Handle double words using strd if possible. */
30305 if (use_strd_p)
30306 {
30307 val_reg = force_reg (DImode, val_exp);
30308 reg = val_reg;
30309 for (; (i + 8 <= length); i += 8)
30310 {
30311 addr = plus_constant (Pmode, dst, i);
30312 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30313 emit_move_insn (mem, reg);
30314 }
30315 }
30316 else
30317 val_reg = force_reg (SImode, val_exp);
30318
30319 /* Handle words. */
30320 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30321 for (; (i + 4 <= length); i += 4)
30322 {
30323 addr = plus_constant (Pmode, dst, i);
30324 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30325 if ((align & 3) == 0)
30326 emit_move_insn (mem, reg);
30327 else
30328 emit_insn (gen_unaligned_storesi (mem, reg));
30329 }
30330
30331 /* Merge last pair of STRH and STRB into a STR if possible. */
30332 if (unaligned_access && i > 0 && (i + 3) == length)
30333 {
30334 addr = plus_constant (Pmode, dst, i - 1);
30335 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30336 /* We are shifting one byte back, set the alignment accordingly. */
30337 if ((align & 1) == 0)
30338 set_mem_align (mem, BITS_PER_UNIT);
30339
30340 /* Most likely this is an unaligned access, and we can't tell at
30341 compilation time. */
30342 emit_insn (gen_unaligned_storesi (mem, reg));
30343 return true;
30344 }
30345
30346 /* Handle half word leftover. */
30347 if (i + 2 <= length)
30348 {
30349 reg = gen_lowpart (HImode, val_reg);
30350 addr = plus_constant (Pmode, dst, i);
30351 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30352 if ((align & 1) == 0)
30353 emit_move_insn (mem, reg);
30354 else
30355 emit_insn (gen_unaligned_storehi (mem, reg));
30356
30357 i += 2;
30358 }
30359
30360 /* Handle single byte leftover. */
30361 if (i + 1 == length)
30362 {
30363 reg = gen_lowpart (QImode, val_reg);
30364 addr = plus_constant (Pmode, dst, i);
30365 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30366 emit_move_insn (mem, reg);
30367 }
30368
30369 return true;
30370 }
30371
30372 /* Set a block of memory using vectorization instructions for both
30373 aligned and unaligned cases. We fill the first LENGTH bytes of
30374 the memory area starting from DSTBASE with byte constant VALUE.
30375 ALIGN is the alignment requirement of memory. */
30376 static bool
30377 arm_block_set_vect (rtx dstbase,
30378 unsigned HOST_WIDE_INT length,
30379 unsigned HOST_WIDE_INT value,
30380 unsigned HOST_WIDE_INT align)
30381 {
30382 /* Check whether we need to use unaligned store instruction. */
30383 if (((align & 3) != 0 || (length & 3) != 0)
30384 /* Check whether unaligned store instruction is available. */
30385 && (!unaligned_access || BYTES_BIG_ENDIAN))
30386 return false;
30387
30388 if ((align & 3) == 0)
30389 return arm_block_set_aligned_vect (dstbase, length, value, align);
30390 else
30391 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30392 }
30393
30394 /* Expand string store operation. Firstly we try to do that by using
30395 vectorization instructions, then try with ARM unaligned access and
30396 double-word store if profitable. OPERANDS[0] is the destination,
30397 OPERANDS[1] is the number of bytes, operands[2] is the value to
30398 initialize the memory, OPERANDS[3] is the known alignment of the
30399 destination. */
30400 bool
30401 arm_gen_setmem (rtx *operands)
30402 {
30403 rtx dstbase = operands[0];
30404 unsigned HOST_WIDE_INT length;
30405 unsigned HOST_WIDE_INT value;
30406 unsigned HOST_WIDE_INT align;
30407
30408 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30409 return false;
30410
30411 length = UINTVAL (operands[1]);
30412 if (length > 64)
30413 return false;
30414
30415 value = (UINTVAL (operands[2]) & 0xFF);
30416 align = UINTVAL (operands[3]);
30417 if (TARGET_NEON && length >= 8
30418 && current_tune->string_ops_prefer_neon
30419 && arm_block_set_vect (dstbase, length, value, align))
30420 return true;
30421
30422 if (!unaligned_access && (align & 3) != 0)
30423 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30424
30425 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30426 }
30427
30428
30429 static bool
30430 arm_macro_fusion_p (void)
30431 {
30432 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30433 }
30434
30435 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30436 for MOVW / MOVT macro fusion. */
30437
30438 static bool
30439 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30440 {
30441 /* We are trying to fuse
30442 movw imm / movt imm
30443 instructions as a group that gets scheduled together. */
30444
30445 rtx set_dest = SET_DEST (curr_set);
30446
30447 if (GET_MODE (set_dest) != SImode)
30448 return false;
30449
30450 /* We are trying to match:
30451 prev (movw) == (set (reg r0) (const_int imm16))
30452 curr (movt) == (set (zero_extract (reg r0)
30453 (const_int 16)
30454 (const_int 16))
30455 (const_int imm16_1))
30456 or
30457 prev (movw) == (set (reg r1)
30458 (high (symbol_ref ("SYM"))))
30459 curr (movt) == (set (reg r0)
30460 (lo_sum (reg r1)
30461 (symbol_ref ("SYM")))) */
30462
30463 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30464 {
30465 if (CONST_INT_P (SET_SRC (curr_set))
30466 && CONST_INT_P (SET_SRC (prev_set))
30467 && REG_P (XEXP (set_dest, 0))
30468 && REG_P (SET_DEST (prev_set))
30469 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30470 return true;
30471
30472 }
30473 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30474 && REG_P (SET_DEST (curr_set))
30475 && REG_P (SET_DEST (prev_set))
30476 && GET_CODE (SET_SRC (prev_set)) == HIGH
30477 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30478 return true;
30479
30480 return false;
30481 }
30482
30483 static bool
30484 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30485 {
30486 rtx prev_set = single_set (prev);
30487 rtx curr_set = single_set (curr);
30488
30489 if (!prev_set
30490 || !curr_set)
30491 return false;
30492
30493 if (any_condjump_p (curr))
30494 return false;
30495
30496 if (!arm_macro_fusion_p ())
30497 return false;
30498
30499 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30500 && aarch_crypto_can_dual_issue (prev, curr))
30501 return true;
30502
30503 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30504 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30505 return true;
30506
30507 return false;
30508 }
30509
30510 /* Return true iff the instruction fusion described by OP is enabled. */
30511 bool
30512 arm_fusion_enabled_p (tune_params::fuse_ops op)
30513 {
30514 return current_tune->fusible_ops & op;
30515 }
30516
30517 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30518 scheduled for speculative execution. Reject the long-running division
30519 and square-root instructions. */
30520
30521 static bool
30522 arm_sched_can_speculate_insn (rtx_insn *insn)
30523 {
30524 switch (get_attr_type (insn))
30525 {
30526 case TYPE_SDIV:
30527 case TYPE_UDIV:
30528 case TYPE_FDIVS:
30529 case TYPE_FDIVD:
30530 case TYPE_FSQRTS:
30531 case TYPE_FSQRTD:
30532 case TYPE_NEON_FP_SQRT_S:
30533 case TYPE_NEON_FP_SQRT_D:
30534 case TYPE_NEON_FP_SQRT_S_Q:
30535 case TYPE_NEON_FP_SQRT_D_Q:
30536 case TYPE_NEON_FP_DIV_S:
30537 case TYPE_NEON_FP_DIV_D:
30538 case TYPE_NEON_FP_DIV_S_Q:
30539 case TYPE_NEON_FP_DIV_D_Q:
30540 return false;
30541 default:
30542 return true;
30543 }
30544 }
30545
30546 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30547
30548 static unsigned HOST_WIDE_INT
30549 arm_asan_shadow_offset (void)
30550 {
30551 return HOST_WIDE_INT_1U << 29;
30552 }
30553
30554
30555 /* This is a temporary fix for PR60655. Ideally we need
30556 to handle most of these cases in the generic part but
30557 currently we reject minus (..) (sym_ref). We try to
30558 ameliorate the case with minus (sym_ref1) (sym_ref2)
30559 where they are in the same section. */
30560
30561 static bool
30562 arm_const_not_ok_for_debug_p (rtx p)
30563 {
30564 tree decl_op0 = NULL;
30565 tree decl_op1 = NULL;
30566
30567 if (GET_CODE (p) == UNSPEC)
30568 return true;
30569 if (GET_CODE (p) == MINUS)
30570 {
30571 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30572 {
30573 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30574 if (decl_op1
30575 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30576 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30577 {
30578 if ((VAR_P (decl_op1)
30579 || TREE_CODE (decl_op1) == CONST_DECL)
30580 && (VAR_P (decl_op0)
30581 || TREE_CODE (decl_op0) == CONST_DECL))
30582 return (get_variable_section (decl_op1, false)
30583 != get_variable_section (decl_op0, false));
30584
30585 if (TREE_CODE (decl_op1) == LABEL_DECL
30586 && TREE_CODE (decl_op0) == LABEL_DECL)
30587 return (DECL_CONTEXT (decl_op1)
30588 != DECL_CONTEXT (decl_op0));
30589 }
30590
30591 return true;
30592 }
30593 }
30594
30595 return false;
30596 }
30597
30598 /* return TRUE if x is a reference to a value in a constant pool */
30599 extern bool
30600 arm_is_constant_pool_ref (rtx x)
30601 {
30602 return (MEM_P (x)
30603 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30604 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30605 }
30606
30607 /* Remember the last target of arm_set_current_function. */
30608 static GTY(()) tree arm_previous_fndecl;
30609
30610 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30611
30612 void
30613 save_restore_target_globals (tree new_tree)
30614 {
30615 /* If we have a previous state, use it. */
30616 if (TREE_TARGET_GLOBALS (new_tree))
30617 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30618 else if (new_tree == target_option_default_node)
30619 restore_target_globals (&default_target_globals);
30620 else
30621 {
30622 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30623 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30624 }
30625
30626 arm_option_params_internal ();
30627 }
30628
30629 /* Invalidate arm_previous_fndecl. */
30630
30631 void
30632 arm_reset_previous_fndecl (void)
30633 {
30634 arm_previous_fndecl = NULL_TREE;
30635 }
30636
30637 /* Establish appropriate back-end context for processing the function
30638 FNDECL. The argument might be NULL to indicate processing at top
30639 level, outside of any function scope. */
30640
30641 static void
30642 arm_set_current_function (tree fndecl)
30643 {
30644 if (!fndecl || fndecl == arm_previous_fndecl)
30645 return;
30646
30647 tree old_tree = (arm_previous_fndecl
30648 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30649 : NULL_TREE);
30650
30651 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30652
30653 /* If current function has no attributes but previous one did,
30654 use the default node. */
30655 if (! new_tree && old_tree)
30656 new_tree = target_option_default_node;
30657
30658 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30659 the default have been handled by save_restore_target_globals from
30660 arm_pragma_target_parse. */
30661 if (old_tree == new_tree)
30662 return;
30663
30664 arm_previous_fndecl = fndecl;
30665
30666 /* First set the target options. */
30667 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30668
30669 save_restore_target_globals (new_tree);
30670 }
30671
30672 /* Implement TARGET_OPTION_PRINT. */
30673
30674 static void
30675 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30676 {
30677 int flags = ptr->x_target_flags;
30678 const char *fpu_name;
30679
30680 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30681 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30682
30683 fprintf (file, "%*sselected isa %s\n", indent, "",
30684 TARGET_THUMB2_P (flags) ? "thumb2" :
30685 TARGET_THUMB_P (flags) ? "thumb1" :
30686 "arm");
30687
30688 if (ptr->x_arm_arch_string)
30689 fprintf (file, "%*sselected architecture %s\n", indent, "",
30690 ptr->x_arm_arch_string);
30691
30692 if (ptr->x_arm_cpu_string)
30693 fprintf (file, "%*sselected CPU %s\n", indent, "",
30694 ptr->x_arm_cpu_string);
30695
30696 if (ptr->x_arm_tune_string)
30697 fprintf (file, "%*sselected tune %s\n", indent, "",
30698 ptr->x_arm_tune_string);
30699
30700 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30701 }
30702
30703 /* Hook to determine if one function can safely inline another. */
30704
30705 static bool
30706 arm_can_inline_p (tree caller, tree callee)
30707 {
30708 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30709 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30710 bool can_inline = true;
30711
30712 struct cl_target_option *caller_opts
30713 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30714 : target_option_default_node);
30715
30716 struct cl_target_option *callee_opts
30717 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30718 : target_option_default_node);
30719
30720 if (callee_opts == caller_opts)
30721 return true;
30722
30723 /* Callee's ISA features should be a subset of the caller's. */
30724 struct arm_build_target caller_target;
30725 struct arm_build_target callee_target;
30726 caller_target.isa = sbitmap_alloc (isa_num_bits);
30727 callee_target.isa = sbitmap_alloc (isa_num_bits);
30728
30729 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30730 false);
30731 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30732 false);
30733 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30734 can_inline = false;
30735
30736 sbitmap_free (caller_target.isa);
30737 sbitmap_free (callee_target.isa);
30738
30739 /* OK to inline between different modes.
30740 Function with mode specific instructions, e.g using asm,
30741 must be explicitly protected with noinline. */
30742 return can_inline;
30743 }
30744
30745 /* Hook to fix function's alignment affected by target attribute. */
30746
30747 static void
30748 arm_relayout_function (tree fndecl)
30749 {
30750 if (DECL_USER_ALIGN (fndecl))
30751 return;
30752
30753 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30754
30755 if (!callee_tree)
30756 callee_tree = target_option_default_node;
30757
30758 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30759 SET_DECL_ALIGN
30760 (fndecl,
30761 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30762 }
30763
30764 /* Inner function to process the attribute((target(...))), take an argument and
30765 set the current options from the argument. If we have a list, recursively
30766 go over the list. */
30767
30768 static bool
30769 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30770 {
30771 if (TREE_CODE (args) == TREE_LIST)
30772 {
30773 bool ret = true;
30774
30775 for (; args; args = TREE_CHAIN (args))
30776 if (TREE_VALUE (args)
30777 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30778 ret = false;
30779 return ret;
30780 }
30781
30782 else if (TREE_CODE (args) != STRING_CST)
30783 {
30784 error ("attribute %<target%> argument not a string");
30785 return false;
30786 }
30787
30788 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30789 char *q;
30790
30791 while ((q = strtok (argstr, ",")) != NULL)
30792 {
30793 while (ISSPACE (*q)) ++q;
30794
30795 argstr = NULL;
30796 if (!strncmp (q, "thumb", 5))
30797 opts->x_target_flags |= MASK_THUMB;
30798
30799 else if (!strncmp (q, "arm", 3))
30800 opts->x_target_flags &= ~MASK_THUMB;
30801
30802 else if (!strncmp (q, "fpu=", 4))
30803 {
30804 int fpu_index;
30805 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30806 &fpu_index, CL_TARGET))
30807 {
30808 error ("invalid fpu for target attribute or pragma %qs", q);
30809 return false;
30810 }
30811 if (fpu_index == TARGET_FPU_auto)
30812 {
30813 /* This doesn't really make sense until we support
30814 general dynamic selection of the architecture and all
30815 sub-features. */
30816 sorry ("auto fpu selection not currently permitted here");
30817 return false;
30818 }
30819 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30820 }
30821 else if (!strncmp (q, "arch=", 5))
30822 {
30823 char* arch = q+5;
30824 const arch_option *arm_selected_arch
30825 = arm_parse_arch_option_name (all_architectures, "arch", arch);
30826
30827 if (!arm_selected_arch)
30828 {
30829 error ("invalid architecture for target attribute or pragma %qs",
30830 q);
30831 return false;
30832 }
30833
30834 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
30835 }
30836 else if (q[0] == '+')
30837 {
30838 opts->x_arm_arch_string
30839 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
30840 }
30841 else
30842 {
30843 error ("unknown target attribute or pragma %qs", q);
30844 return false;
30845 }
30846 }
30847
30848 return true;
30849 }
30850
30851 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30852
30853 tree
30854 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30855 struct gcc_options *opts_set)
30856 {
30857 struct cl_target_option cl_opts;
30858
30859 if (!arm_valid_target_attribute_rec (args, opts))
30860 return NULL_TREE;
30861
30862 cl_target_option_save (&cl_opts, opts);
30863 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30864 arm_option_check_internal (opts);
30865 /* Do any overrides, such as global options arch=xxx.
30866 We do this since arm_active_target was overridden. */
30867 arm_option_reconfigure_globals ();
30868 arm_options_perform_arch_sanity_checks ();
30869 arm_option_override_internal (opts, opts_set);
30870
30871 return build_target_option_node (opts);
30872 }
30873
30874 static void
30875 add_attribute (const char * mode, tree *attributes)
30876 {
30877 size_t len = strlen (mode);
30878 tree value = build_string (len, mode);
30879
30880 TREE_TYPE (value) = build_array_type (char_type_node,
30881 build_index_type (size_int (len)));
30882
30883 *attributes = tree_cons (get_identifier ("target"),
30884 build_tree_list (NULL_TREE, value),
30885 *attributes);
30886 }
30887
30888 /* For testing. Insert thumb or arm modes alternatively on functions. */
30889
30890 static void
30891 arm_insert_attributes (tree fndecl, tree * attributes)
30892 {
30893 const char *mode;
30894
30895 if (! TARGET_FLIP_THUMB)
30896 return;
30897
30898 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30899 || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
30900 return;
30901
30902 /* Nested definitions must inherit mode. */
30903 if (current_function_decl)
30904 {
30905 mode = TARGET_THUMB ? "thumb" : "arm";
30906 add_attribute (mode, attributes);
30907 return;
30908 }
30909
30910 /* If there is already a setting don't change it. */
30911 if (lookup_attribute ("target", *attributes) != NULL)
30912 return;
30913
30914 mode = thumb_flipper ? "thumb" : "arm";
30915 add_attribute (mode, attributes);
30916
30917 thumb_flipper = !thumb_flipper;
30918 }
30919
30920 /* Hook to validate attribute((target("string"))). */
30921
30922 static bool
30923 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30924 tree args, int ARG_UNUSED (flags))
30925 {
30926 bool ret = true;
30927 struct gcc_options func_options;
30928 tree cur_tree, new_optimize;
30929 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30930
30931 /* Get the optimization options of the current function. */
30932 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30933
30934 /* If the function changed the optimization levels as well as setting target
30935 options, start with the optimizations specified. */
30936 if (!func_optimize)
30937 func_optimize = optimization_default_node;
30938
30939 /* Init func_options. */
30940 memset (&func_options, 0, sizeof (func_options));
30941 init_options_struct (&func_options, NULL);
30942 lang_hooks.init_options_struct (&func_options);
30943
30944 /* Initialize func_options to the defaults. */
30945 cl_optimization_restore (&func_options,
30946 TREE_OPTIMIZATION (func_optimize));
30947
30948 cl_target_option_restore (&func_options,
30949 TREE_TARGET_OPTION (target_option_default_node));
30950
30951 /* Set func_options flags with new target mode. */
30952 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30953 &global_options_set);
30954
30955 if (cur_tree == NULL_TREE)
30956 ret = false;
30957
30958 new_optimize = build_optimization_node (&func_options);
30959
30960 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30961
30962 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30963
30964 finalize_options_struct (&func_options);
30965
30966 return ret;
30967 }
30968
30969 /* Match an ISA feature bitmap to a named FPU. We always use the
30970 first entry that exactly matches the feature set, so that we
30971 effectively canonicalize the FPU name for the assembler. */
30972 static const char*
30973 arm_identify_fpu_from_isa (sbitmap isa)
30974 {
30975 auto_sbitmap fpubits (isa_num_bits);
30976 auto_sbitmap cand_fpubits (isa_num_bits);
30977
30978 bitmap_and (fpubits, isa, isa_all_fpubits);
30979
30980 /* If there are no ISA feature bits relating to the FPU, we must be
30981 doing soft-float. */
30982 if (bitmap_empty_p (fpubits))
30983 return "softvfp";
30984
30985 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30986 {
30987 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30988 if (bitmap_equal_p (fpubits, cand_fpubits))
30989 return all_fpus[i].name;
30990 }
30991 /* We must find an entry, or things have gone wrong. */
30992 gcc_unreachable ();
30993 }
30994
30995 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
30996 by the function fndecl. */
30997 void
30998 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30999 {
31000 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
31001
31002 struct cl_target_option *targ_options;
31003 if (target_parts)
31004 targ_options = TREE_TARGET_OPTION (target_parts);
31005 else
31006 targ_options = TREE_TARGET_OPTION (target_option_current_node);
31007 gcc_assert (targ_options);
31008
31009 /* Only update the assembler .arch string if it is distinct from the last
31010 such string we printed. arch_to_print is set conditionally in case
31011 targ_options->x_arm_arch_string is NULL which can be the case
31012 when cc1 is invoked directly without passing -march option. */
31013 std::string arch_to_print;
31014 if (targ_options->x_arm_arch_string)
31015 arch_to_print = targ_options->x_arm_arch_string;
31016
31017 if (arch_to_print != arm_last_printed_arch_string)
31018 {
31019 std::string arch_name
31020 = arch_to_print.substr (0, arch_to_print.find ("+"));
31021 asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
31022 const arch_option *arch
31023 = arm_parse_arch_option_name (all_architectures, "-march",
31024 targ_options->x_arm_arch_string);
31025 auto_sbitmap opt_bits (isa_num_bits);
31026
31027 gcc_assert (arch);
31028 if (arch->common.extensions)
31029 {
31030 for (const struct cpu_arch_extension *opt = arch->common.extensions;
31031 opt->name != NULL;
31032 opt++)
31033 {
31034 if (!opt->remove)
31035 {
31036 arm_initialize_isa (opt_bits, opt->isa_bits);
31037 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
31038 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
31039 asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
31040 opt->name);
31041 }
31042 }
31043 }
31044
31045 arm_last_printed_arch_string = arch_to_print;
31046 }
31047
31048 fprintf (stream, "\t.syntax unified\n");
31049
31050 if (TARGET_THUMB)
31051 {
31052 if (is_called_in_ARM_mode (decl)
31053 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
31054 && cfun->is_thunk))
31055 fprintf (stream, "\t.code 32\n");
31056 else if (TARGET_THUMB1)
31057 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
31058 else
31059 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
31060 }
31061 else
31062 fprintf (stream, "\t.arm\n");
31063
31064 std::string fpu_to_print
31065 = TARGET_SOFT_FLOAT
31066 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
31067
31068 if (fpu_to_print != arm_last_printed_arch_string)
31069 {
31070 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31071 arm_last_printed_fpu_string = fpu_to_print;
31072 }
31073
31074 if (TARGET_POKE_FUNCTION_NAME)
31075 arm_poke_function_name (stream, (const char *) name);
31076 }
31077
31078 /* If MEM is in the form of [base+offset], extract the two parts
31079 of address and set to BASE and OFFSET, otherwise return false
31080 after clearing BASE and OFFSET. */
31081
31082 static bool
31083 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31084 {
31085 rtx addr;
31086
31087 gcc_assert (MEM_P (mem));
31088
31089 addr = XEXP (mem, 0);
31090
31091 /* Strip off const from addresses like (const (addr)). */
31092 if (GET_CODE (addr) == CONST)
31093 addr = XEXP (addr, 0);
31094
31095 if (GET_CODE (addr) == REG)
31096 {
31097 *base = addr;
31098 *offset = const0_rtx;
31099 return true;
31100 }
31101
31102 if (GET_CODE (addr) == PLUS
31103 && GET_CODE (XEXP (addr, 0)) == REG
31104 && CONST_INT_P (XEXP (addr, 1)))
31105 {
31106 *base = XEXP (addr, 0);
31107 *offset = XEXP (addr, 1);
31108 return true;
31109 }
31110
31111 *base = NULL_RTX;
31112 *offset = NULL_RTX;
31113
31114 return false;
31115 }
31116
31117 /* If INSN is a load or store of address in the form of [base+offset],
31118 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
31119 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
31120 otherwise return FALSE. */
31121
31122 static bool
31123 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31124 {
31125 rtx x, dest, src;
31126
31127 gcc_assert (INSN_P (insn));
31128 x = PATTERN (insn);
31129 if (GET_CODE (x) != SET)
31130 return false;
31131
31132 src = SET_SRC (x);
31133 dest = SET_DEST (x);
31134 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31135 {
31136 *is_load = false;
31137 extract_base_offset_in_addr (dest, base, offset);
31138 }
31139 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31140 {
31141 *is_load = true;
31142 extract_base_offset_in_addr (src, base, offset);
31143 }
31144 else
31145 return false;
31146
31147 return (*base != NULL_RTX && *offset != NULL_RTX);
31148 }
31149
31150 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31151
31152 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31153 and PRI are only calculated for these instructions. For other instruction,
31154 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
31155 instruction fusion can be supported by returning different priorities.
31156
31157 It's important that irrelevant instructions get the largest FUSION_PRI. */
31158
31159 static void
31160 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31161 int *fusion_pri, int *pri)
31162 {
31163 int tmp, off_val;
31164 bool is_load;
31165 rtx base, offset;
31166
31167 gcc_assert (INSN_P (insn));
31168
31169 tmp = max_pri - 1;
31170 if (!fusion_load_store (insn, &base, &offset, &is_load))
31171 {
31172 *pri = tmp;
31173 *fusion_pri = tmp;
31174 return;
31175 }
31176
31177 /* Load goes first. */
31178 if (is_load)
31179 *fusion_pri = tmp - 1;
31180 else
31181 *fusion_pri = tmp - 2;
31182
31183 tmp /= 2;
31184
31185 /* INSN with smaller base register goes first. */
31186 tmp -= ((REGNO (base) & 0xff) << 20);
31187
31188 /* INSN with smaller offset goes first. */
31189 off_val = (int)(INTVAL (offset));
31190 if (off_val >= 0)
31191 tmp -= (off_val & 0xfffff);
31192 else
31193 tmp += ((- off_val) & 0xfffff);
31194
31195 *pri = tmp;
31196 return;
31197 }
31198
31199
31200 /* Construct and return a PARALLEL RTX vector with elements numbering the
31201 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31202 the vector - from the perspective of the architecture. This does not
31203 line up with GCC's perspective on lane numbers, so we end up with
31204 different masks depending on our target endian-ness. The diagram
31205 below may help. We must draw the distinction when building masks
31206 which select one half of the vector. An instruction selecting
31207 architectural low-lanes for a big-endian target, must be described using
31208 a mask selecting GCC high-lanes.
31209
31210 Big-Endian Little-Endian
31211
31212 GCC 0 1 2 3 3 2 1 0
31213 | x | x | x | x | | x | x | x | x |
31214 Architecture 3 2 1 0 3 2 1 0
31215
31216 Low Mask: { 2, 3 } { 0, 1 }
31217 High Mask: { 0, 1 } { 2, 3 }
31218 */
31219
31220 rtx
31221 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
31222 {
31223 int nunits = GET_MODE_NUNITS (mode);
31224 rtvec v = rtvec_alloc (nunits / 2);
31225 int high_base = nunits / 2;
31226 int low_base = 0;
31227 int base;
31228 rtx t1;
31229 int i;
31230
31231 if (BYTES_BIG_ENDIAN)
31232 base = high ? low_base : high_base;
31233 else
31234 base = high ? high_base : low_base;
31235
31236 for (i = 0; i < nunits / 2; i++)
31237 RTVEC_ELT (v, i) = GEN_INT (base + i);
31238
31239 t1 = gen_rtx_PARALLEL (mode, v);
31240 return t1;
31241 }
31242
31243 /* Check OP for validity as a PARALLEL RTX vector with elements
31244 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31245 from the perspective of the architecture. See the diagram above
31246 arm_simd_vect_par_cnst_half_p for more details. */
31247
31248 bool
31249 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31250 bool high)
31251 {
31252 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31253 HOST_WIDE_INT count_op = XVECLEN (op, 0);
31254 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31255 int i = 0;
31256
31257 if (!VECTOR_MODE_P (mode))
31258 return false;
31259
31260 if (count_op != count_ideal)
31261 return false;
31262
31263 for (i = 0; i < count_ideal; i++)
31264 {
31265 rtx elt_op = XVECEXP (op, 0, i);
31266 rtx elt_ideal = XVECEXP (ideal, 0, i);
31267
31268 if (!CONST_INT_P (elt_op)
31269 || INTVAL (elt_ideal) != INTVAL (elt_op))
31270 return false;
31271 }
31272 return true;
31273 }
31274
31275 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31276 in Thumb1. */
31277 static bool
31278 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31279 const_tree)
31280 {
31281 /* For now, we punt and not handle this for TARGET_THUMB1. */
31282 if (vcall_offset && TARGET_THUMB1)
31283 return false;
31284
31285 /* Otherwise ok. */
31286 return true;
31287 }
31288
31289 /* Generate RTL for a conditional branch with rtx comparison CODE in
31290 mode CC_MODE. The destination of the unlikely conditional branch
31291 is LABEL_REF. */
31292
31293 void
31294 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31295 rtx label_ref)
31296 {
31297 rtx x;
31298 x = gen_rtx_fmt_ee (code, VOIDmode,
31299 gen_rtx_REG (cc_mode, CC_REGNUM),
31300 const0_rtx);
31301
31302 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31303 gen_rtx_LABEL_REF (VOIDmode, label_ref),
31304 pc_rtx);
31305 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31306 }
31307
31308 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31309
31310 For pure-code sections there is no letter code for this attribute, so
31311 output all the section flags numerically when this is needed. */
31312
31313 static bool
31314 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31315 {
31316
31317 if (flags & SECTION_ARM_PURECODE)
31318 {
31319 *num = 0x20000000;
31320
31321 if (!(flags & SECTION_DEBUG))
31322 *num |= 0x2;
31323 if (flags & SECTION_EXCLUDE)
31324 *num |= 0x80000000;
31325 if (flags & SECTION_WRITE)
31326 *num |= 0x1;
31327 if (flags & SECTION_CODE)
31328 *num |= 0x4;
31329 if (flags & SECTION_MERGE)
31330 *num |= 0x10;
31331 if (flags & SECTION_STRINGS)
31332 *num |= 0x20;
31333 if (flags & SECTION_TLS)
31334 *num |= 0x400;
31335 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31336 *num |= 0x200;
31337
31338 return true;
31339 }
31340
31341 return false;
31342 }
31343
31344 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31345
31346 If pure-code is passed as an option, make sure all functions are in
31347 sections that have the SHF_ARM_PURECODE attribute. */
31348
31349 static section *
31350 arm_function_section (tree decl, enum node_frequency freq,
31351 bool startup, bool exit)
31352 {
31353 const char * section_name;
31354 section * sec;
31355
31356 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31357 return default_function_section (decl, freq, startup, exit);
31358
31359 if (!target_pure_code)
31360 return default_function_section (decl, freq, startup, exit);
31361
31362
31363 section_name = DECL_SECTION_NAME (decl);
31364
31365 /* If a function is not in a named section then it falls under the 'default'
31366 text section, also known as '.text'. We can preserve previous behavior as
31367 the default text section already has the SHF_ARM_PURECODE section
31368 attribute. */
31369 if (!section_name)
31370 {
31371 section *default_sec = default_function_section (decl, freq, startup,
31372 exit);
31373
31374 /* If default_sec is not null, then it must be a special section like for
31375 example .text.startup. We set the pure-code attribute and return the
31376 same section to preserve existing behavior. */
31377 if (default_sec)
31378 default_sec->common.flags |= SECTION_ARM_PURECODE;
31379 return default_sec;
31380 }
31381
31382 /* Otherwise look whether a section has already been created with
31383 'section_name'. */
31384 sec = get_named_section (decl, section_name, 0);
31385 if (!sec)
31386 /* If that is not the case passing NULL as the section's name to
31387 'get_named_section' will create a section with the declaration's
31388 section name. */
31389 sec = get_named_section (decl, NULL, 0);
31390
31391 /* Set the SHF_ARM_PURECODE attribute. */
31392 sec->common.flags |= SECTION_ARM_PURECODE;
31393
31394 return sec;
31395 }
31396
31397 /* Implements the TARGET_SECTION_FLAGS hook.
31398
31399 If DECL is a function declaration and pure-code is passed as an option
31400 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31401 section's name and RELOC indicates whether the declarations initializer may
31402 contain runtime relocations. */
31403
31404 static unsigned int
31405 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31406 {
31407 unsigned int flags = default_section_type_flags (decl, name, reloc);
31408
31409 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31410 flags |= SECTION_ARM_PURECODE;
31411
31412 return flags;
31413 }
31414
31415 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31416
31417 static void
31418 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31419 rtx op0, rtx op1,
31420 rtx *quot_p, rtx *rem_p)
31421 {
31422 if (mode == SImode)
31423 gcc_assert (!TARGET_IDIV);
31424
31425 scalar_int_mode libval_mode
31426 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31427
31428 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31429 libval_mode,
31430 op0, GET_MODE (op0),
31431 op1, GET_MODE (op1));
31432
31433 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31434 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31435 GET_MODE_SIZE (mode));
31436
31437 gcc_assert (quotient);
31438 gcc_assert (remainder);
31439
31440 *quot_p = quotient;
31441 *rem_p = remainder;
31442 }
31443
31444 /* This function checks for the availability of the coprocessor builtin passed
31445 in BUILTIN for the current target. Returns true if it is available and
31446 false otherwise. If a BUILTIN is passed for which this function has not
31447 been implemented it will cause an exception. */
31448
31449 bool
31450 arm_coproc_builtin_available (enum unspecv builtin)
31451 {
31452 /* None of these builtins are available in Thumb mode if the target only
31453 supports Thumb-1. */
31454 if (TARGET_THUMB1)
31455 return false;
31456
31457 switch (builtin)
31458 {
31459 case VUNSPEC_CDP:
31460 case VUNSPEC_LDC:
31461 case VUNSPEC_LDCL:
31462 case VUNSPEC_STC:
31463 case VUNSPEC_STCL:
31464 case VUNSPEC_MCR:
31465 case VUNSPEC_MRC:
31466 if (arm_arch4)
31467 return true;
31468 break;
31469 case VUNSPEC_CDP2:
31470 case VUNSPEC_LDC2:
31471 case VUNSPEC_LDC2L:
31472 case VUNSPEC_STC2:
31473 case VUNSPEC_STC2L:
31474 case VUNSPEC_MCR2:
31475 case VUNSPEC_MRC2:
31476 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31477 ARMv8-{A,M}. */
31478 if (arm_arch5t)
31479 return true;
31480 break;
31481 case VUNSPEC_MCRR:
31482 case VUNSPEC_MRRC:
31483 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31484 ARMv8-{A,M}. */
31485 if (arm_arch6 || arm_arch5te)
31486 return true;
31487 break;
31488 case VUNSPEC_MCRR2:
31489 case VUNSPEC_MRRC2:
31490 if (arm_arch6)
31491 return true;
31492 break;
31493 default:
31494 gcc_unreachable ();
31495 }
31496 return false;
31497 }
31498
31499 /* This function returns true if OP is a valid memory operand for the ldc and
31500 stc coprocessor instructions and false otherwise. */
31501
31502 bool
31503 arm_coproc_ldc_stc_legitimate_address (rtx op)
31504 {
31505 HOST_WIDE_INT range;
31506 /* Has to be a memory operand. */
31507 if (!MEM_P (op))
31508 return false;
31509
31510 op = XEXP (op, 0);
31511
31512 /* We accept registers. */
31513 if (REG_P (op))
31514 return true;
31515
31516 switch GET_CODE (op)
31517 {
31518 case PLUS:
31519 {
31520 /* Or registers with an offset. */
31521 if (!REG_P (XEXP (op, 0)))
31522 return false;
31523
31524 op = XEXP (op, 1);
31525
31526 /* The offset must be an immediate though. */
31527 if (!CONST_INT_P (op))
31528 return false;
31529
31530 range = INTVAL (op);
31531
31532 /* Within the range of [-1020,1020]. */
31533 if (!IN_RANGE (range, -1020, 1020))
31534 return false;
31535
31536 /* And a multiple of 4. */
31537 return (range % 4) == 0;
31538 }
31539 case PRE_INC:
31540 case POST_INC:
31541 case PRE_DEC:
31542 case POST_DEC:
31543 return REG_P (XEXP (op, 0));
31544 default:
31545 gcc_unreachable ();
31546 }
31547 return false;
31548 }
31549
31550 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31551
31552 In VFPv1, VFP registers could only be accessed in the mode they were
31553 set, so subregs would be invalid there. However, we don't support
31554 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31555
31556 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31557 VFP registers in little-endian order. We can't describe that accurately to
31558 GCC, so avoid taking subregs of such values.
31559
31560 The only exception is going from a 128-bit to a 64-bit type. In that
31561 case the data layout happens to be consistent for big-endian, so we
31562 explicitly allow that case. */
31563
31564 static bool
31565 arm_can_change_mode_class (machine_mode from, machine_mode to,
31566 reg_class_t rclass)
31567 {
31568 if (TARGET_BIG_END
31569 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31570 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31571 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31572 && reg_classes_intersect_p (VFP_REGS, rclass))
31573 return false;
31574 return true;
31575 }
31576
31577 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31578 strcpy from constants will be faster. */
31579
31580 static HOST_WIDE_INT
31581 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31582 {
31583 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31584 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31585 return MAX (align, BITS_PER_WORD * factor);
31586 return align;
31587 }
31588
31589 /* Emit a speculation barrier on target architectures that do not have
31590 DSB/ISB directly. Such systems probably don't need a barrier
31591 themselves, but if the code is ever run on a later architecture, it
31592 might become a problem. */
31593 void
31594 arm_emit_speculation_barrier_function ()
31595 {
31596 emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
31597 }
31598
31599 #if CHECKING_P
31600 namespace selftest {
31601
31602 /* Scan the static data tables generated by parsecpu.awk looking for
31603 potential issues with the data. We primarily check for
31604 inconsistencies in the option extensions at present (extensions
31605 that duplicate others but aren't marked as aliases). Furthermore,
31606 for correct canonicalization later options must never be a subset
31607 of an earlier option. Any extension should also only specify other
31608 feature bits and never an architecture bit. The architecture is inferred
31609 from the declaration of the extension. */
31610 static void
31611 arm_test_cpu_arch_data (void)
31612 {
31613 const arch_option *arch;
31614 const cpu_option *cpu;
31615 auto_sbitmap target_isa (isa_num_bits);
31616 auto_sbitmap isa1 (isa_num_bits);
31617 auto_sbitmap isa2 (isa_num_bits);
31618
31619 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31620 {
31621 const cpu_arch_extension *ext1, *ext2;
31622
31623 if (arch->common.extensions == NULL)
31624 continue;
31625
31626 arm_initialize_isa (target_isa, arch->common.isa_bits);
31627
31628 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31629 {
31630 if (ext1->alias)
31631 continue;
31632
31633 arm_initialize_isa (isa1, ext1->isa_bits);
31634 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31635 {
31636 if (ext2->alias || ext1->remove != ext2->remove)
31637 continue;
31638
31639 arm_initialize_isa (isa2, ext2->isa_bits);
31640 /* If the option is a subset of the parent option, it doesn't
31641 add anything and so isn't useful. */
31642 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31643
31644 /* If the extension specifies any architectural bits then
31645 disallow it. Extensions should only specify feature bits. */
31646 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31647 }
31648 }
31649 }
31650
31651 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31652 {
31653 const cpu_arch_extension *ext1, *ext2;
31654
31655 if (cpu->common.extensions == NULL)
31656 continue;
31657
31658 arm_initialize_isa (target_isa, arch->common.isa_bits);
31659
31660 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31661 {
31662 if (ext1->alias)
31663 continue;
31664
31665 arm_initialize_isa (isa1, ext1->isa_bits);
31666 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31667 {
31668 if (ext2->alias || ext1->remove != ext2->remove)
31669 continue;
31670
31671 arm_initialize_isa (isa2, ext2->isa_bits);
31672 /* If the option is a subset of the parent option, it doesn't
31673 add anything and so isn't useful. */
31674 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31675
31676 /* If the extension specifies any architectural bits then
31677 disallow it. Extensions should only specify feature bits. */
31678 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31679 }
31680 }
31681 }
31682 }
31683
31684 /* Scan the static data tables generated by parsecpu.awk looking for
31685 potential issues with the data. Here we check for consistency between the
31686 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31687 a feature bit that is not defined by any FPU flag. */
31688 static void
31689 arm_test_fpu_data (void)
31690 {
31691 auto_sbitmap isa_all_fpubits (isa_num_bits);
31692 auto_sbitmap fpubits (isa_num_bits);
31693 auto_sbitmap tmpset (isa_num_bits);
31694
31695 static const enum isa_feature fpu_bitlist[]
31696 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31697 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31698
31699 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31700 {
31701 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31702 bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31703 bitmap_clear (isa_all_fpubits);
31704 bitmap_copy (isa_all_fpubits, tmpset);
31705 }
31706
31707 if (!bitmap_empty_p (isa_all_fpubits))
31708 {
31709 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31710 " group that are not defined by any FPU.\n"
31711 " Check your arm-cpus.in.\n");
31712 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31713 }
31714 }
31715
31716 static void
31717 arm_run_selftests (void)
31718 {
31719 arm_test_cpu_arch_data ();
31720 arm_test_fpu_data ();
31721 }
31722 } /* Namespace selftest. */
31723
31724 #undef TARGET_RUN_TARGET_SELFTESTS
31725 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31726 #endif /* CHECKING_P */
31727
31728 struct gcc_target targetm = TARGET_INITIALIZER;
31729
31730 #include "gt-arm.h"