]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
2018-07-04 Denys Vlasenko <dvlasenk@redhat.com>
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2018 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "df.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "flags.h"
53 #include "reload.h"
54 #include "explow.h"
55 #include "expr.h"
56 #include "cfgrtl.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
60 #include "intl.h"
61 #include "libfuncs.h"
62 #include "params.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "selftest.h"
73
74 /* This file should be included last. */
75 #include "target-def.h"
76
77 /* Forward definitions of types. */
78 typedef struct minipool_node Mnode;
79 typedef struct minipool_fixup Mfix;
80
81 /* The last .arch and .fpu assembly strings that we printed. */
82 static std::string arm_last_printed_arch_string;
83 static std::string arm_last_printed_fpu_string;
84
85 void (*arm_lang_output_object_attributes_hook)(void);
86
87 struct four_ints
88 {
89 int i[4];
90 };
91
92 /* Forward function declarations. */
93 static bool arm_const_not_ok_for_debug_p (rtx);
94 static int arm_needs_doubleword_align (machine_mode, const_tree);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets *arm_get_frame_offsets (void);
97 static void arm_compute_frame_layout (void);
98 static void arm_add_gc_roots (void);
99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
100 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
101 static unsigned bit_count (unsigned long);
102 static unsigned bitmap_popcount (const sbitmap);
103 static int arm_address_register_rtx_p (rtx, int);
104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
105 static bool is_called_in_ARM_mode (tree);
106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
111 inline static int thumb1_index_register_rtx_p (rtx, int);
112 static int thumb_far_jump_used_p (void);
113 static bool thumb_force_lr_save (void);
114 static unsigned arm_size_return_regs (void);
115 static bool arm_assemble_integer (rtx, unsigned int, int);
116 static void arm_print_operand (FILE *, rtx, int);
117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
118 static bool arm_print_operand_punct_valid_p (unsigned char code);
119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
120 static arm_cc get_arm_condition_code (rtx);
121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
122 static const char *output_multi_immediate (rtx *, const char *, const char *,
123 int, HOST_WIDE_INT);
124 static const char *shift_op (rtx, HOST_WIDE_INT *);
125 static struct machine_function *arm_init_machine_status (void);
126 static void thumb_exit (FILE *, int);
127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
129 static Mnode *add_minipool_forward_ref (Mfix *);
130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
131 static Mnode *add_minipool_backward_ref (Mfix *);
132 static void assign_minipool_offsets (Mfix *);
133 static void arm_print_value (FILE *, rtx);
134 static void dump_minipool (rtx_insn *);
135 static int arm_barrier_cost (rtx_insn *);
136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
139 machine_mode, rtx);
140 static void arm_reorg (void);
141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
143 static unsigned long arm_compute_save_core_reg_mask (void);
144 static unsigned long arm_isr_value (tree);
145 static unsigned long arm_compute_func_type (void);
146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
151 #endif
152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
154 static void arm_output_function_epilogue (FILE *);
155 static void arm_output_function_prologue (FILE *);
156 static int arm_comp_type_attributes (const_tree, const_tree);
157 static void arm_set_default_type_attributes (tree);
158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
160 static int optimal_immediate_sequence (enum rtx_code code,
161 unsigned HOST_WIDE_INT val,
162 struct four_ints *return_sequence);
163 static int optimal_immediate_sequence_1 (enum rtx_code code,
164 unsigned HOST_WIDE_INT val,
165 struct four_ints *return_sequence,
166 int i);
167 static int arm_get_strip_length (int);
168 static bool arm_function_ok_for_sibcall (tree, tree);
169 static machine_mode arm_promote_function_mode (const_tree,
170 machine_mode, int *,
171 const_tree, int);
172 static bool arm_return_in_memory (const_tree, const_tree);
173 static rtx arm_function_value (const_tree, const_tree, bool);
174 static rtx arm_libcall_value_1 (machine_mode);
175 static rtx arm_libcall_value (machine_mode, const_rtx);
176 static bool arm_function_value_regno_p (const unsigned int);
177 static void arm_internal_label (FILE *, const char *, unsigned long);
178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
179 tree);
180 static bool arm_have_conditional_execution (void);
181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
182 static bool arm_legitimate_constant_p (machine_mode, rtx);
183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
184 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
185 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
186 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
187 static void emit_constant_insn (rtx cond, rtx pattern);
188 static rtx_insn *emit_set_insn (rtx, rtx);
189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
190 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
191 tree, bool);
192 static rtx arm_function_arg (cumulative_args_t, machine_mode,
193 const_tree, bool);
194 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
195 const_tree, bool);
196 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
197 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
198 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
199 const_tree);
200 static rtx aapcs_libcall_value (machine_mode);
201 static int aapcs_select_return_coproc (const_tree, const_tree);
202
203 #ifdef OBJECT_FORMAT_ELF
204 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
205 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
206 #endif
207 #ifndef ARM_PE
208 static void arm_encode_section_info (tree, rtx, int);
209 #endif
210
211 static void arm_file_end (void);
212 static void arm_file_start (void);
213 static void arm_insert_attributes (tree, tree *);
214
215 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
216 tree, int *, int);
217 static bool arm_pass_by_reference (cumulative_args_t,
218 machine_mode, const_tree, bool);
219 static bool arm_promote_prototypes (const_tree);
220 static bool arm_default_short_enums (void);
221 static bool arm_align_anon_bitfield (void);
222 static bool arm_return_in_msb (const_tree);
223 static bool arm_must_pass_in_stack (machine_mode, const_tree);
224 static bool arm_return_in_memory (const_tree, const_tree);
225 #if ARM_UNWIND_INFO
226 static void arm_unwind_emit (FILE *, rtx_insn *);
227 static bool arm_output_ttype (rtx);
228 static void arm_asm_emit_except_personality (rtx);
229 #endif
230 static void arm_asm_init_sections (void);
231 static rtx arm_dwarf_register_span (rtx);
232
233 static tree arm_cxx_guard_type (void);
234 static bool arm_cxx_guard_mask_bit (void);
235 static tree arm_get_cookie_size (tree);
236 static bool arm_cookie_has_size (void);
237 static bool arm_cxx_cdtor_returns_this (void);
238 static bool arm_cxx_key_method_may_be_inline (void);
239 static void arm_cxx_determine_class_data_visibility (tree);
240 static bool arm_cxx_class_data_always_comdat (void);
241 static bool arm_cxx_use_aeabi_atexit (void);
242 static void arm_init_libfuncs (void);
243 static tree arm_build_builtin_va_list (void);
244 static void arm_expand_builtin_va_start (tree, rtx);
245 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
246 static void arm_option_override (void);
247 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
248 static void arm_option_restore (struct gcc_options *,
249 struct cl_target_option *);
250 static void arm_override_options_after_change (void);
251 static void arm_option_print (FILE *, int, struct cl_target_option *);
252 static void arm_set_current_function (tree);
253 static bool arm_can_inline_p (tree, tree);
254 static void arm_relayout_function (tree);
255 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
256 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
257 static bool arm_sched_can_speculate_insn (rtx_insn *);
258 static bool arm_macro_fusion_p (void);
259 static bool arm_cannot_copy_insn_p (rtx_insn *);
260 static int arm_issue_rate (void);
261 static int arm_first_cycle_multipass_dfa_lookahead (void);
262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
263 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
264 static bool arm_output_addr_const_extra (FILE *, rtx);
265 static bool arm_allocate_stack_slots_for_args (void);
266 static bool arm_warn_func_return (tree);
267 static tree arm_promoted_type (const_tree t);
268 static bool arm_scalar_mode_supported_p (scalar_mode);
269 static bool arm_frame_pointer_required (void);
270 static bool arm_can_eliminate (const int, const int);
271 static void arm_asm_trampoline_template (FILE *);
272 static void arm_trampoline_init (rtx, tree, rtx);
273 static rtx arm_trampoline_adjust_address (rtx);
274 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
275 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
276 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
277 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
278 static bool arm_array_mode_supported_p (machine_mode,
279 unsigned HOST_WIDE_INT);
280 static machine_mode arm_preferred_simd_mode (scalar_mode);
281 static bool arm_class_likely_spilled_p (reg_class_t);
282 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
283 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
284 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
285 const_tree type,
286 int misalignment,
287 bool is_packed);
288 static void arm_conditional_register_usage (void);
289 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
290 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
291 static void arm_autovectorize_vector_sizes (vector_sizes *);
292 static int arm_default_branch_cost (bool, bool);
293 static int arm_cortex_a5_branch_cost (bool, bool);
294 static int arm_cortex_m_branch_cost (bool, bool);
295 static int arm_cortex_m7_branch_cost (bool, bool);
296
297 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
298 const vec_perm_indices &);
299
300 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
301
302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
303 tree vectype,
304 int misalign ATTRIBUTE_UNUSED);
305 static unsigned arm_add_stmt_cost (void *data, int count,
306 enum vect_cost_for_stmt kind,
307 struct _stmt_vec_info *stmt_info,
308 int misalign,
309 enum vect_cost_model_location where);
310
311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
312 bool op0_preserve_value);
313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
314
315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
317 const_tree);
318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
321 int reloc);
322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
328 \f
329 /* Table of machine attributes. */
330 static const struct attribute_spec arm_attribute_table[] =
331 {
332 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
333 affects_type_identity, handler, exclude } */
334 /* Function calls made to this symbol must be done indirectly, because
335 it may lie outside of the 26 bit addressing range of a normal function
336 call. */
337 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
338 /* Whereas these functions are always known to reside within the 26 bit
339 addressing range. */
340 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
341 /* Specify the procedure call conventions for a function. */
342 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
343 NULL },
344 /* Interrupt Service Routines have special prologue and epilogue requirements. */
345 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
346 NULL },
347 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
348 NULL },
349 { "naked", 0, 0, true, false, false, false,
350 arm_handle_fndecl_attribute, NULL },
351 #ifdef ARM_PE
352 /* ARM/PE has three new attributes:
353 interfacearm - ?
354 dllexport - for exporting a function/variable that will live in a dll
355 dllimport - for importing a function/variable from a dll
356
357 Microsoft allows multiple declspecs in one __declspec, separating
358 them with spaces. We do NOT support this. Instead, use __declspec
359 multiple times.
360 */
361 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
362 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
363 { "interfacearm", 0, 0, true, false, false, false,
364 arm_handle_fndecl_attribute, NULL },
365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
366 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
367 NULL },
368 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
369 NULL },
370 { "notshared", 0, 0, false, true, false, false,
371 arm_handle_notshared_attribute, NULL },
372 #endif
373 /* ARMv8-M Security Extensions support. */
374 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
375 arm_handle_cmse_nonsecure_entry, NULL },
376 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
377 arm_handle_cmse_nonsecure_call, NULL },
378 { NULL, 0, 0, false, false, false, false, NULL, NULL }
379 };
380 \f
381 /* Initialize the GCC target structure. */
382 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
383 #undef TARGET_MERGE_DECL_ATTRIBUTES
384 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
385 #endif
386
387 #undef TARGET_LEGITIMIZE_ADDRESS
388 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
389
390 #undef TARGET_ATTRIBUTE_TABLE
391 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
392
393 #undef TARGET_INSERT_ATTRIBUTES
394 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
395
396 #undef TARGET_ASM_FILE_START
397 #define TARGET_ASM_FILE_START arm_file_start
398 #undef TARGET_ASM_FILE_END
399 #define TARGET_ASM_FILE_END arm_file_end
400
401 #undef TARGET_ASM_ALIGNED_SI_OP
402 #define TARGET_ASM_ALIGNED_SI_OP NULL
403 #undef TARGET_ASM_INTEGER
404 #define TARGET_ASM_INTEGER arm_assemble_integer
405
406 #undef TARGET_PRINT_OPERAND
407 #define TARGET_PRINT_OPERAND arm_print_operand
408 #undef TARGET_PRINT_OPERAND_ADDRESS
409 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
410 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
411 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
412
413 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
414 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
415
416 #undef TARGET_ASM_FUNCTION_PROLOGUE
417 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
418
419 #undef TARGET_ASM_FUNCTION_EPILOGUE
420 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
421
422 #undef TARGET_CAN_INLINE_P
423 #define TARGET_CAN_INLINE_P arm_can_inline_p
424
425 #undef TARGET_RELAYOUT_FUNCTION
426 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
427
428 #undef TARGET_OPTION_OVERRIDE
429 #define TARGET_OPTION_OVERRIDE arm_option_override
430
431 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
432 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
433
434 #undef TARGET_OPTION_SAVE
435 #define TARGET_OPTION_SAVE arm_option_save
436
437 #undef TARGET_OPTION_RESTORE
438 #define TARGET_OPTION_RESTORE arm_option_restore
439
440 #undef TARGET_OPTION_PRINT
441 #define TARGET_OPTION_PRINT arm_option_print
442
443 #undef TARGET_COMP_TYPE_ATTRIBUTES
444 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
445
446 #undef TARGET_SCHED_CAN_SPECULATE_INSN
447 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
448
449 #undef TARGET_SCHED_MACRO_FUSION_P
450 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
451
452 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
453 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
454
455 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
456 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
457
458 #undef TARGET_SCHED_ADJUST_COST
459 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
460
461 #undef TARGET_SET_CURRENT_FUNCTION
462 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
463
464 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
465 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
466
467 #undef TARGET_SCHED_REORDER
468 #define TARGET_SCHED_REORDER arm_sched_reorder
469
470 #undef TARGET_REGISTER_MOVE_COST
471 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
472
473 #undef TARGET_MEMORY_MOVE_COST
474 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
475
476 #undef TARGET_ENCODE_SECTION_INFO
477 #ifdef ARM_PE
478 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
479 #else
480 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
481 #endif
482
483 #undef TARGET_STRIP_NAME_ENCODING
484 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
485
486 #undef TARGET_ASM_INTERNAL_LABEL
487 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
488
489 #undef TARGET_FLOATN_MODE
490 #define TARGET_FLOATN_MODE arm_floatn_mode
491
492 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
493 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
494
495 #undef TARGET_FUNCTION_VALUE
496 #define TARGET_FUNCTION_VALUE arm_function_value
497
498 #undef TARGET_LIBCALL_VALUE
499 #define TARGET_LIBCALL_VALUE arm_libcall_value
500
501 #undef TARGET_FUNCTION_VALUE_REGNO_P
502 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
503
504 #undef TARGET_ASM_OUTPUT_MI_THUNK
505 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
506 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
507 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
508
509 #undef TARGET_RTX_COSTS
510 #define TARGET_RTX_COSTS arm_rtx_costs
511 #undef TARGET_ADDRESS_COST
512 #define TARGET_ADDRESS_COST arm_address_cost
513
514 #undef TARGET_SHIFT_TRUNCATION_MASK
515 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
516 #undef TARGET_VECTOR_MODE_SUPPORTED_P
517 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
518 #undef TARGET_ARRAY_MODE_SUPPORTED_P
519 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
520 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
521 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
522 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
523 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
524 arm_autovectorize_vector_sizes
525
526 #undef TARGET_MACHINE_DEPENDENT_REORG
527 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
528
529 #undef TARGET_INIT_BUILTINS
530 #define TARGET_INIT_BUILTINS arm_init_builtins
531 #undef TARGET_EXPAND_BUILTIN
532 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
533 #undef TARGET_BUILTIN_DECL
534 #define TARGET_BUILTIN_DECL arm_builtin_decl
535
536 #undef TARGET_INIT_LIBFUNCS
537 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
538
539 #undef TARGET_PROMOTE_FUNCTION_MODE
540 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
541 #undef TARGET_PROMOTE_PROTOTYPES
542 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
543 #undef TARGET_PASS_BY_REFERENCE
544 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
545 #undef TARGET_ARG_PARTIAL_BYTES
546 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
547 #undef TARGET_FUNCTION_ARG
548 #define TARGET_FUNCTION_ARG arm_function_arg
549 #undef TARGET_FUNCTION_ARG_ADVANCE
550 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
551 #undef TARGET_FUNCTION_ARG_PADDING
552 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
553 #undef TARGET_FUNCTION_ARG_BOUNDARY
554 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
555
556 #undef TARGET_SETUP_INCOMING_VARARGS
557 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
558
559 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
560 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
561
562 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
563 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
564 #undef TARGET_TRAMPOLINE_INIT
565 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
566 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
567 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
568
569 #undef TARGET_WARN_FUNC_RETURN
570 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
571
572 #undef TARGET_DEFAULT_SHORT_ENUMS
573 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
574
575 #undef TARGET_ALIGN_ANON_BITFIELD
576 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
577
578 #undef TARGET_NARROW_VOLATILE_BITFIELD
579 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
580
581 #undef TARGET_CXX_GUARD_TYPE
582 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
583
584 #undef TARGET_CXX_GUARD_MASK_BIT
585 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
586
587 #undef TARGET_CXX_GET_COOKIE_SIZE
588 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
589
590 #undef TARGET_CXX_COOKIE_HAS_SIZE
591 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
592
593 #undef TARGET_CXX_CDTOR_RETURNS_THIS
594 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
595
596 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
597 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
598
599 #undef TARGET_CXX_USE_AEABI_ATEXIT
600 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
601
602 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
603 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
604 arm_cxx_determine_class_data_visibility
605
606 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
607 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
608
609 #undef TARGET_RETURN_IN_MSB
610 #define TARGET_RETURN_IN_MSB arm_return_in_msb
611
612 #undef TARGET_RETURN_IN_MEMORY
613 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
614
615 #undef TARGET_MUST_PASS_IN_STACK
616 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
617
618 #if ARM_UNWIND_INFO
619 #undef TARGET_ASM_UNWIND_EMIT
620 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
621
622 /* EABI unwinding tables use a different format for the typeinfo tables. */
623 #undef TARGET_ASM_TTYPE
624 #define TARGET_ASM_TTYPE arm_output_ttype
625
626 #undef TARGET_ARM_EABI_UNWINDER
627 #define TARGET_ARM_EABI_UNWINDER true
628
629 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
630 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
631
632 #endif /* ARM_UNWIND_INFO */
633
634 #undef TARGET_ASM_INIT_SECTIONS
635 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
636
637 #undef TARGET_DWARF_REGISTER_SPAN
638 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
639
640 #undef TARGET_CANNOT_COPY_INSN_P
641 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
642
643 #ifdef HAVE_AS_TLS
644 #undef TARGET_HAVE_TLS
645 #define TARGET_HAVE_TLS true
646 #endif
647
648 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
649 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
650
651 #undef TARGET_LEGITIMATE_CONSTANT_P
652 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
653
654 #undef TARGET_CANNOT_FORCE_CONST_MEM
655 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
656
657 #undef TARGET_MAX_ANCHOR_OFFSET
658 #define TARGET_MAX_ANCHOR_OFFSET 4095
659
660 /* The minimum is set such that the total size of the block
661 for a particular anchor is -4088 + 1 + 4095 bytes, which is
662 divisible by eight, ensuring natural spacing of anchors. */
663 #undef TARGET_MIN_ANCHOR_OFFSET
664 #define TARGET_MIN_ANCHOR_OFFSET -4088
665
666 #undef TARGET_SCHED_ISSUE_RATE
667 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
668
669 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
670 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
671 arm_first_cycle_multipass_dfa_lookahead
672
673 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
674 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
675 arm_first_cycle_multipass_dfa_lookahead_guard
676
677 #undef TARGET_MANGLE_TYPE
678 #define TARGET_MANGLE_TYPE arm_mangle_type
679
680 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
681 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
682
683 #undef TARGET_BUILD_BUILTIN_VA_LIST
684 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
685 #undef TARGET_EXPAND_BUILTIN_VA_START
686 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
687 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
688 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
689
690 #ifdef HAVE_AS_TLS
691 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
692 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
693 #endif
694
695 #undef TARGET_LEGITIMATE_ADDRESS_P
696 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
697
698 #undef TARGET_PREFERRED_RELOAD_CLASS
699 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
700
701 #undef TARGET_PROMOTED_TYPE
702 #define TARGET_PROMOTED_TYPE arm_promoted_type
703
704 #undef TARGET_SCALAR_MODE_SUPPORTED_P
705 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
706
707 #undef TARGET_COMPUTE_FRAME_LAYOUT
708 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
709
710 #undef TARGET_FRAME_POINTER_REQUIRED
711 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
712
713 #undef TARGET_CAN_ELIMINATE
714 #define TARGET_CAN_ELIMINATE arm_can_eliminate
715
716 #undef TARGET_CONDITIONAL_REGISTER_USAGE
717 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
718
719 #undef TARGET_CLASS_LIKELY_SPILLED_P
720 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
721
722 #undef TARGET_VECTORIZE_BUILTINS
723 #define TARGET_VECTORIZE_BUILTINS
724
725 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
726 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
727 arm_builtin_vectorized_function
728
729 #undef TARGET_VECTOR_ALIGNMENT
730 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
731
732 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
733 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
734 arm_vector_alignment_reachable
735
736 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
737 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
738 arm_builtin_support_vector_misalignment
739
740 #undef TARGET_PREFERRED_RENAME_CLASS
741 #define TARGET_PREFERRED_RENAME_CLASS \
742 arm_preferred_rename_class
743
744 #undef TARGET_VECTORIZE_VEC_PERM_CONST
745 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
746
747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
749 arm_builtin_vectorization_cost
750 #undef TARGET_VECTORIZE_ADD_STMT_COST
751 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
752
753 #undef TARGET_CANONICALIZE_COMPARISON
754 #define TARGET_CANONICALIZE_COMPARISON \
755 arm_canonicalize_comparison
756
757 #undef TARGET_ASAN_SHADOW_OFFSET
758 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
759
760 #undef MAX_INSN_PER_IT_BLOCK
761 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
762
763 #undef TARGET_CAN_USE_DOLOOP_P
764 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
765
766 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
767 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
768
769 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
770 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
771
772 #undef TARGET_SCHED_FUSION_PRIORITY
773 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
774
775 #undef TARGET_ASM_FUNCTION_SECTION
776 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
777
778 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
779 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
780
781 #undef TARGET_SECTION_TYPE_FLAGS
782 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
783
784 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
785 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
786
787 #undef TARGET_C_EXCESS_PRECISION
788 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
789
790 /* Although the architecture reserves bits 0 and 1, only the former is
791 used for ARM/Thumb ISA selection in v7 and earlier versions. */
792 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
793 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
794
795 #undef TARGET_FIXED_CONDITION_CODE_REGS
796 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
797
798 #undef TARGET_HARD_REGNO_NREGS
799 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
800 #undef TARGET_HARD_REGNO_MODE_OK
801 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
802
803 #undef TARGET_MODES_TIEABLE_P
804 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
805
806 #undef TARGET_CAN_CHANGE_MODE_CLASS
807 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
808
809 #undef TARGET_CONSTANT_ALIGNMENT
810 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
811 \f
812 /* Obstack for minipool constant handling. */
813 static struct obstack minipool_obstack;
814 static char * minipool_startobj;
815
816 /* The maximum number of insns skipped which
817 will be conditionalised if possible. */
818 static int max_insns_skipped = 5;
819
820 extern FILE * asm_out_file;
821
822 /* True if we are currently building a constant table. */
823 int making_const_table;
824
825 /* The processor for which instructions should be scheduled. */
826 enum processor_type arm_tune = TARGET_CPU_arm_none;
827
828 /* The current tuning set. */
829 const struct tune_params *current_tune;
830
831 /* Which floating point hardware to schedule for. */
832 int arm_fpu_attr;
833
834 /* Used for Thumb call_via trampolines. */
835 rtx thumb_call_via_label[14];
836 static int thumb_call_reg_needed;
837
838 /* The bits in this mask specify which instruction scheduling options should
839 be used. */
840 unsigned int tune_flags = 0;
841
842 /* The highest ARM architecture version supported by the
843 target. */
844 enum base_architecture arm_base_arch = BASE_ARCH_0;
845
846 /* Active target architecture and tuning. */
847
848 struct arm_build_target arm_active_target;
849
850 /* The following are used in the arm.md file as equivalents to bits
851 in the above two flag variables. */
852
853 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
854 int arm_arch4 = 0;
855
856 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
857 int arm_arch4t = 0;
858
859 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
860 int arm_arch5t = 0;
861
862 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
863 int arm_arch5te = 0;
864
865 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
866 int arm_arch6 = 0;
867
868 /* Nonzero if this chip supports the ARM 6K extensions. */
869 int arm_arch6k = 0;
870
871 /* Nonzero if this chip supports the ARM 6KZ extensions. */
872 int arm_arch6kz = 0;
873
874 /* Nonzero if instructions present in ARMv6-M can be used. */
875 int arm_arch6m = 0;
876
877 /* Nonzero if this chip supports the ARM 7 extensions. */
878 int arm_arch7 = 0;
879
880 /* Nonzero if this chip supports the Large Physical Address Extension. */
881 int arm_arch_lpae = 0;
882
883 /* Nonzero if instructions not present in the 'M' profile can be used. */
884 int arm_arch_notm = 0;
885
886 /* Nonzero if instructions present in ARMv7E-M can be used. */
887 int arm_arch7em = 0;
888
889 /* Nonzero if instructions present in ARMv8 can be used. */
890 int arm_arch8 = 0;
891
892 /* Nonzero if this chip supports the ARMv8.1 extensions. */
893 int arm_arch8_1 = 0;
894
895 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
896 int arm_arch8_2 = 0;
897
898 /* Nonzero if this chip supports the FP16 instructions extension of ARM
899 Architecture 8.2. */
900 int arm_fp16_inst = 0;
901
902 /* Nonzero if this chip can benefit from load scheduling. */
903 int arm_ld_sched = 0;
904
905 /* Nonzero if this chip is a StrongARM. */
906 int arm_tune_strongarm = 0;
907
908 /* Nonzero if this chip supports Intel Wireless MMX technology. */
909 int arm_arch_iwmmxt = 0;
910
911 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
912 int arm_arch_iwmmxt2 = 0;
913
914 /* Nonzero if this chip is an XScale. */
915 int arm_arch_xscale = 0;
916
917 /* Nonzero if tuning for XScale */
918 int arm_tune_xscale = 0;
919
920 /* Nonzero if we want to tune for stores that access the write-buffer.
921 This typically means an ARM6 or ARM7 with MMU or MPU. */
922 int arm_tune_wbuf = 0;
923
924 /* Nonzero if tuning for Cortex-A9. */
925 int arm_tune_cortex_a9 = 0;
926
927 /* Nonzero if we should define __THUMB_INTERWORK__ in the
928 preprocessor.
929 XXX This is a bit of a hack, it's intended to help work around
930 problems in GLD which doesn't understand that armv5t code is
931 interworking clean. */
932 int arm_cpp_interwork = 0;
933
934 /* Nonzero if chip supports Thumb 1. */
935 int arm_arch_thumb1;
936
937 /* Nonzero if chip supports Thumb 2. */
938 int arm_arch_thumb2;
939
940 /* Nonzero if chip supports integer division instruction. */
941 int arm_arch_arm_hwdiv;
942 int arm_arch_thumb_hwdiv;
943
944 /* Nonzero if chip disallows volatile memory access in IT block. */
945 int arm_arch_no_volatile_ce;
946
947 /* Nonzero if we should use Neon to handle 64-bits operations rather
948 than core registers. */
949 int prefer_neon_for_64bits = 0;
950
951 /* Nonzero if we shouldn't use literal pools. */
952 bool arm_disable_literal_pool = false;
953
954 /* The register number to be used for the PIC offset register. */
955 unsigned arm_pic_register = INVALID_REGNUM;
956
957 enum arm_pcs arm_pcs_default;
958
959 /* For an explanation of these variables, see final_prescan_insn below. */
960 int arm_ccfsm_state;
961 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
962 enum arm_cond_code arm_current_cc;
963
964 rtx arm_target_insn;
965 int arm_target_label;
966 /* The number of conditionally executed insns, including the current insn. */
967 int arm_condexec_count = 0;
968 /* A bitmask specifying the patterns for the IT block.
969 Zero means do not output an IT block before this insn. */
970 int arm_condexec_mask = 0;
971 /* The number of bits used in arm_condexec_mask. */
972 int arm_condexec_masklen = 0;
973
974 /* Nonzero if chip supports the ARMv8 CRC instructions. */
975 int arm_arch_crc = 0;
976
977 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
978 int arm_arch_dotprod = 0;
979
980 /* Nonzero if chip supports the ARMv8-M security extensions. */
981 int arm_arch_cmse = 0;
982
983 /* Nonzero if the core has a very small, high-latency, multiply unit. */
984 int arm_m_profile_small_mul = 0;
985
986 /* The condition codes of the ARM, and the inverse function. */
987 static const char * const arm_condition_codes[] =
988 {
989 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
990 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
991 };
992
993 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
994 int arm_regs_in_sequence[] =
995 {
996 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
997 };
998
999 #define ARM_LSL_NAME "lsl"
1000 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1001
1002 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1003 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1004 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1005 \f
1006 /* Initialization code. */
1007
1008 struct cpu_tune
1009 {
1010 enum processor_type scheduler;
1011 unsigned int tune_flags;
1012 const struct tune_params *tune;
1013 };
1014
1015 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1016 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1017 { \
1018 num_slots, \
1019 l1_size, \
1020 l1_line_size \
1021 }
1022
1023 /* arm generic vectorizer costs. */
1024 static const
1025 struct cpu_vec_costs arm_default_vec_cost = {
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 1, /* vec_unalign_load_cost. */
1034 1, /* vec_unalign_store_cost. */
1035 1, /* vec_store_cost. */
1036 3, /* cond_taken_branch_cost. */
1037 1, /* cond_not_taken_branch_cost. */
1038 };
1039
1040 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1041 #include "aarch-cost-tables.h"
1042
1043
1044
1045 const struct cpu_cost_table cortexa9_extra_costs =
1046 {
1047 /* ALU */
1048 {
1049 0, /* arith. */
1050 0, /* logical. */
1051 0, /* shift. */
1052 COSTS_N_INSNS (1), /* shift_reg. */
1053 COSTS_N_INSNS (1), /* arith_shift. */
1054 COSTS_N_INSNS (2), /* arith_shift_reg. */
1055 0, /* log_shift. */
1056 COSTS_N_INSNS (1), /* log_shift_reg. */
1057 COSTS_N_INSNS (1), /* extend. */
1058 COSTS_N_INSNS (2), /* extend_arith. */
1059 COSTS_N_INSNS (1), /* bfi. */
1060 COSTS_N_INSNS (1), /* bfx. */
1061 0, /* clz. */
1062 0, /* rev. */
1063 0, /* non_exec. */
1064 true /* non_exec_costs_exec. */
1065 },
1066 {
1067 /* MULT SImode */
1068 {
1069 COSTS_N_INSNS (3), /* simple. */
1070 COSTS_N_INSNS (3), /* flag_setting. */
1071 COSTS_N_INSNS (2), /* extend. */
1072 COSTS_N_INSNS (3), /* add. */
1073 COSTS_N_INSNS (2), /* extend_add. */
1074 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1075 },
1076 /* MULT DImode */
1077 {
1078 0, /* simple (N/A). */
1079 0, /* flag_setting (N/A). */
1080 COSTS_N_INSNS (4), /* extend. */
1081 0, /* add (N/A). */
1082 COSTS_N_INSNS (4), /* extend_add. */
1083 0 /* idiv (N/A). */
1084 }
1085 },
1086 /* LD/ST */
1087 {
1088 COSTS_N_INSNS (2), /* load. */
1089 COSTS_N_INSNS (2), /* load_sign_extend. */
1090 COSTS_N_INSNS (2), /* ldrd. */
1091 COSTS_N_INSNS (2), /* ldm_1st. */
1092 1, /* ldm_regs_per_insn_1st. */
1093 2, /* ldm_regs_per_insn_subsequent. */
1094 COSTS_N_INSNS (5), /* loadf. */
1095 COSTS_N_INSNS (5), /* loadd. */
1096 COSTS_N_INSNS (1), /* load_unaligned. */
1097 COSTS_N_INSNS (2), /* store. */
1098 COSTS_N_INSNS (2), /* strd. */
1099 COSTS_N_INSNS (2), /* stm_1st. */
1100 1, /* stm_regs_per_insn_1st. */
1101 2, /* stm_regs_per_insn_subsequent. */
1102 COSTS_N_INSNS (1), /* storef. */
1103 COSTS_N_INSNS (1), /* stored. */
1104 COSTS_N_INSNS (1), /* store_unaligned. */
1105 COSTS_N_INSNS (1), /* loadv. */
1106 COSTS_N_INSNS (1) /* storev. */
1107 },
1108 {
1109 /* FP SFmode */
1110 {
1111 COSTS_N_INSNS (14), /* div. */
1112 COSTS_N_INSNS (4), /* mult. */
1113 COSTS_N_INSNS (7), /* mult_addsub. */
1114 COSTS_N_INSNS (30), /* fma. */
1115 COSTS_N_INSNS (3), /* addsub. */
1116 COSTS_N_INSNS (1), /* fpconst. */
1117 COSTS_N_INSNS (1), /* neg. */
1118 COSTS_N_INSNS (3), /* compare. */
1119 COSTS_N_INSNS (3), /* widen. */
1120 COSTS_N_INSNS (3), /* narrow. */
1121 COSTS_N_INSNS (3), /* toint. */
1122 COSTS_N_INSNS (3), /* fromint. */
1123 COSTS_N_INSNS (3) /* roundint. */
1124 },
1125 /* FP DFmode */
1126 {
1127 COSTS_N_INSNS (24), /* div. */
1128 COSTS_N_INSNS (5), /* mult. */
1129 COSTS_N_INSNS (8), /* mult_addsub. */
1130 COSTS_N_INSNS (30), /* fma. */
1131 COSTS_N_INSNS (3), /* addsub. */
1132 COSTS_N_INSNS (1), /* fpconst. */
1133 COSTS_N_INSNS (1), /* neg. */
1134 COSTS_N_INSNS (3), /* compare. */
1135 COSTS_N_INSNS (3), /* widen. */
1136 COSTS_N_INSNS (3), /* narrow. */
1137 COSTS_N_INSNS (3), /* toint. */
1138 COSTS_N_INSNS (3), /* fromint. */
1139 COSTS_N_INSNS (3) /* roundint. */
1140 }
1141 },
1142 /* Vector */
1143 {
1144 COSTS_N_INSNS (1) /* alu. */
1145 }
1146 };
1147
1148 const struct cpu_cost_table cortexa8_extra_costs =
1149 {
1150 /* ALU */
1151 {
1152 0, /* arith. */
1153 0, /* logical. */
1154 COSTS_N_INSNS (1), /* shift. */
1155 0, /* shift_reg. */
1156 COSTS_N_INSNS (1), /* arith_shift. */
1157 0, /* arith_shift_reg. */
1158 COSTS_N_INSNS (1), /* log_shift. */
1159 0, /* log_shift_reg. */
1160 0, /* extend. */
1161 0, /* extend_arith. */
1162 0, /* bfi. */
1163 0, /* bfx. */
1164 0, /* clz. */
1165 0, /* rev. */
1166 0, /* non_exec. */
1167 true /* non_exec_costs_exec. */
1168 },
1169 {
1170 /* MULT SImode */
1171 {
1172 COSTS_N_INSNS (1), /* simple. */
1173 COSTS_N_INSNS (1), /* flag_setting. */
1174 COSTS_N_INSNS (1), /* extend. */
1175 COSTS_N_INSNS (1), /* add. */
1176 COSTS_N_INSNS (1), /* extend_add. */
1177 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1178 },
1179 /* MULT DImode */
1180 {
1181 0, /* simple (N/A). */
1182 0, /* flag_setting (N/A). */
1183 COSTS_N_INSNS (2), /* extend. */
1184 0, /* add (N/A). */
1185 COSTS_N_INSNS (2), /* extend_add. */
1186 0 /* idiv (N/A). */
1187 }
1188 },
1189 /* LD/ST */
1190 {
1191 COSTS_N_INSNS (1), /* load. */
1192 COSTS_N_INSNS (1), /* load_sign_extend. */
1193 COSTS_N_INSNS (1), /* ldrd. */
1194 COSTS_N_INSNS (1), /* ldm_1st. */
1195 1, /* ldm_regs_per_insn_1st. */
1196 2, /* ldm_regs_per_insn_subsequent. */
1197 COSTS_N_INSNS (1), /* loadf. */
1198 COSTS_N_INSNS (1), /* loadd. */
1199 COSTS_N_INSNS (1), /* load_unaligned. */
1200 COSTS_N_INSNS (1), /* store. */
1201 COSTS_N_INSNS (1), /* strd. */
1202 COSTS_N_INSNS (1), /* stm_1st. */
1203 1, /* stm_regs_per_insn_1st. */
1204 2, /* stm_regs_per_insn_subsequent. */
1205 COSTS_N_INSNS (1), /* storef. */
1206 COSTS_N_INSNS (1), /* stored. */
1207 COSTS_N_INSNS (1), /* store_unaligned. */
1208 COSTS_N_INSNS (1), /* loadv. */
1209 COSTS_N_INSNS (1) /* storev. */
1210 },
1211 {
1212 /* FP SFmode */
1213 {
1214 COSTS_N_INSNS (36), /* div. */
1215 COSTS_N_INSNS (11), /* mult. */
1216 COSTS_N_INSNS (20), /* mult_addsub. */
1217 COSTS_N_INSNS (30), /* fma. */
1218 COSTS_N_INSNS (9), /* addsub. */
1219 COSTS_N_INSNS (3), /* fpconst. */
1220 COSTS_N_INSNS (3), /* neg. */
1221 COSTS_N_INSNS (6), /* compare. */
1222 COSTS_N_INSNS (4), /* widen. */
1223 COSTS_N_INSNS (4), /* narrow. */
1224 COSTS_N_INSNS (8), /* toint. */
1225 COSTS_N_INSNS (8), /* fromint. */
1226 COSTS_N_INSNS (8) /* roundint. */
1227 },
1228 /* FP DFmode */
1229 {
1230 COSTS_N_INSNS (64), /* div. */
1231 COSTS_N_INSNS (16), /* mult. */
1232 COSTS_N_INSNS (25), /* mult_addsub. */
1233 COSTS_N_INSNS (30), /* fma. */
1234 COSTS_N_INSNS (9), /* addsub. */
1235 COSTS_N_INSNS (3), /* fpconst. */
1236 COSTS_N_INSNS (3), /* neg. */
1237 COSTS_N_INSNS (6), /* compare. */
1238 COSTS_N_INSNS (6), /* widen. */
1239 COSTS_N_INSNS (6), /* narrow. */
1240 COSTS_N_INSNS (8), /* toint. */
1241 COSTS_N_INSNS (8), /* fromint. */
1242 COSTS_N_INSNS (8) /* roundint. */
1243 }
1244 },
1245 /* Vector */
1246 {
1247 COSTS_N_INSNS (1) /* alu. */
1248 }
1249 };
1250
1251 const struct cpu_cost_table cortexa5_extra_costs =
1252 {
1253 /* ALU */
1254 {
1255 0, /* arith. */
1256 0, /* logical. */
1257 COSTS_N_INSNS (1), /* shift. */
1258 COSTS_N_INSNS (1), /* shift_reg. */
1259 COSTS_N_INSNS (1), /* arith_shift. */
1260 COSTS_N_INSNS (1), /* arith_shift_reg. */
1261 COSTS_N_INSNS (1), /* log_shift. */
1262 COSTS_N_INSNS (1), /* log_shift_reg. */
1263 COSTS_N_INSNS (1), /* extend. */
1264 COSTS_N_INSNS (1), /* extend_arith. */
1265 COSTS_N_INSNS (1), /* bfi. */
1266 COSTS_N_INSNS (1), /* bfx. */
1267 COSTS_N_INSNS (1), /* clz. */
1268 COSTS_N_INSNS (1), /* rev. */
1269 0, /* non_exec. */
1270 true /* non_exec_costs_exec. */
1271 },
1272
1273 {
1274 /* MULT SImode */
1275 {
1276 0, /* simple. */
1277 COSTS_N_INSNS (1), /* flag_setting. */
1278 COSTS_N_INSNS (1), /* extend. */
1279 COSTS_N_INSNS (1), /* add. */
1280 COSTS_N_INSNS (1), /* extend_add. */
1281 COSTS_N_INSNS (7) /* idiv. */
1282 },
1283 /* MULT DImode */
1284 {
1285 0, /* simple (N/A). */
1286 0, /* flag_setting (N/A). */
1287 COSTS_N_INSNS (1), /* extend. */
1288 0, /* add. */
1289 COSTS_N_INSNS (2), /* extend_add. */
1290 0 /* idiv (N/A). */
1291 }
1292 },
1293 /* LD/ST */
1294 {
1295 COSTS_N_INSNS (1), /* load. */
1296 COSTS_N_INSNS (1), /* load_sign_extend. */
1297 COSTS_N_INSNS (6), /* ldrd. */
1298 COSTS_N_INSNS (1), /* ldm_1st. */
1299 1, /* ldm_regs_per_insn_1st. */
1300 2, /* ldm_regs_per_insn_subsequent. */
1301 COSTS_N_INSNS (2), /* loadf. */
1302 COSTS_N_INSNS (4), /* loadd. */
1303 COSTS_N_INSNS (1), /* load_unaligned. */
1304 COSTS_N_INSNS (1), /* store. */
1305 COSTS_N_INSNS (3), /* strd. */
1306 COSTS_N_INSNS (1), /* stm_1st. */
1307 1, /* stm_regs_per_insn_1st. */
1308 2, /* stm_regs_per_insn_subsequent. */
1309 COSTS_N_INSNS (2), /* storef. */
1310 COSTS_N_INSNS (2), /* stored. */
1311 COSTS_N_INSNS (1), /* store_unaligned. */
1312 COSTS_N_INSNS (1), /* loadv. */
1313 COSTS_N_INSNS (1) /* storev. */
1314 },
1315 {
1316 /* FP SFmode */
1317 {
1318 COSTS_N_INSNS (15), /* div. */
1319 COSTS_N_INSNS (3), /* mult. */
1320 COSTS_N_INSNS (7), /* mult_addsub. */
1321 COSTS_N_INSNS (7), /* fma. */
1322 COSTS_N_INSNS (3), /* addsub. */
1323 COSTS_N_INSNS (3), /* fpconst. */
1324 COSTS_N_INSNS (3), /* neg. */
1325 COSTS_N_INSNS (3), /* compare. */
1326 COSTS_N_INSNS (3), /* widen. */
1327 COSTS_N_INSNS (3), /* narrow. */
1328 COSTS_N_INSNS (3), /* toint. */
1329 COSTS_N_INSNS (3), /* fromint. */
1330 COSTS_N_INSNS (3) /* roundint. */
1331 },
1332 /* FP DFmode */
1333 {
1334 COSTS_N_INSNS (30), /* div. */
1335 COSTS_N_INSNS (6), /* mult. */
1336 COSTS_N_INSNS (10), /* mult_addsub. */
1337 COSTS_N_INSNS (7), /* fma. */
1338 COSTS_N_INSNS (3), /* addsub. */
1339 COSTS_N_INSNS (3), /* fpconst. */
1340 COSTS_N_INSNS (3), /* neg. */
1341 COSTS_N_INSNS (3), /* compare. */
1342 COSTS_N_INSNS (3), /* widen. */
1343 COSTS_N_INSNS (3), /* narrow. */
1344 COSTS_N_INSNS (3), /* toint. */
1345 COSTS_N_INSNS (3), /* fromint. */
1346 COSTS_N_INSNS (3) /* roundint. */
1347 }
1348 },
1349 /* Vector */
1350 {
1351 COSTS_N_INSNS (1) /* alu. */
1352 }
1353 };
1354
1355
1356 const struct cpu_cost_table cortexa7_extra_costs =
1357 {
1358 /* ALU */
1359 {
1360 0, /* arith. */
1361 0, /* logical. */
1362 COSTS_N_INSNS (1), /* shift. */
1363 COSTS_N_INSNS (1), /* shift_reg. */
1364 COSTS_N_INSNS (1), /* arith_shift. */
1365 COSTS_N_INSNS (1), /* arith_shift_reg. */
1366 COSTS_N_INSNS (1), /* log_shift. */
1367 COSTS_N_INSNS (1), /* log_shift_reg. */
1368 COSTS_N_INSNS (1), /* extend. */
1369 COSTS_N_INSNS (1), /* extend_arith. */
1370 COSTS_N_INSNS (1), /* bfi. */
1371 COSTS_N_INSNS (1), /* bfx. */
1372 COSTS_N_INSNS (1), /* clz. */
1373 COSTS_N_INSNS (1), /* rev. */
1374 0, /* non_exec. */
1375 true /* non_exec_costs_exec. */
1376 },
1377
1378 {
1379 /* MULT SImode */
1380 {
1381 0, /* simple. */
1382 COSTS_N_INSNS (1), /* flag_setting. */
1383 COSTS_N_INSNS (1), /* extend. */
1384 COSTS_N_INSNS (1), /* add. */
1385 COSTS_N_INSNS (1), /* extend_add. */
1386 COSTS_N_INSNS (7) /* idiv. */
1387 },
1388 /* MULT DImode */
1389 {
1390 0, /* simple (N/A). */
1391 0, /* flag_setting (N/A). */
1392 COSTS_N_INSNS (1), /* extend. */
1393 0, /* add. */
1394 COSTS_N_INSNS (2), /* extend_add. */
1395 0 /* idiv (N/A). */
1396 }
1397 },
1398 /* LD/ST */
1399 {
1400 COSTS_N_INSNS (1), /* load. */
1401 COSTS_N_INSNS (1), /* load_sign_extend. */
1402 COSTS_N_INSNS (3), /* ldrd. */
1403 COSTS_N_INSNS (1), /* ldm_1st. */
1404 1, /* ldm_regs_per_insn_1st. */
1405 2, /* ldm_regs_per_insn_subsequent. */
1406 COSTS_N_INSNS (2), /* loadf. */
1407 COSTS_N_INSNS (2), /* loadd. */
1408 COSTS_N_INSNS (1), /* load_unaligned. */
1409 COSTS_N_INSNS (1), /* store. */
1410 COSTS_N_INSNS (3), /* strd. */
1411 COSTS_N_INSNS (1), /* stm_1st. */
1412 1, /* stm_regs_per_insn_1st. */
1413 2, /* stm_regs_per_insn_subsequent. */
1414 COSTS_N_INSNS (2), /* storef. */
1415 COSTS_N_INSNS (2), /* stored. */
1416 COSTS_N_INSNS (1), /* store_unaligned. */
1417 COSTS_N_INSNS (1), /* loadv. */
1418 COSTS_N_INSNS (1) /* storev. */
1419 },
1420 {
1421 /* FP SFmode */
1422 {
1423 COSTS_N_INSNS (15), /* div. */
1424 COSTS_N_INSNS (3), /* mult. */
1425 COSTS_N_INSNS (7), /* mult_addsub. */
1426 COSTS_N_INSNS (7), /* fma. */
1427 COSTS_N_INSNS (3), /* addsub. */
1428 COSTS_N_INSNS (3), /* fpconst. */
1429 COSTS_N_INSNS (3), /* neg. */
1430 COSTS_N_INSNS (3), /* compare. */
1431 COSTS_N_INSNS (3), /* widen. */
1432 COSTS_N_INSNS (3), /* narrow. */
1433 COSTS_N_INSNS (3), /* toint. */
1434 COSTS_N_INSNS (3), /* fromint. */
1435 COSTS_N_INSNS (3) /* roundint. */
1436 },
1437 /* FP DFmode */
1438 {
1439 COSTS_N_INSNS (30), /* div. */
1440 COSTS_N_INSNS (6), /* mult. */
1441 COSTS_N_INSNS (10), /* mult_addsub. */
1442 COSTS_N_INSNS (7), /* fma. */
1443 COSTS_N_INSNS (3), /* addsub. */
1444 COSTS_N_INSNS (3), /* fpconst. */
1445 COSTS_N_INSNS (3), /* neg. */
1446 COSTS_N_INSNS (3), /* compare. */
1447 COSTS_N_INSNS (3), /* widen. */
1448 COSTS_N_INSNS (3), /* narrow. */
1449 COSTS_N_INSNS (3), /* toint. */
1450 COSTS_N_INSNS (3), /* fromint. */
1451 COSTS_N_INSNS (3) /* roundint. */
1452 }
1453 },
1454 /* Vector */
1455 {
1456 COSTS_N_INSNS (1) /* alu. */
1457 }
1458 };
1459
1460 const struct cpu_cost_table cortexa12_extra_costs =
1461 {
1462 /* ALU */
1463 {
1464 0, /* arith. */
1465 0, /* logical. */
1466 0, /* shift. */
1467 COSTS_N_INSNS (1), /* shift_reg. */
1468 COSTS_N_INSNS (1), /* arith_shift. */
1469 COSTS_N_INSNS (1), /* arith_shift_reg. */
1470 COSTS_N_INSNS (1), /* log_shift. */
1471 COSTS_N_INSNS (1), /* log_shift_reg. */
1472 0, /* extend. */
1473 COSTS_N_INSNS (1), /* extend_arith. */
1474 0, /* bfi. */
1475 COSTS_N_INSNS (1), /* bfx. */
1476 COSTS_N_INSNS (1), /* clz. */
1477 COSTS_N_INSNS (1), /* rev. */
1478 0, /* non_exec. */
1479 true /* non_exec_costs_exec. */
1480 },
1481 /* MULT SImode */
1482 {
1483 {
1484 COSTS_N_INSNS (2), /* simple. */
1485 COSTS_N_INSNS (3), /* flag_setting. */
1486 COSTS_N_INSNS (2), /* extend. */
1487 COSTS_N_INSNS (3), /* add. */
1488 COSTS_N_INSNS (2), /* extend_add. */
1489 COSTS_N_INSNS (18) /* idiv. */
1490 },
1491 /* MULT DImode */
1492 {
1493 0, /* simple (N/A). */
1494 0, /* flag_setting (N/A). */
1495 COSTS_N_INSNS (3), /* extend. */
1496 0, /* add (N/A). */
1497 COSTS_N_INSNS (3), /* extend_add. */
1498 0 /* idiv (N/A). */
1499 }
1500 },
1501 /* LD/ST */
1502 {
1503 COSTS_N_INSNS (3), /* load. */
1504 COSTS_N_INSNS (3), /* load_sign_extend. */
1505 COSTS_N_INSNS (3), /* ldrd. */
1506 COSTS_N_INSNS (3), /* ldm_1st. */
1507 1, /* ldm_regs_per_insn_1st. */
1508 2, /* ldm_regs_per_insn_subsequent. */
1509 COSTS_N_INSNS (3), /* loadf. */
1510 COSTS_N_INSNS (3), /* loadd. */
1511 0, /* load_unaligned. */
1512 0, /* store. */
1513 0, /* strd. */
1514 0, /* stm_1st. */
1515 1, /* stm_regs_per_insn_1st. */
1516 2, /* stm_regs_per_insn_subsequent. */
1517 COSTS_N_INSNS (2), /* storef. */
1518 COSTS_N_INSNS (2), /* stored. */
1519 0, /* store_unaligned. */
1520 COSTS_N_INSNS (1), /* loadv. */
1521 COSTS_N_INSNS (1) /* storev. */
1522 },
1523 {
1524 /* FP SFmode */
1525 {
1526 COSTS_N_INSNS (17), /* div. */
1527 COSTS_N_INSNS (4), /* mult. */
1528 COSTS_N_INSNS (8), /* mult_addsub. */
1529 COSTS_N_INSNS (8), /* fma. */
1530 COSTS_N_INSNS (4), /* addsub. */
1531 COSTS_N_INSNS (2), /* fpconst. */
1532 COSTS_N_INSNS (2), /* neg. */
1533 COSTS_N_INSNS (2), /* compare. */
1534 COSTS_N_INSNS (4), /* widen. */
1535 COSTS_N_INSNS (4), /* narrow. */
1536 COSTS_N_INSNS (4), /* toint. */
1537 COSTS_N_INSNS (4), /* fromint. */
1538 COSTS_N_INSNS (4) /* roundint. */
1539 },
1540 /* FP DFmode */
1541 {
1542 COSTS_N_INSNS (31), /* div. */
1543 COSTS_N_INSNS (4), /* mult. */
1544 COSTS_N_INSNS (8), /* mult_addsub. */
1545 COSTS_N_INSNS (8), /* fma. */
1546 COSTS_N_INSNS (4), /* addsub. */
1547 COSTS_N_INSNS (2), /* fpconst. */
1548 COSTS_N_INSNS (2), /* neg. */
1549 COSTS_N_INSNS (2), /* compare. */
1550 COSTS_N_INSNS (4), /* widen. */
1551 COSTS_N_INSNS (4), /* narrow. */
1552 COSTS_N_INSNS (4), /* toint. */
1553 COSTS_N_INSNS (4), /* fromint. */
1554 COSTS_N_INSNS (4) /* roundint. */
1555 }
1556 },
1557 /* Vector */
1558 {
1559 COSTS_N_INSNS (1) /* alu. */
1560 }
1561 };
1562
1563 const struct cpu_cost_table cortexa15_extra_costs =
1564 {
1565 /* ALU */
1566 {
1567 0, /* arith. */
1568 0, /* logical. */
1569 0, /* shift. */
1570 0, /* shift_reg. */
1571 COSTS_N_INSNS (1), /* arith_shift. */
1572 COSTS_N_INSNS (1), /* arith_shift_reg. */
1573 COSTS_N_INSNS (1), /* log_shift. */
1574 COSTS_N_INSNS (1), /* log_shift_reg. */
1575 0, /* extend. */
1576 COSTS_N_INSNS (1), /* extend_arith. */
1577 COSTS_N_INSNS (1), /* bfi. */
1578 0, /* bfx. */
1579 0, /* clz. */
1580 0, /* rev. */
1581 0, /* non_exec. */
1582 true /* non_exec_costs_exec. */
1583 },
1584 /* MULT SImode */
1585 {
1586 {
1587 COSTS_N_INSNS (2), /* simple. */
1588 COSTS_N_INSNS (3), /* flag_setting. */
1589 COSTS_N_INSNS (2), /* extend. */
1590 COSTS_N_INSNS (2), /* add. */
1591 COSTS_N_INSNS (2), /* extend_add. */
1592 COSTS_N_INSNS (18) /* idiv. */
1593 },
1594 /* MULT DImode */
1595 {
1596 0, /* simple (N/A). */
1597 0, /* flag_setting (N/A). */
1598 COSTS_N_INSNS (3), /* extend. */
1599 0, /* add (N/A). */
1600 COSTS_N_INSNS (3), /* extend_add. */
1601 0 /* idiv (N/A). */
1602 }
1603 },
1604 /* LD/ST */
1605 {
1606 COSTS_N_INSNS (3), /* load. */
1607 COSTS_N_INSNS (3), /* load_sign_extend. */
1608 COSTS_N_INSNS (3), /* ldrd. */
1609 COSTS_N_INSNS (4), /* ldm_1st. */
1610 1, /* ldm_regs_per_insn_1st. */
1611 2, /* ldm_regs_per_insn_subsequent. */
1612 COSTS_N_INSNS (4), /* loadf. */
1613 COSTS_N_INSNS (4), /* loadd. */
1614 0, /* load_unaligned. */
1615 0, /* store. */
1616 0, /* strd. */
1617 COSTS_N_INSNS (1), /* stm_1st. */
1618 1, /* stm_regs_per_insn_1st. */
1619 2, /* stm_regs_per_insn_subsequent. */
1620 0, /* storef. */
1621 0, /* stored. */
1622 0, /* store_unaligned. */
1623 COSTS_N_INSNS (1), /* loadv. */
1624 COSTS_N_INSNS (1) /* storev. */
1625 },
1626 {
1627 /* FP SFmode */
1628 {
1629 COSTS_N_INSNS (17), /* div. */
1630 COSTS_N_INSNS (4), /* mult. */
1631 COSTS_N_INSNS (8), /* mult_addsub. */
1632 COSTS_N_INSNS (8), /* fma. */
1633 COSTS_N_INSNS (4), /* addsub. */
1634 COSTS_N_INSNS (2), /* fpconst. */
1635 COSTS_N_INSNS (2), /* neg. */
1636 COSTS_N_INSNS (5), /* compare. */
1637 COSTS_N_INSNS (4), /* widen. */
1638 COSTS_N_INSNS (4), /* narrow. */
1639 COSTS_N_INSNS (4), /* toint. */
1640 COSTS_N_INSNS (4), /* fromint. */
1641 COSTS_N_INSNS (4) /* roundint. */
1642 },
1643 /* FP DFmode */
1644 {
1645 COSTS_N_INSNS (31), /* div. */
1646 COSTS_N_INSNS (4), /* mult. */
1647 COSTS_N_INSNS (8), /* mult_addsub. */
1648 COSTS_N_INSNS (8), /* fma. */
1649 COSTS_N_INSNS (4), /* addsub. */
1650 COSTS_N_INSNS (2), /* fpconst. */
1651 COSTS_N_INSNS (2), /* neg. */
1652 COSTS_N_INSNS (2), /* compare. */
1653 COSTS_N_INSNS (4), /* widen. */
1654 COSTS_N_INSNS (4), /* narrow. */
1655 COSTS_N_INSNS (4), /* toint. */
1656 COSTS_N_INSNS (4), /* fromint. */
1657 COSTS_N_INSNS (4) /* roundint. */
1658 }
1659 },
1660 /* Vector */
1661 {
1662 COSTS_N_INSNS (1) /* alu. */
1663 }
1664 };
1665
1666 const struct cpu_cost_table v7m_extra_costs =
1667 {
1668 /* ALU */
1669 {
1670 0, /* arith. */
1671 0, /* logical. */
1672 0, /* shift. */
1673 0, /* shift_reg. */
1674 0, /* arith_shift. */
1675 COSTS_N_INSNS (1), /* arith_shift_reg. */
1676 0, /* log_shift. */
1677 COSTS_N_INSNS (1), /* log_shift_reg. */
1678 0, /* extend. */
1679 COSTS_N_INSNS (1), /* extend_arith. */
1680 0, /* bfi. */
1681 0, /* bfx. */
1682 0, /* clz. */
1683 0, /* rev. */
1684 COSTS_N_INSNS (1), /* non_exec. */
1685 false /* non_exec_costs_exec. */
1686 },
1687 {
1688 /* MULT SImode */
1689 {
1690 COSTS_N_INSNS (1), /* simple. */
1691 COSTS_N_INSNS (1), /* flag_setting. */
1692 COSTS_N_INSNS (2), /* extend. */
1693 COSTS_N_INSNS (1), /* add. */
1694 COSTS_N_INSNS (3), /* extend_add. */
1695 COSTS_N_INSNS (8) /* idiv. */
1696 },
1697 /* MULT DImode */
1698 {
1699 0, /* simple (N/A). */
1700 0, /* flag_setting (N/A). */
1701 COSTS_N_INSNS (2), /* extend. */
1702 0, /* add (N/A). */
1703 COSTS_N_INSNS (3), /* extend_add. */
1704 0 /* idiv (N/A). */
1705 }
1706 },
1707 /* LD/ST */
1708 {
1709 COSTS_N_INSNS (2), /* load. */
1710 0, /* load_sign_extend. */
1711 COSTS_N_INSNS (3), /* ldrd. */
1712 COSTS_N_INSNS (2), /* ldm_1st. */
1713 1, /* ldm_regs_per_insn_1st. */
1714 1, /* ldm_regs_per_insn_subsequent. */
1715 COSTS_N_INSNS (2), /* loadf. */
1716 COSTS_N_INSNS (3), /* loadd. */
1717 COSTS_N_INSNS (1), /* load_unaligned. */
1718 COSTS_N_INSNS (2), /* store. */
1719 COSTS_N_INSNS (3), /* strd. */
1720 COSTS_N_INSNS (2), /* stm_1st. */
1721 1, /* stm_regs_per_insn_1st. */
1722 1, /* stm_regs_per_insn_subsequent. */
1723 COSTS_N_INSNS (2), /* storef. */
1724 COSTS_N_INSNS (3), /* stored. */
1725 COSTS_N_INSNS (1), /* store_unaligned. */
1726 COSTS_N_INSNS (1), /* loadv. */
1727 COSTS_N_INSNS (1) /* storev. */
1728 },
1729 {
1730 /* FP SFmode */
1731 {
1732 COSTS_N_INSNS (7), /* div. */
1733 COSTS_N_INSNS (2), /* mult. */
1734 COSTS_N_INSNS (5), /* mult_addsub. */
1735 COSTS_N_INSNS (3), /* fma. */
1736 COSTS_N_INSNS (1), /* addsub. */
1737 0, /* fpconst. */
1738 0, /* neg. */
1739 0, /* compare. */
1740 0, /* widen. */
1741 0, /* narrow. */
1742 0, /* toint. */
1743 0, /* fromint. */
1744 0 /* roundint. */
1745 },
1746 /* FP DFmode */
1747 {
1748 COSTS_N_INSNS (15), /* div. */
1749 COSTS_N_INSNS (5), /* mult. */
1750 COSTS_N_INSNS (7), /* mult_addsub. */
1751 COSTS_N_INSNS (7), /* fma. */
1752 COSTS_N_INSNS (3), /* addsub. */
1753 0, /* fpconst. */
1754 0, /* neg. */
1755 0, /* compare. */
1756 0, /* widen. */
1757 0, /* narrow. */
1758 0, /* toint. */
1759 0, /* fromint. */
1760 0 /* roundint. */
1761 }
1762 },
1763 /* Vector */
1764 {
1765 COSTS_N_INSNS (1) /* alu. */
1766 }
1767 };
1768
1769 const struct addr_mode_cost_table generic_addr_mode_costs =
1770 {
1771 /* int. */
1772 {
1773 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1774 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1775 COSTS_N_INSNS (0) /* AMO_WB. */
1776 },
1777 /* float. */
1778 {
1779 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1780 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1781 COSTS_N_INSNS (0) /* AMO_WB. */
1782 },
1783 /* vector. */
1784 {
1785 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1786 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1787 COSTS_N_INSNS (0) /* AMO_WB. */
1788 }
1789 };
1790
1791 const struct tune_params arm_slowmul_tune =
1792 {
1793 &generic_extra_costs, /* Insn extra costs. */
1794 &generic_addr_mode_costs, /* Addressing mode costs. */
1795 NULL, /* Sched adj cost. */
1796 arm_default_branch_cost,
1797 &arm_default_vec_cost,
1798 3, /* Constant limit. */
1799 5, /* Max cond insns. */
1800 8, /* Memset max inline. */
1801 1, /* Issue rate. */
1802 ARM_PREFETCH_NOT_BENEFICIAL,
1803 tune_params::PREF_CONST_POOL_TRUE,
1804 tune_params::PREF_LDRD_FALSE,
1805 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1806 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1807 tune_params::DISPARAGE_FLAGS_NEITHER,
1808 tune_params::PREF_NEON_64_FALSE,
1809 tune_params::PREF_NEON_STRINGOPS_FALSE,
1810 tune_params::FUSE_NOTHING,
1811 tune_params::SCHED_AUTOPREF_OFF
1812 };
1813
1814 const struct tune_params arm_fastmul_tune =
1815 {
1816 &generic_extra_costs, /* Insn extra costs. */
1817 &generic_addr_mode_costs, /* Addressing mode costs. */
1818 NULL, /* Sched adj cost. */
1819 arm_default_branch_cost,
1820 &arm_default_vec_cost,
1821 1, /* Constant limit. */
1822 5, /* Max cond insns. */
1823 8, /* Memset max inline. */
1824 1, /* Issue rate. */
1825 ARM_PREFETCH_NOT_BENEFICIAL,
1826 tune_params::PREF_CONST_POOL_TRUE,
1827 tune_params::PREF_LDRD_FALSE,
1828 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1829 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1830 tune_params::DISPARAGE_FLAGS_NEITHER,
1831 tune_params::PREF_NEON_64_FALSE,
1832 tune_params::PREF_NEON_STRINGOPS_FALSE,
1833 tune_params::FUSE_NOTHING,
1834 tune_params::SCHED_AUTOPREF_OFF
1835 };
1836
1837 /* StrongARM has early execution of branches, so a sequence that is worth
1838 skipping is shorter. Set max_insns_skipped to a lower value. */
1839
1840 const struct tune_params arm_strongarm_tune =
1841 {
1842 &generic_extra_costs, /* Insn extra costs. */
1843 &generic_addr_mode_costs, /* Addressing mode costs. */
1844 NULL, /* Sched adj cost. */
1845 arm_default_branch_cost,
1846 &arm_default_vec_cost,
1847 1, /* Constant limit. */
1848 3, /* Max cond insns. */
1849 8, /* Memset max inline. */
1850 1, /* Issue rate. */
1851 ARM_PREFETCH_NOT_BENEFICIAL,
1852 tune_params::PREF_CONST_POOL_TRUE,
1853 tune_params::PREF_LDRD_FALSE,
1854 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1855 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1856 tune_params::DISPARAGE_FLAGS_NEITHER,
1857 tune_params::PREF_NEON_64_FALSE,
1858 tune_params::PREF_NEON_STRINGOPS_FALSE,
1859 tune_params::FUSE_NOTHING,
1860 tune_params::SCHED_AUTOPREF_OFF
1861 };
1862
1863 const struct tune_params arm_xscale_tune =
1864 {
1865 &generic_extra_costs, /* Insn extra costs. */
1866 &generic_addr_mode_costs, /* Addressing mode costs. */
1867 xscale_sched_adjust_cost,
1868 arm_default_branch_cost,
1869 &arm_default_vec_cost,
1870 2, /* Constant limit. */
1871 3, /* Max cond insns. */
1872 8, /* Memset max inline. */
1873 1, /* Issue rate. */
1874 ARM_PREFETCH_NOT_BENEFICIAL,
1875 tune_params::PREF_CONST_POOL_TRUE,
1876 tune_params::PREF_LDRD_FALSE,
1877 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1878 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1879 tune_params::DISPARAGE_FLAGS_NEITHER,
1880 tune_params::PREF_NEON_64_FALSE,
1881 tune_params::PREF_NEON_STRINGOPS_FALSE,
1882 tune_params::FUSE_NOTHING,
1883 tune_params::SCHED_AUTOPREF_OFF
1884 };
1885
1886 const struct tune_params arm_9e_tune =
1887 {
1888 &generic_extra_costs, /* Insn extra costs. */
1889 &generic_addr_mode_costs, /* Addressing mode costs. */
1890 NULL, /* Sched adj cost. */
1891 arm_default_branch_cost,
1892 &arm_default_vec_cost,
1893 1, /* Constant limit. */
1894 5, /* Max cond insns. */
1895 8, /* Memset max inline. */
1896 1, /* Issue rate. */
1897 ARM_PREFETCH_NOT_BENEFICIAL,
1898 tune_params::PREF_CONST_POOL_TRUE,
1899 tune_params::PREF_LDRD_FALSE,
1900 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1901 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1902 tune_params::DISPARAGE_FLAGS_NEITHER,
1903 tune_params::PREF_NEON_64_FALSE,
1904 tune_params::PREF_NEON_STRINGOPS_FALSE,
1905 tune_params::FUSE_NOTHING,
1906 tune_params::SCHED_AUTOPREF_OFF
1907 };
1908
1909 const struct tune_params arm_marvell_pj4_tune =
1910 {
1911 &generic_extra_costs, /* Insn extra costs. */
1912 &generic_addr_mode_costs, /* Addressing mode costs. */
1913 NULL, /* Sched adj cost. */
1914 arm_default_branch_cost,
1915 &arm_default_vec_cost,
1916 1, /* Constant limit. */
1917 5, /* Max cond insns. */
1918 8, /* Memset max inline. */
1919 2, /* Issue rate. */
1920 ARM_PREFETCH_NOT_BENEFICIAL,
1921 tune_params::PREF_CONST_POOL_TRUE,
1922 tune_params::PREF_LDRD_FALSE,
1923 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1924 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1925 tune_params::DISPARAGE_FLAGS_NEITHER,
1926 tune_params::PREF_NEON_64_FALSE,
1927 tune_params::PREF_NEON_STRINGOPS_FALSE,
1928 tune_params::FUSE_NOTHING,
1929 tune_params::SCHED_AUTOPREF_OFF
1930 };
1931
1932 const struct tune_params arm_v6t2_tune =
1933 {
1934 &generic_extra_costs, /* Insn extra costs. */
1935 &generic_addr_mode_costs, /* Addressing mode costs. */
1936 NULL, /* Sched adj cost. */
1937 arm_default_branch_cost,
1938 &arm_default_vec_cost,
1939 1, /* Constant limit. */
1940 5, /* Max cond insns. */
1941 8, /* Memset max inline. */
1942 1, /* Issue rate. */
1943 ARM_PREFETCH_NOT_BENEFICIAL,
1944 tune_params::PREF_CONST_POOL_FALSE,
1945 tune_params::PREF_LDRD_FALSE,
1946 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1947 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1948 tune_params::DISPARAGE_FLAGS_NEITHER,
1949 tune_params::PREF_NEON_64_FALSE,
1950 tune_params::PREF_NEON_STRINGOPS_FALSE,
1951 tune_params::FUSE_NOTHING,
1952 tune_params::SCHED_AUTOPREF_OFF
1953 };
1954
1955
1956 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1957 const struct tune_params arm_cortex_tune =
1958 {
1959 &generic_extra_costs,
1960 &generic_addr_mode_costs, /* Addressing mode costs. */
1961 NULL, /* Sched adj cost. */
1962 arm_default_branch_cost,
1963 &arm_default_vec_cost,
1964 1, /* Constant limit. */
1965 5, /* Max cond insns. */
1966 8, /* Memset max inline. */
1967 2, /* Issue rate. */
1968 ARM_PREFETCH_NOT_BENEFICIAL,
1969 tune_params::PREF_CONST_POOL_FALSE,
1970 tune_params::PREF_LDRD_FALSE,
1971 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1972 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1973 tune_params::DISPARAGE_FLAGS_NEITHER,
1974 tune_params::PREF_NEON_64_FALSE,
1975 tune_params::PREF_NEON_STRINGOPS_FALSE,
1976 tune_params::FUSE_NOTHING,
1977 tune_params::SCHED_AUTOPREF_OFF
1978 };
1979
1980 const struct tune_params arm_cortex_a8_tune =
1981 {
1982 &cortexa8_extra_costs,
1983 &generic_addr_mode_costs, /* Addressing mode costs. */
1984 NULL, /* Sched adj cost. */
1985 arm_default_branch_cost,
1986 &arm_default_vec_cost,
1987 1, /* Constant limit. */
1988 5, /* Max cond insns. */
1989 8, /* Memset max inline. */
1990 2, /* Issue rate. */
1991 ARM_PREFETCH_NOT_BENEFICIAL,
1992 tune_params::PREF_CONST_POOL_FALSE,
1993 tune_params::PREF_LDRD_FALSE,
1994 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1995 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1996 tune_params::DISPARAGE_FLAGS_NEITHER,
1997 tune_params::PREF_NEON_64_FALSE,
1998 tune_params::PREF_NEON_STRINGOPS_TRUE,
1999 tune_params::FUSE_NOTHING,
2000 tune_params::SCHED_AUTOPREF_OFF
2001 };
2002
2003 const struct tune_params arm_cortex_a7_tune =
2004 {
2005 &cortexa7_extra_costs,
2006 &generic_addr_mode_costs, /* Addressing mode costs. */
2007 NULL, /* Sched adj cost. */
2008 arm_default_branch_cost,
2009 &arm_default_vec_cost,
2010 1, /* Constant limit. */
2011 5, /* Max cond insns. */
2012 8, /* Memset max inline. */
2013 2, /* Issue rate. */
2014 ARM_PREFETCH_NOT_BENEFICIAL,
2015 tune_params::PREF_CONST_POOL_FALSE,
2016 tune_params::PREF_LDRD_FALSE,
2017 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2019 tune_params::DISPARAGE_FLAGS_NEITHER,
2020 tune_params::PREF_NEON_64_FALSE,
2021 tune_params::PREF_NEON_STRINGOPS_TRUE,
2022 tune_params::FUSE_NOTHING,
2023 tune_params::SCHED_AUTOPREF_OFF
2024 };
2025
2026 const struct tune_params arm_cortex_a15_tune =
2027 {
2028 &cortexa15_extra_costs,
2029 &generic_addr_mode_costs, /* Addressing mode costs. */
2030 NULL, /* Sched adj cost. */
2031 arm_default_branch_cost,
2032 &arm_default_vec_cost,
2033 1, /* Constant limit. */
2034 2, /* Max cond insns. */
2035 8, /* Memset max inline. */
2036 3, /* Issue rate. */
2037 ARM_PREFETCH_NOT_BENEFICIAL,
2038 tune_params::PREF_CONST_POOL_FALSE,
2039 tune_params::PREF_LDRD_TRUE,
2040 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2041 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2042 tune_params::DISPARAGE_FLAGS_ALL,
2043 tune_params::PREF_NEON_64_FALSE,
2044 tune_params::PREF_NEON_STRINGOPS_TRUE,
2045 tune_params::FUSE_NOTHING,
2046 tune_params::SCHED_AUTOPREF_FULL
2047 };
2048
2049 const struct tune_params arm_cortex_a35_tune =
2050 {
2051 &cortexa53_extra_costs,
2052 &generic_addr_mode_costs, /* Addressing mode costs. */
2053 NULL, /* Sched adj cost. */
2054 arm_default_branch_cost,
2055 &arm_default_vec_cost,
2056 1, /* Constant limit. */
2057 5, /* Max cond insns. */
2058 8, /* Memset max inline. */
2059 1, /* Issue rate. */
2060 ARM_PREFETCH_NOT_BENEFICIAL,
2061 tune_params::PREF_CONST_POOL_FALSE,
2062 tune_params::PREF_LDRD_FALSE,
2063 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2064 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2065 tune_params::DISPARAGE_FLAGS_NEITHER,
2066 tune_params::PREF_NEON_64_FALSE,
2067 tune_params::PREF_NEON_STRINGOPS_TRUE,
2068 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2069 tune_params::SCHED_AUTOPREF_OFF
2070 };
2071
2072 const struct tune_params arm_cortex_a53_tune =
2073 {
2074 &cortexa53_extra_costs,
2075 &generic_addr_mode_costs, /* Addressing mode costs. */
2076 NULL, /* Sched adj cost. */
2077 arm_default_branch_cost,
2078 &arm_default_vec_cost,
2079 1, /* Constant limit. */
2080 5, /* Max cond insns. */
2081 8, /* Memset max inline. */
2082 2, /* Issue rate. */
2083 ARM_PREFETCH_NOT_BENEFICIAL,
2084 tune_params::PREF_CONST_POOL_FALSE,
2085 tune_params::PREF_LDRD_FALSE,
2086 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2087 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2088 tune_params::DISPARAGE_FLAGS_NEITHER,
2089 tune_params::PREF_NEON_64_FALSE,
2090 tune_params::PREF_NEON_STRINGOPS_TRUE,
2091 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2092 tune_params::SCHED_AUTOPREF_OFF
2093 };
2094
2095 const struct tune_params arm_cortex_a57_tune =
2096 {
2097 &cortexa57_extra_costs,
2098 &generic_addr_mode_costs, /* addressing mode costs */
2099 NULL, /* Sched adj cost. */
2100 arm_default_branch_cost,
2101 &arm_default_vec_cost,
2102 1, /* Constant limit. */
2103 2, /* Max cond insns. */
2104 8, /* Memset max inline. */
2105 3, /* Issue rate. */
2106 ARM_PREFETCH_NOT_BENEFICIAL,
2107 tune_params::PREF_CONST_POOL_FALSE,
2108 tune_params::PREF_LDRD_TRUE,
2109 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2110 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2111 tune_params::DISPARAGE_FLAGS_ALL,
2112 tune_params::PREF_NEON_64_FALSE,
2113 tune_params::PREF_NEON_STRINGOPS_TRUE,
2114 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2115 tune_params::SCHED_AUTOPREF_FULL
2116 };
2117
2118 const struct tune_params arm_exynosm1_tune =
2119 {
2120 &exynosm1_extra_costs,
2121 &generic_addr_mode_costs, /* Addressing mode costs. */
2122 NULL, /* Sched adj cost. */
2123 arm_default_branch_cost,
2124 &arm_default_vec_cost,
2125 1, /* Constant limit. */
2126 2, /* Max cond insns. */
2127 8, /* Memset max inline. */
2128 3, /* Issue rate. */
2129 ARM_PREFETCH_NOT_BENEFICIAL,
2130 tune_params::PREF_CONST_POOL_FALSE,
2131 tune_params::PREF_LDRD_TRUE,
2132 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2133 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2134 tune_params::DISPARAGE_FLAGS_ALL,
2135 tune_params::PREF_NEON_64_FALSE,
2136 tune_params::PREF_NEON_STRINGOPS_TRUE,
2137 tune_params::FUSE_NOTHING,
2138 tune_params::SCHED_AUTOPREF_OFF
2139 };
2140
2141 const struct tune_params arm_xgene1_tune =
2142 {
2143 &xgene1_extra_costs,
2144 &generic_addr_mode_costs, /* Addressing mode costs. */
2145 NULL, /* Sched adj cost. */
2146 arm_default_branch_cost,
2147 &arm_default_vec_cost,
2148 1, /* Constant limit. */
2149 2, /* Max cond insns. */
2150 32, /* Memset max inline. */
2151 4, /* Issue rate. */
2152 ARM_PREFETCH_NOT_BENEFICIAL,
2153 tune_params::PREF_CONST_POOL_FALSE,
2154 tune_params::PREF_LDRD_TRUE,
2155 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2156 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2157 tune_params::DISPARAGE_FLAGS_ALL,
2158 tune_params::PREF_NEON_64_FALSE,
2159 tune_params::PREF_NEON_STRINGOPS_FALSE,
2160 tune_params::FUSE_NOTHING,
2161 tune_params::SCHED_AUTOPREF_OFF
2162 };
2163
2164 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2165 less appealing. Set max_insns_skipped to a low value. */
2166
2167 const struct tune_params arm_cortex_a5_tune =
2168 {
2169 &cortexa5_extra_costs,
2170 &generic_addr_mode_costs, /* Addressing mode costs. */
2171 NULL, /* Sched adj cost. */
2172 arm_cortex_a5_branch_cost,
2173 &arm_default_vec_cost,
2174 1, /* Constant limit. */
2175 1, /* Max cond insns. */
2176 8, /* Memset max inline. */
2177 2, /* Issue rate. */
2178 ARM_PREFETCH_NOT_BENEFICIAL,
2179 tune_params::PREF_CONST_POOL_FALSE,
2180 tune_params::PREF_LDRD_FALSE,
2181 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2182 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2183 tune_params::DISPARAGE_FLAGS_NEITHER,
2184 tune_params::PREF_NEON_64_FALSE,
2185 tune_params::PREF_NEON_STRINGOPS_TRUE,
2186 tune_params::FUSE_NOTHING,
2187 tune_params::SCHED_AUTOPREF_OFF
2188 };
2189
2190 const struct tune_params arm_cortex_a9_tune =
2191 {
2192 &cortexa9_extra_costs,
2193 &generic_addr_mode_costs, /* Addressing mode costs. */
2194 cortex_a9_sched_adjust_cost,
2195 arm_default_branch_cost,
2196 &arm_default_vec_cost,
2197 1, /* Constant limit. */
2198 5, /* Max cond insns. */
2199 8, /* Memset max inline. */
2200 2, /* Issue rate. */
2201 ARM_PREFETCH_BENEFICIAL(4,32,32),
2202 tune_params::PREF_CONST_POOL_FALSE,
2203 tune_params::PREF_LDRD_FALSE,
2204 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2205 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2206 tune_params::DISPARAGE_FLAGS_NEITHER,
2207 tune_params::PREF_NEON_64_FALSE,
2208 tune_params::PREF_NEON_STRINGOPS_FALSE,
2209 tune_params::FUSE_NOTHING,
2210 tune_params::SCHED_AUTOPREF_OFF
2211 };
2212
2213 const struct tune_params arm_cortex_a12_tune =
2214 {
2215 &cortexa12_extra_costs,
2216 &generic_addr_mode_costs, /* Addressing mode costs. */
2217 NULL, /* Sched adj cost. */
2218 arm_default_branch_cost,
2219 &arm_default_vec_cost, /* Vectorizer costs. */
2220 1, /* Constant limit. */
2221 2, /* Max cond insns. */
2222 8, /* Memset max inline. */
2223 2, /* Issue rate. */
2224 ARM_PREFETCH_NOT_BENEFICIAL,
2225 tune_params::PREF_CONST_POOL_FALSE,
2226 tune_params::PREF_LDRD_TRUE,
2227 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2228 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2229 tune_params::DISPARAGE_FLAGS_ALL,
2230 tune_params::PREF_NEON_64_FALSE,
2231 tune_params::PREF_NEON_STRINGOPS_TRUE,
2232 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2233 tune_params::SCHED_AUTOPREF_OFF
2234 };
2235
2236 const struct tune_params arm_cortex_a73_tune =
2237 {
2238 &cortexa57_extra_costs,
2239 &generic_addr_mode_costs, /* Addressing mode costs. */
2240 NULL, /* Sched adj cost. */
2241 arm_default_branch_cost,
2242 &arm_default_vec_cost, /* Vectorizer costs. */
2243 1, /* Constant limit. */
2244 2, /* Max cond insns. */
2245 8, /* Memset max inline. */
2246 2, /* Issue rate. */
2247 ARM_PREFETCH_NOT_BENEFICIAL,
2248 tune_params::PREF_CONST_POOL_FALSE,
2249 tune_params::PREF_LDRD_TRUE,
2250 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2251 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2252 tune_params::DISPARAGE_FLAGS_ALL,
2253 tune_params::PREF_NEON_64_FALSE,
2254 tune_params::PREF_NEON_STRINGOPS_TRUE,
2255 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2256 tune_params::SCHED_AUTOPREF_FULL
2257 };
2258
2259 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2260 cycle to execute each. An LDR from the constant pool also takes two cycles
2261 to execute, but mildly increases pipelining opportunity (consecutive
2262 loads/stores can be pipelined together, saving one cycle), and may also
2263 improve icache utilisation. Hence we prefer the constant pool for such
2264 processors. */
2265
2266 const struct tune_params arm_v7m_tune =
2267 {
2268 &v7m_extra_costs,
2269 &generic_addr_mode_costs, /* Addressing mode costs. */
2270 NULL, /* Sched adj cost. */
2271 arm_cortex_m_branch_cost,
2272 &arm_default_vec_cost,
2273 1, /* Constant limit. */
2274 2, /* Max cond insns. */
2275 8, /* Memset max inline. */
2276 1, /* Issue rate. */
2277 ARM_PREFETCH_NOT_BENEFICIAL,
2278 tune_params::PREF_CONST_POOL_TRUE,
2279 tune_params::PREF_LDRD_FALSE,
2280 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2281 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2282 tune_params::DISPARAGE_FLAGS_NEITHER,
2283 tune_params::PREF_NEON_64_FALSE,
2284 tune_params::PREF_NEON_STRINGOPS_FALSE,
2285 tune_params::FUSE_NOTHING,
2286 tune_params::SCHED_AUTOPREF_OFF
2287 };
2288
2289 /* Cortex-M7 tuning. */
2290
2291 const struct tune_params arm_cortex_m7_tune =
2292 {
2293 &v7m_extra_costs,
2294 &generic_addr_mode_costs, /* Addressing mode costs. */
2295 NULL, /* Sched adj cost. */
2296 arm_cortex_m7_branch_cost,
2297 &arm_default_vec_cost,
2298 0, /* Constant limit. */
2299 1, /* Max cond insns. */
2300 8, /* Memset max inline. */
2301 2, /* Issue rate. */
2302 ARM_PREFETCH_NOT_BENEFICIAL,
2303 tune_params::PREF_CONST_POOL_TRUE,
2304 tune_params::PREF_LDRD_FALSE,
2305 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2306 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2307 tune_params::DISPARAGE_FLAGS_NEITHER,
2308 tune_params::PREF_NEON_64_FALSE,
2309 tune_params::PREF_NEON_STRINGOPS_FALSE,
2310 tune_params::FUSE_NOTHING,
2311 tune_params::SCHED_AUTOPREF_OFF
2312 };
2313
2314 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2315 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2316 cortex-m23. */
2317 const struct tune_params arm_v6m_tune =
2318 {
2319 &generic_extra_costs, /* Insn extra costs. */
2320 &generic_addr_mode_costs, /* Addressing mode costs. */
2321 NULL, /* Sched adj cost. */
2322 arm_default_branch_cost,
2323 &arm_default_vec_cost, /* Vectorizer costs. */
2324 1, /* Constant limit. */
2325 5, /* Max cond insns. */
2326 8, /* Memset max inline. */
2327 1, /* Issue rate. */
2328 ARM_PREFETCH_NOT_BENEFICIAL,
2329 tune_params::PREF_CONST_POOL_FALSE,
2330 tune_params::PREF_LDRD_FALSE,
2331 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2332 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2333 tune_params::DISPARAGE_FLAGS_NEITHER,
2334 tune_params::PREF_NEON_64_FALSE,
2335 tune_params::PREF_NEON_STRINGOPS_FALSE,
2336 tune_params::FUSE_NOTHING,
2337 tune_params::SCHED_AUTOPREF_OFF
2338 };
2339
2340 const struct tune_params arm_fa726te_tune =
2341 {
2342 &generic_extra_costs, /* Insn extra costs. */
2343 &generic_addr_mode_costs, /* Addressing mode costs. */
2344 fa726te_sched_adjust_cost,
2345 arm_default_branch_cost,
2346 &arm_default_vec_cost,
2347 1, /* Constant limit. */
2348 5, /* Max cond insns. */
2349 8, /* Memset max inline. */
2350 2, /* Issue rate. */
2351 ARM_PREFETCH_NOT_BENEFICIAL,
2352 tune_params::PREF_CONST_POOL_TRUE,
2353 tune_params::PREF_LDRD_FALSE,
2354 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2355 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2356 tune_params::DISPARAGE_FLAGS_NEITHER,
2357 tune_params::PREF_NEON_64_FALSE,
2358 tune_params::PREF_NEON_STRINGOPS_FALSE,
2359 tune_params::FUSE_NOTHING,
2360 tune_params::SCHED_AUTOPREF_OFF
2361 };
2362
2363 /* Auto-generated CPU, FPU and architecture tables. */
2364 #include "arm-cpu-data.h"
2365
2366 /* The name of the preprocessor macro to define for this architecture. PROFILE
2367 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2368 is thus chosen to be big enough to hold the longest architecture name. */
2369
2370 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2371
2372 /* Supported TLS relocations. */
2373
2374 enum tls_reloc {
2375 TLS_GD32,
2376 TLS_LDM32,
2377 TLS_LDO32,
2378 TLS_IE32,
2379 TLS_LE32,
2380 TLS_DESCSEQ /* GNU scheme */
2381 };
2382
2383 /* The maximum number of insns to be used when loading a constant. */
2384 inline static int
2385 arm_constant_limit (bool size_p)
2386 {
2387 return size_p ? 1 : current_tune->constant_limit;
2388 }
2389
2390 /* Emit an insn that's a simple single-set. Both the operands must be known
2391 to be valid. */
2392 inline static rtx_insn *
2393 emit_set_insn (rtx x, rtx y)
2394 {
2395 return emit_insn (gen_rtx_SET (x, y));
2396 }
2397
2398 /* Return the number of bits set in VALUE. */
2399 static unsigned
2400 bit_count (unsigned long value)
2401 {
2402 unsigned long count = 0;
2403
2404 while (value)
2405 {
2406 count++;
2407 value &= value - 1; /* Clear the least-significant set bit. */
2408 }
2409
2410 return count;
2411 }
2412
2413 /* Return the number of bits set in BMAP. */
2414 static unsigned
2415 bitmap_popcount (const sbitmap bmap)
2416 {
2417 unsigned int count = 0;
2418 unsigned int n = 0;
2419 sbitmap_iterator sbi;
2420
2421 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2422 count++;
2423 return count;
2424 }
2425
2426 typedef struct
2427 {
2428 machine_mode mode;
2429 const char *name;
2430 } arm_fixed_mode_set;
2431
2432 /* A small helper for setting fixed-point library libfuncs. */
2433
2434 static void
2435 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2436 const char *funcname, const char *modename,
2437 int num_suffix)
2438 {
2439 char buffer[50];
2440
2441 if (num_suffix == 0)
2442 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2443 else
2444 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2445
2446 set_optab_libfunc (optable, mode, buffer);
2447 }
2448
2449 static void
2450 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2451 machine_mode from, const char *funcname,
2452 const char *toname, const char *fromname)
2453 {
2454 char buffer[50];
2455 const char *maybe_suffix_2 = "";
2456
2457 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2458 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2459 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2460 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2461 maybe_suffix_2 = "2";
2462
2463 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2464 maybe_suffix_2);
2465
2466 set_conv_libfunc (optable, to, from, buffer);
2467 }
2468
2469 /* Set up library functions unique to ARM. */
2470
2471 static void
2472 arm_init_libfuncs (void)
2473 {
2474 /* For Linux, we have access to kernel support for atomic operations. */
2475 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2476 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2477
2478 /* There are no special library functions unless we are using the
2479 ARM BPABI. */
2480 if (!TARGET_BPABI)
2481 return;
2482
2483 /* The functions below are described in Section 4 of the "Run-Time
2484 ABI for the ARM architecture", Version 1.0. */
2485
2486 /* Double-precision floating-point arithmetic. Table 2. */
2487 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2488 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2489 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2490 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2491 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2492
2493 /* Double-precision comparisons. Table 3. */
2494 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2495 set_optab_libfunc (ne_optab, DFmode, NULL);
2496 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2497 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2498 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2499 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2500 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2501
2502 /* Single-precision floating-point arithmetic. Table 4. */
2503 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2504 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2505 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2506 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2507 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2508
2509 /* Single-precision comparisons. Table 5. */
2510 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2511 set_optab_libfunc (ne_optab, SFmode, NULL);
2512 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2513 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2514 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2515 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2516 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2517
2518 /* Floating-point to integer conversions. Table 6. */
2519 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2520 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2521 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2522 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2523 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2524 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2525 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2526 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2527
2528 /* Conversions between floating types. Table 7. */
2529 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2530 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2531
2532 /* Integer to floating-point conversions. Table 8. */
2533 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2534 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2535 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2536 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2537 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2538 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2539 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2540 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2541
2542 /* Long long. Table 9. */
2543 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2544 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2545 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2546 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2547 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2548 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2549 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2550 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2551
2552 /* Integer (32/32->32) division. \S 4.3.1. */
2553 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2554 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2555
2556 /* The divmod functions are designed so that they can be used for
2557 plain division, even though they return both the quotient and the
2558 remainder. The quotient is returned in the usual location (i.e.,
2559 r0 for SImode, {r0, r1} for DImode), just as would be expected
2560 for an ordinary division routine. Because the AAPCS calling
2561 conventions specify that all of { r0, r1, r2, r3 } are
2562 callee-saved registers, there is no need to tell the compiler
2563 explicitly that those registers are clobbered by these
2564 routines. */
2565 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2566 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2567
2568 /* For SImode division the ABI provides div-without-mod routines,
2569 which are faster. */
2570 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2571 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2572
2573 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2574 divmod libcalls instead. */
2575 set_optab_libfunc (smod_optab, DImode, NULL);
2576 set_optab_libfunc (umod_optab, DImode, NULL);
2577 set_optab_libfunc (smod_optab, SImode, NULL);
2578 set_optab_libfunc (umod_optab, SImode, NULL);
2579
2580 /* Half-precision float operations. The compiler handles all operations
2581 with NULL libfuncs by converting the SFmode. */
2582 switch (arm_fp16_format)
2583 {
2584 case ARM_FP16_FORMAT_IEEE:
2585 case ARM_FP16_FORMAT_ALTERNATIVE:
2586
2587 /* Conversions. */
2588 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2589 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2590 ? "__gnu_f2h_ieee"
2591 : "__gnu_f2h_alternative"));
2592 set_conv_libfunc (sext_optab, SFmode, HFmode,
2593 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2594 ? "__gnu_h2f_ieee"
2595 : "__gnu_h2f_alternative"));
2596
2597 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2598 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2599 ? "__gnu_d2h_ieee"
2600 : "__gnu_d2h_alternative"));
2601
2602 /* Arithmetic. */
2603 set_optab_libfunc (add_optab, HFmode, NULL);
2604 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2605 set_optab_libfunc (smul_optab, HFmode, NULL);
2606 set_optab_libfunc (neg_optab, HFmode, NULL);
2607 set_optab_libfunc (sub_optab, HFmode, NULL);
2608
2609 /* Comparisons. */
2610 set_optab_libfunc (eq_optab, HFmode, NULL);
2611 set_optab_libfunc (ne_optab, HFmode, NULL);
2612 set_optab_libfunc (lt_optab, HFmode, NULL);
2613 set_optab_libfunc (le_optab, HFmode, NULL);
2614 set_optab_libfunc (ge_optab, HFmode, NULL);
2615 set_optab_libfunc (gt_optab, HFmode, NULL);
2616 set_optab_libfunc (unord_optab, HFmode, NULL);
2617 break;
2618
2619 default:
2620 break;
2621 }
2622
2623 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2624 {
2625 const arm_fixed_mode_set fixed_arith_modes[] =
2626 {
2627 { E_QQmode, "qq" },
2628 { E_UQQmode, "uqq" },
2629 { E_HQmode, "hq" },
2630 { E_UHQmode, "uhq" },
2631 { E_SQmode, "sq" },
2632 { E_USQmode, "usq" },
2633 { E_DQmode, "dq" },
2634 { E_UDQmode, "udq" },
2635 { E_TQmode, "tq" },
2636 { E_UTQmode, "utq" },
2637 { E_HAmode, "ha" },
2638 { E_UHAmode, "uha" },
2639 { E_SAmode, "sa" },
2640 { E_USAmode, "usa" },
2641 { E_DAmode, "da" },
2642 { E_UDAmode, "uda" },
2643 { E_TAmode, "ta" },
2644 { E_UTAmode, "uta" }
2645 };
2646 const arm_fixed_mode_set fixed_conv_modes[] =
2647 {
2648 { E_QQmode, "qq" },
2649 { E_UQQmode, "uqq" },
2650 { E_HQmode, "hq" },
2651 { E_UHQmode, "uhq" },
2652 { E_SQmode, "sq" },
2653 { E_USQmode, "usq" },
2654 { E_DQmode, "dq" },
2655 { E_UDQmode, "udq" },
2656 { E_TQmode, "tq" },
2657 { E_UTQmode, "utq" },
2658 { E_HAmode, "ha" },
2659 { E_UHAmode, "uha" },
2660 { E_SAmode, "sa" },
2661 { E_USAmode, "usa" },
2662 { E_DAmode, "da" },
2663 { E_UDAmode, "uda" },
2664 { E_TAmode, "ta" },
2665 { E_UTAmode, "uta" },
2666 { E_QImode, "qi" },
2667 { E_HImode, "hi" },
2668 { E_SImode, "si" },
2669 { E_DImode, "di" },
2670 { E_TImode, "ti" },
2671 { E_SFmode, "sf" },
2672 { E_DFmode, "df" }
2673 };
2674 unsigned int i, j;
2675
2676 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2677 {
2678 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2679 "add", fixed_arith_modes[i].name, 3);
2680 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2681 "ssadd", fixed_arith_modes[i].name, 3);
2682 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2683 "usadd", fixed_arith_modes[i].name, 3);
2684 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2685 "sub", fixed_arith_modes[i].name, 3);
2686 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2687 "sssub", fixed_arith_modes[i].name, 3);
2688 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2689 "ussub", fixed_arith_modes[i].name, 3);
2690 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2691 "mul", fixed_arith_modes[i].name, 3);
2692 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2693 "ssmul", fixed_arith_modes[i].name, 3);
2694 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2695 "usmul", fixed_arith_modes[i].name, 3);
2696 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2697 "div", fixed_arith_modes[i].name, 3);
2698 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2699 "udiv", fixed_arith_modes[i].name, 3);
2700 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2701 "ssdiv", fixed_arith_modes[i].name, 3);
2702 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2703 "usdiv", fixed_arith_modes[i].name, 3);
2704 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2705 "neg", fixed_arith_modes[i].name, 2);
2706 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2707 "ssneg", fixed_arith_modes[i].name, 2);
2708 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2709 "usneg", fixed_arith_modes[i].name, 2);
2710 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2711 "ashl", fixed_arith_modes[i].name, 3);
2712 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2713 "ashr", fixed_arith_modes[i].name, 3);
2714 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2715 "lshr", fixed_arith_modes[i].name, 3);
2716 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2717 "ssashl", fixed_arith_modes[i].name, 3);
2718 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2719 "usashl", fixed_arith_modes[i].name, 3);
2720 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2721 "cmp", fixed_arith_modes[i].name, 2);
2722 }
2723
2724 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2725 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2726 {
2727 if (i == j
2728 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2729 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2730 continue;
2731
2732 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2733 fixed_conv_modes[j].mode, "fract",
2734 fixed_conv_modes[i].name,
2735 fixed_conv_modes[j].name);
2736 arm_set_fixed_conv_libfunc (satfract_optab,
2737 fixed_conv_modes[i].mode,
2738 fixed_conv_modes[j].mode, "satfract",
2739 fixed_conv_modes[i].name,
2740 fixed_conv_modes[j].name);
2741 arm_set_fixed_conv_libfunc (fractuns_optab,
2742 fixed_conv_modes[i].mode,
2743 fixed_conv_modes[j].mode, "fractuns",
2744 fixed_conv_modes[i].name,
2745 fixed_conv_modes[j].name);
2746 arm_set_fixed_conv_libfunc (satfractuns_optab,
2747 fixed_conv_modes[i].mode,
2748 fixed_conv_modes[j].mode, "satfractuns",
2749 fixed_conv_modes[i].name,
2750 fixed_conv_modes[j].name);
2751 }
2752 }
2753
2754 if (TARGET_AAPCS_BASED)
2755 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2756 }
2757
2758 /* On AAPCS systems, this is the "struct __va_list". */
2759 static GTY(()) tree va_list_type;
2760
2761 /* Return the type to use as __builtin_va_list. */
2762 static tree
2763 arm_build_builtin_va_list (void)
2764 {
2765 tree va_list_name;
2766 tree ap_field;
2767
2768 if (!TARGET_AAPCS_BASED)
2769 return std_build_builtin_va_list ();
2770
2771 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2772 defined as:
2773
2774 struct __va_list
2775 {
2776 void *__ap;
2777 };
2778
2779 The C Library ABI further reinforces this definition in \S
2780 4.1.
2781
2782 We must follow this definition exactly. The structure tag
2783 name is visible in C++ mangled names, and thus forms a part
2784 of the ABI. The field name may be used by people who
2785 #include <stdarg.h>. */
2786 /* Create the type. */
2787 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2788 /* Give it the required name. */
2789 va_list_name = build_decl (BUILTINS_LOCATION,
2790 TYPE_DECL,
2791 get_identifier ("__va_list"),
2792 va_list_type);
2793 DECL_ARTIFICIAL (va_list_name) = 1;
2794 TYPE_NAME (va_list_type) = va_list_name;
2795 TYPE_STUB_DECL (va_list_type) = va_list_name;
2796 /* Create the __ap field. */
2797 ap_field = build_decl (BUILTINS_LOCATION,
2798 FIELD_DECL,
2799 get_identifier ("__ap"),
2800 ptr_type_node);
2801 DECL_ARTIFICIAL (ap_field) = 1;
2802 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2803 TYPE_FIELDS (va_list_type) = ap_field;
2804 /* Compute its layout. */
2805 layout_type (va_list_type);
2806
2807 return va_list_type;
2808 }
2809
2810 /* Return an expression of type "void *" pointing to the next
2811 available argument in a variable-argument list. VALIST is the
2812 user-level va_list object, of type __builtin_va_list. */
2813 static tree
2814 arm_extract_valist_ptr (tree valist)
2815 {
2816 if (TREE_TYPE (valist) == error_mark_node)
2817 return error_mark_node;
2818
2819 /* On an AAPCS target, the pointer is stored within "struct
2820 va_list". */
2821 if (TARGET_AAPCS_BASED)
2822 {
2823 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2824 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2825 valist, ap_field, NULL_TREE);
2826 }
2827
2828 return valist;
2829 }
2830
2831 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2832 static void
2833 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2834 {
2835 valist = arm_extract_valist_ptr (valist);
2836 std_expand_builtin_va_start (valist, nextarg);
2837 }
2838
2839 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2840 static tree
2841 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2842 gimple_seq *post_p)
2843 {
2844 valist = arm_extract_valist_ptr (valist);
2845 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2846 }
2847
2848 /* Check any incompatible options that the user has specified. */
2849 static void
2850 arm_option_check_internal (struct gcc_options *opts)
2851 {
2852 int flags = opts->x_target_flags;
2853
2854 /* iWMMXt and NEON are incompatible. */
2855 if (TARGET_IWMMXT
2856 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2857 error ("iWMMXt and NEON are incompatible");
2858
2859 /* Make sure that the processor choice does not conflict with any of the
2860 other command line choices. */
2861 if (TARGET_ARM_P (flags)
2862 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2863 error ("target CPU does not support ARM mode");
2864
2865 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2866 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2867 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2868
2869 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2870 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2871
2872 /* If this target is normally configured to use APCS frames, warn if they
2873 are turned off and debugging is turned on. */
2874 if (TARGET_ARM_P (flags)
2875 && write_symbols != NO_DEBUG
2876 && !TARGET_APCS_FRAME
2877 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2878 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2879
2880 /* iWMMXt unsupported under Thumb mode. */
2881 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2882 error ("iWMMXt unsupported under Thumb mode");
2883
2884 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2885 error ("can not use -mtp=cp15 with 16-bit Thumb");
2886
2887 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2888 {
2889 error ("RTP PIC is incompatible with Thumb");
2890 flag_pic = 0;
2891 }
2892
2893 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2894 with MOVT. */
2895 if ((target_pure_code || target_slow_flash_data)
2896 && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2897 {
2898 const char *flag = (target_pure_code ? "-mpure-code" :
2899 "-mslow-flash-data");
2900 error ("%s only supports non-pic code on M-profile targets with the "
2901 "MOVT instruction", flag);
2902 }
2903
2904 }
2905
2906 /* Recompute the global settings depending on target attribute options. */
2907
2908 static void
2909 arm_option_params_internal (void)
2910 {
2911 /* If we are not using the default (ARM mode) section anchor offset
2912 ranges, then set the correct ranges now. */
2913 if (TARGET_THUMB1)
2914 {
2915 /* Thumb-1 LDR instructions cannot have negative offsets.
2916 Permissible positive offset ranges are 5-bit (for byte loads),
2917 6-bit (for halfword loads), or 7-bit (for word loads).
2918 Empirical results suggest a 7-bit anchor range gives the best
2919 overall code size. */
2920 targetm.min_anchor_offset = 0;
2921 targetm.max_anchor_offset = 127;
2922 }
2923 else if (TARGET_THUMB2)
2924 {
2925 /* The minimum is set such that the total size of the block
2926 for a particular anchor is 248 + 1 + 4095 bytes, which is
2927 divisible by eight, ensuring natural spacing of anchors. */
2928 targetm.min_anchor_offset = -248;
2929 targetm.max_anchor_offset = 4095;
2930 }
2931 else
2932 {
2933 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2934 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2935 }
2936
2937 /* Increase the number of conditional instructions with -Os. */
2938 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2939
2940 /* For THUMB2, we limit the conditional sequence to one IT block. */
2941 if (TARGET_THUMB2)
2942 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2943 }
2944
2945 /* True if -mflip-thumb should next add an attribute for the default
2946 mode, false if it should next add an attribute for the opposite mode. */
2947 static GTY(()) bool thumb_flipper;
2948
2949 /* Options after initial target override. */
2950 static GTY(()) tree init_optimize;
2951
2952 static void
2953 arm_override_options_after_change_1 (struct gcc_options *opts)
2954 {
2955 /* -falign-functions without argument: supply one. */
2956 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
2957 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2958 && opts->x_optimize_size ? "2" : "4";
2959 }
2960
2961 /* Implement targetm.override_options_after_change. */
2962
2963 static void
2964 arm_override_options_after_change (void)
2965 {
2966 arm_configure_build_target (&arm_active_target,
2967 TREE_TARGET_OPTION (target_option_default_node),
2968 &global_options_set, false);
2969
2970 arm_override_options_after_change_1 (&global_options);
2971 }
2972
2973 /* Implement TARGET_OPTION_SAVE. */
2974 static void
2975 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2976 {
2977 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2978 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2979 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2980 }
2981
2982 /* Implement TARGET_OPTION_RESTORE. */
2983 static void
2984 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2985 {
2986 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2987 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2988 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2989 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2990 false);
2991 }
2992
2993 /* Reset options between modes that the user has specified. */
2994 static void
2995 arm_option_override_internal (struct gcc_options *opts,
2996 struct gcc_options *opts_set)
2997 {
2998 arm_override_options_after_change_1 (opts);
2999
3000 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3001 {
3002 /* The default is to enable interworking, so this warning message would
3003 be confusing to users who have just compiled with
3004 eg, -march=armv4. */
3005 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3006 opts->x_target_flags &= ~MASK_INTERWORK;
3007 }
3008
3009 if (TARGET_THUMB_P (opts->x_target_flags)
3010 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3011 {
3012 warning (0, "target CPU does not support THUMB instructions");
3013 opts->x_target_flags &= ~MASK_THUMB;
3014 }
3015
3016 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3017 {
3018 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3019 opts->x_target_flags &= ~MASK_APCS_FRAME;
3020 }
3021
3022 /* Callee super interworking implies thumb interworking. Adding
3023 this to the flags here simplifies the logic elsewhere. */
3024 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3025 opts->x_target_flags |= MASK_INTERWORK;
3026
3027 /* need to remember initial values so combinaisons of options like
3028 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3029 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3030
3031 if (! opts_set->x_arm_restrict_it)
3032 opts->x_arm_restrict_it = arm_arch8;
3033
3034 /* ARM execution state and M profile don't have [restrict] IT. */
3035 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3036 opts->x_arm_restrict_it = 0;
3037
3038 /* Enable -munaligned-access by default for
3039 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3040 i.e. Thumb2 and ARM state only.
3041 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3042 - ARMv8 architecture-base processors.
3043
3044 Disable -munaligned-access by default for
3045 - all pre-ARMv6 architecture-based processors
3046 - ARMv6-M architecture-based processors
3047 - ARMv8-M Baseline processors. */
3048
3049 if (! opts_set->x_unaligned_access)
3050 {
3051 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3052 && arm_arch6 && (arm_arch_notm || arm_arch7));
3053 }
3054 else if (opts->x_unaligned_access == 1
3055 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3056 {
3057 warning (0, "target CPU does not support unaligned accesses");
3058 opts->x_unaligned_access = 0;
3059 }
3060
3061 /* Don't warn since it's on by default in -O2. */
3062 if (TARGET_THUMB1_P (opts->x_target_flags))
3063 opts->x_flag_schedule_insns = 0;
3064 else
3065 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3066
3067 /* Disable shrink-wrap when optimizing function for size, since it tends to
3068 generate additional returns. */
3069 if (optimize_function_for_size_p (cfun)
3070 && TARGET_THUMB2_P (opts->x_target_flags))
3071 opts->x_flag_shrink_wrap = false;
3072 else
3073 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3074
3075 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3076 - epilogue_insns - does not accurately model the corresponding insns
3077 emitted in the asm file. In particular, see the comment in thumb_exit
3078 'Find out how many of the (return) argument registers we can corrupt'.
3079 As a consequence, the epilogue may clobber registers without fipa-ra
3080 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3081 TODO: Accurately model clobbers for epilogue_insns and reenable
3082 fipa-ra. */
3083 if (TARGET_THUMB1_P (opts->x_target_flags))
3084 opts->x_flag_ipa_ra = 0;
3085 else
3086 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3087
3088 /* Thumb2 inline assembly code should always use unified syntax.
3089 This will apply to ARM and Thumb1 eventually. */
3090 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3091
3092 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3093 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3094 #endif
3095 }
3096
3097 static sbitmap isa_all_fpubits;
3098 static sbitmap isa_quirkbits;
3099
3100 /* Configure a build target TARGET from the user-specified options OPTS and
3101 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3102 architecture have been specified, but the two are not identical. */
3103 void
3104 arm_configure_build_target (struct arm_build_target *target,
3105 struct cl_target_option *opts,
3106 struct gcc_options *opts_set,
3107 bool warn_compatible)
3108 {
3109 const cpu_option *arm_selected_tune = NULL;
3110 const arch_option *arm_selected_arch = NULL;
3111 const cpu_option *arm_selected_cpu = NULL;
3112 const arm_fpu_desc *arm_selected_fpu = NULL;
3113 const char *tune_opts = NULL;
3114 const char *arch_opts = NULL;
3115 const char *cpu_opts = NULL;
3116
3117 bitmap_clear (target->isa);
3118 target->core_name = NULL;
3119 target->arch_name = NULL;
3120
3121 if (opts_set->x_arm_arch_string)
3122 {
3123 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3124 "-march",
3125 opts->x_arm_arch_string);
3126 arch_opts = strchr (opts->x_arm_arch_string, '+');
3127 }
3128
3129 if (opts_set->x_arm_cpu_string)
3130 {
3131 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3132 opts->x_arm_cpu_string);
3133 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3134 arm_selected_tune = arm_selected_cpu;
3135 /* If taking the tuning from -mcpu, we don't need to rescan the
3136 options for tuning. */
3137 }
3138
3139 if (opts_set->x_arm_tune_string)
3140 {
3141 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3142 opts->x_arm_tune_string);
3143 tune_opts = strchr (opts->x_arm_tune_string, '+');
3144 }
3145
3146 if (arm_selected_arch)
3147 {
3148 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3149 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3150 arch_opts);
3151
3152 if (arm_selected_cpu)
3153 {
3154 auto_sbitmap cpu_isa (isa_num_bits);
3155 auto_sbitmap isa_delta (isa_num_bits);
3156
3157 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3158 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3159 cpu_opts);
3160 bitmap_xor (isa_delta, cpu_isa, target->isa);
3161 /* Ignore any bits that are quirk bits. */
3162 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3163 /* Ignore (for now) any bits that might be set by -mfpu. */
3164 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3165
3166 if (!bitmap_empty_p (isa_delta))
3167 {
3168 if (warn_compatible)
3169 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3170 arm_selected_cpu->common.name,
3171 arm_selected_arch->common.name);
3172 /* -march wins for code generation.
3173 -mcpu wins for default tuning. */
3174 if (!arm_selected_tune)
3175 arm_selected_tune = arm_selected_cpu;
3176
3177 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3178 target->arch_name = arm_selected_arch->common.name;
3179 }
3180 else
3181 {
3182 /* Architecture and CPU are essentially the same.
3183 Prefer the CPU setting. */
3184 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3185 target->core_name = arm_selected_cpu->common.name;
3186 /* Copy the CPU's capabilities, so that we inherit the
3187 appropriate extensions and quirks. */
3188 bitmap_copy (target->isa, cpu_isa);
3189 }
3190 }
3191 else
3192 {
3193 /* Pick a CPU based on the architecture. */
3194 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3195 target->arch_name = arm_selected_arch->common.name;
3196 /* Note: target->core_name is left unset in this path. */
3197 }
3198 }
3199 else if (arm_selected_cpu)
3200 {
3201 target->core_name = arm_selected_cpu->common.name;
3202 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3203 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3204 cpu_opts);
3205 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3206 }
3207 /* If the user did not specify a processor or architecture, choose
3208 one for them. */
3209 else
3210 {
3211 const cpu_option *sel;
3212 auto_sbitmap sought_isa (isa_num_bits);
3213 bitmap_clear (sought_isa);
3214 auto_sbitmap default_isa (isa_num_bits);
3215
3216 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3217 TARGET_CPU_DEFAULT);
3218 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3219 gcc_assert (arm_selected_cpu->common.name);
3220
3221 /* RWE: All of the selection logic below (to the end of this
3222 'if' clause) looks somewhat suspect. It appears to be mostly
3223 there to support forcing thumb support when the default CPU
3224 does not have thumb (somewhat dubious in terms of what the
3225 user might be expecting). I think it should be removed once
3226 support for the pre-thumb era cores is removed. */
3227 sel = arm_selected_cpu;
3228 arm_initialize_isa (default_isa, sel->common.isa_bits);
3229 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3230 cpu_opts);
3231
3232 /* Now check to see if the user has specified any command line
3233 switches that require certain abilities from the cpu. */
3234
3235 if (TARGET_INTERWORK || TARGET_THUMB)
3236 bitmap_set_bit (sought_isa, isa_bit_thumb);
3237
3238 /* If there are such requirements and the default CPU does not
3239 satisfy them, we need to run over the complete list of
3240 cores looking for one that is satisfactory. */
3241 if (!bitmap_empty_p (sought_isa)
3242 && !bitmap_subset_p (sought_isa, default_isa))
3243 {
3244 auto_sbitmap candidate_isa (isa_num_bits);
3245 /* We're only interested in a CPU with at least the
3246 capabilities of the default CPU and the required
3247 additional features. */
3248 bitmap_ior (default_isa, default_isa, sought_isa);
3249
3250 /* Try to locate a CPU type that supports all of the abilities
3251 of the default CPU, plus the extra abilities requested by
3252 the user. */
3253 for (sel = all_cores; sel->common.name != NULL; sel++)
3254 {
3255 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3256 /* An exact match? */
3257 if (bitmap_equal_p (default_isa, candidate_isa))
3258 break;
3259 }
3260
3261 if (sel->common.name == NULL)
3262 {
3263 unsigned current_bit_count = isa_num_bits;
3264 const cpu_option *best_fit = NULL;
3265
3266 /* Ideally we would like to issue an error message here
3267 saying that it was not possible to find a CPU compatible
3268 with the default CPU, but which also supports the command
3269 line options specified by the programmer, and so they
3270 ought to use the -mcpu=<name> command line option to
3271 override the default CPU type.
3272
3273 If we cannot find a CPU that has exactly the
3274 characteristics of the default CPU and the given
3275 command line options we scan the array again looking
3276 for a best match. The best match must have at least
3277 the capabilities of the perfect match. */
3278 for (sel = all_cores; sel->common.name != NULL; sel++)
3279 {
3280 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3281
3282 if (bitmap_subset_p (default_isa, candidate_isa))
3283 {
3284 unsigned count;
3285
3286 bitmap_and_compl (candidate_isa, candidate_isa,
3287 default_isa);
3288 count = bitmap_popcount (candidate_isa);
3289
3290 if (count < current_bit_count)
3291 {
3292 best_fit = sel;
3293 current_bit_count = count;
3294 }
3295 }
3296
3297 gcc_assert (best_fit);
3298 sel = best_fit;
3299 }
3300 }
3301 arm_selected_cpu = sel;
3302 }
3303
3304 /* Now we know the CPU, we can finally initialize the target
3305 structure. */
3306 target->core_name = arm_selected_cpu->common.name;
3307 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3308 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3309 cpu_opts);
3310 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3311 }
3312
3313 gcc_assert (arm_selected_cpu);
3314 gcc_assert (arm_selected_arch);
3315
3316 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3317 {
3318 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3319 auto_sbitmap fpu_bits (isa_num_bits);
3320
3321 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3322 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3323 bitmap_ior (target->isa, target->isa, fpu_bits);
3324 }
3325
3326 if (!arm_selected_tune)
3327 arm_selected_tune = arm_selected_cpu;
3328 else /* Validate the features passed to -mtune. */
3329 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3330
3331 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3332
3333 /* Finish initializing the target structure. */
3334 target->arch_pp_name = arm_selected_arch->arch;
3335 target->base_arch = arm_selected_arch->base_arch;
3336 target->profile = arm_selected_arch->profile;
3337
3338 target->tune_flags = tune_data->tune_flags;
3339 target->tune = tune_data->tune;
3340 target->tune_core = tune_data->scheduler;
3341 arm_option_reconfigure_globals ();
3342 }
3343
3344 /* Fix up any incompatible options that the user has specified. */
3345 static void
3346 arm_option_override (void)
3347 {
3348 static const enum isa_feature fpu_bitlist[]
3349 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3350 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3351 cl_target_option opts;
3352
3353 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3354 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3355
3356 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3357 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3358
3359 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3360
3361 if (!global_options_set.x_arm_fpu_index)
3362 {
3363 bool ok;
3364 int fpu_index;
3365
3366 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3367 CL_TARGET);
3368 gcc_assert (ok);
3369 arm_fpu_index = (enum fpu_type) fpu_index;
3370 }
3371
3372 cl_target_option_save (&opts, &global_options);
3373 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3374 true);
3375
3376 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3377 SUBTARGET_OVERRIDE_OPTIONS;
3378 #endif
3379
3380 /* Initialize boolean versions of the architectural flags, for use
3381 in the arm.md file and for enabling feature flags. */
3382 arm_option_reconfigure_globals ();
3383
3384 arm_tune = arm_active_target.tune_core;
3385 tune_flags = arm_active_target.tune_flags;
3386 current_tune = arm_active_target.tune;
3387
3388 /* TBD: Dwarf info for apcs frame is not handled yet. */
3389 if (TARGET_APCS_FRAME)
3390 flag_shrink_wrap = false;
3391
3392 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3393 {
3394 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3395 target_flags |= MASK_APCS_FRAME;
3396 }
3397
3398 if (TARGET_POKE_FUNCTION_NAME)
3399 target_flags |= MASK_APCS_FRAME;
3400
3401 if (TARGET_APCS_REENT && flag_pic)
3402 error ("-fpic and -mapcs-reent are incompatible");
3403
3404 if (TARGET_APCS_REENT)
3405 warning (0, "APCS reentrant code not supported. Ignored");
3406
3407 /* Set up some tuning parameters. */
3408 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3409 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3410 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3411 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3412 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3413 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3414
3415 /* For arm2/3 there is no need to do any scheduling if we are doing
3416 software floating-point. */
3417 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3418 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3419
3420 /* Override the default structure alignment for AAPCS ABI. */
3421 if (!global_options_set.x_arm_structure_size_boundary)
3422 {
3423 if (TARGET_AAPCS_BASED)
3424 arm_structure_size_boundary = 8;
3425 }
3426 else
3427 {
3428 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3429
3430 if (arm_structure_size_boundary != 8
3431 && arm_structure_size_boundary != 32
3432 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3433 {
3434 if (ARM_DOUBLEWORD_ALIGN)
3435 warning (0,
3436 "structure size boundary can only be set to 8, 32 or 64");
3437 else
3438 warning (0, "structure size boundary can only be set to 8 or 32");
3439 arm_structure_size_boundary
3440 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3441 }
3442 }
3443
3444 if (TARGET_VXWORKS_RTP)
3445 {
3446 if (!global_options_set.x_arm_pic_data_is_text_relative)
3447 arm_pic_data_is_text_relative = 0;
3448 }
3449 else if (flag_pic
3450 && !arm_pic_data_is_text_relative
3451 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3452 /* When text & data segments don't have a fixed displacement, the
3453 intended use is with a single, read only, pic base register.
3454 Unless the user explicitly requested not to do that, set
3455 it. */
3456 target_flags |= MASK_SINGLE_PIC_BASE;
3457
3458 /* If stack checking is disabled, we can use r10 as the PIC register,
3459 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3460 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3461 {
3462 if (TARGET_VXWORKS_RTP)
3463 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3464 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3465 }
3466
3467 if (flag_pic && TARGET_VXWORKS_RTP)
3468 arm_pic_register = 9;
3469
3470 if (arm_pic_register_string != NULL)
3471 {
3472 int pic_register = decode_reg_name (arm_pic_register_string);
3473
3474 if (!flag_pic)
3475 warning (0, "-mpic-register= is useless without -fpic");
3476
3477 /* Prevent the user from choosing an obviously stupid PIC register. */
3478 else if (pic_register < 0 || call_used_regs[pic_register]
3479 || pic_register == HARD_FRAME_POINTER_REGNUM
3480 || pic_register == STACK_POINTER_REGNUM
3481 || pic_register >= PC_REGNUM
3482 || (TARGET_VXWORKS_RTP
3483 && (unsigned int) pic_register != arm_pic_register))
3484 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3485 else
3486 arm_pic_register = pic_register;
3487 }
3488
3489 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3490 if (fix_cm3_ldrd == 2)
3491 {
3492 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3493 fix_cm3_ldrd = 1;
3494 else
3495 fix_cm3_ldrd = 0;
3496 }
3497
3498 /* Hot/Cold partitioning is not currently supported, since we can't
3499 handle literal pool placement in that case. */
3500 if (flag_reorder_blocks_and_partition)
3501 {
3502 inform (input_location,
3503 "-freorder-blocks-and-partition not supported on this architecture");
3504 flag_reorder_blocks_and_partition = 0;
3505 flag_reorder_blocks = 1;
3506 }
3507
3508 if (flag_pic)
3509 /* Hoisting PIC address calculations more aggressively provides a small,
3510 but measurable, size reduction for PIC code. Therefore, we decrease
3511 the bar for unrestricted expression hoisting to the cost of PIC address
3512 calculation, which is 2 instructions. */
3513 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3514 global_options.x_param_values,
3515 global_options_set.x_param_values);
3516
3517 /* ARM EABI defaults to strict volatile bitfields. */
3518 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3519 && abi_version_at_least(2))
3520 flag_strict_volatile_bitfields = 1;
3521
3522 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3523 have deemed it beneficial (signified by setting
3524 prefetch.num_slots to 1 or more). */
3525 if (flag_prefetch_loop_arrays < 0
3526 && HAVE_prefetch
3527 && optimize >= 3
3528 && current_tune->prefetch.num_slots > 0)
3529 flag_prefetch_loop_arrays = 1;
3530
3531 /* Set up parameters to be used in prefetching algorithm. Do not
3532 override the defaults unless we are tuning for a core we have
3533 researched values for. */
3534 if (current_tune->prefetch.num_slots > 0)
3535 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3536 current_tune->prefetch.num_slots,
3537 global_options.x_param_values,
3538 global_options_set.x_param_values);
3539 if (current_tune->prefetch.l1_cache_line_size >= 0)
3540 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3541 current_tune->prefetch.l1_cache_line_size,
3542 global_options.x_param_values,
3543 global_options_set.x_param_values);
3544 if (current_tune->prefetch.l1_cache_size >= 0)
3545 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3546 current_tune->prefetch.l1_cache_size,
3547 global_options.x_param_values,
3548 global_options_set.x_param_values);
3549
3550 /* Use Neon to perform 64-bits operations rather than core
3551 registers. */
3552 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3553 if (use_neon_for_64bits == 1)
3554 prefer_neon_for_64bits = true;
3555
3556 /* Use the alternative scheduling-pressure algorithm by default. */
3557 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3558 global_options.x_param_values,
3559 global_options_set.x_param_values);
3560
3561 /* Look through ready list and all of queue for instructions
3562 relevant for L2 auto-prefetcher. */
3563 int param_sched_autopref_queue_depth;
3564
3565 switch (current_tune->sched_autopref)
3566 {
3567 case tune_params::SCHED_AUTOPREF_OFF:
3568 param_sched_autopref_queue_depth = -1;
3569 break;
3570
3571 case tune_params::SCHED_AUTOPREF_RANK:
3572 param_sched_autopref_queue_depth = 0;
3573 break;
3574
3575 case tune_params::SCHED_AUTOPREF_FULL:
3576 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3577 break;
3578
3579 default:
3580 gcc_unreachable ();
3581 }
3582
3583 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3584 param_sched_autopref_queue_depth,
3585 global_options.x_param_values,
3586 global_options_set.x_param_values);
3587
3588 /* Currently, for slow flash data, we just disable literal pools. We also
3589 disable it for pure-code. */
3590 if (target_slow_flash_data || target_pure_code)
3591 arm_disable_literal_pool = true;
3592
3593 /* Disable scheduling fusion by default if it's not armv7 processor
3594 or doesn't prefer ldrd/strd. */
3595 if (flag_schedule_fusion == 2
3596 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3597 flag_schedule_fusion = 0;
3598
3599 /* Need to remember initial options before they are overriden. */
3600 init_optimize = build_optimization_node (&global_options);
3601
3602 arm_options_perform_arch_sanity_checks ();
3603 arm_option_override_internal (&global_options, &global_options_set);
3604 arm_option_check_internal (&global_options);
3605 arm_option_params_internal ();
3606
3607 /* Create the default target_options structure. */
3608 target_option_default_node = target_option_current_node
3609 = build_target_option_node (&global_options);
3610
3611 /* Register global variables with the garbage collector. */
3612 arm_add_gc_roots ();
3613
3614 /* Init initial mode for testing. */
3615 thumb_flipper = TARGET_THUMB;
3616 }
3617
3618
3619 /* Reconfigure global status flags from the active_target.isa. */
3620 void
3621 arm_option_reconfigure_globals (void)
3622 {
3623 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3624 arm_base_arch = arm_active_target.base_arch;
3625
3626 /* Initialize boolean versions of the architectural flags, for use
3627 in the arm.md file. */
3628 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3629 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3630 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3631 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3632 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3633 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3634 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3635 arm_arch6m = arm_arch6 && !arm_arch_notm;
3636 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3637 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3638 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3639 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3640 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3641 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3642 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3643 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3644 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3645 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3646 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3647 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3648 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3649 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3650 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3651 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3652 if (arm_fp16_inst)
3653 {
3654 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3655 error ("selected fp16 options are incompatible");
3656 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3657 }
3658
3659 /* And finally, set up some quirks. */
3660 arm_arch_no_volatile_ce
3661 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3662 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3663 isa_bit_quirk_armv6kz);
3664
3665 /* Use the cp15 method if it is available. */
3666 if (target_thread_pointer == TP_AUTO)
3667 {
3668 if (arm_arch6k && !TARGET_THUMB1)
3669 target_thread_pointer = TP_CP15;
3670 else
3671 target_thread_pointer = TP_SOFT;
3672 }
3673 }
3674
3675 /* Perform some validation between the desired architecture and the rest of the
3676 options. */
3677 void
3678 arm_options_perform_arch_sanity_checks (void)
3679 {
3680 /* V5T code we generate is completely interworking capable, so we turn off
3681 TARGET_INTERWORK here to avoid many tests later on. */
3682
3683 /* XXX However, we must pass the right pre-processor defines to CPP
3684 or GLD can get confused. This is a hack. */
3685 if (TARGET_INTERWORK)
3686 arm_cpp_interwork = 1;
3687
3688 if (arm_arch5t)
3689 target_flags &= ~MASK_INTERWORK;
3690
3691 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3692 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3693
3694 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3695 error ("iwmmxt abi requires an iwmmxt capable cpu");
3696
3697 /* BPABI targets use linker tricks to allow interworking on cores
3698 without thumb support. */
3699 if (TARGET_INTERWORK
3700 && !TARGET_BPABI
3701 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3702 {
3703 warning (0, "target CPU does not support interworking" );
3704 target_flags &= ~MASK_INTERWORK;
3705 }
3706
3707 /* If soft-float is specified then don't use FPU. */
3708 if (TARGET_SOFT_FLOAT)
3709 arm_fpu_attr = FPU_NONE;
3710 else
3711 arm_fpu_attr = FPU_VFP;
3712
3713 if (TARGET_AAPCS_BASED)
3714 {
3715 if (TARGET_CALLER_INTERWORKING)
3716 error ("AAPCS does not support -mcaller-super-interworking");
3717 else
3718 if (TARGET_CALLEE_INTERWORKING)
3719 error ("AAPCS does not support -mcallee-super-interworking");
3720 }
3721
3722 /* __fp16 support currently assumes the core has ldrh. */
3723 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3724 sorry ("__fp16 and no ldrh");
3725
3726 if (use_cmse && !arm_arch_cmse)
3727 error ("target CPU does not support ARMv8-M Security Extensions");
3728
3729 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3730 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3731 if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3732 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3733
3734
3735 if (TARGET_AAPCS_BASED)
3736 {
3737 if (arm_abi == ARM_ABI_IWMMXT)
3738 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3739 else if (TARGET_HARD_FLOAT_ABI)
3740 {
3741 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3742 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3743 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3744 }
3745 else
3746 arm_pcs_default = ARM_PCS_AAPCS;
3747 }
3748 else
3749 {
3750 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3751 sorry ("-mfloat-abi=hard and VFP");
3752
3753 if (arm_abi == ARM_ABI_APCS)
3754 arm_pcs_default = ARM_PCS_APCS;
3755 else
3756 arm_pcs_default = ARM_PCS_ATPCS;
3757 }
3758 }
3759
3760 static void
3761 arm_add_gc_roots (void)
3762 {
3763 gcc_obstack_init(&minipool_obstack);
3764 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3765 }
3766 \f
3767 /* A table of known ARM exception types.
3768 For use with the interrupt function attribute. */
3769
3770 typedef struct
3771 {
3772 const char *const arg;
3773 const unsigned long return_value;
3774 }
3775 isr_attribute_arg;
3776
3777 static const isr_attribute_arg isr_attribute_args [] =
3778 {
3779 { "IRQ", ARM_FT_ISR },
3780 { "irq", ARM_FT_ISR },
3781 { "FIQ", ARM_FT_FIQ },
3782 { "fiq", ARM_FT_FIQ },
3783 { "ABORT", ARM_FT_ISR },
3784 { "abort", ARM_FT_ISR },
3785 { "ABORT", ARM_FT_ISR },
3786 { "abort", ARM_FT_ISR },
3787 { "UNDEF", ARM_FT_EXCEPTION },
3788 { "undef", ARM_FT_EXCEPTION },
3789 { "SWI", ARM_FT_EXCEPTION },
3790 { "swi", ARM_FT_EXCEPTION },
3791 { NULL, ARM_FT_NORMAL }
3792 };
3793
3794 /* Returns the (interrupt) function type of the current
3795 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3796
3797 static unsigned long
3798 arm_isr_value (tree argument)
3799 {
3800 const isr_attribute_arg * ptr;
3801 const char * arg;
3802
3803 if (!arm_arch_notm)
3804 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3805
3806 /* No argument - default to IRQ. */
3807 if (argument == NULL_TREE)
3808 return ARM_FT_ISR;
3809
3810 /* Get the value of the argument. */
3811 if (TREE_VALUE (argument) == NULL_TREE
3812 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3813 return ARM_FT_UNKNOWN;
3814
3815 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3816
3817 /* Check it against the list of known arguments. */
3818 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3819 if (streq (arg, ptr->arg))
3820 return ptr->return_value;
3821
3822 /* An unrecognized interrupt type. */
3823 return ARM_FT_UNKNOWN;
3824 }
3825
3826 /* Computes the type of the current function. */
3827
3828 static unsigned long
3829 arm_compute_func_type (void)
3830 {
3831 unsigned long type = ARM_FT_UNKNOWN;
3832 tree a;
3833 tree attr;
3834
3835 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3836
3837 /* Decide if the current function is volatile. Such functions
3838 never return, and many memory cycles can be saved by not storing
3839 register values that will never be needed again. This optimization
3840 was added to speed up context switching in a kernel application. */
3841 if (optimize > 0
3842 && (TREE_NOTHROW (current_function_decl)
3843 || !(flag_unwind_tables
3844 || (flag_exceptions
3845 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3846 && TREE_THIS_VOLATILE (current_function_decl))
3847 type |= ARM_FT_VOLATILE;
3848
3849 if (cfun->static_chain_decl != NULL)
3850 type |= ARM_FT_NESTED;
3851
3852 attr = DECL_ATTRIBUTES (current_function_decl);
3853
3854 a = lookup_attribute ("naked", attr);
3855 if (a != NULL_TREE)
3856 type |= ARM_FT_NAKED;
3857
3858 a = lookup_attribute ("isr", attr);
3859 if (a == NULL_TREE)
3860 a = lookup_attribute ("interrupt", attr);
3861
3862 if (a == NULL_TREE)
3863 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3864 else
3865 type |= arm_isr_value (TREE_VALUE (a));
3866
3867 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3868 type |= ARM_FT_CMSE_ENTRY;
3869
3870 return type;
3871 }
3872
3873 /* Returns the type of the current function. */
3874
3875 unsigned long
3876 arm_current_func_type (void)
3877 {
3878 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3879 cfun->machine->func_type = arm_compute_func_type ();
3880
3881 return cfun->machine->func_type;
3882 }
3883
3884 bool
3885 arm_allocate_stack_slots_for_args (void)
3886 {
3887 /* Naked functions should not allocate stack slots for arguments. */
3888 return !IS_NAKED (arm_current_func_type ());
3889 }
3890
3891 static bool
3892 arm_warn_func_return (tree decl)
3893 {
3894 /* Naked functions are implemented entirely in assembly, including the
3895 return sequence, so suppress warnings about this. */
3896 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3897 }
3898
3899 \f
3900 /* Output assembler code for a block containing the constant parts
3901 of a trampoline, leaving space for the variable parts.
3902
3903 On the ARM, (if r8 is the static chain regnum, and remembering that
3904 referencing pc adds an offset of 8) the trampoline looks like:
3905 ldr r8, [pc, #0]
3906 ldr pc, [pc]
3907 .word static chain value
3908 .word function's address
3909 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3910
3911 static void
3912 arm_asm_trampoline_template (FILE *f)
3913 {
3914 fprintf (f, "\t.syntax unified\n");
3915
3916 if (TARGET_ARM)
3917 {
3918 fprintf (f, "\t.arm\n");
3919 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3920 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3921 }
3922 else if (TARGET_THUMB2)
3923 {
3924 fprintf (f, "\t.thumb\n");
3925 /* The Thumb-2 trampoline is similar to the arm implementation.
3926 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3927 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3928 STATIC_CHAIN_REGNUM, PC_REGNUM);
3929 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3930 }
3931 else
3932 {
3933 ASM_OUTPUT_ALIGN (f, 2);
3934 fprintf (f, "\t.code\t16\n");
3935 fprintf (f, ".Ltrampoline_start:\n");
3936 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3937 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3938 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3939 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3940 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3941 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3942 }
3943 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3944 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3945 }
3946
3947 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3948
3949 static void
3950 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3951 {
3952 rtx fnaddr, mem, a_tramp;
3953
3954 emit_block_move (m_tramp, assemble_trampoline_template (),
3955 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3956
3957 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3958 emit_move_insn (mem, chain_value);
3959
3960 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3961 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3962 emit_move_insn (mem, fnaddr);
3963
3964 a_tramp = XEXP (m_tramp, 0);
3965 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3966 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3967 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3968 }
3969
3970 /* Thumb trampolines should be entered in thumb mode, so set
3971 the bottom bit of the address. */
3972
3973 static rtx
3974 arm_trampoline_adjust_address (rtx addr)
3975 {
3976 if (TARGET_THUMB)
3977 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3978 NULL, 0, OPTAB_LIB_WIDEN);
3979 return addr;
3980 }
3981 \f
3982 /* Return 1 if it is possible to return using a single instruction.
3983 If SIBLING is non-null, this is a test for a return before a sibling
3984 call. SIBLING is the call insn, so we can examine its register usage. */
3985
3986 int
3987 use_return_insn (int iscond, rtx sibling)
3988 {
3989 int regno;
3990 unsigned int func_type;
3991 unsigned long saved_int_regs;
3992 unsigned HOST_WIDE_INT stack_adjust;
3993 arm_stack_offsets *offsets;
3994
3995 /* Never use a return instruction before reload has run. */
3996 if (!reload_completed)
3997 return 0;
3998
3999 func_type = arm_current_func_type ();
4000
4001 /* Naked, volatile and stack alignment functions need special
4002 consideration. */
4003 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4004 return 0;
4005
4006 /* So do interrupt functions that use the frame pointer and Thumb
4007 interrupt functions. */
4008 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4009 return 0;
4010
4011 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4012 && !optimize_function_for_size_p (cfun))
4013 return 0;
4014
4015 offsets = arm_get_frame_offsets ();
4016 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4017
4018 /* As do variadic functions. */
4019 if (crtl->args.pretend_args_size
4020 || cfun->machine->uses_anonymous_args
4021 /* Or if the function calls __builtin_eh_return () */
4022 || crtl->calls_eh_return
4023 /* Or if the function calls alloca */
4024 || cfun->calls_alloca
4025 /* Or if there is a stack adjustment. However, if the stack pointer
4026 is saved on the stack, we can use a pre-incrementing stack load. */
4027 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4028 && stack_adjust == 4))
4029 /* Or if the static chain register was saved above the frame, under the
4030 assumption that the stack pointer isn't saved on the stack. */
4031 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4032 && arm_compute_static_chain_stack_bytes() != 0))
4033 return 0;
4034
4035 saved_int_regs = offsets->saved_regs_mask;
4036
4037 /* Unfortunately, the insn
4038
4039 ldmib sp, {..., sp, ...}
4040
4041 triggers a bug on most SA-110 based devices, such that the stack
4042 pointer won't be correctly restored if the instruction takes a
4043 page fault. We work around this problem by popping r3 along with
4044 the other registers, since that is never slower than executing
4045 another instruction.
4046
4047 We test for !arm_arch5t here, because code for any architecture
4048 less than this could potentially be run on one of the buggy
4049 chips. */
4050 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4051 {
4052 /* Validate that r3 is a call-clobbered register (always true in
4053 the default abi) ... */
4054 if (!call_used_regs[3])
4055 return 0;
4056
4057 /* ... that it isn't being used for a return value ... */
4058 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4059 return 0;
4060
4061 /* ... or for a tail-call argument ... */
4062 if (sibling)
4063 {
4064 gcc_assert (CALL_P (sibling));
4065
4066 if (find_regno_fusage (sibling, USE, 3))
4067 return 0;
4068 }
4069
4070 /* ... and that there are no call-saved registers in r0-r2
4071 (always true in the default ABI). */
4072 if (saved_int_regs & 0x7)
4073 return 0;
4074 }
4075
4076 /* Can't be done if interworking with Thumb, and any registers have been
4077 stacked. */
4078 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4079 return 0;
4080
4081 /* On StrongARM, conditional returns are expensive if they aren't
4082 taken and multiple registers have been stacked. */
4083 if (iscond && arm_tune_strongarm)
4084 {
4085 /* Conditional return when just the LR is stored is a simple
4086 conditional-load instruction, that's not expensive. */
4087 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4088 return 0;
4089
4090 if (flag_pic
4091 && arm_pic_register != INVALID_REGNUM
4092 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4093 return 0;
4094 }
4095
4096 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4097 several instructions if anything needs to be popped. */
4098 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4099 return 0;
4100
4101 /* If there are saved registers but the LR isn't saved, then we need
4102 two instructions for the return. */
4103 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4104 return 0;
4105
4106 /* Can't be done if any of the VFP regs are pushed,
4107 since this also requires an insn. */
4108 if (TARGET_HARD_FLOAT)
4109 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4110 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4111 return 0;
4112
4113 if (TARGET_REALLY_IWMMXT)
4114 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4115 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4116 return 0;
4117
4118 return 1;
4119 }
4120
4121 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4122 shrink-wrapping if possible. This is the case if we need to emit a
4123 prologue, which we can test by looking at the offsets. */
4124 bool
4125 use_simple_return_p (void)
4126 {
4127 arm_stack_offsets *offsets;
4128
4129 /* Note this function can be called before or after reload. */
4130 if (!reload_completed)
4131 arm_compute_frame_layout ();
4132
4133 offsets = arm_get_frame_offsets ();
4134 return offsets->outgoing_args != 0;
4135 }
4136
4137 /* Return TRUE if int I is a valid immediate ARM constant. */
4138
4139 int
4140 const_ok_for_arm (HOST_WIDE_INT i)
4141 {
4142 int lowbit;
4143
4144 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4145 be all zero, or all one. */
4146 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4147 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4148 != ((~(unsigned HOST_WIDE_INT) 0)
4149 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4150 return FALSE;
4151
4152 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4153
4154 /* Fast return for 0 and small values. We must do this for zero, since
4155 the code below can't handle that one case. */
4156 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4157 return TRUE;
4158
4159 /* Get the number of trailing zeros. */
4160 lowbit = ffs((int) i) - 1;
4161
4162 /* Only even shifts are allowed in ARM mode so round down to the
4163 nearest even number. */
4164 if (TARGET_ARM)
4165 lowbit &= ~1;
4166
4167 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4168 return TRUE;
4169
4170 if (TARGET_ARM)
4171 {
4172 /* Allow rotated constants in ARM mode. */
4173 if (lowbit <= 4
4174 && ((i & ~0xc000003f) == 0
4175 || (i & ~0xf000000f) == 0
4176 || (i & ~0xfc000003) == 0))
4177 return TRUE;
4178 }
4179 else if (TARGET_THUMB2)
4180 {
4181 HOST_WIDE_INT v;
4182
4183 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4184 v = i & 0xff;
4185 v |= v << 16;
4186 if (i == v || i == (v | (v << 8)))
4187 return TRUE;
4188
4189 /* Allow repeated pattern 0xXY00XY00. */
4190 v = i & 0xff00;
4191 v |= v << 16;
4192 if (i == v)
4193 return TRUE;
4194 }
4195 else if (TARGET_HAVE_MOVT)
4196 {
4197 /* Thumb-1 Targets with MOVT. */
4198 if (i > 0xffff)
4199 return FALSE;
4200 else
4201 return TRUE;
4202 }
4203
4204 return FALSE;
4205 }
4206
4207 /* Return true if I is a valid constant for the operation CODE. */
4208 int
4209 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4210 {
4211 if (const_ok_for_arm (i))
4212 return 1;
4213
4214 switch (code)
4215 {
4216 case SET:
4217 /* See if we can use movw. */
4218 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4219 return 1;
4220 else
4221 /* Otherwise, try mvn. */
4222 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4223
4224 case PLUS:
4225 /* See if we can use addw or subw. */
4226 if (TARGET_THUMB2
4227 && ((i & 0xfffff000) == 0
4228 || ((-i) & 0xfffff000) == 0))
4229 return 1;
4230 /* Fall through. */
4231 case COMPARE:
4232 case EQ:
4233 case NE:
4234 case GT:
4235 case LE:
4236 case LT:
4237 case GE:
4238 case GEU:
4239 case LTU:
4240 case GTU:
4241 case LEU:
4242 case UNORDERED:
4243 case ORDERED:
4244 case UNEQ:
4245 case UNGE:
4246 case UNLT:
4247 case UNGT:
4248 case UNLE:
4249 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4250
4251 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4252 case XOR:
4253 return 0;
4254
4255 case IOR:
4256 if (TARGET_THUMB2)
4257 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4258 return 0;
4259
4260 case AND:
4261 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4262
4263 default:
4264 gcc_unreachable ();
4265 }
4266 }
4267
4268 /* Return true if I is a valid di mode constant for the operation CODE. */
4269 int
4270 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4271 {
4272 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4273 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4274 rtx hi = GEN_INT (hi_val);
4275 rtx lo = GEN_INT (lo_val);
4276
4277 if (TARGET_THUMB1)
4278 return 0;
4279
4280 switch (code)
4281 {
4282 case AND:
4283 case IOR:
4284 case XOR:
4285 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4286 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4287 case PLUS:
4288 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4289
4290 default:
4291 return 0;
4292 }
4293 }
4294
4295 /* Emit a sequence of insns to handle a large constant.
4296 CODE is the code of the operation required, it can be any of SET, PLUS,
4297 IOR, AND, XOR, MINUS;
4298 MODE is the mode in which the operation is being performed;
4299 VAL is the integer to operate on;
4300 SOURCE is the other operand (a register, or a null-pointer for SET);
4301 SUBTARGETS means it is safe to create scratch registers if that will
4302 either produce a simpler sequence, or we will want to cse the values.
4303 Return value is the number of insns emitted. */
4304
4305 /* ??? Tweak this for thumb2. */
4306 int
4307 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4308 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4309 {
4310 rtx cond;
4311
4312 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4313 cond = COND_EXEC_TEST (PATTERN (insn));
4314 else
4315 cond = NULL_RTX;
4316
4317 if (subtargets || code == SET
4318 || (REG_P (target) && REG_P (source)
4319 && REGNO (target) != REGNO (source)))
4320 {
4321 /* After arm_reorg has been called, we can't fix up expensive
4322 constants by pushing them into memory so we must synthesize
4323 them in-line, regardless of the cost. This is only likely to
4324 be more costly on chips that have load delay slots and we are
4325 compiling without running the scheduler (so no splitting
4326 occurred before the final instruction emission).
4327
4328 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4329 */
4330 if (!cfun->machine->after_arm_reorg
4331 && !cond
4332 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4333 1, 0)
4334 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4335 + (code != SET))))
4336 {
4337 if (code == SET)
4338 {
4339 /* Currently SET is the only monadic value for CODE, all
4340 the rest are diadic. */
4341 if (TARGET_USE_MOVT)
4342 arm_emit_movpair (target, GEN_INT (val));
4343 else
4344 emit_set_insn (target, GEN_INT (val));
4345
4346 return 1;
4347 }
4348 else
4349 {
4350 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4351
4352 if (TARGET_USE_MOVT)
4353 arm_emit_movpair (temp, GEN_INT (val));
4354 else
4355 emit_set_insn (temp, GEN_INT (val));
4356
4357 /* For MINUS, the value is subtracted from, since we never
4358 have subtraction of a constant. */
4359 if (code == MINUS)
4360 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4361 else
4362 emit_set_insn (target,
4363 gen_rtx_fmt_ee (code, mode, source, temp));
4364 return 2;
4365 }
4366 }
4367 }
4368
4369 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4370 1);
4371 }
4372
4373 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4374 ARM/THUMB2 immediates, and add up to VAL.
4375 Thr function return value gives the number of insns required. */
4376 static int
4377 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4378 struct four_ints *return_sequence)
4379 {
4380 int best_consecutive_zeros = 0;
4381 int i;
4382 int best_start = 0;
4383 int insns1, insns2;
4384 struct four_ints tmp_sequence;
4385
4386 /* If we aren't targeting ARM, the best place to start is always at
4387 the bottom, otherwise look more closely. */
4388 if (TARGET_ARM)
4389 {
4390 for (i = 0; i < 32; i += 2)
4391 {
4392 int consecutive_zeros = 0;
4393
4394 if (!(val & (3 << i)))
4395 {
4396 while ((i < 32) && !(val & (3 << i)))
4397 {
4398 consecutive_zeros += 2;
4399 i += 2;
4400 }
4401 if (consecutive_zeros > best_consecutive_zeros)
4402 {
4403 best_consecutive_zeros = consecutive_zeros;
4404 best_start = i - consecutive_zeros;
4405 }
4406 i -= 2;
4407 }
4408 }
4409 }
4410
4411 /* So long as it won't require any more insns to do so, it's
4412 desirable to emit a small constant (in bits 0...9) in the last
4413 insn. This way there is more chance that it can be combined with
4414 a later addressing insn to form a pre-indexed load or store
4415 operation. Consider:
4416
4417 *((volatile int *)0xe0000100) = 1;
4418 *((volatile int *)0xe0000110) = 2;
4419
4420 We want this to wind up as:
4421
4422 mov rA, #0xe0000000
4423 mov rB, #1
4424 str rB, [rA, #0x100]
4425 mov rB, #2
4426 str rB, [rA, #0x110]
4427
4428 rather than having to synthesize both large constants from scratch.
4429
4430 Therefore, we calculate how many insns would be required to emit
4431 the constant starting from `best_start', and also starting from
4432 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4433 yield a shorter sequence, we may as well use zero. */
4434 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4435 if (best_start != 0
4436 && ((HOST_WIDE_INT_1U << best_start) < val))
4437 {
4438 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4439 if (insns2 <= insns1)
4440 {
4441 *return_sequence = tmp_sequence;
4442 insns1 = insns2;
4443 }
4444 }
4445
4446 return insns1;
4447 }
4448
4449 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4450 static int
4451 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4452 struct four_ints *return_sequence, int i)
4453 {
4454 int remainder = val & 0xffffffff;
4455 int insns = 0;
4456
4457 /* Try and find a way of doing the job in either two or three
4458 instructions.
4459
4460 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4461 location. We start at position I. This may be the MSB, or
4462 optimial_immediate_sequence may have positioned it at the largest block
4463 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4464 wrapping around to the top of the word when we drop off the bottom.
4465 In the worst case this code should produce no more than four insns.
4466
4467 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4468 constants, shifted to any arbitrary location. We should always start
4469 at the MSB. */
4470 do
4471 {
4472 int end;
4473 unsigned int b1, b2, b3, b4;
4474 unsigned HOST_WIDE_INT result;
4475 int loc;
4476
4477 gcc_assert (insns < 4);
4478
4479 if (i <= 0)
4480 i += 32;
4481
4482 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4483 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4484 {
4485 loc = i;
4486 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4487 /* We can use addw/subw for the last 12 bits. */
4488 result = remainder;
4489 else
4490 {
4491 /* Use an 8-bit shifted/rotated immediate. */
4492 end = i - 8;
4493 if (end < 0)
4494 end += 32;
4495 result = remainder & ((0x0ff << end)
4496 | ((i < end) ? (0xff >> (32 - end))
4497 : 0));
4498 i -= 8;
4499 }
4500 }
4501 else
4502 {
4503 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4504 arbitrary shifts. */
4505 i -= TARGET_ARM ? 2 : 1;
4506 continue;
4507 }
4508
4509 /* Next, see if we can do a better job with a thumb2 replicated
4510 constant.
4511
4512 We do it this way around to catch the cases like 0x01F001E0 where
4513 two 8-bit immediates would work, but a replicated constant would
4514 make it worse.
4515
4516 TODO: 16-bit constants that don't clear all the bits, but still win.
4517 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4518 if (TARGET_THUMB2)
4519 {
4520 b1 = (remainder & 0xff000000) >> 24;
4521 b2 = (remainder & 0x00ff0000) >> 16;
4522 b3 = (remainder & 0x0000ff00) >> 8;
4523 b4 = remainder & 0xff;
4524
4525 if (loc > 24)
4526 {
4527 /* The 8-bit immediate already found clears b1 (and maybe b2),
4528 but must leave b3 and b4 alone. */
4529
4530 /* First try to find a 32-bit replicated constant that clears
4531 almost everything. We can assume that we can't do it in one,
4532 or else we wouldn't be here. */
4533 unsigned int tmp = b1 & b2 & b3 & b4;
4534 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4535 + (tmp << 24);
4536 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4537 + (tmp == b3) + (tmp == b4);
4538 if (tmp
4539 && (matching_bytes >= 3
4540 || (matching_bytes == 2
4541 && const_ok_for_op (remainder & ~tmp2, code))))
4542 {
4543 /* At least 3 of the bytes match, and the fourth has at
4544 least as many bits set, or two of the bytes match
4545 and it will only require one more insn to finish. */
4546 result = tmp2;
4547 i = tmp != b1 ? 32
4548 : tmp != b2 ? 24
4549 : tmp != b3 ? 16
4550 : 8;
4551 }
4552
4553 /* Second, try to find a 16-bit replicated constant that can
4554 leave three of the bytes clear. If b2 or b4 is already
4555 zero, then we can. If the 8-bit from above would not
4556 clear b2 anyway, then we still win. */
4557 else if (b1 == b3 && (!b2 || !b4
4558 || (remainder & 0x00ff0000 & ~result)))
4559 {
4560 result = remainder & 0xff00ff00;
4561 i = 24;
4562 }
4563 }
4564 else if (loc > 16)
4565 {
4566 /* The 8-bit immediate already found clears b2 (and maybe b3)
4567 and we don't get here unless b1 is alredy clear, but it will
4568 leave b4 unchanged. */
4569
4570 /* If we can clear b2 and b4 at once, then we win, since the
4571 8-bits couldn't possibly reach that far. */
4572 if (b2 == b4)
4573 {
4574 result = remainder & 0x00ff00ff;
4575 i = 16;
4576 }
4577 }
4578 }
4579
4580 return_sequence->i[insns++] = result;
4581 remainder &= ~result;
4582
4583 if (code == SET || code == MINUS)
4584 code = PLUS;
4585 }
4586 while (remainder);
4587
4588 return insns;
4589 }
4590
4591 /* Emit an instruction with the indicated PATTERN. If COND is
4592 non-NULL, conditionalize the execution of the instruction on COND
4593 being true. */
4594
4595 static void
4596 emit_constant_insn (rtx cond, rtx pattern)
4597 {
4598 if (cond)
4599 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4600 emit_insn (pattern);
4601 }
4602
4603 /* As above, but extra parameter GENERATE which, if clear, suppresses
4604 RTL generation. */
4605
4606 static int
4607 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4608 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4609 int subtargets, int generate)
4610 {
4611 int can_invert = 0;
4612 int can_negate = 0;
4613 int final_invert = 0;
4614 int i;
4615 int set_sign_bit_copies = 0;
4616 int clear_sign_bit_copies = 0;
4617 int clear_zero_bit_copies = 0;
4618 int set_zero_bit_copies = 0;
4619 int insns = 0, neg_insns, inv_insns;
4620 unsigned HOST_WIDE_INT temp1, temp2;
4621 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4622 struct four_ints *immediates;
4623 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4624
4625 /* Find out which operations are safe for a given CODE. Also do a quick
4626 check for degenerate cases; these can occur when DImode operations
4627 are split. */
4628 switch (code)
4629 {
4630 case SET:
4631 can_invert = 1;
4632 break;
4633
4634 case PLUS:
4635 can_negate = 1;
4636 break;
4637
4638 case IOR:
4639 if (remainder == 0xffffffff)
4640 {
4641 if (generate)
4642 emit_constant_insn (cond,
4643 gen_rtx_SET (target,
4644 GEN_INT (ARM_SIGN_EXTEND (val))));
4645 return 1;
4646 }
4647
4648 if (remainder == 0)
4649 {
4650 if (reload_completed && rtx_equal_p (target, source))
4651 return 0;
4652
4653 if (generate)
4654 emit_constant_insn (cond, gen_rtx_SET (target, source));
4655 return 1;
4656 }
4657 break;
4658
4659 case AND:
4660 if (remainder == 0)
4661 {
4662 if (generate)
4663 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4664 return 1;
4665 }
4666 if (remainder == 0xffffffff)
4667 {
4668 if (reload_completed && rtx_equal_p (target, source))
4669 return 0;
4670 if (generate)
4671 emit_constant_insn (cond, gen_rtx_SET (target, source));
4672 return 1;
4673 }
4674 can_invert = 1;
4675 break;
4676
4677 case XOR:
4678 if (remainder == 0)
4679 {
4680 if (reload_completed && rtx_equal_p (target, source))
4681 return 0;
4682 if (generate)
4683 emit_constant_insn (cond, gen_rtx_SET (target, source));
4684 return 1;
4685 }
4686
4687 if (remainder == 0xffffffff)
4688 {
4689 if (generate)
4690 emit_constant_insn (cond,
4691 gen_rtx_SET (target,
4692 gen_rtx_NOT (mode, source)));
4693 return 1;
4694 }
4695 final_invert = 1;
4696 break;
4697
4698 case MINUS:
4699 /* We treat MINUS as (val - source), since (source - val) is always
4700 passed as (source + (-val)). */
4701 if (remainder == 0)
4702 {
4703 if (generate)
4704 emit_constant_insn (cond,
4705 gen_rtx_SET (target,
4706 gen_rtx_NEG (mode, source)));
4707 return 1;
4708 }
4709 if (const_ok_for_arm (val))
4710 {
4711 if (generate)
4712 emit_constant_insn (cond,
4713 gen_rtx_SET (target,
4714 gen_rtx_MINUS (mode, GEN_INT (val),
4715 source)));
4716 return 1;
4717 }
4718
4719 break;
4720
4721 default:
4722 gcc_unreachable ();
4723 }
4724
4725 /* If we can do it in one insn get out quickly. */
4726 if (const_ok_for_op (val, code))
4727 {
4728 if (generate)
4729 emit_constant_insn (cond,
4730 gen_rtx_SET (target,
4731 (source
4732 ? gen_rtx_fmt_ee (code, mode, source,
4733 GEN_INT (val))
4734 : GEN_INT (val))));
4735 return 1;
4736 }
4737
4738 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4739 insn. */
4740 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4741 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4742 {
4743 if (generate)
4744 {
4745 if (mode == SImode && i == 16)
4746 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4747 smaller insn. */
4748 emit_constant_insn (cond,
4749 gen_zero_extendhisi2
4750 (target, gen_lowpart (HImode, source)));
4751 else
4752 /* Extz only supports SImode, but we can coerce the operands
4753 into that mode. */
4754 emit_constant_insn (cond,
4755 gen_extzv_t2 (gen_lowpart (SImode, target),
4756 gen_lowpart (SImode, source),
4757 GEN_INT (i), const0_rtx));
4758 }
4759
4760 return 1;
4761 }
4762
4763 /* Calculate a few attributes that may be useful for specific
4764 optimizations. */
4765 /* Count number of leading zeros. */
4766 for (i = 31; i >= 0; i--)
4767 {
4768 if ((remainder & (1 << i)) == 0)
4769 clear_sign_bit_copies++;
4770 else
4771 break;
4772 }
4773
4774 /* Count number of leading 1's. */
4775 for (i = 31; i >= 0; i--)
4776 {
4777 if ((remainder & (1 << i)) != 0)
4778 set_sign_bit_copies++;
4779 else
4780 break;
4781 }
4782
4783 /* Count number of trailing zero's. */
4784 for (i = 0; i <= 31; i++)
4785 {
4786 if ((remainder & (1 << i)) == 0)
4787 clear_zero_bit_copies++;
4788 else
4789 break;
4790 }
4791
4792 /* Count number of trailing 1's. */
4793 for (i = 0; i <= 31; i++)
4794 {
4795 if ((remainder & (1 << i)) != 0)
4796 set_zero_bit_copies++;
4797 else
4798 break;
4799 }
4800
4801 switch (code)
4802 {
4803 case SET:
4804 /* See if we can do this by sign_extending a constant that is known
4805 to be negative. This is a good, way of doing it, since the shift
4806 may well merge into a subsequent insn. */
4807 if (set_sign_bit_copies > 1)
4808 {
4809 if (const_ok_for_arm
4810 (temp1 = ARM_SIGN_EXTEND (remainder
4811 << (set_sign_bit_copies - 1))))
4812 {
4813 if (generate)
4814 {
4815 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4816 emit_constant_insn (cond,
4817 gen_rtx_SET (new_src, GEN_INT (temp1)));
4818 emit_constant_insn (cond,
4819 gen_ashrsi3 (target, new_src,
4820 GEN_INT (set_sign_bit_copies - 1)));
4821 }
4822 return 2;
4823 }
4824 /* For an inverted constant, we will need to set the low bits,
4825 these will be shifted out of harm's way. */
4826 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4827 if (const_ok_for_arm (~temp1))
4828 {
4829 if (generate)
4830 {
4831 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4832 emit_constant_insn (cond,
4833 gen_rtx_SET (new_src, GEN_INT (temp1)));
4834 emit_constant_insn (cond,
4835 gen_ashrsi3 (target, new_src,
4836 GEN_INT (set_sign_bit_copies - 1)));
4837 }
4838 return 2;
4839 }
4840 }
4841
4842 /* See if we can calculate the value as the difference between two
4843 valid immediates. */
4844 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4845 {
4846 int topshift = clear_sign_bit_copies & ~1;
4847
4848 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4849 & (0xff000000 >> topshift));
4850
4851 /* If temp1 is zero, then that means the 9 most significant
4852 bits of remainder were 1 and we've caused it to overflow.
4853 When topshift is 0 we don't need to do anything since we
4854 can borrow from 'bit 32'. */
4855 if (temp1 == 0 && topshift != 0)
4856 temp1 = 0x80000000 >> (topshift - 1);
4857
4858 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4859
4860 if (const_ok_for_arm (temp2))
4861 {
4862 if (generate)
4863 {
4864 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4865 emit_constant_insn (cond,
4866 gen_rtx_SET (new_src, GEN_INT (temp1)));
4867 emit_constant_insn (cond,
4868 gen_addsi3 (target, new_src,
4869 GEN_INT (-temp2)));
4870 }
4871
4872 return 2;
4873 }
4874 }
4875
4876 /* See if we can generate this by setting the bottom (or the top)
4877 16 bits, and then shifting these into the other half of the
4878 word. We only look for the simplest cases, to do more would cost
4879 too much. Be careful, however, not to generate this when the
4880 alternative would take fewer insns. */
4881 if (val & 0xffff0000)
4882 {
4883 temp1 = remainder & 0xffff0000;
4884 temp2 = remainder & 0x0000ffff;
4885
4886 /* Overlaps outside this range are best done using other methods. */
4887 for (i = 9; i < 24; i++)
4888 {
4889 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4890 && !const_ok_for_arm (temp2))
4891 {
4892 rtx new_src = (subtargets
4893 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4894 : target);
4895 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4896 source, subtargets, generate);
4897 source = new_src;
4898 if (generate)
4899 emit_constant_insn
4900 (cond,
4901 gen_rtx_SET
4902 (target,
4903 gen_rtx_IOR (mode,
4904 gen_rtx_ASHIFT (mode, source,
4905 GEN_INT (i)),
4906 source)));
4907 return insns + 1;
4908 }
4909 }
4910
4911 /* Don't duplicate cases already considered. */
4912 for (i = 17; i < 24; i++)
4913 {
4914 if (((temp1 | (temp1 >> i)) == remainder)
4915 && !const_ok_for_arm (temp1))
4916 {
4917 rtx new_src = (subtargets
4918 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4919 : target);
4920 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4921 source, subtargets, generate);
4922 source = new_src;
4923 if (generate)
4924 emit_constant_insn
4925 (cond,
4926 gen_rtx_SET (target,
4927 gen_rtx_IOR
4928 (mode,
4929 gen_rtx_LSHIFTRT (mode, source,
4930 GEN_INT (i)),
4931 source)));
4932 return insns + 1;
4933 }
4934 }
4935 }
4936 break;
4937
4938 case IOR:
4939 case XOR:
4940 /* If we have IOR or XOR, and the constant can be loaded in a
4941 single instruction, and we can find a temporary to put it in,
4942 then this can be done in two instructions instead of 3-4. */
4943 if (subtargets
4944 /* TARGET can't be NULL if SUBTARGETS is 0 */
4945 || (reload_completed && !reg_mentioned_p (target, source)))
4946 {
4947 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4948 {
4949 if (generate)
4950 {
4951 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4952
4953 emit_constant_insn (cond,
4954 gen_rtx_SET (sub, GEN_INT (val)));
4955 emit_constant_insn (cond,
4956 gen_rtx_SET (target,
4957 gen_rtx_fmt_ee (code, mode,
4958 source, sub)));
4959 }
4960 return 2;
4961 }
4962 }
4963
4964 if (code == XOR)
4965 break;
4966
4967 /* Convert.
4968 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4969 and the remainder 0s for e.g. 0xfff00000)
4970 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4971
4972 This can be done in 2 instructions by using shifts with mov or mvn.
4973 e.g. for
4974 x = x | 0xfff00000;
4975 we generate.
4976 mvn r0, r0, asl #12
4977 mvn r0, r0, lsr #12 */
4978 if (set_sign_bit_copies > 8
4979 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4980 {
4981 if (generate)
4982 {
4983 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4984 rtx shift = GEN_INT (set_sign_bit_copies);
4985
4986 emit_constant_insn
4987 (cond,
4988 gen_rtx_SET (sub,
4989 gen_rtx_NOT (mode,
4990 gen_rtx_ASHIFT (mode,
4991 source,
4992 shift))));
4993 emit_constant_insn
4994 (cond,
4995 gen_rtx_SET (target,
4996 gen_rtx_NOT (mode,
4997 gen_rtx_LSHIFTRT (mode, sub,
4998 shift))));
4999 }
5000 return 2;
5001 }
5002
5003 /* Convert
5004 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5005 to
5006 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5007
5008 For eg. r0 = r0 | 0xfff
5009 mvn r0, r0, lsr #12
5010 mvn r0, r0, asl #12
5011
5012 */
5013 if (set_zero_bit_copies > 8
5014 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5015 {
5016 if (generate)
5017 {
5018 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5019 rtx shift = GEN_INT (set_zero_bit_copies);
5020
5021 emit_constant_insn
5022 (cond,
5023 gen_rtx_SET (sub,
5024 gen_rtx_NOT (mode,
5025 gen_rtx_LSHIFTRT (mode,
5026 source,
5027 shift))));
5028 emit_constant_insn
5029 (cond,
5030 gen_rtx_SET (target,
5031 gen_rtx_NOT (mode,
5032 gen_rtx_ASHIFT (mode, sub,
5033 shift))));
5034 }
5035 return 2;
5036 }
5037
5038 /* This will never be reached for Thumb2 because orn is a valid
5039 instruction. This is for Thumb1 and the ARM 32 bit cases.
5040
5041 x = y | constant (such that ~constant is a valid constant)
5042 Transform this to
5043 x = ~(~y & ~constant).
5044 */
5045 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5046 {
5047 if (generate)
5048 {
5049 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5050 emit_constant_insn (cond,
5051 gen_rtx_SET (sub,
5052 gen_rtx_NOT (mode, source)));
5053 source = sub;
5054 if (subtargets)
5055 sub = gen_reg_rtx (mode);
5056 emit_constant_insn (cond,
5057 gen_rtx_SET (sub,
5058 gen_rtx_AND (mode, source,
5059 GEN_INT (temp1))));
5060 emit_constant_insn (cond,
5061 gen_rtx_SET (target,
5062 gen_rtx_NOT (mode, sub)));
5063 }
5064 return 3;
5065 }
5066 break;
5067
5068 case AND:
5069 /* See if two shifts will do 2 or more insn's worth of work. */
5070 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5071 {
5072 HOST_WIDE_INT shift_mask = ((0xffffffff
5073 << (32 - clear_sign_bit_copies))
5074 & 0xffffffff);
5075
5076 if ((remainder | shift_mask) != 0xffffffff)
5077 {
5078 HOST_WIDE_INT new_val
5079 = ARM_SIGN_EXTEND (remainder | shift_mask);
5080
5081 if (generate)
5082 {
5083 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5084 insns = arm_gen_constant (AND, SImode, cond, new_val,
5085 new_src, source, subtargets, 1);
5086 source = new_src;
5087 }
5088 else
5089 {
5090 rtx targ = subtargets ? NULL_RTX : target;
5091 insns = arm_gen_constant (AND, mode, cond, new_val,
5092 targ, source, subtargets, 0);
5093 }
5094 }
5095
5096 if (generate)
5097 {
5098 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5099 rtx shift = GEN_INT (clear_sign_bit_copies);
5100
5101 emit_insn (gen_ashlsi3 (new_src, source, shift));
5102 emit_insn (gen_lshrsi3 (target, new_src, shift));
5103 }
5104
5105 return insns + 2;
5106 }
5107
5108 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5109 {
5110 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5111
5112 if ((remainder | shift_mask) != 0xffffffff)
5113 {
5114 HOST_WIDE_INT new_val
5115 = ARM_SIGN_EXTEND (remainder | shift_mask);
5116 if (generate)
5117 {
5118 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5119
5120 insns = arm_gen_constant (AND, mode, cond, new_val,
5121 new_src, source, subtargets, 1);
5122 source = new_src;
5123 }
5124 else
5125 {
5126 rtx targ = subtargets ? NULL_RTX : target;
5127
5128 insns = arm_gen_constant (AND, mode, cond, new_val,
5129 targ, source, subtargets, 0);
5130 }
5131 }
5132
5133 if (generate)
5134 {
5135 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5136 rtx shift = GEN_INT (clear_zero_bit_copies);
5137
5138 emit_insn (gen_lshrsi3 (new_src, source, shift));
5139 emit_insn (gen_ashlsi3 (target, new_src, shift));
5140 }
5141
5142 return insns + 2;
5143 }
5144
5145 break;
5146
5147 default:
5148 break;
5149 }
5150
5151 /* Calculate what the instruction sequences would be if we generated it
5152 normally, negated, or inverted. */
5153 if (code == AND)
5154 /* AND cannot be split into multiple insns, so invert and use BIC. */
5155 insns = 99;
5156 else
5157 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5158
5159 if (can_negate)
5160 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5161 &neg_immediates);
5162 else
5163 neg_insns = 99;
5164
5165 if (can_invert || final_invert)
5166 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5167 &inv_immediates);
5168 else
5169 inv_insns = 99;
5170
5171 immediates = &pos_immediates;
5172
5173 /* Is the negated immediate sequence more efficient? */
5174 if (neg_insns < insns && neg_insns <= inv_insns)
5175 {
5176 insns = neg_insns;
5177 immediates = &neg_immediates;
5178 }
5179 else
5180 can_negate = 0;
5181
5182 /* Is the inverted immediate sequence more efficient?
5183 We must allow for an extra NOT instruction for XOR operations, although
5184 there is some chance that the final 'mvn' will get optimized later. */
5185 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5186 {
5187 insns = inv_insns;
5188 immediates = &inv_immediates;
5189 }
5190 else
5191 {
5192 can_invert = 0;
5193 final_invert = 0;
5194 }
5195
5196 /* Now output the chosen sequence as instructions. */
5197 if (generate)
5198 {
5199 for (i = 0; i < insns; i++)
5200 {
5201 rtx new_src, temp1_rtx;
5202
5203 temp1 = immediates->i[i];
5204
5205 if (code == SET || code == MINUS)
5206 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5207 else if ((final_invert || i < (insns - 1)) && subtargets)
5208 new_src = gen_reg_rtx (mode);
5209 else
5210 new_src = target;
5211
5212 if (can_invert)
5213 temp1 = ~temp1;
5214 else if (can_negate)
5215 temp1 = -temp1;
5216
5217 temp1 = trunc_int_for_mode (temp1, mode);
5218 temp1_rtx = GEN_INT (temp1);
5219
5220 if (code == SET)
5221 ;
5222 else if (code == MINUS)
5223 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5224 else
5225 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5226
5227 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5228 source = new_src;
5229
5230 if (code == SET)
5231 {
5232 can_negate = can_invert;
5233 can_invert = 0;
5234 code = PLUS;
5235 }
5236 else if (code == MINUS)
5237 code = PLUS;
5238 }
5239 }
5240
5241 if (final_invert)
5242 {
5243 if (generate)
5244 emit_constant_insn (cond, gen_rtx_SET (target,
5245 gen_rtx_NOT (mode, source)));
5246 insns++;
5247 }
5248
5249 return insns;
5250 }
5251
5252 /* Canonicalize a comparison so that we are more likely to recognize it.
5253 This can be done for a few constant compares, where we can make the
5254 immediate value easier to load. */
5255
5256 static void
5257 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5258 bool op0_preserve_value)
5259 {
5260 machine_mode mode;
5261 unsigned HOST_WIDE_INT i, maxval;
5262
5263 mode = GET_MODE (*op0);
5264 if (mode == VOIDmode)
5265 mode = GET_MODE (*op1);
5266
5267 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5268
5269 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5270 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5271 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5272 for GTU/LEU in Thumb mode. */
5273 if (mode == DImode)
5274 {
5275
5276 if (*code == GT || *code == LE
5277 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5278 {
5279 /* Missing comparison. First try to use an available
5280 comparison. */
5281 if (CONST_INT_P (*op1))
5282 {
5283 i = INTVAL (*op1);
5284 switch (*code)
5285 {
5286 case GT:
5287 case LE:
5288 if (i != maxval
5289 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5290 {
5291 *op1 = GEN_INT (i + 1);
5292 *code = *code == GT ? GE : LT;
5293 return;
5294 }
5295 break;
5296 case GTU:
5297 case LEU:
5298 if (i != ~((unsigned HOST_WIDE_INT) 0)
5299 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5300 {
5301 *op1 = GEN_INT (i + 1);
5302 *code = *code == GTU ? GEU : LTU;
5303 return;
5304 }
5305 break;
5306 default:
5307 gcc_unreachable ();
5308 }
5309 }
5310
5311 /* If that did not work, reverse the condition. */
5312 if (!op0_preserve_value)
5313 {
5314 std::swap (*op0, *op1);
5315 *code = (int)swap_condition ((enum rtx_code)*code);
5316 }
5317 }
5318 return;
5319 }
5320
5321 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5322 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5323 to facilitate possible combining with a cmp into 'ands'. */
5324 if (mode == SImode
5325 && GET_CODE (*op0) == ZERO_EXTEND
5326 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5327 && GET_MODE (XEXP (*op0, 0)) == QImode
5328 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5329 && subreg_lowpart_p (XEXP (*op0, 0))
5330 && *op1 == const0_rtx)
5331 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5332 GEN_INT (255));
5333
5334 /* Comparisons smaller than DImode. Only adjust comparisons against
5335 an out-of-range constant. */
5336 if (!CONST_INT_P (*op1)
5337 || const_ok_for_arm (INTVAL (*op1))
5338 || const_ok_for_arm (- INTVAL (*op1)))
5339 return;
5340
5341 i = INTVAL (*op1);
5342
5343 switch (*code)
5344 {
5345 case EQ:
5346 case NE:
5347 return;
5348
5349 case GT:
5350 case LE:
5351 if (i != maxval
5352 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5353 {
5354 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5355 *code = *code == GT ? GE : LT;
5356 return;
5357 }
5358 break;
5359
5360 case GE:
5361 case LT:
5362 if (i != ~maxval
5363 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5364 {
5365 *op1 = GEN_INT (i - 1);
5366 *code = *code == GE ? GT : LE;
5367 return;
5368 }
5369 break;
5370
5371 case GTU:
5372 case LEU:
5373 if (i != ~((unsigned HOST_WIDE_INT) 0)
5374 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5375 {
5376 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5377 *code = *code == GTU ? GEU : LTU;
5378 return;
5379 }
5380 break;
5381
5382 case GEU:
5383 case LTU:
5384 if (i != 0
5385 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5386 {
5387 *op1 = GEN_INT (i - 1);
5388 *code = *code == GEU ? GTU : LEU;
5389 return;
5390 }
5391 break;
5392
5393 default:
5394 gcc_unreachable ();
5395 }
5396 }
5397
5398
5399 /* Define how to find the value returned by a function. */
5400
5401 static rtx
5402 arm_function_value(const_tree type, const_tree func,
5403 bool outgoing ATTRIBUTE_UNUSED)
5404 {
5405 machine_mode mode;
5406 int unsignedp ATTRIBUTE_UNUSED;
5407 rtx r ATTRIBUTE_UNUSED;
5408
5409 mode = TYPE_MODE (type);
5410
5411 if (TARGET_AAPCS_BASED)
5412 return aapcs_allocate_return_reg (mode, type, func);
5413
5414 /* Promote integer types. */
5415 if (INTEGRAL_TYPE_P (type))
5416 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5417
5418 /* Promotes small structs returned in a register to full-word size
5419 for big-endian AAPCS. */
5420 if (arm_return_in_msb (type))
5421 {
5422 HOST_WIDE_INT size = int_size_in_bytes (type);
5423 if (size % UNITS_PER_WORD != 0)
5424 {
5425 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5426 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5427 }
5428 }
5429
5430 return arm_libcall_value_1 (mode);
5431 }
5432
5433 /* libcall hashtable helpers. */
5434
5435 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5436 {
5437 static inline hashval_t hash (const rtx_def *);
5438 static inline bool equal (const rtx_def *, const rtx_def *);
5439 static inline void remove (rtx_def *);
5440 };
5441
5442 inline bool
5443 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5444 {
5445 return rtx_equal_p (p1, p2);
5446 }
5447
5448 inline hashval_t
5449 libcall_hasher::hash (const rtx_def *p1)
5450 {
5451 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5452 }
5453
5454 typedef hash_table<libcall_hasher> libcall_table_type;
5455
5456 static void
5457 add_libcall (libcall_table_type *htab, rtx libcall)
5458 {
5459 *htab->find_slot (libcall, INSERT) = libcall;
5460 }
5461
5462 static bool
5463 arm_libcall_uses_aapcs_base (const_rtx libcall)
5464 {
5465 static bool init_done = false;
5466 static libcall_table_type *libcall_htab = NULL;
5467
5468 if (!init_done)
5469 {
5470 init_done = true;
5471
5472 libcall_htab = new libcall_table_type (31);
5473 add_libcall (libcall_htab,
5474 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5475 add_libcall (libcall_htab,
5476 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5477 add_libcall (libcall_htab,
5478 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5479 add_libcall (libcall_htab,
5480 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5481
5482 add_libcall (libcall_htab,
5483 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5484 add_libcall (libcall_htab,
5485 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5486 add_libcall (libcall_htab,
5487 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5488 add_libcall (libcall_htab,
5489 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5490
5491 add_libcall (libcall_htab,
5492 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5493 add_libcall (libcall_htab,
5494 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5495 add_libcall (libcall_htab,
5496 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5497 add_libcall (libcall_htab,
5498 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5499 add_libcall (libcall_htab,
5500 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5501 add_libcall (libcall_htab,
5502 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5503 add_libcall (libcall_htab,
5504 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5505 add_libcall (libcall_htab,
5506 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5507
5508 /* Values from double-precision helper functions are returned in core
5509 registers if the selected core only supports single-precision
5510 arithmetic, even if we are using the hard-float ABI. The same is
5511 true for single-precision helpers, but we will never be using the
5512 hard-float ABI on a CPU which doesn't support single-precision
5513 operations in hardware. */
5514 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5515 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5516 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5517 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5518 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5519 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5520 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5521 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5522 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5523 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5524 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5525 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5526 SFmode));
5527 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5528 DFmode));
5529 add_libcall (libcall_htab,
5530 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5531 }
5532
5533 return libcall && libcall_htab->find (libcall) != NULL;
5534 }
5535
5536 static rtx
5537 arm_libcall_value_1 (machine_mode mode)
5538 {
5539 if (TARGET_AAPCS_BASED)
5540 return aapcs_libcall_value (mode);
5541 else if (TARGET_IWMMXT_ABI
5542 && arm_vector_mode_supported_p (mode))
5543 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5544 else
5545 return gen_rtx_REG (mode, ARG_REGISTER (1));
5546 }
5547
5548 /* Define how to find the value returned by a library function
5549 assuming the value has mode MODE. */
5550
5551 static rtx
5552 arm_libcall_value (machine_mode mode, const_rtx libcall)
5553 {
5554 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5555 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5556 {
5557 /* The following libcalls return their result in integer registers,
5558 even though they return a floating point value. */
5559 if (arm_libcall_uses_aapcs_base (libcall))
5560 return gen_rtx_REG (mode, ARG_REGISTER(1));
5561
5562 }
5563
5564 return arm_libcall_value_1 (mode);
5565 }
5566
5567 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5568
5569 static bool
5570 arm_function_value_regno_p (const unsigned int regno)
5571 {
5572 if (regno == ARG_REGISTER (1)
5573 || (TARGET_32BIT
5574 && TARGET_AAPCS_BASED
5575 && TARGET_HARD_FLOAT
5576 && regno == FIRST_VFP_REGNUM)
5577 || (TARGET_IWMMXT_ABI
5578 && regno == FIRST_IWMMXT_REGNUM))
5579 return true;
5580
5581 return false;
5582 }
5583
5584 /* Determine the amount of memory needed to store the possible return
5585 registers of an untyped call. */
5586 int
5587 arm_apply_result_size (void)
5588 {
5589 int size = 16;
5590
5591 if (TARGET_32BIT)
5592 {
5593 if (TARGET_HARD_FLOAT_ABI)
5594 size += 32;
5595 if (TARGET_IWMMXT_ABI)
5596 size += 8;
5597 }
5598
5599 return size;
5600 }
5601
5602 /* Decide whether TYPE should be returned in memory (true)
5603 or in a register (false). FNTYPE is the type of the function making
5604 the call. */
5605 static bool
5606 arm_return_in_memory (const_tree type, const_tree fntype)
5607 {
5608 HOST_WIDE_INT size;
5609
5610 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5611
5612 if (TARGET_AAPCS_BASED)
5613 {
5614 /* Simple, non-aggregate types (ie not including vectors and
5615 complex) are always returned in a register (or registers).
5616 We don't care about which register here, so we can short-cut
5617 some of the detail. */
5618 if (!AGGREGATE_TYPE_P (type)
5619 && TREE_CODE (type) != VECTOR_TYPE
5620 && TREE_CODE (type) != COMPLEX_TYPE)
5621 return false;
5622
5623 /* Any return value that is no larger than one word can be
5624 returned in r0. */
5625 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5626 return false;
5627
5628 /* Check any available co-processors to see if they accept the
5629 type as a register candidate (VFP, for example, can return
5630 some aggregates in consecutive registers). These aren't
5631 available if the call is variadic. */
5632 if (aapcs_select_return_coproc (type, fntype) >= 0)
5633 return false;
5634
5635 /* Vector values should be returned using ARM registers, not
5636 memory (unless they're over 16 bytes, which will break since
5637 we only have four call-clobbered registers to play with). */
5638 if (TREE_CODE (type) == VECTOR_TYPE)
5639 return (size < 0 || size > (4 * UNITS_PER_WORD));
5640
5641 /* The rest go in memory. */
5642 return true;
5643 }
5644
5645 if (TREE_CODE (type) == VECTOR_TYPE)
5646 return (size < 0 || size > (4 * UNITS_PER_WORD));
5647
5648 if (!AGGREGATE_TYPE_P (type) &&
5649 (TREE_CODE (type) != VECTOR_TYPE))
5650 /* All simple types are returned in registers. */
5651 return false;
5652
5653 if (arm_abi != ARM_ABI_APCS)
5654 {
5655 /* ATPCS and later return aggregate types in memory only if they are
5656 larger than a word (or are variable size). */
5657 return (size < 0 || size > UNITS_PER_WORD);
5658 }
5659
5660 /* For the arm-wince targets we choose to be compatible with Microsoft's
5661 ARM and Thumb compilers, which always return aggregates in memory. */
5662 #ifndef ARM_WINCE
5663 /* All structures/unions bigger than one word are returned in memory.
5664 Also catch the case where int_size_in_bytes returns -1. In this case
5665 the aggregate is either huge or of variable size, and in either case
5666 we will want to return it via memory and not in a register. */
5667 if (size < 0 || size > UNITS_PER_WORD)
5668 return true;
5669
5670 if (TREE_CODE (type) == RECORD_TYPE)
5671 {
5672 tree field;
5673
5674 /* For a struct the APCS says that we only return in a register
5675 if the type is 'integer like' and every addressable element
5676 has an offset of zero. For practical purposes this means
5677 that the structure can have at most one non bit-field element
5678 and that this element must be the first one in the structure. */
5679
5680 /* Find the first field, ignoring non FIELD_DECL things which will
5681 have been created by C++. */
5682 for (field = TYPE_FIELDS (type);
5683 field && TREE_CODE (field) != FIELD_DECL;
5684 field = DECL_CHAIN (field))
5685 continue;
5686
5687 if (field == NULL)
5688 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5689
5690 /* Check that the first field is valid for returning in a register. */
5691
5692 /* ... Floats are not allowed */
5693 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5694 return true;
5695
5696 /* ... Aggregates that are not themselves valid for returning in
5697 a register are not allowed. */
5698 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5699 return true;
5700
5701 /* Now check the remaining fields, if any. Only bitfields are allowed,
5702 since they are not addressable. */
5703 for (field = DECL_CHAIN (field);
5704 field;
5705 field = DECL_CHAIN (field))
5706 {
5707 if (TREE_CODE (field) != FIELD_DECL)
5708 continue;
5709
5710 if (!DECL_BIT_FIELD_TYPE (field))
5711 return true;
5712 }
5713
5714 return false;
5715 }
5716
5717 if (TREE_CODE (type) == UNION_TYPE)
5718 {
5719 tree field;
5720
5721 /* Unions can be returned in registers if every element is
5722 integral, or can be returned in an integer register. */
5723 for (field = TYPE_FIELDS (type);
5724 field;
5725 field = DECL_CHAIN (field))
5726 {
5727 if (TREE_CODE (field) != FIELD_DECL)
5728 continue;
5729
5730 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5731 return true;
5732
5733 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5734 return true;
5735 }
5736
5737 return false;
5738 }
5739 #endif /* not ARM_WINCE */
5740
5741 /* Return all other types in memory. */
5742 return true;
5743 }
5744
5745 const struct pcs_attribute_arg
5746 {
5747 const char *arg;
5748 enum arm_pcs value;
5749 } pcs_attribute_args[] =
5750 {
5751 {"aapcs", ARM_PCS_AAPCS},
5752 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5753 #if 0
5754 /* We could recognize these, but changes would be needed elsewhere
5755 * to implement them. */
5756 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5757 {"atpcs", ARM_PCS_ATPCS},
5758 {"apcs", ARM_PCS_APCS},
5759 #endif
5760 {NULL, ARM_PCS_UNKNOWN}
5761 };
5762
5763 static enum arm_pcs
5764 arm_pcs_from_attribute (tree attr)
5765 {
5766 const struct pcs_attribute_arg *ptr;
5767 const char *arg;
5768
5769 /* Get the value of the argument. */
5770 if (TREE_VALUE (attr) == NULL_TREE
5771 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5772 return ARM_PCS_UNKNOWN;
5773
5774 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5775
5776 /* Check it against the list of known arguments. */
5777 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5778 if (streq (arg, ptr->arg))
5779 return ptr->value;
5780
5781 /* An unrecognized interrupt type. */
5782 return ARM_PCS_UNKNOWN;
5783 }
5784
5785 /* Get the PCS variant to use for this call. TYPE is the function's type
5786 specification, DECL is the specific declartion. DECL may be null if
5787 the call could be indirect or if this is a library call. */
5788 static enum arm_pcs
5789 arm_get_pcs_model (const_tree type, const_tree decl)
5790 {
5791 bool user_convention = false;
5792 enum arm_pcs user_pcs = arm_pcs_default;
5793 tree attr;
5794
5795 gcc_assert (type);
5796
5797 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5798 if (attr)
5799 {
5800 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5801 user_convention = true;
5802 }
5803
5804 if (TARGET_AAPCS_BASED)
5805 {
5806 /* Detect varargs functions. These always use the base rules
5807 (no argument is ever a candidate for a co-processor
5808 register). */
5809 bool base_rules = stdarg_p (type);
5810
5811 if (user_convention)
5812 {
5813 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5814 sorry ("non-AAPCS derived PCS variant");
5815 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5816 error ("variadic functions must use the base AAPCS variant");
5817 }
5818
5819 if (base_rules)
5820 return ARM_PCS_AAPCS;
5821 else if (user_convention)
5822 return user_pcs;
5823 else if (decl && flag_unit_at_a_time)
5824 {
5825 /* Local functions never leak outside this compilation unit,
5826 so we are free to use whatever conventions are
5827 appropriate. */
5828 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5829 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5830 if (i && i->local)
5831 return ARM_PCS_AAPCS_LOCAL;
5832 }
5833 }
5834 else if (user_convention && user_pcs != arm_pcs_default)
5835 sorry ("PCS variant");
5836
5837 /* For everything else we use the target's default. */
5838 return arm_pcs_default;
5839 }
5840
5841
5842 static void
5843 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5844 const_tree fntype ATTRIBUTE_UNUSED,
5845 rtx libcall ATTRIBUTE_UNUSED,
5846 const_tree fndecl ATTRIBUTE_UNUSED)
5847 {
5848 /* Record the unallocated VFP registers. */
5849 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5850 pcum->aapcs_vfp_reg_alloc = 0;
5851 }
5852
5853 /* Walk down the type tree of TYPE counting consecutive base elements.
5854 If *MODEP is VOIDmode, then set it to the first valid floating point
5855 type. If a non-floating point type is found, or if a floating point
5856 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5857 otherwise return the count in the sub-tree. */
5858 static int
5859 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5860 {
5861 machine_mode mode;
5862 HOST_WIDE_INT size;
5863
5864 switch (TREE_CODE (type))
5865 {
5866 case REAL_TYPE:
5867 mode = TYPE_MODE (type);
5868 if (mode != DFmode && mode != SFmode && mode != HFmode)
5869 return -1;
5870
5871 if (*modep == VOIDmode)
5872 *modep = mode;
5873
5874 if (*modep == mode)
5875 return 1;
5876
5877 break;
5878
5879 case COMPLEX_TYPE:
5880 mode = TYPE_MODE (TREE_TYPE (type));
5881 if (mode != DFmode && mode != SFmode)
5882 return -1;
5883
5884 if (*modep == VOIDmode)
5885 *modep = mode;
5886
5887 if (*modep == mode)
5888 return 2;
5889
5890 break;
5891
5892 case VECTOR_TYPE:
5893 /* Use V2SImode and V4SImode as representatives of all 64-bit
5894 and 128-bit vector types, whether or not those modes are
5895 supported with the present options. */
5896 size = int_size_in_bytes (type);
5897 switch (size)
5898 {
5899 case 8:
5900 mode = V2SImode;
5901 break;
5902 case 16:
5903 mode = V4SImode;
5904 break;
5905 default:
5906 return -1;
5907 }
5908
5909 if (*modep == VOIDmode)
5910 *modep = mode;
5911
5912 /* Vector modes are considered to be opaque: two vectors are
5913 equivalent for the purposes of being homogeneous aggregates
5914 if they are the same size. */
5915 if (*modep == mode)
5916 return 1;
5917
5918 break;
5919
5920 case ARRAY_TYPE:
5921 {
5922 int count;
5923 tree index = TYPE_DOMAIN (type);
5924
5925 /* Can't handle incomplete types nor sizes that are not
5926 fixed. */
5927 if (!COMPLETE_TYPE_P (type)
5928 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5929 return -1;
5930
5931 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5932 if (count == -1
5933 || !index
5934 || !TYPE_MAX_VALUE (index)
5935 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5936 || !TYPE_MIN_VALUE (index)
5937 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5938 || count < 0)
5939 return -1;
5940
5941 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5942 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5943
5944 /* There must be no padding. */
5945 if (wi::to_wide (TYPE_SIZE (type))
5946 != count * GET_MODE_BITSIZE (*modep))
5947 return -1;
5948
5949 return count;
5950 }
5951
5952 case RECORD_TYPE:
5953 {
5954 int count = 0;
5955 int sub_count;
5956 tree field;
5957
5958 /* Can't handle incomplete types nor sizes that are not
5959 fixed. */
5960 if (!COMPLETE_TYPE_P (type)
5961 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5962 return -1;
5963
5964 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5965 {
5966 if (TREE_CODE (field) != FIELD_DECL)
5967 continue;
5968
5969 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5970 if (sub_count < 0)
5971 return -1;
5972 count += sub_count;
5973 }
5974
5975 /* There must be no padding. */
5976 if (wi::to_wide (TYPE_SIZE (type))
5977 != count * GET_MODE_BITSIZE (*modep))
5978 return -1;
5979
5980 return count;
5981 }
5982
5983 case UNION_TYPE:
5984 case QUAL_UNION_TYPE:
5985 {
5986 /* These aren't very interesting except in a degenerate case. */
5987 int count = 0;
5988 int sub_count;
5989 tree field;
5990
5991 /* Can't handle incomplete types nor sizes that are not
5992 fixed. */
5993 if (!COMPLETE_TYPE_P (type)
5994 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5995 return -1;
5996
5997 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5998 {
5999 if (TREE_CODE (field) != FIELD_DECL)
6000 continue;
6001
6002 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6003 if (sub_count < 0)
6004 return -1;
6005 count = count > sub_count ? count : sub_count;
6006 }
6007
6008 /* There must be no padding. */
6009 if (wi::to_wide (TYPE_SIZE (type))
6010 != count * GET_MODE_BITSIZE (*modep))
6011 return -1;
6012
6013 return count;
6014 }
6015
6016 default:
6017 break;
6018 }
6019
6020 return -1;
6021 }
6022
6023 /* Return true if PCS_VARIANT should use VFP registers. */
6024 static bool
6025 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6026 {
6027 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6028 {
6029 static bool seen_thumb1_vfp = false;
6030
6031 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6032 {
6033 sorry ("Thumb-1 hard-float VFP ABI");
6034 /* sorry() is not immediately fatal, so only display this once. */
6035 seen_thumb1_vfp = true;
6036 }
6037
6038 return true;
6039 }
6040
6041 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6042 return false;
6043
6044 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6045 (TARGET_VFP_DOUBLE || !is_double));
6046 }
6047
6048 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6049 suitable for passing or returning in VFP registers for the PCS
6050 variant selected. If it is, then *BASE_MODE is updated to contain
6051 a machine mode describing each element of the argument's type and
6052 *COUNT to hold the number of such elements. */
6053 static bool
6054 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6055 machine_mode mode, const_tree type,
6056 machine_mode *base_mode, int *count)
6057 {
6058 machine_mode new_mode = VOIDmode;
6059
6060 /* If we have the type information, prefer that to working things
6061 out from the mode. */
6062 if (type)
6063 {
6064 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6065
6066 if (ag_count > 0 && ag_count <= 4)
6067 *count = ag_count;
6068 else
6069 return false;
6070 }
6071 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6072 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6073 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6074 {
6075 *count = 1;
6076 new_mode = mode;
6077 }
6078 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6079 {
6080 *count = 2;
6081 new_mode = (mode == DCmode ? DFmode : SFmode);
6082 }
6083 else
6084 return false;
6085
6086
6087 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6088 return false;
6089
6090 *base_mode = new_mode;
6091 return true;
6092 }
6093
6094 static bool
6095 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6096 machine_mode mode, const_tree type)
6097 {
6098 int count ATTRIBUTE_UNUSED;
6099 machine_mode ag_mode ATTRIBUTE_UNUSED;
6100
6101 if (!use_vfp_abi (pcs_variant, false))
6102 return false;
6103 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6104 &ag_mode, &count);
6105 }
6106
6107 static bool
6108 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6109 const_tree type)
6110 {
6111 if (!use_vfp_abi (pcum->pcs_variant, false))
6112 return false;
6113
6114 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6115 &pcum->aapcs_vfp_rmode,
6116 &pcum->aapcs_vfp_rcount);
6117 }
6118
6119 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6120 for the behaviour of this function. */
6121
6122 static bool
6123 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6124 const_tree type ATTRIBUTE_UNUSED)
6125 {
6126 int rmode_size
6127 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6128 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6129 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6130 int regno;
6131
6132 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6133 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6134 {
6135 pcum->aapcs_vfp_reg_alloc = mask << regno;
6136 if (mode == BLKmode
6137 || (mode == TImode && ! TARGET_NEON)
6138 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6139 {
6140 int i;
6141 int rcount = pcum->aapcs_vfp_rcount;
6142 int rshift = shift;
6143 machine_mode rmode = pcum->aapcs_vfp_rmode;
6144 rtx par;
6145 if (!TARGET_NEON)
6146 {
6147 /* Avoid using unsupported vector modes. */
6148 if (rmode == V2SImode)
6149 rmode = DImode;
6150 else if (rmode == V4SImode)
6151 {
6152 rmode = DImode;
6153 rcount *= 2;
6154 rshift /= 2;
6155 }
6156 }
6157 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6158 for (i = 0; i < rcount; i++)
6159 {
6160 rtx tmp = gen_rtx_REG (rmode,
6161 FIRST_VFP_REGNUM + regno + i * rshift);
6162 tmp = gen_rtx_EXPR_LIST
6163 (VOIDmode, tmp,
6164 GEN_INT (i * GET_MODE_SIZE (rmode)));
6165 XVECEXP (par, 0, i) = tmp;
6166 }
6167
6168 pcum->aapcs_reg = par;
6169 }
6170 else
6171 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6172 return true;
6173 }
6174 return false;
6175 }
6176
6177 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6178 comment there for the behaviour of this function. */
6179
6180 static rtx
6181 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6182 machine_mode mode,
6183 const_tree type ATTRIBUTE_UNUSED)
6184 {
6185 if (!use_vfp_abi (pcs_variant, false))
6186 return NULL;
6187
6188 if (mode == BLKmode
6189 || (GET_MODE_CLASS (mode) == MODE_INT
6190 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6191 && !TARGET_NEON))
6192 {
6193 int count;
6194 machine_mode ag_mode;
6195 int i;
6196 rtx par;
6197 int shift;
6198
6199 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6200 &ag_mode, &count);
6201
6202 if (!TARGET_NEON)
6203 {
6204 if (ag_mode == V2SImode)
6205 ag_mode = DImode;
6206 else if (ag_mode == V4SImode)
6207 {
6208 ag_mode = DImode;
6209 count *= 2;
6210 }
6211 }
6212 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6213 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6214 for (i = 0; i < count; i++)
6215 {
6216 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6217 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6218 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6219 XVECEXP (par, 0, i) = tmp;
6220 }
6221
6222 return par;
6223 }
6224
6225 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6226 }
6227
6228 static void
6229 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6230 machine_mode mode ATTRIBUTE_UNUSED,
6231 const_tree type ATTRIBUTE_UNUSED)
6232 {
6233 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6234 pcum->aapcs_vfp_reg_alloc = 0;
6235 return;
6236 }
6237
6238 #define AAPCS_CP(X) \
6239 { \
6240 aapcs_ ## X ## _cum_init, \
6241 aapcs_ ## X ## _is_call_candidate, \
6242 aapcs_ ## X ## _allocate, \
6243 aapcs_ ## X ## _is_return_candidate, \
6244 aapcs_ ## X ## _allocate_return_reg, \
6245 aapcs_ ## X ## _advance \
6246 }
6247
6248 /* Table of co-processors that can be used to pass arguments in
6249 registers. Idealy no arugment should be a candidate for more than
6250 one co-processor table entry, but the table is processed in order
6251 and stops after the first match. If that entry then fails to put
6252 the argument into a co-processor register, the argument will go on
6253 the stack. */
6254 static struct
6255 {
6256 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6257 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6258
6259 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6260 BLKmode) is a candidate for this co-processor's registers; this
6261 function should ignore any position-dependent state in
6262 CUMULATIVE_ARGS and only use call-type dependent information. */
6263 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6264
6265 /* Return true if the argument does get a co-processor register; it
6266 should set aapcs_reg to an RTX of the register allocated as is
6267 required for a return from FUNCTION_ARG. */
6268 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6269
6270 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6271 be returned in this co-processor's registers. */
6272 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6273
6274 /* Allocate and return an RTX element to hold the return type of a call. This
6275 routine must not fail and will only be called if is_return_candidate
6276 returned true with the same parameters. */
6277 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6278
6279 /* Finish processing this argument and prepare to start processing
6280 the next one. */
6281 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6282 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6283 {
6284 AAPCS_CP(vfp)
6285 };
6286
6287 #undef AAPCS_CP
6288
6289 static int
6290 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6291 const_tree type)
6292 {
6293 int i;
6294
6295 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6296 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6297 return i;
6298
6299 return -1;
6300 }
6301
6302 static int
6303 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6304 {
6305 /* We aren't passed a decl, so we can't check that a call is local.
6306 However, it isn't clear that that would be a win anyway, since it
6307 might limit some tail-calling opportunities. */
6308 enum arm_pcs pcs_variant;
6309
6310 if (fntype)
6311 {
6312 const_tree fndecl = NULL_TREE;
6313
6314 if (TREE_CODE (fntype) == FUNCTION_DECL)
6315 {
6316 fndecl = fntype;
6317 fntype = TREE_TYPE (fntype);
6318 }
6319
6320 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6321 }
6322 else
6323 pcs_variant = arm_pcs_default;
6324
6325 if (pcs_variant != ARM_PCS_AAPCS)
6326 {
6327 int i;
6328
6329 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6330 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6331 TYPE_MODE (type),
6332 type))
6333 return i;
6334 }
6335 return -1;
6336 }
6337
6338 static rtx
6339 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6340 const_tree fntype)
6341 {
6342 /* We aren't passed a decl, so we can't check that a call is local.
6343 However, it isn't clear that that would be a win anyway, since it
6344 might limit some tail-calling opportunities. */
6345 enum arm_pcs pcs_variant;
6346 int unsignedp ATTRIBUTE_UNUSED;
6347
6348 if (fntype)
6349 {
6350 const_tree fndecl = NULL_TREE;
6351
6352 if (TREE_CODE (fntype) == FUNCTION_DECL)
6353 {
6354 fndecl = fntype;
6355 fntype = TREE_TYPE (fntype);
6356 }
6357
6358 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6359 }
6360 else
6361 pcs_variant = arm_pcs_default;
6362
6363 /* Promote integer types. */
6364 if (type && INTEGRAL_TYPE_P (type))
6365 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6366
6367 if (pcs_variant != ARM_PCS_AAPCS)
6368 {
6369 int i;
6370
6371 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6372 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6373 type))
6374 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6375 mode, type);
6376 }
6377
6378 /* Promotes small structs returned in a register to full-word size
6379 for big-endian AAPCS. */
6380 if (type && arm_return_in_msb (type))
6381 {
6382 HOST_WIDE_INT size = int_size_in_bytes (type);
6383 if (size % UNITS_PER_WORD != 0)
6384 {
6385 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6386 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6387 }
6388 }
6389
6390 return gen_rtx_REG (mode, R0_REGNUM);
6391 }
6392
6393 static rtx
6394 aapcs_libcall_value (machine_mode mode)
6395 {
6396 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6397 && GET_MODE_SIZE (mode) <= 4)
6398 mode = SImode;
6399
6400 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6401 }
6402
6403 /* Lay out a function argument using the AAPCS rules. The rule
6404 numbers referred to here are those in the AAPCS. */
6405 static void
6406 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6407 const_tree type, bool named)
6408 {
6409 int nregs, nregs2;
6410 int ncrn;
6411
6412 /* We only need to do this once per argument. */
6413 if (pcum->aapcs_arg_processed)
6414 return;
6415
6416 pcum->aapcs_arg_processed = true;
6417
6418 /* Special case: if named is false then we are handling an incoming
6419 anonymous argument which is on the stack. */
6420 if (!named)
6421 return;
6422
6423 /* Is this a potential co-processor register candidate? */
6424 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6425 {
6426 int slot = aapcs_select_call_coproc (pcum, mode, type);
6427 pcum->aapcs_cprc_slot = slot;
6428
6429 /* We don't have to apply any of the rules from part B of the
6430 preparation phase, these are handled elsewhere in the
6431 compiler. */
6432
6433 if (slot >= 0)
6434 {
6435 /* A Co-processor register candidate goes either in its own
6436 class of registers or on the stack. */
6437 if (!pcum->aapcs_cprc_failed[slot])
6438 {
6439 /* C1.cp - Try to allocate the argument to co-processor
6440 registers. */
6441 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6442 return;
6443
6444 /* C2.cp - Put the argument on the stack and note that we
6445 can't assign any more candidates in this slot. We also
6446 need to note that we have allocated stack space, so that
6447 we won't later try to split a non-cprc candidate between
6448 core registers and the stack. */
6449 pcum->aapcs_cprc_failed[slot] = true;
6450 pcum->can_split = false;
6451 }
6452
6453 /* We didn't get a register, so this argument goes on the
6454 stack. */
6455 gcc_assert (pcum->can_split == false);
6456 return;
6457 }
6458 }
6459
6460 /* C3 - For double-word aligned arguments, round the NCRN up to the
6461 next even number. */
6462 ncrn = pcum->aapcs_ncrn;
6463 if (ncrn & 1)
6464 {
6465 int res = arm_needs_doubleword_align (mode, type);
6466 /* Only warn during RTL expansion of call stmts, otherwise we would
6467 warn e.g. during gimplification even on functions that will be
6468 always inlined, and we'd warn multiple times. Don't warn when
6469 called in expand_function_start either, as we warn instead in
6470 arm_function_arg_boundary in that case. */
6471 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6472 inform (input_location, "parameter passing for argument of type "
6473 "%qT changed in GCC 7.1", type);
6474 else if (res > 0)
6475 ncrn++;
6476 }
6477
6478 nregs = ARM_NUM_REGS2(mode, type);
6479
6480 /* Sigh, this test should really assert that nregs > 0, but a GCC
6481 extension allows empty structs and then gives them empty size; it
6482 then allows such a structure to be passed by value. For some of
6483 the code below we have to pretend that such an argument has
6484 non-zero size so that we 'locate' it correctly either in
6485 registers or on the stack. */
6486 gcc_assert (nregs >= 0);
6487
6488 nregs2 = nregs ? nregs : 1;
6489
6490 /* C4 - Argument fits entirely in core registers. */
6491 if (ncrn + nregs2 <= NUM_ARG_REGS)
6492 {
6493 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6494 pcum->aapcs_next_ncrn = ncrn + nregs;
6495 return;
6496 }
6497
6498 /* C5 - Some core registers left and there are no arguments already
6499 on the stack: split this argument between the remaining core
6500 registers and the stack. */
6501 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6502 {
6503 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6504 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6505 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6506 return;
6507 }
6508
6509 /* C6 - NCRN is set to 4. */
6510 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6511
6512 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6513 return;
6514 }
6515
6516 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6517 for a call to a function whose data type is FNTYPE.
6518 For a library call, FNTYPE is NULL. */
6519 void
6520 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6521 rtx libname,
6522 tree fndecl ATTRIBUTE_UNUSED)
6523 {
6524 /* Long call handling. */
6525 if (fntype)
6526 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6527 else
6528 pcum->pcs_variant = arm_pcs_default;
6529
6530 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6531 {
6532 if (arm_libcall_uses_aapcs_base (libname))
6533 pcum->pcs_variant = ARM_PCS_AAPCS;
6534
6535 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6536 pcum->aapcs_reg = NULL_RTX;
6537 pcum->aapcs_partial = 0;
6538 pcum->aapcs_arg_processed = false;
6539 pcum->aapcs_cprc_slot = -1;
6540 pcum->can_split = true;
6541
6542 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6543 {
6544 int i;
6545
6546 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6547 {
6548 pcum->aapcs_cprc_failed[i] = false;
6549 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6550 }
6551 }
6552 return;
6553 }
6554
6555 /* Legacy ABIs */
6556
6557 /* On the ARM, the offset starts at 0. */
6558 pcum->nregs = 0;
6559 pcum->iwmmxt_nregs = 0;
6560 pcum->can_split = true;
6561
6562 /* Varargs vectors are treated the same as long long.
6563 named_count avoids having to change the way arm handles 'named' */
6564 pcum->named_count = 0;
6565 pcum->nargs = 0;
6566
6567 if (TARGET_REALLY_IWMMXT && fntype)
6568 {
6569 tree fn_arg;
6570
6571 for (fn_arg = TYPE_ARG_TYPES (fntype);
6572 fn_arg;
6573 fn_arg = TREE_CHAIN (fn_arg))
6574 pcum->named_count += 1;
6575
6576 if (! pcum->named_count)
6577 pcum->named_count = INT_MAX;
6578 }
6579 }
6580
6581 /* Return 1 if double word alignment is required for argument passing.
6582 Return -1 if double word alignment used to be required for argument
6583 passing before PR77728 ABI fix, but is not required anymore.
6584 Return 0 if double word alignment is not required and wasn't requried
6585 before either. */
6586 static int
6587 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6588 {
6589 if (!type)
6590 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6591
6592 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6593 if (!AGGREGATE_TYPE_P (type))
6594 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6595
6596 /* Array types: Use member alignment of element type. */
6597 if (TREE_CODE (type) == ARRAY_TYPE)
6598 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6599
6600 int ret = 0;
6601 /* Record/aggregate types: Use greatest member alignment of any member. */
6602 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6603 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6604 {
6605 if (TREE_CODE (field) == FIELD_DECL)
6606 return 1;
6607 else
6608 /* Before PR77728 fix, we were incorrectly considering also
6609 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6610 Make sure we can warn about that with -Wpsabi. */
6611 ret = -1;
6612 }
6613
6614 return ret;
6615 }
6616
6617
6618 /* Determine where to put an argument to a function.
6619 Value is zero to push the argument on the stack,
6620 or a hard register in which to store the argument.
6621
6622 MODE is the argument's machine mode.
6623 TYPE is the data type of the argument (as a tree).
6624 This is null for libcalls where that information may
6625 not be available.
6626 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6627 the preceding args and about the function being called.
6628 NAMED is nonzero if this argument is a named parameter
6629 (otherwise it is an extra parameter matching an ellipsis).
6630
6631 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6632 other arguments are passed on the stack. If (NAMED == 0) (which happens
6633 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6634 defined), say it is passed in the stack (function_prologue will
6635 indeed make it pass in the stack if necessary). */
6636
6637 static rtx
6638 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6639 const_tree type, bool named)
6640 {
6641 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6642 int nregs;
6643
6644 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6645 a call insn (op3 of a call_value insn). */
6646 if (mode == VOIDmode)
6647 return const0_rtx;
6648
6649 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6650 {
6651 aapcs_layout_arg (pcum, mode, type, named);
6652 return pcum->aapcs_reg;
6653 }
6654
6655 /* Varargs vectors are treated the same as long long.
6656 named_count avoids having to change the way arm handles 'named' */
6657 if (TARGET_IWMMXT_ABI
6658 && arm_vector_mode_supported_p (mode)
6659 && pcum->named_count > pcum->nargs + 1)
6660 {
6661 if (pcum->iwmmxt_nregs <= 9)
6662 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6663 else
6664 {
6665 pcum->can_split = false;
6666 return NULL_RTX;
6667 }
6668 }
6669
6670 /* Put doubleword aligned quantities in even register pairs. */
6671 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6672 {
6673 int res = arm_needs_doubleword_align (mode, type);
6674 if (res < 0 && warn_psabi)
6675 inform (input_location, "parameter passing for argument of type "
6676 "%qT changed in GCC 7.1", type);
6677 else if (res > 0)
6678 pcum->nregs++;
6679 }
6680
6681 /* Only allow splitting an arg between regs and memory if all preceding
6682 args were allocated to regs. For args passed by reference we only count
6683 the reference pointer. */
6684 if (pcum->can_split)
6685 nregs = 1;
6686 else
6687 nregs = ARM_NUM_REGS2 (mode, type);
6688
6689 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6690 return NULL_RTX;
6691
6692 return gen_rtx_REG (mode, pcum->nregs);
6693 }
6694
6695 static unsigned int
6696 arm_function_arg_boundary (machine_mode mode, const_tree type)
6697 {
6698 if (!ARM_DOUBLEWORD_ALIGN)
6699 return PARM_BOUNDARY;
6700
6701 int res = arm_needs_doubleword_align (mode, type);
6702 if (res < 0 && warn_psabi)
6703 inform (input_location, "parameter passing for argument of type %qT "
6704 "changed in GCC 7.1", type);
6705
6706 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6707 }
6708
6709 static int
6710 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6711 tree type, bool named)
6712 {
6713 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6714 int nregs = pcum->nregs;
6715
6716 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6717 {
6718 aapcs_layout_arg (pcum, mode, type, named);
6719 return pcum->aapcs_partial;
6720 }
6721
6722 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6723 return 0;
6724
6725 if (NUM_ARG_REGS > nregs
6726 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6727 && pcum->can_split)
6728 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6729
6730 return 0;
6731 }
6732
6733 /* Update the data in PCUM to advance over an argument
6734 of mode MODE and data type TYPE.
6735 (TYPE is null for libcalls where that information may not be available.) */
6736
6737 static void
6738 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6739 const_tree type, bool named)
6740 {
6741 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6742
6743 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6744 {
6745 aapcs_layout_arg (pcum, mode, type, named);
6746
6747 if (pcum->aapcs_cprc_slot >= 0)
6748 {
6749 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6750 type);
6751 pcum->aapcs_cprc_slot = -1;
6752 }
6753
6754 /* Generic stuff. */
6755 pcum->aapcs_arg_processed = false;
6756 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6757 pcum->aapcs_reg = NULL_RTX;
6758 pcum->aapcs_partial = 0;
6759 }
6760 else
6761 {
6762 pcum->nargs += 1;
6763 if (arm_vector_mode_supported_p (mode)
6764 && pcum->named_count > pcum->nargs
6765 && TARGET_IWMMXT_ABI)
6766 pcum->iwmmxt_nregs += 1;
6767 else
6768 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6769 }
6770 }
6771
6772 /* Variable sized types are passed by reference. This is a GCC
6773 extension to the ARM ABI. */
6774
6775 static bool
6776 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6777 machine_mode mode ATTRIBUTE_UNUSED,
6778 const_tree type, bool named ATTRIBUTE_UNUSED)
6779 {
6780 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6781 }
6782 \f
6783 /* Encode the current state of the #pragma [no_]long_calls. */
6784 typedef enum
6785 {
6786 OFF, /* No #pragma [no_]long_calls is in effect. */
6787 LONG, /* #pragma long_calls is in effect. */
6788 SHORT /* #pragma no_long_calls is in effect. */
6789 } arm_pragma_enum;
6790
6791 static arm_pragma_enum arm_pragma_long_calls = OFF;
6792
6793 void
6794 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6795 {
6796 arm_pragma_long_calls = LONG;
6797 }
6798
6799 void
6800 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6801 {
6802 arm_pragma_long_calls = SHORT;
6803 }
6804
6805 void
6806 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6807 {
6808 arm_pragma_long_calls = OFF;
6809 }
6810 \f
6811 /* Handle an attribute requiring a FUNCTION_DECL;
6812 arguments as in struct attribute_spec.handler. */
6813 static tree
6814 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6815 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6816 {
6817 if (TREE_CODE (*node) != FUNCTION_DECL)
6818 {
6819 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6820 name);
6821 *no_add_attrs = true;
6822 }
6823
6824 return NULL_TREE;
6825 }
6826
6827 /* Handle an "interrupt" or "isr" attribute;
6828 arguments as in struct attribute_spec.handler. */
6829 static tree
6830 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6831 bool *no_add_attrs)
6832 {
6833 if (DECL_P (*node))
6834 {
6835 if (TREE_CODE (*node) != FUNCTION_DECL)
6836 {
6837 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6838 name);
6839 *no_add_attrs = true;
6840 }
6841 /* FIXME: the argument if any is checked for type attributes;
6842 should it be checked for decl ones? */
6843 }
6844 else
6845 {
6846 if (TREE_CODE (*node) == FUNCTION_TYPE
6847 || TREE_CODE (*node) == METHOD_TYPE)
6848 {
6849 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6850 {
6851 warning (OPT_Wattributes, "%qE attribute ignored",
6852 name);
6853 *no_add_attrs = true;
6854 }
6855 }
6856 else if (TREE_CODE (*node) == POINTER_TYPE
6857 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6858 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6859 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6860 {
6861 *node = build_variant_type_copy (*node);
6862 TREE_TYPE (*node) = build_type_attribute_variant
6863 (TREE_TYPE (*node),
6864 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6865 *no_add_attrs = true;
6866 }
6867 else
6868 {
6869 /* Possibly pass this attribute on from the type to a decl. */
6870 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6871 | (int) ATTR_FLAG_FUNCTION_NEXT
6872 | (int) ATTR_FLAG_ARRAY_NEXT))
6873 {
6874 *no_add_attrs = true;
6875 return tree_cons (name, args, NULL_TREE);
6876 }
6877 else
6878 {
6879 warning (OPT_Wattributes, "%qE attribute ignored",
6880 name);
6881 }
6882 }
6883 }
6884
6885 return NULL_TREE;
6886 }
6887
6888 /* Handle a "pcs" attribute; arguments as in struct
6889 attribute_spec.handler. */
6890 static tree
6891 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6892 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6893 {
6894 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6895 {
6896 warning (OPT_Wattributes, "%qE attribute ignored", name);
6897 *no_add_attrs = true;
6898 }
6899 return NULL_TREE;
6900 }
6901
6902 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6903 /* Handle the "notshared" attribute. This attribute is another way of
6904 requesting hidden visibility. ARM's compiler supports
6905 "__declspec(notshared)"; we support the same thing via an
6906 attribute. */
6907
6908 static tree
6909 arm_handle_notshared_attribute (tree *node,
6910 tree name ATTRIBUTE_UNUSED,
6911 tree args ATTRIBUTE_UNUSED,
6912 int flags ATTRIBUTE_UNUSED,
6913 bool *no_add_attrs)
6914 {
6915 tree decl = TYPE_NAME (*node);
6916
6917 if (decl)
6918 {
6919 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6920 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6921 *no_add_attrs = false;
6922 }
6923 return NULL_TREE;
6924 }
6925 #endif
6926
6927 /* This function returns true if a function with declaration FNDECL and type
6928 FNTYPE uses the stack to pass arguments or return variables and false
6929 otherwise. This is used for functions with the attributes
6930 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6931 diagnostic messages if the stack is used. NAME is the name of the attribute
6932 used. */
6933
6934 static bool
6935 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6936 {
6937 function_args_iterator args_iter;
6938 CUMULATIVE_ARGS args_so_far_v;
6939 cumulative_args_t args_so_far;
6940 bool first_param = true;
6941 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6942
6943 /* Error out if any argument is passed on the stack. */
6944 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6945 args_so_far = pack_cumulative_args (&args_so_far_v);
6946 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6947 {
6948 rtx arg_rtx;
6949 machine_mode arg_mode = TYPE_MODE (arg_type);
6950
6951 prev_arg_type = arg_type;
6952 if (VOID_TYPE_P (arg_type))
6953 continue;
6954
6955 if (!first_param)
6956 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6957 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6958 if (!arg_rtx
6959 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6960 {
6961 error ("%qE attribute not available to functions with arguments "
6962 "passed on the stack", name);
6963 return true;
6964 }
6965 first_param = false;
6966 }
6967
6968 /* Error out for variadic functions since we cannot control how many
6969 arguments will be passed and thus stack could be used. stdarg_p () is not
6970 used for the checking to avoid browsing arguments twice. */
6971 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6972 {
6973 error ("%qE attribute not available to functions with variable number "
6974 "of arguments", name);
6975 return true;
6976 }
6977
6978 /* Error out if return value is passed on the stack. */
6979 ret_type = TREE_TYPE (fntype);
6980 if (arm_return_in_memory (ret_type, fntype))
6981 {
6982 error ("%qE attribute not available to functions that return value on "
6983 "the stack", name);
6984 return true;
6985 }
6986 return false;
6987 }
6988
6989 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6990 function will check whether the attribute is allowed here and will add the
6991 attribute to the function declaration tree or otherwise issue a warning. */
6992
6993 static tree
6994 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6995 tree /* args */,
6996 int /* flags */,
6997 bool *no_add_attrs)
6998 {
6999 tree fndecl;
7000
7001 if (!use_cmse)
7002 {
7003 *no_add_attrs = true;
7004 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7005 name);
7006 return NULL_TREE;
7007 }
7008
7009 /* Ignore attribute for function types. */
7010 if (TREE_CODE (*node) != FUNCTION_DECL)
7011 {
7012 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7013 name);
7014 *no_add_attrs = true;
7015 return NULL_TREE;
7016 }
7017
7018 fndecl = *node;
7019
7020 /* Warn for static linkage functions. */
7021 if (!TREE_PUBLIC (fndecl))
7022 {
7023 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7024 "with static linkage", name);
7025 *no_add_attrs = true;
7026 return NULL_TREE;
7027 }
7028
7029 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7030 TREE_TYPE (fndecl));
7031 return NULL_TREE;
7032 }
7033
7034
7035 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7036 function will check whether the attribute is allowed here and will add the
7037 attribute to the function type tree or otherwise issue a diagnostic. The
7038 reason we check this at declaration time is to only allow the use of the
7039 attribute with declarations of function pointers and not function
7040 declarations. This function checks NODE is of the expected type and issues
7041 diagnostics otherwise using NAME. If it is not of the expected type
7042 *NO_ADD_ATTRS will be set to true. */
7043
7044 static tree
7045 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7046 tree /* args */,
7047 int /* flags */,
7048 bool *no_add_attrs)
7049 {
7050 tree decl = NULL_TREE, fntype = NULL_TREE;
7051 tree type;
7052
7053 if (!use_cmse)
7054 {
7055 *no_add_attrs = true;
7056 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7057 name);
7058 return NULL_TREE;
7059 }
7060
7061 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7062 {
7063 decl = *node;
7064 fntype = TREE_TYPE (decl);
7065 }
7066
7067 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7068 fntype = TREE_TYPE (fntype);
7069
7070 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7071 {
7072 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7073 "function pointer", name);
7074 *no_add_attrs = true;
7075 return NULL_TREE;
7076 }
7077
7078 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7079
7080 if (*no_add_attrs)
7081 return NULL_TREE;
7082
7083 /* Prevent trees being shared among function types with and without
7084 cmse_nonsecure_call attribute. */
7085 type = TREE_TYPE (decl);
7086
7087 type = build_distinct_type_copy (type);
7088 TREE_TYPE (decl) = type;
7089 fntype = type;
7090
7091 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7092 {
7093 type = fntype;
7094 fntype = TREE_TYPE (fntype);
7095 fntype = build_distinct_type_copy (fntype);
7096 TREE_TYPE (type) = fntype;
7097 }
7098
7099 /* Construct a type attribute and add it to the function type. */
7100 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7101 TYPE_ATTRIBUTES (fntype));
7102 TYPE_ATTRIBUTES (fntype) = attrs;
7103 return NULL_TREE;
7104 }
7105
7106 /* Return 0 if the attributes for two types are incompatible, 1 if they
7107 are compatible, and 2 if they are nearly compatible (which causes a
7108 warning to be generated). */
7109 static int
7110 arm_comp_type_attributes (const_tree type1, const_tree type2)
7111 {
7112 int l1, l2, s1, s2;
7113
7114 /* Check for mismatch of non-default calling convention. */
7115 if (TREE_CODE (type1) != FUNCTION_TYPE)
7116 return 1;
7117
7118 /* Check for mismatched call attributes. */
7119 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7120 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7121 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7122 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7123
7124 /* Only bother to check if an attribute is defined. */
7125 if (l1 | l2 | s1 | s2)
7126 {
7127 /* If one type has an attribute, the other must have the same attribute. */
7128 if ((l1 != l2) || (s1 != s2))
7129 return 0;
7130
7131 /* Disallow mixed attributes. */
7132 if ((l1 & s2) || (l2 & s1))
7133 return 0;
7134 }
7135
7136 /* Check for mismatched ISR attribute. */
7137 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7138 if (! l1)
7139 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7140 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7141 if (! l2)
7142 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7143 if (l1 != l2)
7144 return 0;
7145
7146 l1 = lookup_attribute ("cmse_nonsecure_call",
7147 TYPE_ATTRIBUTES (type1)) != NULL;
7148 l2 = lookup_attribute ("cmse_nonsecure_call",
7149 TYPE_ATTRIBUTES (type2)) != NULL;
7150
7151 if (l1 != l2)
7152 return 0;
7153
7154 return 1;
7155 }
7156
7157 /* Assigns default attributes to newly defined type. This is used to
7158 set short_call/long_call attributes for function types of
7159 functions defined inside corresponding #pragma scopes. */
7160 static void
7161 arm_set_default_type_attributes (tree type)
7162 {
7163 /* Add __attribute__ ((long_call)) to all functions, when
7164 inside #pragma long_calls or __attribute__ ((short_call)),
7165 when inside #pragma no_long_calls. */
7166 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7167 {
7168 tree type_attr_list, attr_name;
7169 type_attr_list = TYPE_ATTRIBUTES (type);
7170
7171 if (arm_pragma_long_calls == LONG)
7172 attr_name = get_identifier ("long_call");
7173 else if (arm_pragma_long_calls == SHORT)
7174 attr_name = get_identifier ("short_call");
7175 else
7176 return;
7177
7178 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7179 TYPE_ATTRIBUTES (type) = type_attr_list;
7180 }
7181 }
7182 \f
7183 /* Return true if DECL is known to be linked into section SECTION. */
7184
7185 static bool
7186 arm_function_in_section_p (tree decl, section *section)
7187 {
7188 /* We can only be certain about the prevailing symbol definition. */
7189 if (!decl_binds_to_current_def_p (decl))
7190 return false;
7191
7192 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7193 if (!DECL_SECTION_NAME (decl))
7194 {
7195 /* Make sure that we will not create a unique section for DECL. */
7196 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7197 return false;
7198 }
7199
7200 return function_section (decl) == section;
7201 }
7202
7203 /* Return nonzero if a 32-bit "long_call" should be generated for
7204 a call from the current function to DECL. We generate a long_call
7205 if the function:
7206
7207 a. has an __attribute__((long call))
7208 or b. is within the scope of a #pragma long_calls
7209 or c. the -mlong-calls command line switch has been specified
7210
7211 However we do not generate a long call if the function:
7212
7213 d. has an __attribute__ ((short_call))
7214 or e. is inside the scope of a #pragma no_long_calls
7215 or f. is defined in the same section as the current function. */
7216
7217 bool
7218 arm_is_long_call_p (tree decl)
7219 {
7220 tree attrs;
7221
7222 if (!decl)
7223 return TARGET_LONG_CALLS;
7224
7225 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7226 if (lookup_attribute ("short_call", attrs))
7227 return false;
7228
7229 /* For "f", be conservative, and only cater for cases in which the
7230 whole of the current function is placed in the same section. */
7231 if (!flag_reorder_blocks_and_partition
7232 && TREE_CODE (decl) == FUNCTION_DECL
7233 && arm_function_in_section_p (decl, current_function_section ()))
7234 return false;
7235
7236 if (lookup_attribute ("long_call", attrs))
7237 return true;
7238
7239 return TARGET_LONG_CALLS;
7240 }
7241
7242 /* Return nonzero if it is ok to make a tail-call to DECL. */
7243 static bool
7244 arm_function_ok_for_sibcall (tree decl, tree exp)
7245 {
7246 unsigned long func_type;
7247
7248 if (cfun->machine->sibcall_blocked)
7249 return false;
7250
7251 /* Never tailcall something if we are generating code for Thumb-1. */
7252 if (TARGET_THUMB1)
7253 return false;
7254
7255 /* The PIC register is live on entry to VxWorks PLT entries, so we
7256 must make the call before restoring the PIC register. */
7257 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7258 return false;
7259
7260 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7261 may be used both as target of the call and base register for restoring
7262 the VFP registers */
7263 if (TARGET_APCS_FRAME && TARGET_ARM
7264 && TARGET_HARD_FLOAT
7265 && decl && arm_is_long_call_p (decl))
7266 return false;
7267
7268 /* If we are interworking and the function is not declared static
7269 then we can't tail-call it unless we know that it exists in this
7270 compilation unit (since it might be a Thumb routine). */
7271 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7272 && !TREE_ASM_WRITTEN (decl))
7273 return false;
7274
7275 func_type = arm_current_func_type ();
7276 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7277 if (IS_INTERRUPT (func_type))
7278 return false;
7279
7280 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7281 generated for entry functions themselves. */
7282 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7283 return false;
7284
7285 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7286 this would complicate matters for later code generation. */
7287 if (TREE_CODE (exp) == CALL_EXPR)
7288 {
7289 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7290 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7291 return false;
7292 }
7293
7294 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7295 {
7296 /* Check that the return value locations are the same. For
7297 example that we aren't returning a value from the sibling in
7298 a VFP register but then need to transfer it to a core
7299 register. */
7300 rtx a, b;
7301 tree decl_or_type = decl;
7302
7303 /* If it is an indirect function pointer, get the function type. */
7304 if (!decl)
7305 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7306
7307 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7308 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7309 cfun->decl, false);
7310 if (!rtx_equal_p (a, b))
7311 return false;
7312 }
7313
7314 /* Never tailcall if function may be called with a misaligned SP. */
7315 if (IS_STACKALIGN (func_type))
7316 return false;
7317
7318 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7319 references should become a NOP. Don't convert such calls into
7320 sibling calls. */
7321 if (TARGET_AAPCS_BASED
7322 && arm_abi == ARM_ABI_AAPCS
7323 && decl
7324 && DECL_WEAK (decl))
7325 return false;
7326
7327 /* We cannot do a tailcall for an indirect call by descriptor if all the
7328 argument registers are used because the only register left to load the
7329 address is IP and it will already contain the static chain. */
7330 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7331 {
7332 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7333 CUMULATIVE_ARGS cum;
7334 cumulative_args_t cum_v;
7335
7336 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7337 cum_v = pack_cumulative_args (&cum);
7338
7339 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7340 {
7341 tree type = TREE_VALUE (t);
7342 if (!VOID_TYPE_P (type))
7343 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7344 }
7345
7346 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7347 return false;
7348 }
7349
7350 /* Everything else is ok. */
7351 return true;
7352 }
7353
7354 \f
7355 /* Addressing mode support functions. */
7356
7357 /* Return nonzero if X is a legitimate immediate operand when compiling
7358 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7359 int
7360 legitimate_pic_operand_p (rtx x)
7361 {
7362 if (GET_CODE (x) == SYMBOL_REF
7363 || (GET_CODE (x) == CONST
7364 && GET_CODE (XEXP (x, 0)) == PLUS
7365 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7366 return 0;
7367
7368 return 1;
7369 }
7370
7371 /* Record that the current function needs a PIC register. Initialize
7372 cfun->machine->pic_reg if we have not already done so. */
7373
7374 static void
7375 require_pic_register (void)
7376 {
7377 /* A lot of the logic here is made obscure by the fact that this
7378 routine gets called as part of the rtx cost estimation process.
7379 We don't want those calls to affect any assumptions about the real
7380 function; and further, we can't call entry_of_function() until we
7381 start the real expansion process. */
7382 if (!crtl->uses_pic_offset_table)
7383 {
7384 gcc_assert (can_create_pseudo_p ());
7385 if (arm_pic_register != INVALID_REGNUM
7386 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7387 {
7388 if (!cfun->machine->pic_reg)
7389 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7390
7391 /* Play games to avoid marking the function as needing pic
7392 if we are being called as part of the cost-estimation
7393 process. */
7394 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7395 crtl->uses_pic_offset_table = 1;
7396 }
7397 else
7398 {
7399 rtx_insn *seq, *insn;
7400
7401 if (!cfun->machine->pic_reg)
7402 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7403
7404 /* Play games to avoid marking the function as needing pic
7405 if we are being called as part of the cost-estimation
7406 process. */
7407 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7408 {
7409 crtl->uses_pic_offset_table = 1;
7410 start_sequence ();
7411
7412 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7413 && arm_pic_register > LAST_LO_REGNUM)
7414 emit_move_insn (cfun->machine->pic_reg,
7415 gen_rtx_REG (Pmode, arm_pic_register));
7416 else
7417 arm_load_pic_register (0UL);
7418
7419 seq = get_insns ();
7420 end_sequence ();
7421
7422 for (insn = seq; insn; insn = NEXT_INSN (insn))
7423 if (INSN_P (insn))
7424 INSN_LOCATION (insn) = prologue_location;
7425
7426 /* We can be called during expansion of PHI nodes, where
7427 we can't yet emit instructions directly in the final
7428 insn stream. Queue the insns on the entry edge, they will
7429 be committed after everything else is expanded. */
7430 insert_insn_on_edge (seq,
7431 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7432 }
7433 }
7434 }
7435 }
7436
7437 rtx
7438 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7439 {
7440 if (GET_CODE (orig) == SYMBOL_REF
7441 || GET_CODE (orig) == LABEL_REF)
7442 {
7443 if (reg == 0)
7444 {
7445 gcc_assert (can_create_pseudo_p ());
7446 reg = gen_reg_rtx (Pmode);
7447 }
7448
7449 /* VxWorks does not impose a fixed gap between segments; the run-time
7450 gap can be different from the object-file gap. We therefore can't
7451 use GOTOFF unless we are absolutely sure that the symbol is in the
7452 same segment as the GOT. Unfortunately, the flexibility of linker
7453 scripts means that we can't be sure of that in general, so assume
7454 that GOTOFF is never valid on VxWorks. */
7455 /* References to weak symbols cannot be resolved locally: they
7456 may be overridden by a non-weak definition at link time. */
7457 rtx_insn *insn;
7458 if ((GET_CODE (orig) == LABEL_REF
7459 || (GET_CODE (orig) == SYMBOL_REF
7460 && SYMBOL_REF_LOCAL_P (orig)
7461 && (SYMBOL_REF_DECL (orig)
7462 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7463 && NEED_GOT_RELOC
7464 && arm_pic_data_is_text_relative)
7465 insn = arm_pic_static_addr (orig, reg);
7466 else
7467 {
7468 rtx pat;
7469 rtx mem;
7470
7471 /* If this function doesn't have a pic register, create one now. */
7472 require_pic_register ();
7473
7474 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7475
7476 /* Make the MEM as close to a constant as possible. */
7477 mem = SET_SRC (pat);
7478 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7479 MEM_READONLY_P (mem) = 1;
7480 MEM_NOTRAP_P (mem) = 1;
7481
7482 insn = emit_insn (pat);
7483 }
7484
7485 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7486 by loop. */
7487 set_unique_reg_note (insn, REG_EQUAL, orig);
7488
7489 return reg;
7490 }
7491 else if (GET_CODE (orig) == CONST)
7492 {
7493 rtx base, offset;
7494
7495 if (GET_CODE (XEXP (orig, 0)) == PLUS
7496 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7497 return orig;
7498
7499 /* Handle the case where we have: const (UNSPEC_TLS). */
7500 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7501 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7502 return orig;
7503
7504 /* Handle the case where we have:
7505 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7506 CONST_INT. */
7507 if (GET_CODE (XEXP (orig, 0)) == PLUS
7508 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7509 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7510 {
7511 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7512 return orig;
7513 }
7514
7515 if (reg == 0)
7516 {
7517 gcc_assert (can_create_pseudo_p ());
7518 reg = gen_reg_rtx (Pmode);
7519 }
7520
7521 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7522
7523 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7524 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7525 base == reg ? 0 : reg);
7526
7527 if (CONST_INT_P (offset))
7528 {
7529 /* The base register doesn't really matter, we only want to
7530 test the index for the appropriate mode. */
7531 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7532 {
7533 gcc_assert (can_create_pseudo_p ());
7534 offset = force_reg (Pmode, offset);
7535 }
7536
7537 if (CONST_INT_P (offset))
7538 return plus_constant (Pmode, base, INTVAL (offset));
7539 }
7540
7541 if (GET_MODE_SIZE (mode) > 4
7542 && (GET_MODE_CLASS (mode) == MODE_INT
7543 || TARGET_SOFT_FLOAT))
7544 {
7545 emit_insn (gen_addsi3 (reg, base, offset));
7546 return reg;
7547 }
7548
7549 return gen_rtx_PLUS (Pmode, base, offset);
7550 }
7551
7552 return orig;
7553 }
7554
7555
7556 /* Find a spare register to use during the prolog of a function. */
7557
7558 static int
7559 thumb_find_work_register (unsigned long pushed_regs_mask)
7560 {
7561 int reg;
7562
7563 /* Check the argument registers first as these are call-used. The
7564 register allocation order means that sometimes r3 might be used
7565 but earlier argument registers might not, so check them all. */
7566 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7567 if (!df_regs_ever_live_p (reg))
7568 return reg;
7569
7570 /* Before going on to check the call-saved registers we can try a couple
7571 more ways of deducing that r3 is available. The first is when we are
7572 pushing anonymous arguments onto the stack and we have less than 4
7573 registers worth of fixed arguments(*). In this case r3 will be part of
7574 the variable argument list and so we can be sure that it will be
7575 pushed right at the start of the function. Hence it will be available
7576 for the rest of the prologue.
7577 (*): ie crtl->args.pretend_args_size is greater than 0. */
7578 if (cfun->machine->uses_anonymous_args
7579 && crtl->args.pretend_args_size > 0)
7580 return LAST_ARG_REGNUM;
7581
7582 /* The other case is when we have fixed arguments but less than 4 registers
7583 worth. In this case r3 might be used in the body of the function, but
7584 it is not being used to convey an argument into the function. In theory
7585 we could just check crtl->args.size to see how many bytes are
7586 being passed in argument registers, but it seems that it is unreliable.
7587 Sometimes it will have the value 0 when in fact arguments are being
7588 passed. (See testcase execute/20021111-1.c for an example). So we also
7589 check the args_info.nregs field as well. The problem with this field is
7590 that it makes no allowances for arguments that are passed to the
7591 function but which are not used. Hence we could miss an opportunity
7592 when a function has an unused argument in r3. But it is better to be
7593 safe than to be sorry. */
7594 if (! cfun->machine->uses_anonymous_args
7595 && crtl->args.size >= 0
7596 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7597 && (TARGET_AAPCS_BASED
7598 ? crtl->args.info.aapcs_ncrn < 4
7599 : crtl->args.info.nregs < 4))
7600 return LAST_ARG_REGNUM;
7601
7602 /* Otherwise look for a call-saved register that is going to be pushed. */
7603 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7604 if (pushed_regs_mask & (1 << reg))
7605 return reg;
7606
7607 if (TARGET_THUMB2)
7608 {
7609 /* Thumb-2 can use high regs. */
7610 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7611 if (pushed_regs_mask & (1 << reg))
7612 return reg;
7613 }
7614 /* Something went wrong - thumb_compute_save_reg_mask()
7615 should have arranged for a suitable register to be pushed. */
7616 gcc_unreachable ();
7617 }
7618
7619 static GTY(()) int pic_labelno;
7620
7621 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7622 low register. */
7623
7624 void
7625 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7626 {
7627 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7628
7629 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7630 return;
7631
7632 gcc_assert (flag_pic);
7633
7634 pic_reg = cfun->machine->pic_reg;
7635 if (TARGET_VXWORKS_RTP)
7636 {
7637 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7638 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7639 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7640
7641 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7642
7643 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7644 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7645 }
7646 else
7647 {
7648 /* We use an UNSPEC rather than a LABEL_REF because this label
7649 never appears in the code stream. */
7650
7651 labelno = GEN_INT (pic_labelno++);
7652 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7653 l1 = gen_rtx_CONST (VOIDmode, l1);
7654
7655 /* On the ARM the PC register contains 'dot + 8' at the time of the
7656 addition, on the Thumb it is 'dot + 4'. */
7657 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7658 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7659 UNSPEC_GOTSYM_OFF);
7660 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7661
7662 if (TARGET_32BIT)
7663 {
7664 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7665 }
7666 else /* TARGET_THUMB1 */
7667 {
7668 if (arm_pic_register != INVALID_REGNUM
7669 && REGNO (pic_reg) > LAST_LO_REGNUM)
7670 {
7671 /* We will have pushed the pic register, so we should always be
7672 able to find a work register. */
7673 pic_tmp = gen_rtx_REG (SImode,
7674 thumb_find_work_register (saved_regs));
7675 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7676 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7677 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7678 }
7679 else if (arm_pic_register != INVALID_REGNUM
7680 && arm_pic_register > LAST_LO_REGNUM
7681 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7682 {
7683 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7684 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7685 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7686 }
7687 else
7688 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7689 }
7690 }
7691
7692 /* Need to emit this whether or not we obey regdecls,
7693 since setjmp/longjmp can cause life info to screw up. */
7694 emit_use (pic_reg);
7695 }
7696
7697 /* Generate code to load the address of a static var when flag_pic is set. */
7698 static rtx_insn *
7699 arm_pic_static_addr (rtx orig, rtx reg)
7700 {
7701 rtx l1, labelno, offset_rtx;
7702
7703 gcc_assert (flag_pic);
7704
7705 /* We use an UNSPEC rather than a LABEL_REF because this label
7706 never appears in the code stream. */
7707 labelno = GEN_INT (pic_labelno++);
7708 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7709 l1 = gen_rtx_CONST (VOIDmode, l1);
7710
7711 /* On the ARM the PC register contains 'dot + 8' at the time of the
7712 addition, on the Thumb it is 'dot + 4'. */
7713 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7714 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7715 UNSPEC_SYMBOL_OFFSET);
7716 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7717
7718 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7719 }
7720
7721 /* Return nonzero if X is valid as an ARM state addressing register. */
7722 static int
7723 arm_address_register_rtx_p (rtx x, int strict_p)
7724 {
7725 int regno;
7726
7727 if (!REG_P (x))
7728 return 0;
7729
7730 regno = REGNO (x);
7731
7732 if (strict_p)
7733 return ARM_REGNO_OK_FOR_BASE_P (regno);
7734
7735 return (regno <= LAST_ARM_REGNUM
7736 || regno >= FIRST_PSEUDO_REGISTER
7737 || regno == FRAME_POINTER_REGNUM
7738 || regno == ARG_POINTER_REGNUM);
7739 }
7740
7741 /* Return TRUE if this rtx is the difference of a symbol and a label,
7742 and will reduce to a PC-relative relocation in the object file.
7743 Expressions like this can be left alone when generating PIC, rather
7744 than forced through the GOT. */
7745 static int
7746 pcrel_constant_p (rtx x)
7747 {
7748 if (GET_CODE (x) == MINUS)
7749 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7750
7751 return FALSE;
7752 }
7753
7754 /* Return true if X will surely end up in an index register after next
7755 splitting pass. */
7756 static bool
7757 will_be_in_index_register (const_rtx x)
7758 {
7759 /* arm.md: calculate_pic_address will split this into a register. */
7760 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7761 }
7762
7763 /* Return nonzero if X is a valid ARM state address operand. */
7764 int
7765 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7766 int strict_p)
7767 {
7768 bool use_ldrd;
7769 enum rtx_code code = GET_CODE (x);
7770
7771 if (arm_address_register_rtx_p (x, strict_p))
7772 return 1;
7773
7774 use_ldrd = (TARGET_LDRD
7775 && (mode == DImode || mode == DFmode));
7776
7777 if (code == POST_INC || code == PRE_DEC
7778 || ((code == PRE_INC || code == POST_DEC)
7779 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7780 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7781
7782 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7783 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7784 && GET_CODE (XEXP (x, 1)) == PLUS
7785 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7786 {
7787 rtx addend = XEXP (XEXP (x, 1), 1);
7788
7789 /* Don't allow ldrd post increment by register because it's hard
7790 to fixup invalid register choices. */
7791 if (use_ldrd
7792 && GET_CODE (x) == POST_MODIFY
7793 && REG_P (addend))
7794 return 0;
7795
7796 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7797 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7798 }
7799
7800 /* After reload constants split into minipools will have addresses
7801 from a LABEL_REF. */
7802 else if (reload_completed
7803 && (code == LABEL_REF
7804 || (code == CONST
7805 && GET_CODE (XEXP (x, 0)) == PLUS
7806 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7807 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7808 return 1;
7809
7810 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7811 return 0;
7812
7813 else if (code == PLUS)
7814 {
7815 rtx xop0 = XEXP (x, 0);
7816 rtx xop1 = XEXP (x, 1);
7817
7818 return ((arm_address_register_rtx_p (xop0, strict_p)
7819 && ((CONST_INT_P (xop1)
7820 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7821 || (!strict_p && will_be_in_index_register (xop1))))
7822 || (arm_address_register_rtx_p (xop1, strict_p)
7823 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7824 }
7825
7826 #if 0
7827 /* Reload currently can't handle MINUS, so disable this for now */
7828 else if (GET_CODE (x) == MINUS)
7829 {
7830 rtx xop0 = XEXP (x, 0);
7831 rtx xop1 = XEXP (x, 1);
7832
7833 return (arm_address_register_rtx_p (xop0, strict_p)
7834 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7835 }
7836 #endif
7837
7838 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7839 && code == SYMBOL_REF
7840 && CONSTANT_POOL_ADDRESS_P (x)
7841 && ! (flag_pic
7842 && symbol_mentioned_p (get_pool_constant (x))
7843 && ! pcrel_constant_p (get_pool_constant (x))))
7844 return 1;
7845
7846 return 0;
7847 }
7848
7849 /* Return true if we can avoid creating a constant pool entry for x. */
7850 static bool
7851 can_avoid_literal_pool_for_label_p (rtx x)
7852 {
7853 /* Normally we can assign constant values to target registers without
7854 the help of constant pool. But there are cases we have to use constant
7855 pool like:
7856 1) assign a label to register.
7857 2) sign-extend a 8bit value to 32bit and then assign to register.
7858
7859 Constant pool access in format:
7860 (set (reg r0) (mem (symbol_ref (".LC0"))))
7861 will cause the use of literal pool (later in function arm_reorg).
7862 So here we mark such format as an invalid format, then the compiler
7863 will adjust it into:
7864 (set (reg r0) (symbol_ref (".LC0")))
7865 (set (reg r0) (mem (reg r0))).
7866 No extra register is required, and (mem (reg r0)) won't cause the use
7867 of literal pools. */
7868 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7869 && CONSTANT_POOL_ADDRESS_P (x))
7870 return 1;
7871 return 0;
7872 }
7873
7874
7875 /* Return nonzero if X is a valid Thumb-2 address operand. */
7876 static int
7877 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7878 {
7879 bool use_ldrd;
7880 enum rtx_code code = GET_CODE (x);
7881
7882 if (arm_address_register_rtx_p (x, strict_p))
7883 return 1;
7884
7885 use_ldrd = (TARGET_LDRD
7886 && (mode == DImode || mode == DFmode));
7887
7888 if (code == POST_INC || code == PRE_DEC
7889 || ((code == PRE_INC || code == POST_DEC)
7890 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7891 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7892
7893 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7894 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7895 && GET_CODE (XEXP (x, 1)) == PLUS
7896 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7897 {
7898 /* Thumb-2 only has autoincrement by constant. */
7899 rtx addend = XEXP (XEXP (x, 1), 1);
7900 HOST_WIDE_INT offset;
7901
7902 if (!CONST_INT_P (addend))
7903 return 0;
7904
7905 offset = INTVAL(addend);
7906 if (GET_MODE_SIZE (mode) <= 4)
7907 return (offset > -256 && offset < 256);
7908
7909 return (use_ldrd && offset > -1024 && offset < 1024
7910 && (offset & 3) == 0);
7911 }
7912
7913 /* After reload constants split into minipools will have addresses
7914 from a LABEL_REF. */
7915 else if (reload_completed
7916 && (code == LABEL_REF
7917 || (code == CONST
7918 && GET_CODE (XEXP (x, 0)) == PLUS
7919 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7920 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7921 return 1;
7922
7923 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7924 return 0;
7925
7926 else if (code == PLUS)
7927 {
7928 rtx xop0 = XEXP (x, 0);
7929 rtx xop1 = XEXP (x, 1);
7930
7931 return ((arm_address_register_rtx_p (xop0, strict_p)
7932 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7933 || (!strict_p && will_be_in_index_register (xop1))))
7934 || (arm_address_register_rtx_p (xop1, strict_p)
7935 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7936 }
7937
7938 else if (can_avoid_literal_pool_for_label_p (x))
7939 return 0;
7940
7941 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7942 && code == SYMBOL_REF
7943 && CONSTANT_POOL_ADDRESS_P (x)
7944 && ! (flag_pic
7945 && symbol_mentioned_p (get_pool_constant (x))
7946 && ! pcrel_constant_p (get_pool_constant (x))))
7947 return 1;
7948
7949 return 0;
7950 }
7951
7952 /* Return nonzero if INDEX is valid for an address index operand in
7953 ARM state. */
7954 static int
7955 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7956 int strict_p)
7957 {
7958 HOST_WIDE_INT range;
7959 enum rtx_code code = GET_CODE (index);
7960
7961 /* Standard coprocessor addressing modes. */
7962 if (TARGET_HARD_FLOAT
7963 && (mode == SFmode || mode == DFmode))
7964 return (code == CONST_INT && INTVAL (index) < 1024
7965 && INTVAL (index) > -1024
7966 && (INTVAL (index) & 3) == 0);
7967
7968 /* For quad modes, we restrict the constant offset to be slightly less
7969 than what the instruction format permits. We do this because for
7970 quad mode moves, we will actually decompose them into two separate
7971 double-mode reads or writes. INDEX must therefore be a valid
7972 (double-mode) offset and so should INDEX+8. */
7973 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7974 return (code == CONST_INT
7975 && INTVAL (index) < 1016
7976 && INTVAL (index) > -1024
7977 && (INTVAL (index) & 3) == 0);
7978
7979 /* We have no such constraint on double mode offsets, so we permit the
7980 full range of the instruction format. */
7981 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7982 return (code == CONST_INT
7983 && INTVAL (index) < 1024
7984 && INTVAL (index) > -1024
7985 && (INTVAL (index) & 3) == 0);
7986
7987 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7988 return (code == CONST_INT
7989 && INTVAL (index) < 1024
7990 && INTVAL (index) > -1024
7991 && (INTVAL (index) & 3) == 0);
7992
7993 if (arm_address_register_rtx_p (index, strict_p)
7994 && (GET_MODE_SIZE (mode) <= 4))
7995 return 1;
7996
7997 if (mode == DImode || mode == DFmode)
7998 {
7999 if (code == CONST_INT)
8000 {
8001 HOST_WIDE_INT val = INTVAL (index);
8002
8003 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8004 If vldr is selected it uses arm_coproc_mem_operand. */
8005 if (TARGET_LDRD)
8006 return val > -256 && val < 256;
8007 else
8008 return val > -4096 && val < 4092;
8009 }
8010
8011 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8012 }
8013
8014 if (GET_MODE_SIZE (mode) <= 4
8015 && ! (arm_arch4
8016 && (mode == HImode
8017 || mode == HFmode
8018 || (mode == QImode && outer == SIGN_EXTEND))))
8019 {
8020 if (code == MULT)
8021 {
8022 rtx xiop0 = XEXP (index, 0);
8023 rtx xiop1 = XEXP (index, 1);
8024
8025 return ((arm_address_register_rtx_p (xiop0, strict_p)
8026 && power_of_two_operand (xiop1, SImode))
8027 || (arm_address_register_rtx_p (xiop1, strict_p)
8028 && power_of_two_operand (xiop0, SImode)));
8029 }
8030 else if (code == LSHIFTRT || code == ASHIFTRT
8031 || code == ASHIFT || code == ROTATERT)
8032 {
8033 rtx op = XEXP (index, 1);
8034
8035 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8036 && CONST_INT_P (op)
8037 && INTVAL (op) > 0
8038 && INTVAL (op) <= 31);
8039 }
8040 }
8041
8042 /* For ARM v4 we may be doing a sign-extend operation during the
8043 load. */
8044 if (arm_arch4)
8045 {
8046 if (mode == HImode
8047 || mode == HFmode
8048 || (outer == SIGN_EXTEND && mode == QImode))
8049 range = 256;
8050 else
8051 range = 4096;
8052 }
8053 else
8054 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8055
8056 return (code == CONST_INT
8057 && INTVAL (index) < range
8058 && INTVAL (index) > -range);
8059 }
8060
8061 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8062 index operand. i.e. 1, 2, 4 or 8. */
8063 static bool
8064 thumb2_index_mul_operand (rtx op)
8065 {
8066 HOST_WIDE_INT val;
8067
8068 if (!CONST_INT_P (op))
8069 return false;
8070
8071 val = INTVAL(op);
8072 return (val == 1 || val == 2 || val == 4 || val == 8);
8073 }
8074
8075 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8076 static int
8077 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8078 {
8079 enum rtx_code code = GET_CODE (index);
8080
8081 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8082 /* Standard coprocessor addressing modes. */
8083 if (TARGET_HARD_FLOAT
8084 && (mode == SFmode || mode == DFmode))
8085 return (code == CONST_INT && INTVAL (index) < 1024
8086 /* Thumb-2 allows only > -256 index range for it's core register
8087 load/stores. Since we allow SF/DF in core registers, we have
8088 to use the intersection between -256~4096 (core) and -1024~1024
8089 (coprocessor). */
8090 && INTVAL (index) > -256
8091 && (INTVAL (index) & 3) == 0);
8092
8093 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8094 {
8095 /* For DImode assume values will usually live in core regs
8096 and only allow LDRD addressing modes. */
8097 if (!TARGET_LDRD || mode != DImode)
8098 return (code == CONST_INT
8099 && INTVAL (index) < 1024
8100 && INTVAL (index) > -1024
8101 && (INTVAL (index) & 3) == 0);
8102 }
8103
8104 /* For quad modes, we restrict the constant offset to be slightly less
8105 than what the instruction format permits. We do this because for
8106 quad mode moves, we will actually decompose them into two separate
8107 double-mode reads or writes. INDEX must therefore be a valid
8108 (double-mode) offset and so should INDEX+8. */
8109 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8110 return (code == CONST_INT
8111 && INTVAL (index) < 1016
8112 && INTVAL (index) > -1024
8113 && (INTVAL (index) & 3) == 0);
8114
8115 /* We have no such constraint on double mode offsets, so we permit the
8116 full range of the instruction format. */
8117 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8118 return (code == CONST_INT
8119 && INTVAL (index) < 1024
8120 && INTVAL (index) > -1024
8121 && (INTVAL (index) & 3) == 0);
8122
8123 if (arm_address_register_rtx_p (index, strict_p)
8124 && (GET_MODE_SIZE (mode) <= 4))
8125 return 1;
8126
8127 if (mode == DImode || mode == DFmode)
8128 {
8129 if (code == CONST_INT)
8130 {
8131 HOST_WIDE_INT val = INTVAL (index);
8132 /* Thumb-2 ldrd only has reg+const addressing modes.
8133 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8134 If vldr is selected it uses arm_coproc_mem_operand. */
8135 if (TARGET_LDRD)
8136 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8137 else
8138 return IN_RANGE (val, -255, 4095 - 4);
8139 }
8140 else
8141 return 0;
8142 }
8143
8144 if (code == MULT)
8145 {
8146 rtx xiop0 = XEXP (index, 0);
8147 rtx xiop1 = XEXP (index, 1);
8148
8149 return ((arm_address_register_rtx_p (xiop0, strict_p)
8150 && thumb2_index_mul_operand (xiop1))
8151 || (arm_address_register_rtx_p (xiop1, strict_p)
8152 && thumb2_index_mul_operand (xiop0)));
8153 }
8154 else if (code == ASHIFT)
8155 {
8156 rtx op = XEXP (index, 1);
8157
8158 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8159 && CONST_INT_P (op)
8160 && INTVAL (op) > 0
8161 && INTVAL (op) <= 3);
8162 }
8163
8164 return (code == CONST_INT
8165 && INTVAL (index) < 4096
8166 && INTVAL (index) > -256);
8167 }
8168
8169 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8170 static int
8171 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8172 {
8173 int regno;
8174
8175 if (!REG_P (x))
8176 return 0;
8177
8178 regno = REGNO (x);
8179
8180 if (strict_p)
8181 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8182
8183 return (regno <= LAST_LO_REGNUM
8184 || regno > LAST_VIRTUAL_REGISTER
8185 || regno == FRAME_POINTER_REGNUM
8186 || (GET_MODE_SIZE (mode) >= 4
8187 && (regno == STACK_POINTER_REGNUM
8188 || regno >= FIRST_PSEUDO_REGISTER
8189 || x == hard_frame_pointer_rtx
8190 || x == arg_pointer_rtx)));
8191 }
8192
8193 /* Return nonzero if x is a legitimate index register. This is the case
8194 for any base register that can access a QImode object. */
8195 inline static int
8196 thumb1_index_register_rtx_p (rtx x, int strict_p)
8197 {
8198 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8199 }
8200
8201 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8202
8203 The AP may be eliminated to either the SP or the FP, so we use the
8204 least common denominator, e.g. SImode, and offsets from 0 to 64.
8205
8206 ??? Verify whether the above is the right approach.
8207
8208 ??? Also, the FP may be eliminated to the SP, so perhaps that
8209 needs special handling also.
8210
8211 ??? Look at how the mips16 port solves this problem. It probably uses
8212 better ways to solve some of these problems.
8213
8214 Although it is not incorrect, we don't accept QImode and HImode
8215 addresses based on the frame pointer or arg pointer until the
8216 reload pass starts. This is so that eliminating such addresses
8217 into stack based ones won't produce impossible code. */
8218 int
8219 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8220 {
8221 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8222 return 0;
8223
8224 /* ??? Not clear if this is right. Experiment. */
8225 if (GET_MODE_SIZE (mode) < 4
8226 && !(reload_in_progress || reload_completed)
8227 && (reg_mentioned_p (frame_pointer_rtx, x)
8228 || reg_mentioned_p (arg_pointer_rtx, x)
8229 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8230 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8231 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8232 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8233 return 0;
8234
8235 /* Accept any base register. SP only in SImode or larger. */
8236 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8237 return 1;
8238
8239 /* This is PC relative data before arm_reorg runs. */
8240 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8241 && GET_CODE (x) == SYMBOL_REF
8242 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8243 return 1;
8244
8245 /* This is PC relative data after arm_reorg runs. */
8246 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8247 && reload_completed
8248 && (GET_CODE (x) == LABEL_REF
8249 || (GET_CODE (x) == CONST
8250 && GET_CODE (XEXP (x, 0)) == PLUS
8251 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8252 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8253 return 1;
8254
8255 /* Post-inc indexing only supported for SImode and larger. */
8256 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8257 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8258 return 1;
8259
8260 else if (GET_CODE (x) == PLUS)
8261 {
8262 /* REG+REG address can be any two index registers. */
8263 /* We disallow FRAME+REG addressing since we know that FRAME
8264 will be replaced with STACK, and SP relative addressing only
8265 permits SP+OFFSET. */
8266 if (GET_MODE_SIZE (mode) <= 4
8267 && XEXP (x, 0) != frame_pointer_rtx
8268 && XEXP (x, 1) != frame_pointer_rtx
8269 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8270 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8271 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8272 return 1;
8273
8274 /* REG+const has 5-7 bit offset for non-SP registers. */
8275 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8276 || XEXP (x, 0) == arg_pointer_rtx)
8277 && CONST_INT_P (XEXP (x, 1))
8278 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8279 return 1;
8280
8281 /* REG+const has 10-bit offset for SP, but only SImode and
8282 larger is supported. */
8283 /* ??? Should probably check for DI/DFmode overflow here
8284 just like GO_IF_LEGITIMATE_OFFSET does. */
8285 else if (REG_P (XEXP (x, 0))
8286 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8287 && GET_MODE_SIZE (mode) >= 4
8288 && CONST_INT_P (XEXP (x, 1))
8289 && INTVAL (XEXP (x, 1)) >= 0
8290 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8291 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8292 return 1;
8293
8294 else if (REG_P (XEXP (x, 0))
8295 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8296 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8297 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8298 && REGNO (XEXP (x, 0))
8299 <= LAST_VIRTUAL_POINTER_REGISTER))
8300 && GET_MODE_SIZE (mode) >= 4
8301 && CONST_INT_P (XEXP (x, 1))
8302 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8303 return 1;
8304 }
8305
8306 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8307 && GET_MODE_SIZE (mode) == 4
8308 && GET_CODE (x) == SYMBOL_REF
8309 && CONSTANT_POOL_ADDRESS_P (x)
8310 && ! (flag_pic
8311 && symbol_mentioned_p (get_pool_constant (x))
8312 && ! pcrel_constant_p (get_pool_constant (x))))
8313 return 1;
8314
8315 return 0;
8316 }
8317
8318 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8319 instruction of mode MODE. */
8320 int
8321 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8322 {
8323 switch (GET_MODE_SIZE (mode))
8324 {
8325 case 1:
8326 return val >= 0 && val < 32;
8327
8328 case 2:
8329 return val >= 0 && val < 64 && (val & 1) == 0;
8330
8331 default:
8332 return (val >= 0
8333 && (val + GET_MODE_SIZE (mode)) <= 128
8334 && (val & 3) == 0);
8335 }
8336 }
8337
8338 bool
8339 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8340 {
8341 if (TARGET_ARM)
8342 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8343 else if (TARGET_THUMB2)
8344 return thumb2_legitimate_address_p (mode, x, strict_p);
8345 else /* if (TARGET_THUMB1) */
8346 return thumb1_legitimate_address_p (mode, x, strict_p);
8347 }
8348
8349 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8350
8351 Given an rtx X being reloaded into a reg required to be
8352 in class CLASS, return the class of reg to actually use.
8353 In general this is just CLASS, but for the Thumb core registers and
8354 immediate constants we prefer a LO_REGS class or a subset. */
8355
8356 static reg_class_t
8357 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8358 {
8359 if (TARGET_32BIT)
8360 return rclass;
8361 else
8362 {
8363 if (rclass == GENERAL_REGS)
8364 return LO_REGS;
8365 else
8366 return rclass;
8367 }
8368 }
8369
8370 /* Build the SYMBOL_REF for __tls_get_addr. */
8371
8372 static GTY(()) rtx tls_get_addr_libfunc;
8373
8374 static rtx
8375 get_tls_get_addr (void)
8376 {
8377 if (!tls_get_addr_libfunc)
8378 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8379 return tls_get_addr_libfunc;
8380 }
8381
8382 rtx
8383 arm_load_tp (rtx target)
8384 {
8385 if (!target)
8386 target = gen_reg_rtx (SImode);
8387
8388 if (TARGET_HARD_TP)
8389 {
8390 /* Can return in any reg. */
8391 emit_insn (gen_load_tp_hard (target));
8392 }
8393 else
8394 {
8395 /* Always returned in r0. Immediately copy the result into a pseudo,
8396 otherwise other uses of r0 (e.g. setting up function arguments) may
8397 clobber the value. */
8398
8399 rtx tmp;
8400
8401 emit_insn (gen_load_tp_soft ());
8402
8403 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8404 emit_move_insn (target, tmp);
8405 }
8406 return target;
8407 }
8408
8409 static rtx
8410 load_tls_operand (rtx x, rtx reg)
8411 {
8412 rtx tmp;
8413
8414 if (reg == NULL_RTX)
8415 reg = gen_reg_rtx (SImode);
8416
8417 tmp = gen_rtx_CONST (SImode, x);
8418
8419 emit_move_insn (reg, tmp);
8420
8421 return reg;
8422 }
8423
8424 static rtx_insn *
8425 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8426 {
8427 rtx label, labelno, sum;
8428
8429 gcc_assert (reloc != TLS_DESCSEQ);
8430 start_sequence ();
8431
8432 labelno = GEN_INT (pic_labelno++);
8433 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8434 label = gen_rtx_CONST (VOIDmode, label);
8435
8436 sum = gen_rtx_UNSPEC (Pmode,
8437 gen_rtvec (4, x, GEN_INT (reloc), label,
8438 GEN_INT (TARGET_ARM ? 8 : 4)),
8439 UNSPEC_TLS);
8440 reg = load_tls_operand (sum, reg);
8441
8442 if (TARGET_ARM)
8443 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8444 else
8445 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8446
8447 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8448 LCT_PURE, /* LCT_CONST? */
8449 Pmode, reg, Pmode);
8450
8451 rtx_insn *insns = get_insns ();
8452 end_sequence ();
8453
8454 return insns;
8455 }
8456
8457 static rtx
8458 arm_tls_descseq_addr (rtx x, rtx reg)
8459 {
8460 rtx labelno = GEN_INT (pic_labelno++);
8461 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8462 rtx sum = gen_rtx_UNSPEC (Pmode,
8463 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8464 gen_rtx_CONST (VOIDmode, label),
8465 GEN_INT (!TARGET_ARM)),
8466 UNSPEC_TLS);
8467 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8468
8469 emit_insn (gen_tlscall (x, labelno));
8470 if (!reg)
8471 reg = gen_reg_rtx (SImode);
8472 else
8473 gcc_assert (REGNO (reg) != R0_REGNUM);
8474
8475 emit_move_insn (reg, reg0);
8476
8477 return reg;
8478 }
8479
8480 rtx
8481 legitimize_tls_address (rtx x, rtx reg)
8482 {
8483 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8484 rtx_insn *insns;
8485 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8486
8487 switch (model)
8488 {
8489 case TLS_MODEL_GLOBAL_DYNAMIC:
8490 if (TARGET_GNU2_TLS)
8491 {
8492 reg = arm_tls_descseq_addr (x, reg);
8493
8494 tp = arm_load_tp (NULL_RTX);
8495
8496 dest = gen_rtx_PLUS (Pmode, tp, reg);
8497 }
8498 else
8499 {
8500 /* Original scheme */
8501 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8502 dest = gen_reg_rtx (Pmode);
8503 emit_libcall_block (insns, dest, ret, x);
8504 }
8505 return dest;
8506
8507 case TLS_MODEL_LOCAL_DYNAMIC:
8508 if (TARGET_GNU2_TLS)
8509 {
8510 reg = arm_tls_descseq_addr (x, reg);
8511
8512 tp = arm_load_tp (NULL_RTX);
8513
8514 dest = gen_rtx_PLUS (Pmode, tp, reg);
8515 }
8516 else
8517 {
8518 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8519
8520 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8521 share the LDM result with other LD model accesses. */
8522 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8523 UNSPEC_TLS);
8524 dest = gen_reg_rtx (Pmode);
8525 emit_libcall_block (insns, dest, ret, eqv);
8526
8527 /* Load the addend. */
8528 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8529 GEN_INT (TLS_LDO32)),
8530 UNSPEC_TLS);
8531 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8532 dest = gen_rtx_PLUS (Pmode, dest, addend);
8533 }
8534 return dest;
8535
8536 case TLS_MODEL_INITIAL_EXEC:
8537 labelno = GEN_INT (pic_labelno++);
8538 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8539 label = gen_rtx_CONST (VOIDmode, label);
8540 sum = gen_rtx_UNSPEC (Pmode,
8541 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8542 GEN_INT (TARGET_ARM ? 8 : 4)),
8543 UNSPEC_TLS);
8544 reg = load_tls_operand (sum, reg);
8545
8546 if (TARGET_ARM)
8547 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8548 else if (TARGET_THUMB2)
8549 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8550 else
8551 {
8552 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8553 emit_move_insn (reg, gen_const_mem (SImode, reg));
8554 }
8555
8556 tp = arm_load_tp (NULL_RTX);
8557
8558 return gen_rtx_PLUS (Pmode, tp, reg);
8559
8560 case TLS_MODEL_LOCAL_EXEC:
8561 tp = arm_load_tp (NULL_RTX);
8562
8563 reg = gen_rtx_UNSPEC (Pmode,
8564 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8565 UNSPEC_TLS);
8566 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8567
8568 return gen_rtx_PLUS (Pmode, tp, reg);
8569
8570 default:
8571 abort ();
8572 }
8573 }
8574
8575 /* Try machine-dependent ways of modifying an illegitimate address
8576 to be legitimate. If we find one, return the new, valid address. */
8577 rtx
8578 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8579 {
8580 if (arm_tls_referenced_p (x))
8581 {
8582 rtx addend = NULL;
8583
8584 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8585 {
8586 addend = XEXP (XEXP (x, 0), 1);
8587 x = XEXP (XEXP (x, 0), 0);
8588 }
8589
8590 if (GET_CODE (x) != SYMBOL_REF)
8591 return x;
8592
8593 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8594
8595 x = legitimize_tls_address (x, NULL_RTX);
8596
8597 if (addend)
8598 {
8599 x = gen_rtx_PLUS (SImode, x, addend);
8600 orig_x = x;
8601 }
8602 else
8603 return x;
8604 }
8605
8606 if (!TARGET_ARM)
8607 {
8608 /* TODO: legitimize_address for Thumb2. */
8609 if (TARGET_THUMB2)
8610 return x;
8611 return thumb_legitimize_address (x, orig_x, mode);
8612 }
8613
8614 if (GET_CODE (x) == PLUS)
8615 {
8616 rtx xop0 = XEXP (x, 0);
8617 rtx xop1 = XEXP (x, 1);
8618
8619 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8620 xop0 = force_reg (SImode, xop0);
8621
8622 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8623 && !symbol_mentioned_p (xop1))
8624 xop1 = force_reg (SImode, xop1);
8625
8626 if (ARM_BASE_REGISTER_RTX_P (xop0)
8627 && CONST_INT_P (xop1))
8628 {
8629 HOST_WIDE_INT n, low_n;
8630 rtx base_reg, val;
8631 n = INTVAL (xop1);
8632
8633 /* VFP addressing modes actually allow greater offsets, but for
8634 now we just stick with the lowest common denominator. */
8635 if (mode == DImode || mode == DFmode)
8636 {
8637 low_n = n & 0x0f;
8638 n &= ~0x0f;
8639 if (low_n > 4)
8640 {
8641 n += 16;
8642 low_n -= 16;
8643 }
8644 }
8645 else
8646 {
8647 low_n = ((mode) == TImode ? 0
8648 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8649 n -= low_n;
8650 }
8651
8652 base_reg = gen_reg_rtx (SImode);
8653 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8654 emit_move_insn (base_reg, val);
8655 x = plus_constant (Pmode, base_reg, low_n);
8656 }
8657 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8658 x = gen_rtx_PLUS (SImode, xop0, xop1);
8659 }
8660
8661 /* XXX We don't allow MINUS any more -- see comment in
8662 arm_legitimate_address_outer_p (). */
8663 else if (GET_CODE (x) == MINUS)
8664 {
8665 rtx xop0 = XEXP (x, 0);
8666 rtx xop1 = XEXP (x, 1);
8667
8668 if (CONSTANT_P (xop0))
8669 xop0 = force_reg (SImode, xop0);
8670
8671 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8672 xop1 = force_reg (SImode, xop1);
8673
8674 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8675 x = gen_rtx_MINUS (SImode, xop0, xop1);
8676 }
8677
8678 /* Make sure to take full advantage of the pre-indexed addressing mode
8679 with absolute addresses which often allows for the base register to
8680 be factorized for multiple adjacent memory references, and it might
8681 even allows for the mini pool to be avoided entirely. */
8682 else if (CONST_INT_P (x) && optimize > 0)
8683 {
8684 unsigned int bits;
8685 HOST_WIDE_INT mask, base, index;
8686 rtx base_reg;
8687
8688 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8689 use a 8-bit index. So let's use a 12-bit index for SImode only and
8690 hope that arm_gen_constant will enable ldrb to use more bits. */
8691 bits = (mode == SImode) ? 12 : 8;
8692 mask = (1 << bits) - 1;
8693 base = INTVAL (x) & ~mask;
8694 index = INTVAL (x) & mask;
8695 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8696 {
8697 /* It'll most probably be more efficient to generate the base
8698 with more bits set and use a negative index instead. */
8699 base |= mask;
8700 index -= mask;
8701 }
8702 base_reg = force_reg (SImode, GEN_INT (base));
8703 x = plus_constant (Pmode, base_reg, index);
8704 }
8705
8706 if (flag_pic)
8707 {
8708 /* We need to find and carefully transform any SYMBOL and LABEL
8709 references; so go back to the original address expression. */
8710 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8711
8712 if (new_x != orig_x)
8713 x = new_x;
8714 }
8715
8716 return x;
8717 }
8718
8719
8720 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8721 to be legitimate. If we find one, return the new, valid address. */
8722 rtx
8723 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8724 {
8725 if (GET_CODE (x) == PLUS
8726 && CONST_INT_P (XEXP (x, 1))
8727 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8728 || INTVAL (XEXP (x, 1)) < 0))
8729 {
8730 rtx xop0 = XEXP (x, 0);
8731 rtx xop1 = XEXP (x, 1);
8732 HOST_WIDE_INT offset = INTVAL (xop1);
8733
8734 /* Try and fold the offset into a biasing of the base register and
8735 then offsetting that. Don't do this when optimizing for space
8736 since it can cause too many CSEs. */
8737 if (optimize_size && offset >= 0
8738 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8739 {
8740 HOST_WIDE_INT delta;
8741
8742 if (offset >= 256)
8743 delta = offset - (256 - GET_MODE_SIZE (mode));
8744 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8745 delta = 31 * GET_MODE_SIZE (mode);
8746 else
8747 delta = offset & (~31 * GET_MODE_SIZE (mode));
8748
8749 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8750 NULL_RTX);
8751 x = plus_constant (Pmode, xop0, delta);
8752 }
8753 else if (offset < 0 && offset > -256)
8754 /* Small negative offsets are best done with a subtract before the
8755 dereference, forcing these into a register normally takes two
8756 instructions. */
8757 x = force_operand (x, NULL_RTX);
8758 else
8759 {
8760 /* For the remaining cases, force the constant into a register. */
8761 xop1 = force_reg (SImode, xop1);
8762 x = gen_rtx_PLUS (SImode, xop0, xop1);
8763 }
8764 }
8765 else if (GET_CODE (x) == PLUS
8766 && s_register_operand (XEXP (x, 1), SImode)
8767 && !s_register_operand (XEXP (x, 0), SImode))
8768 {
8769 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8770
8771 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8772 }
8773
8774 if (flag_pic)
8775 {
8776 /* We need to find and carefully transform any SYMBOL and LABEL
8777 references; so go back to the original address expression. */
8778 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8779
8780 if (new_x != orig_x)
8781 x = new_x;
8782 }
8783
8784 return x;
8785 }
8786
8787 /* Return TRUE if X contains any TLS symbol references. */
8788
8789 bool
8790 arm_tls_referenced_p (rtx x)
8791 {
8792 if (! TARGET_HAVE_TLS)
8793 return false;
8794
8795 subrtx_iterator::array_type array;
8796 FOR_EACH_SUBRTX (iter, array, x, ALL)
8797 {
8798 const_rtx x = *iter;
8799 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8800 {
8801 /* ARM currently does not provide relocations to encode TLS variables
8802 into AArch32 instructions, only data, so there is no way to
8803 currently implement these if a literal pool is disabled. */
8804 if (arm_disable_literal_pool)
8805 sorry ("accessing thread-local storage is not currently supported "
8806 "with -mpure-code or -mslow-flash-data");
8807
8808 return true;
8809 }
8810
8811 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8812 TLS offsets, not real symbol references. */
8813 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8814 iter.skip_subrtxes ();
8815 }
8816 return false;
8817 }
8818
8819 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8820
8821 On the ARM, allow any integer (invalid ones are removed later by insn
8822 patterns), nice doubles and symbol_refs which refer to the function's
8823 constant pool XXX.
8824
8825 When generating pic allow anything. */
8826
8827 static bool
8828 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8829 {
8830 return flag_pic || !label_mentioned_p (x);
8831 }
8832
8833 static bool
8834 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8835 {
8836 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8837 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8838 for ARMv8-M Baseline or later the result is valid. */
8839 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8840 x = XEXP (x, 0);
8841
8842 return (CONST_INT_P (x)
8843 || CONST_DOUBLE_P (x)
8844 || CONSTANT_ADDRESS_P (x)
8845 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8846 || flag_pic);
8847 }
8848
8849 static bool
8850 arm_legitimate_constant_p (machine_mode mode, rtx x)
8851 {
8852 return (!arm_cannot_force_const_mem (mode, x)
8853 && (TARGET_32BIT
8854 ? arm_legitimate_constant_p_1 (mode, x)
8855 : thumb_legitimate_constant_p (mode, x)));
8856 }
8857
8858 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8859
8860 static bool
8861 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8862 {
8863 rtx base, offset;
8864
8865 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8866 {
8867 split_const (x, &base, &offset);
8868 if (GET_CODE (base) == SYMBOL_REF
8869 && !offset_within_block_p (base, INTVAL (offset)))
8870 return true;
8871 }
8872 return arm_tls_referenced_p (x);
8873 }
8874 \f
8875 #define REG_OR_SUBREG_REG(X) \
8876 (REG_P (X) \
8877 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8878
8879 #define REG_OR_SUBREG_RTX(X) \
8880 (REG_P (X) ? (X) : SUBREG_REG (X))
8881
8882 static inline int
8883 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8884 {
8885 machine_mode mode = GET_MODE (x);
8886 int total, words;
8887
8888 switch (code)
8889 {
8890 case ASHIFT:
8891 case ASHIFTRT:
8892 case LSHIFTRT:
8893 case ROTATERT:
8894 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8895
8896 case PLUS:
8897 case MINUS:
8898 case COMPARE:
8899 case NEG:
8900 case NOT:
8901 return COSTS_N_INSNS (1);
8902
8903 case MULT:
8904 if (arm_arch6m && arm_m_profile_small_mul)
8905 return COSTS_N_INSNS (32);
8906
8907 if (CONST_INT_P (XEXP (x, 1)))
8908 {
8909 int cycles = 0;
8910 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8911
8912 while (i)
8913 {
8914 i >>= 2;
8915 cycles++;
8916 }
8917 return COSTS_N_INSNS (2) + cycles;
8918 }
8919 return COSTS_N_INSNS (1) + 16;
8920
8921 case SET:
8922 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8923 the mode. */
8924 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8925 return (COSTS_N_INSNS (words)
8926 + 4 * ((MEM_P (SET_SRC (x)))
8927 + MEM_P (SET_DEST (x))));
8928
8929 case CONST_INT:
8930 if (outer == SET)
8931 {
8932 if (UINTVAL (x) < 256
8933 /* 16-bit constant. */
8934 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8935 return 0;
8936 if (thumb_shiftable_const (INTVAL (x)))
8937 return COSTS_N_INSNS (2);
8938 return COSTS_N_INSNS (3);
8939 }
8940 else if ((outer == PLUS || outer == COMPARE)
8941 && INTVAL (x) < 256 && INTVAL (x) > -256)
8942 return 0;
8943 else if ((outer == IOR || outer == XOR || outer == AND)
8944 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8945 return COSTS_N_INSNS (1);
8946 else if (outer == AND)
8947 {
8948 int i;
8949 /* This duplicates the tests in the andsi3 expander. */
8950 for (i = 9; i <= 31; i++)
8951 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8952 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8953 return COSTS_N_INSNS (2);
8954 }
8955 else if (outer == ASHIFT || outer == ASHIFTRT
8956 || outer == LSHIFTRT)
8957 return 0;
8958 return COSTS_N_INSNS (2);
8959
8960 case CONST:
8961 case CONST_DOUBLE:
8962 case LABEL_REF:
8963 case SYMBOL_REF:
8964 return COSTS_N_INSNS (3);
8965
8966 case UDIV:
8967 case UMOD:
8968 case DIV:
8969 case MOD:
8970 return 100;
8971
8972 case TRUNCATE:
8973 return 99;
8974
8975 case AND:
8976 case XOR:
8977 case IOR:
8978 /* XXX guess. */
8979 return 8;
8980
8981 case MEM:
8982 /* XXX another guess. */
8983 /* Memory costs quite a lot for the first word, but subsequent words
8984 load at the equivalent of a single insn each. */
8985 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8986 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8987 ? 4 : 0));
8988
8989 case IF_THEN_ELSE:
8990 /* XXX a guess. */
8991 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8992 return 14;
8993 return 2;
8994
8995 case SIGN_EXTEND:
8996 case ZERO_EXTEND:
8997 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8998 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8999
9000 if (mode == SImode)
9001 return total;
9002
9003 if (arm_arch6)
9004 return total + COSTS_N_INSNS (1);
9005
9006 /* Assume a two-shift sequence. Increase the cost slightly so
9007 we prefer actual shifts over an extend operation. */
9008 return total + 1 + COSTS_N_INSNS (2);
9009
9010 default:
9011 return 99;
9012 }
9013 }
9014
9015 /* Estimates the size cost of thumb1 instructions.
9016 For now most of the code is copied from thumb1_rtx_costs. We need more
9017 fine grain tuning when we have more related test cases. */
9018 static inline int
9019 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9020 {
9021 machine_mode mode = GET_MODE (x);
9022 int words, cost;
9023
9024 switch (code)
9025 {
9026 case ASHIFT:
9027 case ASHIFTRT:
9028 case LSHIFTRT:
9029 case ROTATERT:
9030 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9031
9032 case PLUS:
9033 case MINUS:
9034 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9035 defined by RTL expansion, especially for the expansion of
9036 multiplication. */
9037 if ((GET_CODE (XEXP (x, 0)) == MULT
9038 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9039 || (GET_CODE (XEXP (x, 1)) == MULT
9040 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9041 return COSTS_N_INSNS (2);
9042 /* Fall through. */
9043 case COMPARE:
9044 case NEG:
9045 case NOT:
9046 return COSTS_N_INSNS (1);
9047
9048 case MULT:
9049 if (CONST_INT_P (XEXP (x, 1)))
9050 {
9051 /* Thumb1 mul instruction can't operate on const. We must Load it
9052 into a register first. */
9053 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9054 /* For the targets which have a very small and high-latency multiply
9055 unit, we prefer to synthesize the mult with up to 5 instructions,
9056 giving a good balance between size and performance. */
9057 if (arm_arch6m && arm_m_profile_small_mul)
9058 return COSTS_N_INSNS (5);
9059 else
9060 return COSTS_N_INSNS (1) + const_size;
9061 }
9062 return COSTS_N_INSNS (1);
9063
9064 case SET:
9065 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9066 the mode. */
9067 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9068 cost = COSTS_N_INSNS (words);
9069 if (satisfies_constraint_J (SET_SRC (x))
9070 || satisfies_constraint_K (SET_SRC (x))
9071 /* Too big an immediate for a 2-byte mov, using MOVT. */
9072 || (CONST_INT_P (SET_SRC (x))
9073 && UINTVAL (SET_SRC (x)) >= 256
9074 && TARGET_HAVE_MOVT
9075 && satisfies_constraint_j (SET_SRC (x)))
9076 /* thumb1_movdi_insn. */
9077 || ((words > 1) && MEM_P (SET_SRC (x))))
9078 cost += COSTS_N_INSNS (1);
9079 return cost;
9080
9081 case CONST_INT:
9082 if (outer == SET)
9083 {
9084 if (UINTVAL (x) < 256)
9085 return COSTS_N_INSNS (1);
9086 /* movw is 4byte long. */
9087 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9088 return COSTS_N_INSNS (2);
9089 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9090 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9091 return COSTS_N_INSNS (2);
9092 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9093 if (thumb_shiftable_const (INTVAL (x)))
9094 return COSTS_N_INSNS (2);
9095 return COSTS_N_INSNS (3);
9096 }
9097 else if ((outer == PLUS || outer == COMPARE)
9098 && INTVAL (x) < 256 && INTVAL (x) > -256)
9099 return 0;
9100 else if ((outer == IOR || outer == XOR || outer == AND)
9101 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9102 return COSTS_N_INSNS (1);
9103 else if (outer == AND)
9104 {
9105 int i;
9106 /* This duplicates the tests in the andsi3 expander. */
9107 for (i = 9; i <= 31; i++)
9108 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9109 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9110 return COSTS_N_INSNS (2);
9111 }
9112 else if (outer == ASHIFT || outer == ASHIFTRT
9113 || outer == LSHIFTRT)
9114 return 0;
9115 return COSTS_N_INSNS (2);
9116
9117 case CONST:
9118 case CONST_DOUBLE:
9119 case LABEL_REF:
9120 case SYMBOL_REF:
9121 return COSTS_N_INSNS (3);
9122
9123 case UDIV:
9124 case UMOD:
9125 case DIV:
9126 case MOD:
9127 return 100;
9128
9129 case TRUNCATE:
9130 return 99;
9131
9132 case AND:
9133 case XOR:
9134 case IOR:
9135 return COSTS_N_INSNS (1);
9136
9137 case MEM:
9138 return (COSTS_N_INSNS (1)
9139 + COSTS_N_INSNS (1)
9140 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9141 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9142 ? COSTS_N_INSNS (1) : 0));
9143
9144 case IF_THEN_ELSE:
9145 /* XXX a guess. */
9146 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9147 return 14;
9148 return 2;
9149
9150 case ZERO_EXTEND:
9151 /* XXX still guessing. */
9152 switch (GET_MODE (XEXP (x, 0)))
9153 {
9154 case E_QImode:
9155 return (1 + (mode == DImode ? 4 : 0)
9156 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9157
9158 case E_HImode:
9159 return (4 + (mode == DImode ? 4 : 0)
9160 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9161
9162 case E_SImode:
9163 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9164
9165 default:
9166 return 99;
9167 }
9168
9169 default:
9170 return 99;
9171 }
9172 }
9173
9174 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9175 operand, then return the operand that is being shifted. If the shift
9176 is not by a constant, then set SHIFT_REG to point to the operand.
9177 Return NULL if OP is not a shifter operand. */
9178 static rtx
9179 shifter_op_p (rtx op, rtx *shift_reg)
9180 {
9181 enum rtx_code code = GET_CODE (op);
9182
9183 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9184 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9185 return XEXP (op, 0);
9186 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9187 return XEXP (op, 0);
9188 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9189 || code == ASHIFTRT)
9190 {
9191 if (!CONST_INT_P (XEXP (op, 1)))
9192 *shift_reg = XEXP (op, 1);
9193 return XEXP (op, 0);
9194 }
9195
9196 return NULL;
9197 }
9198
9199 static bool
9200 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9201 {
9202 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9203 rtx_code code = GET_CODE (x);
9204 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9205
9206 switch (XINT (x, 1))
9207 {
9208 case UNSPEC_UNALIGNED_LOAD:
9209 /* We can only do unaligned loads into the integer unit, and we can't
9210 use LDM or LDRD. */
9211 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9212 if (speed_p)
9213 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9214 + extra_cost->ldst.load_unaligned);
9215
9216 #ifdef NOT_YET
9217 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9218 ADDR_SPACE_GENERIC, speed_p);
9219 #endif
9220 return true;
9221
9222 case UNSPEC_UNALIGNED_STORE:
9223 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9224 if (speed_p)
9225 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9226 + extra_cost->ldst.store_unaligned);
9227
9228 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9229 #ifdef NOT_YET
9230 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9231 ADDR_SPACE_GENERIC, speed_p);
9232 #endif
9233 return true;
9234
9235 case UNSPEC_VRINTZ:
9236 case UNSPEC_VRINTP:
9237 case UNSPEC_VRINTM:
9238 case UNSPEC_VRINTR:
9239 case UNSPEC_VRINTX:
9240 case UNSPEC_VRINTA:
9241 if (speed_p)
9242 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9243
9244 return true;
9245 default:
9246 *cost = COSTS_N_INSNS (2);
9247 break;
9248 }
9249 return true;
9250 }
9251
9252 /* Cost of a libcall. We assume one insn per argument, an amount for the
9253 call (one insn for -Os) and then one for processing the result. */
9254 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9255
9256 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9257 do \
9258 { \
9259 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9260 if (shift_op != NULL \
9261 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9262 { \
9263 if (shift_reg) \
9264 { \
9265 if (speed_p) \
9266 *cost += extra_cost->alu.arith_shift_reg; \
9267 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9268 ASHIFT, 1, speed_p); \
9269 } \
9270 else if (speed_p) \
9271 *cost += extra_cost->alu.arith_shift; \
9272 \
9273 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9274 ASHIFT, 0, speed_p) \
9275 + rtx_cost (XEXP (x, 1 - IDX), \
9276 GET_MODE (shift_op), \
9277 OP, 1, speed_p)); \
9278 return true; \
9279 } \
9280 } \
9281 while (0)
9282
9283 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9284 considering the costs of the addressing mode and memory access
9285 separately. */
9286 static bool
9287 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9288 int *cost, bool speed_p)
9289 {
9290 machine_mode mode = GET_MODE (x);
9291
9292 *cost = COSTS_N_INSNS (1);
9293
9294 if (flag_pic
9295 && GET_CODE (XEXP (x, 0)) == PLUS
9296 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9297 /* This will be split into two instructions. Add the cost of the
9298 additional instruction here. The cost of the memory access is computed
9299 below. See arm.md:calculate_pic_address. */
9300 *cost += COSTS_N_INSNS (1);
9301
9302 /* Calculate cost of the addressing mode. */
9303 if (speed_p)
9304 {
9305 arm_addr_mode_op op_type;
9306 switch (GET_CODE (XEXP (x, 0)))
9307 {
9308 default:
9309 case REG:
9310 op_type = AMO_DEFAULT;
9311 break;
9312 case MINUS:
9313 /* MINUS does not appear in RTL, but the architecture supports it,
9314 so handle this case defensively. */
9315 /* fall through */
9316 case PLUS:
9317 op_type = AMO_NO_WB;
9318 break;
9319 case PRE_INC:
9320 case PRE_DEC:
9321 case POST_INC:
9322 case POST_DEC:
9323 case PRE_MODIFY:
9324 case POST_MODIFY:
9325 op_type = AMO_WB;
9326 break;
9327 }
9328
9329 if (VECTOR_MODE_P (mode))
9330 *cost += current_tune->addr_mode_costs->vector[op_type];
9331 else if (FLOAT_MODE_P (mode))
9332 *cost += current_tune->addr_mode_costs->fp[op_type];
9333 else
9334 *cost += current_tune->addr_mode_costs->integer[op_type];
9335 }
9336
9337 /* Calculate cost of memory access. */
9338 if (speed_p)
9339 {
9340 if (FLOAT_MODE_P (mode))
9341 {
9342 if (GET_MODE_SIZE (mode) == 8)
9343 *cost += extra_cost->ldst.loadd;
9344 else
9345 *cost += extra_cost->ldst.loadf;
9346 }
9347 else if (VECTOR_MODE_P (mode))
9348 *cost += extra_cost->ldst.loadv;
9349 else
9350 {
9351 /* Integer modes */
9352 if (GET_MODE_SIZE (mode) == 8)
9353 *cost += extra_cost->ldst.ldrd;
9354 else
9355 *cost += extra_cost->ldst.load;
9356 }
9357 }
9358
9359 return true;
9360 }
9361
9362 /* RTX costs. Make an estimate of the cost of executing the operation
9363 X, which is contained within an operation with code OUTER_CODE.
9364 SPEED_P indicates whether the cost desired is the performance cost,
9365 or the size cost. The estimate is stored in COST and the return
9366 value is TRUE if the cost calculation is final, or FALSE if the
9367 caller should recurse through the operands of X to add additional
9368 costs.
9369
9370 We currently make no attempt to model the size savings of Thumb-2
9371 16-bit instructions. At the normal points in compilation where
9372 this code is called we have no measure of whether the condition
9373 flags are live or not, and thus no realistic way to determine what
9374 the size will eventually be. */
9375 static bool
9376 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9377 const struct cpu_cost_table *extra_cost,
9378 int *cost, bool speed_p)
9379 {
9380 machine_mode mode = GET_MODE (x);
9381
9382 *cost = COSTS_N_INSNS (1);
9383
9384 if (TARGET_THUMB1)
9385 {
9386 if (speed_p)
9387 *cost = thumb1_rtx_costs (x, code, outer_code);
9388 else
9389 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9390 return true;
9391 }
9392
9393 switch (code)
9394 {
9395 case SET:
9396 *cost = 0;
9397 /* SET RTXs don't have a mode so we get it from the destination. */
9398 mode = GET_MODE (SET_DEST (x));
9399
9400 if (REG_P (SET_SRC (x))
9401 && REG_P (SET_DEST (x)))
9402 {
9403 /* Assume that most copies can be done with a single insn,
9404 unless we don't have HW FP, in which case everything
9405 larger than word mode will require two insns. */
9406 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9407 && GET_MODE_SIZE (mode) > 4)
9408 || mode == DImode)
9409 ? 2 : 1);
9410 /* Conditional register moves can be encoded
9411 in 16 bits in Thumb mode. */
9412 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9413 *cost >>= 1;
9414
9415 return true;
9416 }
9417
9418 if (CONST_INT_P (SET_SRC (x)))
9419 {
9420 /* Handle CONST_INT here, since the value doesn't have a mode
9421 and we would otherwise be unable to work out the true cost. */
9422 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9423 0, speed_p);
9424 outer_code = SET;
9425 /* Slightly lower the cost of setting a core reg to a constant.
9426 This helps break up chains and allows for better scheduling. */
9427 if (REG_P (SET_DEST (x))
9428 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9429 *cost -= 1;
9430 x = SET_SRC (x);
9431 /* Immediate moves with an immediate in the range [0, 255] can be
9432 encoded in 16 bits in Thumb mode. */
9433 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9434 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9435 *cost >>= 1;
9436 goto const_int_cost;
9437 }
9438
9439 return false;
9440
9441 case MEM:
9442 return arm_mem_costs (x, extra_cost, cost, speed_p);
9443
9444 case PARALLEL:
9445 {
9446 /* Calculations of LDM costs are complex. We assume an initial cost
9447 (ldm_1st) which will load the number of registers mentioned in
9448 ldm_regs_per_insn_1st registers; then each additional
9449 ldm_regs_per_insn_subsequent registers cost one more insn. The
9450 formula for N regs is thus:
9451
9452 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9453 + ldm_regs_per_insn_subsequent - 1)
9454 / ldm_regs_per_insn_subsequent).
9455
9456 Additional costs may also be added for addressing. A similar
9457 formula is used for STM. */
9458
9459 bool is_ldm = load_multiple_operation (x, SImode);
9460 bool is_stm = store_multiple_operation (x, SImode);
9461
9462 if (is_ldm || is_stm)
9463 {
9464 if (speed_p)
9465 {
9466 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9467 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9468 ? extra_cost->ldst.ldm_regs_per_insn_1st
9469 : extra_cost->ldst.stm_regs_per_insn_1st;
9470 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9471 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9472 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9473
9474 *cost += regs_per_insn_1st
9475 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9476 + regs_per_insn_sub - 1)
9477 / regs_per_insn_sub);
9478 return true;
9479 }
9480
9481 }
9482 return false;
9483 }
9484 case DIV:
9485 case UDIV:
9486 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9487 && (mode == SFmode || !TARGET_VFP_SINGLE))
9488 *cost += COSTS_N_INSNS (speed_p
9489 ? extra_cost->fp[mode != SFmode].div : 0);
9490 else if (mode == SImode && TARGET_IDIV)
9491 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9492 else
9493 *cost = LIBCALL_COST (2);
9494
9495 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9496 possible udiv is prefered. */
9497 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9498 return false; /* All arguments must be in registers. */
9499
9500 case MOD:
9501 /* MOD by a power of 2 can be expanded as:
9502 rsbs r1, r0, #0
9503 and r0, r0, #(n - 1)
9504 and r1, r1, #(n - 1)
9505 rsbpl r0, r1, #0. */
9506 if (CONST_INT_P (XEXP (x, 1))
9507 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9508 && mode == SImode)
9509 {
9510 *cost += COSTS_N_INSNS (3);
9511
9512 if (speed_p)
9513 *cost += 2 * extra_cost->alu.logical
9514 + extra_cost->alu.arith;
9515 return true;
9516 }
9517
9518 /* Fall-through. */
9519 case UMOD:
9520 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9521 possible udiv is prefered. */
9522 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9523 return false; /* All arguments must be in registers. */
9524
9525 case ROTATE:
9526 if (mode == SImode && REG_P (XEXP (x, 1)))
9527 {
9528 *cost += (COSTS_N_INSNS (1)
9529 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9530 if (speed_p)
9531 *cost += extra_cost->alu.shift_reg;
9532 return true;
9533 }
9534 /* Fall through */
9535 case ROTATERT:
9536 case ASHIFT:
9537 case LSHIFTRT:
9538 case ASHIFTRT:
9539 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9540 {
9541 *cost += (COSTS_N_INSNS (2)
9542 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9543 if (speed_p)
9544 *cost += 2 * extra_cost->alu.shift;
9545 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9546 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9547 *cost += 1;
9548 return true;
9549 }
9550 else if (mode == SImode)
9551 {
9552 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9553 /* Slightly disparage register shifts at -Os, but not by much. */
9554 if (!CONST_INT_P (XEXP (x, 1)))
9555 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9556 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9557 return true;
9558 }
9559 else if (GET_MODE_CLASS (mode) == MODE_INT
9560 && GET_MODE_SIZE (mode) < 4)
9561 {
9562 if (code == ASHIFT)
9563 {
9564 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9565 /* Slightly disparage register shifts at -Os, but not by
9566 much. */
9567 if (!CONST_INT_P (XEXP (x, 1)))
9568 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9569 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9570 }
9571 else if (code == LSHIFTRT || code == ASHIFTRT)
9572 {
9573 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9574 {
9575 /* Can use SBFX/UBFX. */
9576 if (speed_p)
9577 *cost += extra_cost->alu.bfx;
9578 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9579 }
9580 else
9581 {
9582 *cost += COSTS_N_INSNS (1);
9583 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9584 if (speed_p)
9585 {
9586 if (CONST_INT_P (XEXP (x, 1)))
9587 *cost += 2 * extra_cost->alu.shift;
9588 else
9589 *cost += (extra_cost->alu.shift
9590 + extra_cost->alu.shift_reg);
9591 }
9592 else
9593 /* Slightly disparage register shifts. */
9594 *cost += !CONST_INT_P (XEXP (x, 1));
9595 }
9596 }
9597 else /* Rotates. */
9598 {
9599 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9600 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9601 if (speed_p)
9602 {
9603 if (CONST_INT_P (XEXP (x, 1)))
9604 *cost += (2 * extra_cost->alu.shift
9605 + extra_cost->alu.log_shift);
9606 else
9607 *cost += (extra_cost->alu.shift
9608 + extra_cost->alu.shift_reg
9609 + extra_cost->alu.log_shift_reg);
9610 }
9611 }
9612 return true;
9613 }
9614
9615 *cost = LIBCALL_COST (2);
9616 return false;
9617
9618 case BSWAP:
9619 if (arm_arch6)
9620 {
9621 if (mode == SImode)
9622 {
9623 if (speed_p)
9624 *cost += extra_cost->alu.rev;
9625
9626 return false;
9627 }
9628 }
9629 else
9630 {
9631 /* No rev instruction available. Look at arm_legacy_rev
9632 and thumb_legacy_rev for the form of RTL used then. */
9633 if (TARGET_THUMB)
9634 {
9635 *cost += COSTS_N_INSNS (9);
9636
9637 if (speed_p)
9638 {
9639 *cost += 6 * extra_cost->alu.shift;
9640 *cost += 3 * extra_cost->alu.logical;
9641 }
9642 }
9643 else
9644 {
9645 *cost += COSTS_N_INSNS (4);
9646
9647 if (speed_p)
9648 {
9649 *cost += 2 * extra_cost->alu.shift;
9650 *cost += extra_cost->alu.arith_shift;
9651 *cost += 2 * extra_cost->alu.logical;
9652 }
9653 }
9654 return true;
9655 }
9656 return false;
9657
9658 case MINUS:
9659 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9660 && (mode == SFmode || !TARGET_VFP_SINGLE))
9661 {
9662 if (GET_CODE (XEXP (x, 0)) == MULT
9663 || GET_CODE (XEXP (x, 1)) == MULT)
9664 {
9665 rtx mul_op0, mul_op1, sub_op;
9666
9667 if (speed_p)
9668 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9669
9670 if (GET_CODE (XEXP (x, 0)) == MULT)
9671 {
9672 mul_op0 = XEXP (XEXP (x, 0), 0);
9673 mul_op1 = XEXP (XEXP (x, 0), 1);
9674 sub_op = XEXP (x, 1);
9675 }
9676 else
9677 {
9678 mul_op0 = XEXP (XEXP (x, 1), 0);
9679 mul_op1 = XEXP (XEXP (x, 1), 1);
9680 sub_op = XEXP (x, 0);
9681 }
9682
9683 /* The first operand of the multiply may be optionally
9684 negated. */
9685 if (GET_CODE (mul_op0) == NEG)
9686 mul_op0 = XEXP (mul_op0, 0);
9687
9688 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9689 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9690 + rtx_cost (sub_op, mode, code, 0, speed_p));
9691
9692 return true;
9693 }
9694
9695 if (speed_p)
9696 *cost += extra_cost->fp[mode != SFmode].addsub;
9697 return false;
9698 }
9699
9700 if (mode == SImode)
9701 {
9702 rtx shift_by_reg = NULL;
9703 rtx shift_op;
9704 rtx non_shift_op;
9705
9706 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9707 if (shift_op == NULL)
9708 {
9709 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9710 non_shift_op = XEXP (x, 0);
9711 }
9712 else
9713 non_shift_op = XEXP (x, 1);
9714
9715 if (shift_op != NULL)
9716 {
9717 if (shift_by_reg != NULL)
9718 {
9719 if (speed_p)
9720 *cost += extra_cost->alu.arith_shift_reg;
9721 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9722 }
9723 else if (speed_p)
9724 *cost += extra_cost->alu.arith_shift;
9725
9726 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9727 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9728 return true;
9729 }
9730
9731 if (arm_arch_thumb2
9732 && GET_CODE (XEXP (x, 1)) == MULT)
9733 {
9734 /* MLS. */
9735 if (speed_p)
9736 *cost += extra_cost->mult[0].add;
9737 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9738 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9739 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9740 return true;
9741 }
9742
9743 if (CONST_INT_P (XEXP (x, 0)))
9744 {
9745 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9746 INTVAL (XEXP (x, 0)), NULL_RTX,
9747 NULL_RTX, 1, 0);
9748 *cost = COSTS_N_INSNS (insns);
9749 if (speed_p)
9750 *cost += insns * extra_cost->alu.arith;
9751 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9752 return true;
9753 }
9754 else if (speed_p)
9755 *cost += extra_cost->alu.arith;
9756
9757 return false;
9758 }
9759
9760 if (GET_MODE_CLASS (mode) == MODE_INT
9761 && GET_MODE_SIZE (mode) < 4)
9762 {
9763 rtx shift_op, shift_reg;
9764 shift_reg = NULL;
9765
9766 /* We check both sides of the MINUS for shifter operands since,
9767 unlike PLUS, it's not commutative. */
9768
9769 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
9770 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
9771
9772 /* Slightly disparage, as we might need to widen the result. */
9773 *cost += 1;
9774 if (speed_p)
9775 *cost += extra_cost->alu.arith;
9776
9777 if (CONST_INT_P (XEXP (x, 0)))
9778 {
9779 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9780 return true;
9781 }
9782
9783 return false;
9784 }
9785
9786 if (mode == DImode)
9787 {
9788 *cost += COSTS_N_INSNS (1);
9789
9790 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9791 {
9792 rtx op1 = XEXP (x, 1);
9793
9794 if (speed_p)
9795 *cost += 2 * extra_cost->alu.arith;
9796
9797 if (GET_CODE (op1) == ZERO_EXTEND)
9798 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9799 0, speed_p);
9800 else
9801 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9802 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9803 0, speed_p);
9804 return true;
9805 }
9806 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9807 {
9808 if (speed_p)
9809 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9810 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9811 0, speed_p)
9812 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9813 return true;
9814 }
9815 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9816 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9817 {
9818 if (speed_p)
9819 *cost += (extra_cost->alu.arith
9820 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9821 ? extra_cost->alu.arith
9822 : extra_cost->alu.arith_shift));
9823 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9824 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9825 GET_CODE (XEXP (x, 1)), 0, speed_p));
9826 return true;
9827 }
9828
9829 if (speed_p)
9830 *cost += 2 * extra_cost->alu.arith;
9831 return false;
9832 }
9833
9834 /* Vector mode? */
9835
9836 *cost = LIBCALL_COST (2);
9837 return false;
9838
9839 case PLUS:
9840 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9841 && (mode == SFmode || !TARGET_VFP_SINGLE))
9842 {
9843 if (GET_CODE (XEXP (x, 0)) == MULT)
9844 {
9845 rtx mul_op0, mul_op1, add_op;
9846
9847 if (speed_p)
9848 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9849
9850 mul_op0 = XEXP (XEXP (x, 0), 0);
9851 mul_op1 = XEXP (XEXP (x, 0), 1);
9852 add_op = XEXP (x, 1);
9853
9854 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9855 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9856 + rtx_cost (add_op, mode, code, 0, speed_p));
9857
9858 return true;
9859 }
9860
9861 if (speed_p)
9862 *cost += extra_cost->fp[mode != SFmode].addsub;
9863 return false;
9864 }
9865 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9866 {
9867 *cost = LIBCALL_COST (2);
9868 return false;
9869 }
9870
9871 /* Narrow modes can be synthesized in SImode, but the range
9872 of useful sub-operations is limited. Check for shift operations
9873 on one of the operands. Only left shifts can be used in the
9874 narrow modes. */
9875 if (GET_MODE_CLASS (mode) == MODE_INT
9876 && GET_MODE_SIZE (mode) < 4)
9877 {
9878 rtx shift_op, shift_reg;
9879 shift_reg = NULL;
9880
9881 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
9882
9883 if (CONST_INT_P (XEXP (x, 1)))
9884 {
9885 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9886 INTVAL (XEXP (x, 1)), NULL_RTX,
9887 NULL_RTX, 1, 0);
9888 *cost = COSTS_N_INSNS (insns);
9889 if (speed_p)
9890 *cost += insns * extra_cost->alu.arith;
9891 /* Slightly penalize a narrow operation as the result may
9892 need widening. */
9893 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9894 return true;
9895 }
9896
9897 /* Slightly penalize a narrow operation as the result may
9898 need widening. */
9899 *cost += 1;
9900 if (speed_p)
9901 *cost += extra_cost->alu.arith;
9902
9903 return false;
9904 }
9905
9906 if (mode == SImode)
9907 {
9908 rtx shift_op, shift_reg;
9909
9910 if (TARGET_INT_SIMD
9911 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9912 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9913 {
9914 /* UXTA[BH] or SXTA[BH]. */
9915 if (speed_p)
9916 *cost += extra_cost->alu.extend_arith;
9917 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9918 0, speed_p)
9919 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9920 return true;
9921 }
9922
9923 shift_reg = NULL;
9924 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9925 if (shift_op != NULL)
9926 {
9927 if (shift_reg)
9928 {
9929 if (speed_p)
9930 *cost += extra_cost->alu.arith_shift_reg;
9931 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9932 }
9933 else if (speed_p)
9934 *cost += extra_cost->alu.arith_shift;
9935
9936 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9937 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9938 return true;
9939 }
9940 if (GET_CODE (XEXP (x, 0)) == MULT)
9941 {
9942 rtx mul_op = XEXP (x, 0);
9943
9944 if (TARGET_DSP_MULTIPLY
9945 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9946 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9947 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9948 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9949 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9950 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9951 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9952 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9953 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9954 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9955 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9956 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9957 == 16))))))
9958 {
9959 /* SMLA[BT][BT]. */
9960 if (speed_p)
9961 *cost += extra_cost->mult[0].extend_add;
9962 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9963 SIGN_EXTEND, 0, speed_p)
9964 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9965 SIGN_EXTEND, 0, speed_p)
9966 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9967 return true;
9968 }
9969
9970 if (speed_p)
9971 *cost += extra_cost->mult[0].add;
9972 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9973 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9974 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9975 return true;
9976 }
9977 if (CONST_INT_P (XEXP (x, 1)))
9978 {
9979 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9980 INTVAL (XEXP (x, 1)), NULL_RTX,
9981 NULL_RTX, 1, 0);
9982 *cost = COSTS_N_INSNS (insns);
9983 if (speed_p)
9984 *cost += insns * extra_cost->alu.arith;
9985 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9986 return true;
9987 }
9988 else if (speed_p)
9989 *cost += extra_cost->alu.arith;
9990
9991 return false;
9992 }
9993
9994 if (mode == DImode)
9995 {
9996 if (GET_CODE (XEXP (x, 0)) == MULT
9997 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9998 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9999 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10000 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10001 {
10002 if (speed_p)
10003 *cost += extra_cost->mult[1].extend_add;
10004 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10005 ZERO_EXTEND, 0, speed_p)
10006 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10007 ZERO_EXTEND, 0, speed_p)
10008 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10009 return true;
10010 }
10011
10012 *cost += COSTS_N_INSNS (1);
10013
10014 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10015 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10016 {
10017 if (speed_p)
10018 *cost += (extra_cost->alu.arith
10019 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10020 ? extra_cost->alu.arith
10021 : extra_cost->alu.arith_shift));
10022
10023 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10024 0, speed_p)
10025 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10026 return true;
10027 }
10028
10029 if (speed_p)
10030 *cost += 2 * extra_cost->alu.arith;
10031 return false;
10032 }
10033
10034 /* Vector mode? */
10035 *cost = LIBCALL_COST (2);
10036 return false;
10037 case IOR:
10038 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10039 {
10040 if (speed_p)
10041 *cost += extra_cost->alu.rev;
10042
10043 return true;
10044 }
10045 /* Fall through. */
10046 case AND: case XOR:
10047 if (mode == SImode)
10048 {
10049 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10050 rtx op0 = XEXP (x, 0);
10051 rtx shift_op, shift_reg;
10052
10053 if (subcode == NOT
10054 && (code == AND
10055 || (code == IOR && TARGET_THUMB2)))
10056 op0 = XEXP (op0, 0);
10057
10058 shift_reg = NULL;
10059 shift_op = shifter_op_p (op0, &shift_reg);
10060 if (shift_op != NULL)
10061 {
10062 if (shift_reg)
10063 {
10064 if (speed_p)
10065 *cost += extra_cost->alu.log_shift_reg;
10066 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10067 }
10068 else if (speed_p)
10069 *cost += extra_cost->alu.log_shift;
10070
10071 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10072 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10073 return true;
10074 }
10075
10076 if (CONST_INT_P (XEXP (x, 1)))
10077 {
10078 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10079 INTVAL (XEXP (x, 1)), NULL_RTX,
10080 NULL_RTX, 1, 0);
10081
10082 *cost = COSTS_N_INSNS (insns);
10083 if (speed_p)
10084 *cost += insns * extra_cost->alu.logical;
10085 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10086 return true;
10087 }
10088
10089 if (speed_p)
10090 *cost += extra_cost->alu.logical;
10091 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10092 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10093 return true;
10094 }
10095
10096 if (mode == DImode)
10097 {
10098 rtx op0 = XEXP (x, 0);
10099 enum rtx_code subcode = GET_CODE (op0);
10100
10101 *cost += COSTS_N_INSNS (1);
10102
10103 if (subcode == NOT
10104 && (code == AND
10105 || (code == IOR && TARGET_THUMB2)))
10106 op0 = XEXP (op0, 0);
10107
10108 if (GET_CODE (op0) == ZERO_EXTEND)
10109 {
10110 if (speed_p)
10111 *cost += 2 * extra_cost->alu.logical;
10112
10113 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10114 0, speed_p)
10115 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10116 return true;
10117 }
10118 else if (GET_CODE (op0) == SIGN_EXTEND)
10119 {
10120 if (speed_p)
10121 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10122
10123 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10124 0, speed_p)
10125 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10126 return true;
10127 }
10128
10129 if (speed_p)
10130 *cost += 2 * extra_cost->alu.logical;
10131
10132 return true;
10133 }
10134 /* Vector mode? */
10135
10136 *cost = LIBCALL_COST (2);
10137 return false;
10138
10139 case MULT:
10140 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10141 && (mode == SFmode || !TARGET_VFP_SINGLE))
10142 {
10143 rtx op0 = XEXP (x, 0);
10144
10145 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10146 op0 = XEXP (op0, 0);
10147
10148 if (speed_p)
10149 *cost += extra_cost->fp[mode != SFmode].mult;
10150
10151 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10152 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10153 return true;
10154 }
10155 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10156 {
10157 *cost = LIBCALL_COST (2);
10158 return false;
10159 }
10160
10161 if (mode == SImode)
10162 {
10163 if (TARGET_DSP_MULTIPLY
10164 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10165 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10166 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10167 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10168 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10169 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10170 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10171 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10172 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10173 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10174 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10175 && (INTVAL (XEXP (XEXP (x, 1), 1))
10176 == 16))))))
10177 {
10178 /* SMUL[TB][TB]. */
10179 if (speed_p)
10180 *cost += extra_cost->mult[0].extend;
10181 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10182 SIGN_EXTEND, 0, speed_p);
10183 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10184 SIGN_EXTEND, 1, speed_p);
10185 return true;
10186 }
10187 if (speed_p)
10188 *cost += extra_cost->mult[0].simple;
10189 return false;
10190 }
10191
10192 if (mode == DImode)
10193 {
10194 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10195 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10196 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10197 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
10198 {
10199 if (speed_p)
10200 *cost += extra_cost->mult[1].extend;
10201 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10202 ZERO_EXTEND, 0, speed_p)
10203 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10204 ZERO_EXTEND, 0, speed_p));
10205 return true;
10206 }
10207
10208 *cost = LIBCALL_COST (2);
10209 return false;
10210 }
10211
10212 /* Vector mode? */
10213 *cost = LIBCALL_COST (2);
10214 return false;
10215
10216 case NEG:
10217 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10218 && (mode == SFmode || !TARGET_VFP_SINGLE))
10219 {
10220 if (GET_CODE (XEXP (x, 0)) == MULT)
10221 {
10222 /* VNMUL. */
10223 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10224 return true;
10225 }
10226
10227 if (speed_p)
10228 *cost += extra_cost->fp[mode != SFmode].neg;
10229
10230 return false;
10231 }
10232 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10233 {
10234 *cost = LIBCALL_COST (1);
10235 return false;
10236 }
10237
10238 if (mode == SImode)
10239 {
10240 if (GET_CODE (XEXP (x, 0)) == ABS)
10241 {
10242 *cost += COSTS_N_INSNS (1);
10243 /* Assume the non-flag-changing variant. */
10244 if (speed_p)
10245 *cost += (extra_cost->alu.log_shift
10246 + extra_cost->alu.arith_shift);
10247 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10248 return true;
10249 }
10250
10251 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10252 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10253 {
10254 *cost += COSTS_N_INSNS (1);
10255 /* No extra cost for MOV imm and MVN imm. */
10256 /* If the comparison op is using the flags, there's no further
10257 cost, otherwise we need to add the cost of the comparison. */
10258 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10259 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10260 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10261 {
10262 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10263 *cost += (COSTS_N_INSNS (1)
10264 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10265 0, speed_p)
10266 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10267 1, speed_p));
10268 if (speed_p)
10269 *cost += extra_cost->alu.arith;
10270 }
10271 return true;
10272 }
10273
10274 if (speed_p)
10275 *cost += extra_cost->alu.arith;
10276 return false;
10277 }
10278
10279 if (GET_MODE_CLASS (mode) == MODE_INT
10280 && GET_MODE_SIZE (mode) < 4)
10281 {
10282 /* Slightly disparage, as we might need an extend operation. */
10283 *cost += 1;
10284 if (speed_p)
10285 *cost += extra_cost->alu.arith;
10286 return false;
10287 }
10288
10289 if (mode == DImode)
10290 {
10291 *cost += COSTS_N_INSNS (1);
10292 if (speed_p)
10293 *cost += 2 * extra_cost->alu.arith;
10294 return false;
10295 }
10296
10297 /* Vector mode? */
10298 *cost = LIBCALL_COST (1);
10299 return false;
10300
10301 case NOT:
10302 if (mode == SImode)
10303 {
10304 rtx shift_op;
10305 rtx shift_reg = NULL;
10306
10307 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10308
10309 if (shift_op)
10310 {
10311 if (shift_reg != NULL)
10312 {
10313 if (speed_p)
10314 *cost += extra_cost->alu.log_shift_reg;
10315 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10316 }
10317 else if (speed_p)
10318 *cost += extra_cost->alu.log_shift;
10319 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10320 return true;
10321 }
10322
10323 if (speed_p)
10324 *cost += extra_cost->alu.logical;
10325 return false;
10326 }
10327 if (mode == DImode)
10328 {
10329 *cost += COSTS_N_INSNS (1);
10330 return false;
10331 }
10332
10333 /* Vector mode? */
10334
10335 *cost += LIBCALL_COST (1);
10336 return false;
10337
10338 case IF_THEN_ELSE:
10339 {
10340 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10341 {
10342 *cost += COSTS_N_INSNS (3);
10343 return true;
10344 }
10345 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10346 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10347
10348 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10349 /* Assume that if one arm of the if_then_else is a register,
10350 that it will be tied with the result and eliminate the
10351 conditional insn. */
10352 if (REG_P (XEXP (x, 1)))
10353 *cost += op2cost;
10354 else if (REG_P (XEXP (x, 2)))
10355 *cost += op1cost;
10356 else
10357 {
10358 if (speed_p)
10359 {
10360 if (extra_cost->alu.non_exec_costs_exec)
10361 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10362 else
10363 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10364 }
10365 else
10366 *cost += op1cost + op2cost;
10367 }
10368 }
10369 return true;
10370
10371 case COMPARE:
10372 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10373 *cost = 0;
10374 else
10375 {
10376 machine_mode op0mode;
10377 /* We'll mostly assume that the cost of a compare is the cost of the
10378 LHS. However, there are some notable exceptions. */
10379
10380 /* Floating point compares are never done as side-effects. */
10381 op0mode = GET_MODE (XEXP (x, 0));
10382 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10383 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10384 {
10385 if (speed_p)
10386 *cost += extra_cost->fp[op0mode != SFmode].compare;
10387
10388 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10389 {
10390 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10391 return true;
10392 }
10393
10394 return false;
10395 }
10396 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10397 {
10398 *cost = LIBCALL_COST (2);
10399 return false;
10400 }
10401
10402 /* DImode compares normally take two insns. */
10403 if (op0mode == DImode)
10404 {
10405 *cost += COSTS_N_INSNS (1);
10406 if (speed_p)
10407 *cost += 2 * extra_cost->alu.arith;
10408 return false;
10409 }
10410
10411 if (op0mode == SImode)
10412 {
10413 rtx shift_op;
10414 rtx shift_reg;
10415
10416 if (XEXP (x, 1) == const0_rtx
10417 && !(REG_P (XEXP (x, 0))
10418 || (GET_CODE (XEXP (x, 0)) == SUBREG
10419 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10420 {
10421 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10422
10423 /* Multiply operations that set the flags are often
10424 significantly more expensive. */
10425 if (speed_p
10426 && GET_CODE (XEXP (x, 0)) == MULT
10427 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10428 *cost += extra_cost->mult[0].flag_setting;
10429
10430 if (speed_p
10431 && GET_CODE (XEXP (x, 0)) == PLUS
10432 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10433 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10434 0), 1), mode))
10435 *cost += extra_cost->mult[0].flag_setting;
10436 return true;
10437 }
10438
10439 shift_reg = NULL;
10440 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10441 if (shift_op != NULL)
10442 {
10443 if (shift_reg != NULL)
10444 {
10445 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10446 1, speed_p);
10447 if (speed_p)
10448 *cost += extra_cost->alu.arith_shift_reg;
10449 }
10450 else if (speed_p)
10451 *cost += extra_cost->alu.arith_shift;
10452 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10453 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10454 return true;
10455 }
10456
10457 if (speed_p)
10458 *cost += extra_cost->alu.arith;
10459 if (CONST_INT_P (XEXP (x, 1))
10460 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10461 {
10462 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10463 return true;
10464 }
10465 return false;
10466 }
10467
10468 /* Vector mode? */
10469
10470 *cost = LIBCALL_COST (2);
10471 return false;
10472 }
10473 return true;
10474
10475 case EQ:
10476 case NE:
10477 case LT:
10478 case LE:
10479 case GT:
10480 case GE:
10481 case LTU:
10482 case LEU:
10483 case GEU:
10484 case GTU:
10485 case ORDERED:
10486 case UNORDERED:
10487 case UNEQ:
10488 case UNLE:
10489 case UNLT:
10490 case UNGE:
10491 case UNGT:
10492 case LTGT:
10493 if (outer_code == SET)
10494 {
10495 /* Is it a store-flag operation? */
10496 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10497 && XEXP (x, 1) == const0_rtx)
10498 {
10499 /* Thumb also needs an IT insn. */
10500 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10501 return true;
10502 }
10503 if (XEXP (x, 1) == const0_rtx)
10504 {
10505 switch (code)
10506 {
10507 case LT:
10508 /* LSR Rd, Rn, #31. */
10509 if (speed_p)
10510 *cost += extra_cost->alu.shift;
10511 break;
10512
10513 case EQ:
10514 /* RSBS T1, Rn, #0
10515 ADC Rd, Rn, T1. */
10516
10517 case NE:
10518 /* SUBS T1, Rn, #1
10519 SBC Rd, Rn, T1. */
10520 *cost += COSTS_N_INSNS (1);
10521 break;
10522
10523 case LE:
10524 /* RSBS T1, Rn, Rn, LSR #31
10525 ADC Rd, Rn, T1. */
10526 *cost += COSTS_N_INSNS (1);
10527 if (speed_p)
10528 *cost += extra_cost->alu.arith_shift;
10529 break;
10530
10531 case GT:
10532 /* RSB Rd, Rn, Rn, ASR #1
10533 LSR Rd, Rd, #31. */
10534 *cost += COSTS_N_INSNS (1);
10535 if (speed_p)
10536 *cost += (extra_cost->alu.arith_shift
10537 + extra_cost->alu.shift);
10538 break;
10539
10540 case GE:
10541 /* ASR Rd, Rn, #31
10542 ADD Rd, Rn, #1. */
10543 *cost += COSTS_N_INSNS (1);
10544 if (speed_p)
10545 *cost += extra_cost->alu.shift;
10546 break;
10547
10548 default:
10549 /* Remaining cases are either meaningless or would take
10550 three insns anyway. */
10551 *cost = COSTS_N_INSNS (3);
10552 break;
10553 }
10554 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10555 return true;
10556 }
10557 else
10558 {
10559 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10560 if (CONST_INT_P (XEXP (x, 1))
10561 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10562 {
10563 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10564 return true;
10565 }
10566
10567 return false;
10568 }
10569 }
10570 /* Not directly inside a set. If it involves the condition code
10571 register it must be the condition for a branch, cond_exec or
10572 I_T_E operation. Since the comparison is performed elsewhere
10573 this is just the control part which has no additional
10574 cost. */
10575 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10576 && XEXP (x, 1) == const0_rtx)
10577 {
10578 *cost = 0;
10579 return true;
10580 }
10581 return false;
10582
10583 case ABS:
10584 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10585 && (mode == SFmode || !TARGET_VFP_SINGLE))
10586 {
10587 if (speed_p)
10588 *cost += extra_cost->fp[mode != SFmode].neg;
10589
10590 return false;
10591 }
10592 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10593 {
10594 *cost = LIBCALL_COST (1);
10595 return false;
10596 }
10597
10598 if (mode == SImode)
10599 {
10600 if (speed_p)
10601 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10602 return false;
10603 }
10604 /* Vector mode? */
10605 *cost = LIBCALL_COST (1);
10606 return false;
10607
10608 case SIGN_EXTEND:
10609 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10610 && MEM_P (XEXP (x, 0)))
10611 {
10612 if (mode == DImode)
10613 *cost += COSTS_N_INSNS (1);
10614
10615 if (!speed_p)
10616 return true;
10617
10618 if (GET_MODE (XEXP (x, 0)) == SImode)
10619 *cost += extra_cost->ldst.load;
10620 else
10621 *cost += extra_cost->ldst.load_sign_extend;
10622
10623 if (mode == DImode)
10624 *cost += extra_cost->alu.shift;
10625
10626 return true;
10627 }
10628
10629 /* Widening from less than 32-bits requires an extend operation. */
10630 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10631 {
10632 /* We have SXTB/SXTH. */
10633 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10634 if (speed_p)
10635 *cost += extra_cost->alu.extend;
10636 }
10637 else if (GET_MODE (XEXP (x, 0)) != SImode)
10638 {
10639 /* Needs two shifts. */
10640 *cost += COSTS_N_INSNS (1);
10641 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10642 if (speed_p)
10643 *cost += 2 * extra_cost->alu.shift;
10644 }
10645
10646 /* Widening beyond 32-bits requires one more insn. */
10647 if (mode == DImode)
10648 {
10649 *cost += COSTS_N_INSNS (1);
10650 if (speed_p)
10651 *cost += extra_cost->alu.shift;
10652 }
10653
10654 return true;
10655
10656 case ZERO_EXTEND:
10657 if ((arm_arch4
10658 || GET_MODE (XEXP (x, 0)) == SImode
10659 || GET_MODE (XEXP (x, 0)) == QImode)
10660 && MEM_P (XEXP (x, 0)))
10661 {
10662 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10663
10664 if (mode == DImode)
10665 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10666
10667 return true;
10668 }
10669
10670 /* Widening from less than 32-bits requires an extend operation. */
10671 if (GET_MODE (XEXP (x, 0)) == QImode)
10672 {
10673 /* UXTB can be a shorter instruction in Thumb2, but it might
10674 be slower than the AND Rd, Rn, #255 alternative. When
10675 optimizing for speed it should never be slower to use
10676 AND, and we don't really model 16-bit vs 32-bit insns
10677 here. */
10678 if (speed_p)
10679 *cost += extra_cost->alu.logical;
10680 }
10681 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10682 {
10683 /* We have UXTB/UXTH. */
10684 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10685 if (speed_p)
10686 *cost += extra_cost->alu.extend;
10687 }
10688 else if (GET_MODE (XEXP (x, 0)) != SImode)
10689 {
10690 /* Needs two shifts. It's marginally preferable to use
10691 shifts rather than two BIC instructions as the second
10692 shift may merge with a subsequent insn as a shifter
10693 op. */
10694 *cost = COSTS_N_INSNS (2);
10695 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10696 if (speed_p)
10697 *cost += 2 * extra_cost->alu.shift;
10698 }
10699
10700 /* Widening beyond 32-bits requires one more insn. */
10701 if (mode == DImode)
10702 {
10703 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10704 }
10705
10706 return true;
10707
10708 case CONST_INT:
10709 *cost = 0;
10710 /* CONST_INT has no mode, so we cannot tell for sure how many
10711 insns are really going to be needed. The best we can do is
10712 look at the value passed. If it fits in SImode, then assume
10713 that's the mode it will be used for. Otherwise assume it
10714 will be used in DImode. */
10715 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10716 mode = SImode;
10717 else
10718 mode = DImode;
10719
10720 /* Avoid blowing up in arm_gen_constant (). */
10721 if (!(outer_code == PLUS
10722 || outer_code == AND
10723 || outer_code == IOR
10724 || outer_code == XOR
10725 || outer_code == MINUS))
10726 outer_code = SET;
10727
10728 const_int_cost:
10729 if (mode == SImode)
10730 {
10731 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10732 INTVAL (x), NULL, NULL,
10733 0, 0));
10734 /* Extra costs? */
10735 }
10736 else
10737 {
10738 *cost += COSTS_N_INSNS (arm_gen_constant
10739 (outer_code, SImode, NULL,
10740 trunc_int_for_mode (INTVAL (x), SImode),
10741 NULL, NULL, 0, 0)
10742 + arm_gen_constant (outer_code, SImode, NULL,
10743 INTVAL (x) >> 32, NULL,
10744 NULL, 0, 0));
10745 /* Extra costs? */
10746 }
10747
10748 return true;
10749
10750 case CONST:
10751 case LABEL_REF:
10752 case SYMBOL_REF:
10753 if (speed_p)
10754 {
10755 if (arm_arch_thumb2 && !flag_pic)
10756 *cost += COSTS_N_INSNS (1);
10757 else
10758 *cost += extra_cost->ldst.load;
10759 }
10760 else
10761 *cost += COSTS_N_INSNS (1);
10762
10763 if (flag_pic)
10764 {
10765 *cost += COSTS_N_INSNS (1);
10766 if (speed_p)
10767 *cost += extra_cost->alu.arith;
10768 }
10769
10770 return true;
10771
10772 case CONST_FIXED:
10773 *cost = COSTS_N_INSNS (4);
10774 /* Fixme. */
10775 return true;
10776
10777 case CONST_DOUBLE:
10778 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10779 && (mode == SFmode || !TARGET_VFP_SINGLE))
10780 {
10781 if (vfp3_const_double_rtx (x))
10782 {
10783 if (speed_p)
10784 *cost += extra_cost->fp[mode == DFmode].fpconst;
10785 return true;
10786 }
10787
10788 if (speed_p)
10789 {
10790 if (mode == DFmode)
10791 *cost += extra_cost->ldst.loadd;
10792 else
10793 *cost += extra_cost->ldst.loadf;
10794 }
10795 else
10796 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10797
10798 return true;
10799 }
10800 *cost = COSTS_N_INSNS (4);
10801 return true;
10802
10803 case CONST_VECTOR:
10804 /* Fixme. */
10805 if (TARGET_NEON
10806 && TARGET_HARD_FLOAT
10807 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10808 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10809 *cost = COSTS_N_INSNS (1);
10810 else
10811 *cost = COSTS_N_INSNS (4);
10812 return true;
10813
10814 case HIGH:
10815 case LO_SUM:
10816 /* When optimizing for size, we prefer constant pool entries to
10817 MOVW/MOVT pairs, so bump the cost of these slightly. */
10818 if (!speed_p)
10819 *cost += 1;
10820 return true;
10821
10822 case CLZ:
10823 if (speed_p)
10824 *cost += extra_cost->alu.clz;
10825 return false;
10826
10827 case SMIN:
10828 if (XEXP (x, 1) == const0_rtx)
10829 {
10830 if (speed_p)
10831 *cost += extra_cost->alu.log_shift;
10832 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10833 return true;
10834 }
10835 /* Fall through. */
10836 case SMAX:
10837 case UMIN:
10838 case UMAX:
10839 *cost += COSTS_N_INSNS (1);
10840 return false;
10841
10842 case TRUNCATE:
10843 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10844 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10845 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10846 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10847 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10848 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10849 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10850 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10851 == ZERO_EXTEND))))
10852 {
10853 if (speed_p)
10854 *cost += extra_cost->mult[1].extend;
10855 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10856 ZERO_EXTEND, 0, speed_p)
10857 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10858 ZERO_EXTEND, 0, speed_p));
10859 return true;
10860 }
10861 *cost = LIBCALL_COST (1);
10862 return false;
10863
10864 case UNSPEC_VOLATILE:
10865 case UNSPEC:
10866 return arm_unspec_cost (x, outer_code, speed_p, cost);
10867
10868 case PC:
10869 /* Reading the PC is like reading any other register. Writing it
10870 is more expensive, but we take that into account elsewhere. */
10871 *cost = 0;
10872 return true;
10873
10874 case ZERO_EXTRACT:
10875 /* TODO: Simple zero_extract of bottom bits using AND. */
10876 /* Fall through. */
10877 case SIGN_EXTRACT:
10878 if (arm_arch6
10879 && mode == SImode
10880 && CONST_INT_P (XEXP (x, 1))
10881 && CONST_INT_P (XEXP (x, 2)))
10882 {
10883 if (speed_p)
10884 *cost += extra_cost->alu.bfx;
10885 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10886 return true;
10887 }
10888 /* Without UBFX/SBFX, need to resort to shift operations. */
10889 *cost += COSTS_N_INSNS (1);
10890 if (speed_p)
10891 *cost += 2 * extra_cost->alu.shift;
10892 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10893 return true;
10894
10895 case FLOAT_EXTEND:
10896 if (TARGET_HARD_FLOAT)
10897 {
10898 if (speed_p)
10899 *cost += extra_cost->fp[mode == DFmode].widen;
10900 if (!TARGET_VFP5
10901 && GET_MODE (XEXP (x, 0)) == HFmode)
10902 {
10903 /* Pre v8, widening HF->DF is a two-step process, first
10904 widening to SFmode. */
10905 *cost += COSTS_N_INSNS (1);
10906 if (speed_p)
10907 *cost += extra_cost->fp[0].widen;
10908 }
10909 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10910 return true;
10911 }
10912
10913 *cost = LIBCALL_COST (1);
10914 return false;
10915
10916 case FLOAT_TRUNCATE:
10917 if (TARGET_HARD_FLOAT)
10918 {
10919 if (speed_p)
10920 *cost += extra_cost->fp[mode == DFmode].narrow;
10921 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10922 return true;
10923 /* Vector modes? */
10924 }
10925 *cost = LIBCALL_COST (1);
10926 return false;
10927
10928 case FMA:
10929 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10930 {
10931 rtx op0 = XEXP (x, 0);
10932 rtx op1 = XEXP (x, 1);
10933 rtx op2 = XEXP (x, 2);
10934
10935
10936 /* vfms or vfnma. */
10937 if (GET_CODE (op0) == NEG)
10938 op0 = XEXP (op0, 0);
10939
10940 /* vfnms or vfnma. */
10941 if (GET_CODE (op2) == NEG)
10942 op2 = XEXP (op2, 0);
10943
10944 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10945 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10946 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10947
10948 if (speed_p)
10949 *cost += extra_cost->fp[mode ==DFmode].fma;
10950
10951 return true;
10952 }
10953
10954 *cost = LIBCALL_COST (3);
10955 return false;
10956
10957 case FIX:
10958 case UNSIGNED_FIX:
10959 if (TARGET_HARD_FLOAT)
10960 {
10961 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10962 a vcvt fixed-point conversion. */
10963 if (code == FIX && mode == SImode
10964 && GET_CODE (XEXP (x, 0)) == FIX
10965 && GET_MODE (XEXP (x, 0)) == SFmode
10966 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10967 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10968 > 0)
10969 {
10970 if (speed_p)
10971 *cost += extra_cost->fp[0].toint;
10972
10973 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10974 code, 0, speed_p);
10975 return true;
10976 }
10977
10978 if (GET_MODE_CLASS (mode) == MODE_INT)
10979 {
10980 mode = GET_MODE (XEXP (x, 0));
10981 if (speed_p)
10982 *cost += extra_cost->fp[mode == DFmode].toint;
10983 /* Strip of the 'cost' of rounding towards zero. */
10984 if (GET_CODE (XEXP (x, 0)) == FIX)
10985 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10986 0, speed_p);
10987 else
10988 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10989 /* ??? Increase the cost to deal with transferring from
10990 FP -> CORE registers? */
10991 return true;
10992 }
10993 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10994 && TARGET_VFP5)
10995 {
10996 if (speed_p)
10997 *cost += extra_cost->fp[mode == DFmode].roundint;
10998 return false;
10999 }
11000 /* Vector costs? */
11001 }
11002 *cost = LIBCALL_COST (1);
11003 return false;
11004
11005 case FLOAT:
11006 case UNSIGNED_FLOAT:
11007 if (TARGET_HARD_FLOAT)
11008 {
11009 /* ??? Increase the cost to deal with transferring from CORE
11010 -> FP registers? */
11011 if (speed_p)
11012 *cost += extra_cost->fp[mode == DFmode].fromint;
11013 return false;
11014 }
11015 *cost = LIBCALL_COST (1);
11016 return false;
11017
11018 case CALL:
11019 return true;
11020
11021 case ASM_OPERANDS:
11022 {
11023 /* Just a guess. Guess number of instructions in the asm
11024 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11025 though (see PR60663). */
11026 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11027 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11028
11029 *cost = COSTS_N_INSNS (asm_length + num_operands);
11030 return true;
11031 }
11032 default:
11033 if (mode != VOIDmode)
11034 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11035 else
11036 *cost = COSTS_N_INSNS (4); /* Who knows? */
11037 return false;
11038 }
11039 }
11040
11041 #undef HANDLE_NARROW_SHIFT_ARITH
11042
11043 /* RTX costs entry point. */
11044
11045 static bool
11046 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11047 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11048 {
11049 bool result;
11050 int code = GET_CODE (x);
11051 gcc_assert (current_tune->insn_extra_cost);
11052
11053 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
11054 (enum rtx_code) outer_code,
11055 current_tune->insn_extra_cost,
11056 total, speed);
11057
11058 if (dump_file && arm_verbose_cost)
11059 {
11060 print_rtl_single (dump_file, x);
11061 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11062 *total, result ? "final" : "partial");
11063 }
11064 return result;
11065 }
11066
11067 /* All address computations that can be done are free, but rtx cost returns
11068 the same for practically all of them. So we weight the different types
11069 of address here in the order (most pref first):
11070 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11071 static inline int
11072 arm_arm_address_cost (rtx x)
11073 {
11074 enum rtx_code c = GET_CODE (x);
11075
11076 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11077 return 0;
11078 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11079 return 10;
11080
11081 if (c == PLUS)
11082 {
11083 if (CONST_INT_P (XEXP (x, 1)))
11084 return 2;
11085
11086 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11087 return 3;
11088
11089 return 4;
11090 }
11091
11092 return 6;
11093 }
11094
11095 static inline int
11096 arm_thumb_address_cost (rtx x)
11097 {
11098 enum rtx_code c = GET_CODE (x);
11099
11100 if (c == REG)
11101 return 1;
11102 if (c == PLUS
11103 && REG_P (XEXP (x, 0))
11104 && CONST_INT_P (XEXP (x, 1)))
11105 return 1;
11106
11107 return 2;
11108 }
11109
11110 static int
11111 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11112 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11113 {
11114 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11115 }
11116
11117 /* Adjust cost hook for XScale. */
11118 static bool
11119 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11120 int * cost)
11121 {
11122 /* Some true dependencies can have a higher cost depending
11123 on precisely how certain input operands are used. */
11124 if (dep_type == 0
11125 && recog_memoized (insn) >= 0
11126 && recog_memoized (dep) >= 0)
11127 {
11128 int shift_opnum = get_attr_shift (insn);
11129 enum attr_type attr_type = get_attr_type (dep);
11130
11131 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11132 operand for INSN. If we have a shifted input operand and the
11133 instruction we depend on is another ALU instruction, then we may
11134 have to account for an additional stall. */
11135 if (shift_opnum != 0
11136 && (attr_type == TYPE_ALU_SHIFT_IMM
11137 || attr_type == TYPE_ALUS_SHIFT_IMM
11138 || attr_type == TYPE_LOGIC_SHIFT_IMM
11139 || attr_type == TYPE_LOGICS_SHIFT_IMM
11140 || attr_type == TYPE_ALU_SHIFT_REG
11141 || attr_type == TYPE_ALUS_SHIFT_REG
11142 || attr_type == TYPE_LOGIC_SHIFT_REG
11143 || attr_type == TYPE_LOGICS_SHIFT_REG
11144 || attr_type == TYPE_MOV_SHIFT
11145 || attr_type == TYPE_MVN_SHIFT
11146 || attr_type == TYPE_MOV_SHIFT_REG
11147 || attr_type == TYPE_MVN_SHIFT_REG))
11148 {
11149 rtx shifted_operand;
11150 int opno;
11151
11152 /* Get the shifted operand. */
11153 extract_insn (insn);
11154 shifted_operand = recog_data.operand[shift_opnum];
11155
11156 /* Iterate over all the operands in DEP. If we write an operand
11157 that overlaps with SHIFTED_OPERAND, then we have increase the
11158 cost of this dependency. */
11159 extract_insn (dep);
11160 preprocess_constraints (dep);
11161 for (opno = 0; opno < recog_data.n_operands; opno++)
11162 {
11163 /* We can ignore strict inputs. */
11164 if (recog_data.operand_type[opno] == OP_IN)
11165 continue;
11166
11167 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11168 shifted_operand))
11169 {
11170 *cost = 2;
11171 return false;
11172 }
11173 }
11174 }
11175 }
11176 return true;
11177 }
11178
11179 /* Adjust cost hook for Cortex A9. */
11180 static bool
11181 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11182 int * cost)
11183 {
11184 switch (dep_type)
11185 {
11186 case REG_DEP_ANTI:
11187 *cost = 0;
11188 return false;
11189
11190 case REG_DEP_TRUE:
11191 case REG_DEP_OUTPUT:
11192 if (recog_memoized (insn) >= 0
11193 && recog_memoized (dep) >= 0)
11194 {
11195 if (GET_CODE (PATTERN (insn)) == SET)
11196 {
11197 if (GET_MODE_CLASS
11198 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11199 || GET_MODE_CLASS
11200 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11201 {
11202 enum attr_type attr_type_insn = get_attr_type (insn);
11203 enum attr_type attr_type_dep = get_attr_type (dep);
11204
11205 /* By default all dependencies of the form
11206 s0 = s0 <op> s1
11207 s0 = s0 <op> s2
11208 have an extra latency of 1 cycle because
11209 of the input and output dependency in this
11210 case. However this gets modeled as an true
11211 dependency and hence all these checks. */
11212 if (REG_P (SET_DEST (PATTERN (insn)))
11213 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11214 {
11215 /* FMACS is a special case where the dependent
11216 instruction can be issued 3 cycles before
11217 the normal latency in case of an output
11218 dependency. */
11219 if ((attr_type_insn == TYPE_FMACS
11220 || attr_type_insn == TYPE_FMACD)
11221 && (attr_type_dep == TYPE_FMACS
11222 || attr_type_dep == TYPE_FMACD))
11223 {
11224 if (dep_type == REG_DEP_OUTPUT)
11225 *cost = insn_default_latency (dep) - 3;
11226 else
11227 *cost = insn_default_latency (dep);
11228 return false;
11229 }
11230 else
11231 {
11232 if (dep_type == REG_DEP_OUTPUT)
11233 *cost = insn_default_latency (dep) + 1;
11234 else
11235 *cost = insn_default_latency (dep);
11236 }
11237 return false;
11238 }
11239 }
11240 }
11241 }
11242 break;
11243
11244 default:
11245 gcc_unreachable ();
11246 }
11247
11248 return true;
11249 }
11250
11251 /* Adjust cost hook for FA726TE. */
11252 static bool
11253 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11254 int * cost)
11255 {
11256 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11257 have penalty of 3. */
11258 if (dep_type == REG_DEP_TRUE
11259 && recog_memoized (insn) >= 0
11260 && recog_memoized (dep) >= 0
11261 && get_attr_conds (dep) == CONDS_SET)
11262 {
11263 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11264 if (get_attr_conds (insn) == CONDS_USE
11265 && get_attr_type (insn) != TYPE_BRANCH)
11266 {
11267 *cost = 3;
11268 return false;
11269 }
11270
11271 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11272 || get_attr_conds (insn) == CONDS_USE)
11273 {
11274 *cost = 0;
11275 return false;
11276 }
11277 }
11278
11279 return true;
11280 }
11281
11282 /* Implement TARGET_REGISTER_MOVE_COST.
11283
11284 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11285 it is typically more expensive than a single memory access. We set
11286 the cost to less than two memory accesses so that floating
11287 point to integer conversion does not go through memory. */
11288
11289 int
11290 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11291 reg_class_t from, reg_class_t to)
11292 {
11293 if (TARGET_32BIT)
11294 {
11295 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11296 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11297 return 15;
11298 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11299 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11300 return 4;
11301 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11302 return 20;
11303 else
11304 return 2;
11305 }
11306 else
11307 {
11308 if (from == HI_REGS || to == HI_REGS)
11309 return 4;
11310 else
11311 return 2;
11312 }
11313 }
11314
11315 /* Implement TARGET_MEMORY_MOVE_COST. */
11316
11317 int
11318 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11319 bool in ATTRIBUTE_UNUSED)
11320 {
11321 if (TARGET_32BIT)
11322 return 10;
11323 else
11324 {
11325 if (GET_MODE_SIZE (mode) < 4)
11326 return 8;
11327 else
11328 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11329 }
11330 }
11331
11332 /* Vectorizer cost model implementation. */
11333
11334 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11335 static int
11336 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11337 tree vectype,
11338 int misalign ATTRIBUTE_UNUSED)
11339 {
11340 unsigned elements;
11341
11342 switch (type_of_cost)
11343 {
11344 case scalar_stmt:
11345 return current_tune->vec_costs->scalar_stmt_cost;
11346
11347 case scalar_load:
11348 return current_tune->vec_costs->scalar_load_cost;
11349
11350 case scalar_store:
11351 return current_tune->vec_costs->scalar_store_cost;
11352
11353 case vector_stmt:
11354 return current_tune->vec_costs->vec_stmt_cost;
11355
11356 case vector_load:
11357 return current_tune->vec_costs->vec_align_load_cost;
11358
11359 case vector_store:
11360 return current_tune->vec_costs->vec_store_cost;
11361
11362 case vec_to_scalar:
11363 return current_tune->vec_costs->vec_to_scalar_cost;
11364
11365 case scalar_to_vec:
11366 return current_tune->vec_costs->scalar_to_vec_cost;
11367
11368 case unaligned_load:
11369 case vector_gather_load:
11370 return current_tune->vec_costs->vec_unalign_load_cost;
11371
11372 case unaligned_store:
11373 case vector_scatter_store:
11374 return current_tune->vec_costs->vec_unalign_store_cost;
11375
11376 case cond_branch_taken:
11377 return current_tune->vec_costs->cond_taken_branch_cost;
11378
11379 case cond_branch_not_taken:
11380 return current_tune->vec_costs->cond_not_taken_branch_cost;
11381
11382 case vec_perm:
11383 case vec_promote_demote:
11384 return current_tune->vec_costs->vec_stmt_cost;
11385
11386 case vec_construct:
11387 elements = TYPE_VECTOR_SUBPARTS (vectype);
11388 return elements / 2 + 1;
11389
11390 default:
11391 gcc_unreachable ();
11392 }
11393 }
11394
11395 /* Implement targetm.vectorize.add_stmt_cost. */
11396
11397 static unsigned
11398 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11399 struct _stmt_vec_info *stmt_info, int misalign,
11400 enum vect_cost_model_location where)
11401 {
11402 unsigned *cost = (unsigned *) data;
11403 unsigned retval = 0;
11404
11405 if (flag_vect_cost_model)
11406 {
11407 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11408 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11409
11410 /* Statements in an inner loop relative to the loop being
11411 vectorized are weighted more heavily. The value here is
11412 arbitrary and could potentially be improved with analysis. */
11413 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11414 count *= 50; /* FIXME. */
11415
11416 retval = (unsigned) (count * stmt_cost);
11417 cost[where] += retval;
11418 }
11419
11420 return retval;
11421 }
11422
11423 /* Return true if and only if this insn can dual-issue only as older. */
11424 static bool
11425 cortexa7_older_only (rtx_insn *insn)
11426 {
11427 if (recog_memoized (insn) < 0)
11428 return false;
11429
11430 switch (get_attr_type (insn))
11431 {
11432 case TYPE_ALU_DSP_REG:
11433 case TYPE_ALU_SREG:
11434 case TYPE_ALUS_SREG:
11435 case TYPE_LOGIC_REG:
11436 case TYPE_LOGICS_REG:
11437 case TYPE_ADC_REG:
11438 case TYPE_ADCS_REG:
11439 case TYPE_ADR:
11440 case TYPE_BFM:
11441 case TYPE_REV:
11442 case TYPE_MVN_REG:
11443 case TYPE_SHIFT_IMM:
11444 case TYPE_SHIFT_REG:
11445 case TYPE_LOAD_BYTE:
11446 case TYPE_LOAD_4:
11447 case TYPE_STORE_4:
11448 case TYPE_FFARITHS:
11449 case TYPE_FADDS:
11450 case TYPE_FFARITHD:
11451 case TYPE_FADDD:
11452 case TYPE_FMOV:
11453 case TYPE_F_CVT:
11454 case TYPE_FCMPS:
11455 case TYPE_FCMPD:
11456 case TYPE_FCONSTS:
11457 case TYPE_FCONSTD:
11458 case TYPE_FMULS:
11459 case TYPE_FMACS:
11460 case TYPE_FMULD:
11461 case TYPE_FMACD:
11462 case TYPE_FDIVS:
11463 case TYPE_FDIVD:
11464 case TYPE_F_MRC:
11465 case TYPE_F_MRRC:
11466 case TYPE_F_FLAG:
11467 case TYPE_F_LOADS:
11468 case TYPE_F_STORES:
11469 return true;
11470 default:
11471 return false;
11472 }
11473 }
11474
11475 /* Return true if and only if this insn can dual-issue as younger. */
11476 static bool
11477 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11478 {
11479 if (recog_memoized (insn) < 0)
11480 {
11481 if (verbose > 5)
11482 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11483 return false;
11484 }
11485
11486 switch (get_attr_type (insn))
11487 {
11488 case TYPE_ALU_IMM:
11489 case TYPE_ALUS_IMM:
11490 case TYPE_LOGIC_IMM:
11491 case TYPE_LOGICS_IMM:
11492 case TYPE_EXTEND:
11493 case TYPE_MVN_IMM:
11494 case TYPE_MOV_IMM:
11495 case TYPE_MOV_REG:
11496 case TYPE_MOV_SHIFT:
11497 case TYPE_MOV_SHIFT_REG:
11498 case TYPE_BRANCH:
11499 case TYPE_CALL:
11500 return true;
11501 default:
11502 return false;
11503 }
11504 }
11505
11506
11507 /* Look for an instruction that can dual issue only as an older
11508 instruction, and move it in front of any instructions that can
11509 dual-issue as younger, while preserving the relative order of all
11510 other instructions in the ready list. This is a hueuristic to help
11511 dual-issue in later cycles, by postponing issue of more flexible
11512 instructions. This heuristic may affect dual issue opportunities
11513 in the current cycle. */
11514 static void
11515 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11516 int *n_readyp, int clock)
11517 {
11518 int i;
11519 int first_older_only = -1, first_younger = -1;
11520
11521 if (verbose > 5)
11522 fprintf (file,
11523 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11524 clock,
11525 *n_readyp);
11526
11527 /* Traverse the ready list from the head (the instruction to issue
11528 first), and looking for the first instruction that can issue as
11529 younger and the first instruction that can dual-issue only as
11530 older. */
11531 for (i = *n_readyp - 1; i >= 0; i--)
11532 {
11533 rtx_insn *insn = ready[i];
11534 if (cortexa7_older_only (insn))
11535 {
11536 first_older_only = i;
11537 if (verbose > 5)
11538 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11539 break;
11540 }
11541 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11542 first_younger = i;
11543 }
11544
11545 /* Nothing to reorder because either no younger insn found or insn
11546 that can dual-issue only as older appears before any insn that
11547 can dual-issue as younger. */
11548 if (first_younger == -1)
11549 {
11550 if (verbose > 5)
11551 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11552 return;
11553 }
11554
11555 /* Nothing to reorder because no older-only insn in the ready list. */
11556 if (first_older_only == -1)
11557 {
11558 if (verbose > 5)
11559 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11560 return;
11561 }
11562
11563 /* Move first_older_only insn before first_younger. */
11564 if (verbose > 5)
11565 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11566 INSN_UID(ready [first_older_only]),
11567 INSN_UID(ready [first_younger]));
11568 rtx_insn *first_older_only_insn = ready [first_older_only];
11569 for (i = first_older_only; i < first_younger; i++)
11570 {
11571 ready[i] = ready[i+1];
11572 }
11573
11574 ready[i] = first_older_only_insn;
11575 return;
11576 }
11577
11578 /* Implement TARGET_SCHED_REORDER. */
11579 static int
11580 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11581 int clock)
11582 {
11583 switch (arm_tune)
11584 {
11585 case TARGET_CPU_cortexa7:
11586 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11587 break;
11588 default:
11589 /* Do nothing for other cores. */
11590 break;
11591 }
11592
11593 return arm_issue_rate ();
11594 }
11595
11596 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11597 It corrects the value of COST based on the relationship between
11598 INSN and DEP through the dependence LINK. It returns the new
11599 value. There is a per-core adjust_cost hook to adjust scheduler costs
11600 and the per-core hook can choose to completely override the generic
11601 adjust_cost function. Only put bits of code into arm_adjust_cost that
11602 are common across all cores. */
11603 static int
11604 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11605 unsigned int)
11606 {
11607 rtx i_pat, d_pat;
11608
11609 /* When generating Thumb-1 code, we want to place flag-setting operations
11610 close to a conditional branch which depends on them, so that we can
11611 omit the comparison. */
11612 if (TARGET_THUMB1
11613 && dep_type == 0
11614 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11615 && recog_memoized (dep) >= 0
11616 && get_attr_conds (dep) == CONDS_SET)
11617 return 0;
11618
11619 if (current_tune->sched_adjust_cost != NULL)
11620 {
11621 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11622 return cost;
11623 }
11624
11625 /* XXX Is this strictly true? */
11626 if (dep_type == REG_DEP_ANTI
11627 || dep_type == REG_DEP_OUTPUT)
11628 return 0;
11629
11630 /* Call insns don't incur a stall, even if they follow a load. */
11631 if (dep_type == 0
11632 && CALL_P (insn))
11633 return 1;
11634
11635 if ((i_pat = single_set (insn)) != NULL
11636 && MEM_P (SET_SRC (i_pat))
11637 && (d_pat = single_set (dep)) != NULL
11638 && MEM_P (SET_DEST (d_pat)))
11639 {
11640 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11641 /* This is a load after a store, there is no conflict if the load reads
11642 from a cached area. Assume that loads from the stack, and from the
11643 constant pool are cached, and that others will miss. This is a
11644 hack. */
11645
11646 if ((GET_CODE (src_mem) == SYMBOL_REF
11647 && CONSTANT_POOL_ADDRESS_P (src_mem))
11648 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11649 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11650 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11651 return 1;
11652 }
11653
11654 return cost;
11655 }
11656
11657 int
11658 arm_max_conditional_execute (void)
11659 {
11660 return max_insns_skipped;
11661 }
11662
11663 static int
11664 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11665 {
11666 if (TARGET_32BIT)
11667 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11668 else
11669 return (optimize > 0) ? 2 : 0;
11670 }
11671
11672 static int
11673 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11674 {
11675 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11676 }
11677
11678 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11679 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11680 sequences of non-executed instructions in IT blocks probably take the same
11681 amount of time as executed instructions (and the IT instruction itself takes
11682 space in icache). This function was experimentally determined to give good
11683 results on a popular embedded benchmark. */
11684
11685 static int
11686 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11687 {
11688 return (TARGET_32BIT && speed_p) ? 1
11689 : arm_default_branch_cost (speed_p, predictable_p);
11690 }
11691
11692 static int
11693 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11694 {
11695 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11696 }
11697
11698 static bool fp_consts_inited = false;
11699
11700 static REAL_VALUE_TYPE value_fp0;
11701
11702 static void
11703 init_fp_table (void)
11704 {
11705 REAL_VALUE_TYPE r;
11706
11707 r = REAL_VALUE_ATOF ("0", DFmode);
11708 value_fp0 = r;
11709 fp_consts_inited = true;
11710 }
11711
11712 /* Return TRUE if rtx X is a valid immediate FP constant. */
11713 int
11714 arm_const_double_rtx (rtx x)
11715 {
11716 const REAL_VALUE_TYPE *r;
11717
11718 if (!fp_consts_inited)
11719 init_fp_table ();
11720
11721 r = CONST_DOUBLE_REAL_VALUE (x);
11722 if (REAL_VALUE_MINUS_ZERO (*r))
11723 return 0;
11724
11725 if (real_equal (r, &value_fp0))
11726 return 1;
11727
11728 return 0;
11729 }
11730
11731 /* VFPv3 has a fairly wide range of representable immediates, formed from
11732 "quarter-precision" floating-point values. These can be evaluated using this
11733 formula (with ^ for exponentiation):
11734
11735 -1^s * n * 2^-r
11736
11737 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11738 16 <= n <= 31 and 0 <= r <= 7.
11739
11740 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11741
11742 - A (most-significant) is the sign bit.
11743 - BCD are the exponent (encoded as r XOR 3).
11744 - EFGH are the mantissa (encoded as n - 16).
11745 */
11746
11747 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11748 fconst[sd] instruction, or -1 if X isn't suitable. */
11749 static int
11750 vfp3_const_double_index (rtx x)
11751 {
11752 REAL_VALUE_TYPE r, m;
11753 int sign, exponent;
11754 unsigned HOST_WIDE_INT mantissa, mant_hi;
11755 unsigned HOST_WIDE_INT mask;
11756 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11757 bool fail;
11758
11759 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11760 return -1;
11761
11762 r = *CONST_DOUBLE_REAL_VALUE (x);
11763
11764 /* We can't represent these things, so detect them first. */
11765 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11766 return -1;
11767
11768 /* Extract sign, exponent and mantissa. */
11769 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11770 r = real_value_abs (&r);
11771 exponent = REAL_EXP (&r);
11772 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11773 highest (sign) bit, with a fixed binary point at bit point_pos.
11774 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11775 bits for the mantissa, this may fail (low bits would be lost). */
11776 real_ldexp (&m, &r, point_pos - exponent);
11777 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11778 mantissa = w.elt (0);
11779 mant_hi = w.elt (1);
11780
11781 /* If there are bits set in the low part of the mantissa, we can't
11782 represent this value. */
11783 if (mantissa != 0)
11784 return -1;
11785
11786 /* Now make it so that mantissa contains the most-significant bits, and move
11787 the point_pos to indicate that the least-significant bits have been
11788 discarded. */
11789 point_pos -= HOST_BITS_PER_WIDE_INT;
11790 mantissa = mant_hi;
11791
11792 /* We can permit four significant bits of mantissa only, plus a high bit
11793 which is always 1. */
11794 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11795 if ((mantissa & mask) != 0)
11796 return -1;
11797
11798 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11799 mantissa >>= point_pos - 5;
11800
11801 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11802 floating-point immediate zero with Neon using an integer-zero load, but
11803 that case is handled elsewhere.) */
11804 if (mantissa == 0)
11805 return -1;
11806
11807 gcc_assert (mantissa >= 16 && mantissa <= 31);
11808
11809 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11810 normalized significands are in the range [1, 2). (Our mantissa is shifted
11811 left 4 places at this point relative to normalized IEEE754 values). GCC
11812 internally uses [0.5, 1) (see real.c), so the exponent returned from
11813 REAL_EXP must be altered. */
11814 exponent = 5 - exponent;
11815
11816 if (exponent < 0 || exponent > 7)
11817 return -1;
11818
11819 /* Sign, mantissa and exponent are now in the correct form to plug into the
11820 formula described in the comment above. */
11821 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11822 }
11823
11824 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11825 int
11826 vfp3_const_double_rtx (rtx x)
11827 {
11828 if (!TARGET_VFP3)
11829 return 0;
11830
11831 return vfp3_const_double_index (x) != -1;
11832 }
11833
11834 /* Recognize immediates which can be used in various Neon instructions. Legal
11835 immediates are described by the following table (for VMVN variants, the
11836 bitwise inverse of the constant shown is recognized. In either case, VMOV
11837 is output and the correct instruction to use for a given constant is chosen
11838 by the assembler). The constant shown is replicated across all elements of
11839 the destination vector.
11840
11841 insn elems variant constant (binary)
11842 ---- ----- ------- -----------------
11843 vmov i32 0 00000000 00000000 00000000 abcdefgh
11844 vmov i32 1 00000000 00000000 abcdefgh 00000000
11845 vmov i32 2 00000000 abcdefgh 00000000 00000000
11846 vmov i32 3 abcdefgh 00000000 00000000 00000000
11847 vmov i16 4 00000000 abcdefgh
11848 vmov i16 5 abcdefgh 00000000
11849 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11850 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11851 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11852 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11853 vmvn i16 10 00000000 abcdefgh
11854 vmvn i16 11 abcdefgh 00000000
11855 vmov i32 12 00000000 00000000 abcdefgh 11111111
11856 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11857 vmov i32 14 00000000 abcdefgh 11111111 11111111
11858 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11859 vmov i8 16 abcdefgh
11860 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11861 eeeeeeee ffffffff gggggggg hhhhhhhh
11862 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11863 vmov f32 19 00000000 00000000 00000000 00000000
11864
11865 For case 18, B = !b. Representable values are exactly those accepted by
11866 vfp3_const_double_index, but are output as floating-point numbers rather
11867 than indices.
11868
11869 For case 19, we will change it to vmov.i32 when assembling.
11870
11871 Variants 0-5 (inclusive) may also be used as immediates for the second
11872 operand of VORR/VBIC instructions.
11873
11874 The INVERSE argument causes the bitwise inverse of the given operand to be
11875 recognized instead (used for recognizing legal immediates for the VAND/VORN
11876 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11877 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11878 output, rather than the real insns vbic/vorr).
11879
11880 INVERSE makes no difference to the recognition of float vectors.
11881
11882 The return value is the variant of immediate as shown in the above table, or
11883 -1 if the given value doesn't match any of the listed patterns.
11884 */
11885 static int
11886 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11887 rtx *modconst, int *elementwidth)
11888 {
11889 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11890 matches = 1; \
11891 for (i = 0; i < idx; i += (STRIDE)) \
11892 if (!(TEST)) \
11893 matches = 0; \
11894 if (matches) \
11895 { \
11896 immtype = (CLASS); \
11897 elsize = (ELSIZE); \
11898 break; \
11899 }
11900
11901 unsigned int i, elsize = 0, idx = 0, n_elts;
11902 unsigned int innersize;
11903 unsigned char bytes[16];
11904 int immtype = -1, matches;
11905 unsigned int invmask = inverse ? 0xff : 0;
11906 bool vector = GET_CODE (op) == CONST_VECTOR;
11907
11908 if (vector)
11909 n_elts = CONST_VECTOR_NUNITS (op);
11910 else
11911 {
11912 n_elts = 1;
11913 if (mode == VOIDmode)
11914 mode = DImode;
11915 }
11916
11917 innersize = GET_MODE_UNIT_SIZE (mode);
11918
11919 /* Vectors of float constants. */
11920 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11921 {
11922 rtx el0 = CONST_VECTOR_ELT (op, 0);
11923
11924 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11925 return -1;
11926
11927 /* FP16 vectors cannot be represented. */
11928 if (GET_MODE_INNER (mode) == HFmode)
11929 return -1;
11930
11931 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11932 are distinct in this context. */
11933 if (!const_vec_duplicate_p (op))
11934 return -1;
11935
11936 if (modconst)
11937 *modconst = CONST_VECTOR_ELT (op, 0);
11938
11939 if (elementwidth)
11940 *elementwidth = 0;
11941
11942 if (el0 == CONST0_RTX (GET_MODE (el0)))
11943 return 19;
11944 else
11945 return 18;
11946 }
11947
11948 /* The tricks done in the code below apply for little-endian vector layout.
11949 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11950 FIXME: Implement logic for big-endian vectors. */
11951 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11952 return -1;
11953
11954 /* Splat vector constant out into a byte vector. */
11955 for (i = 0; i < n_elts; i++)
11956 {
11957 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11958 unsigned HOST_WIDE_INT elpart;
11959
11960 gcc_assert (CONST_INT_P (el));
11961 elpart = INTVAL (el);
11962
11963 for (unsigned int byte = 0; byte < innersize; byte++)
11964 {
11965 bytes[idx++] = (elpart & 0xff) ^ invmask;
11966 elpart >>= BITS_PER_UNIT;
11967 }
11968 }
11969
11970 /* Sanity check. */
11971 gcc_assert (idx == GET_MODE_SIZE (mode));
11972
11973 do
11974 {
11975 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11976 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11977
11978 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11979 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11980
11981 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11982 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11983
11984 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11985 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11986
11987 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11988
11989 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11990
11991 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11992 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11993
11994 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11995 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11996
11997 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11998 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11999
12000 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12001 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12002
12003 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12004
12005 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12006
12007 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12008 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12009
12010 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12011 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12012
12013 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12014 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12015
12016 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12017 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12018
12019 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12020
12021 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12022 && bytes[i] == bytes[(i + 8) % idx]);
12023 }
12024 while (0);
12025
12026 if (immtype == -1)
12027 return -1;
12028
12029 if (elementwidth)
12030 *elementwidth = elsize;
12031
12032 if (modconst)
12033 {
12034 unsigned HOST_WIDE_INT imm = 0;
12035
12036 /* Un-invert bytes of recognized vector, if necessary. */
12037 if (invmask != 0)
12038 for (i = 0; i < idx; i++)
12039 bytes[i] ^= invmask;
12040
12041 if (immtype == 17)
12042 {
12043 /* FIXME: Broken on 32-bit H_W_I hosts. */
12044 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12045
12046 for (i = 0; i < 8; i++)
12047 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12048 << (i * BITS_PER_UNIT);
12049
12050 *modconst = GEN_INT (imm);
12051 }
12052 else
12053 {
12054 unsigned HOST_WIDE_INT imm = 0;
12055
12056 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12057 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12058
12059 *modconst = GEN_INT (imm);
12060 }
12061 }
12062
12063 return immtype;
12064 #undef CHECK
12065 }
12066
12067 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12068 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12069 float elements), and a modified constant (whatever should be output for a
12070 VMOV) in *MODCONST. */
12071
12072 int
12073 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12074 rtx *modconst, int *elementwidth)
12075 {
12076 rtx tmpconst;
12077 int tmpwidth;
12078 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12079
12080 if (retval == -1)
12081 return 0;
12082
12083 if (modconst)
12084 *modconst = tmpconst;
12085
12086 if (elementwidth)
12087 *elementwidth = tmpwidth;
12088
12089 return 1;
12090 }
12091
12092 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12093 the immediate is valid, write a constant suitable for using as an operand
12094 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12095 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12096
12097 int
12098 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12099 rtx *modconst, int *elementwidth)
12100 {
12101 rtx tmpconst;
12102 int tmpwidth;
12103 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12104
12105 if (retval < 0 || retval > 5)
12106 return 0;
12107
12108 if (modconst)
12109 *modconst = tmpconst;
12110
12111 if (elementwidth)
12112 *elementwidth = tmpwidth;
12113
12114 return 1;
12115 }
12116
12117 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12118 the immediate is valid, write a constant suitable for using as an operand
12119 to VSHR/VSHL to *MODCONST and the corresponding element width to
12120 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12121 because they have different limitations. */
12122
12123 int
12124 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12125 rtx *modconst, int *elementwidth,
12126 bool isleftshift)
12127 {
12128 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12129 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12130 unsigned HOST_WIDE_INT last_elt = 0;
12131 unsigned HOST_WIDE_INT maxshift;
12132
12133 /* Split vector constant out into a byte vector. */
12134 for (i = 0; i < n_elts; i++)
12135 {
12136 rtx el = CONST_VECTOR_ELT (op, i);
12137 unsigned HOST_WIDE_INT elpart;
12138
12139 if (CONST_INT_P (el))
12140 elpart = INTVAL (el);
12141 else if (CONST_DOUBLE_P (el))
12142 return 0;
12143 else
12144 gcc_unreachable ();
12145
12146 if (i != 0 && elpart != last_elt)
12147 return 0;
12148
12149 last_elt = elpart;
12150 }
12151
12152 /* Shift less than element size. */
12153 maxshift = innersize * 8;
12154
12155 if (isleftshift)
12156 {
12157 /* Left shift immediate value can be from 0 to <size>-1. */
12158 if (last_elt >= maxshift)
12159 return 0;
12160 }
12161 else
12162 {
12163 /* Right shift immediate value can be from 1 to <size>. */
12164 if (last_elt == 0 || last_elt > maxshift)
12165 return 0;
12166 }
12167
12168 if (elementwidth)
12169 *elementwidth = innersize * 8;
12170
12171 if (modconst)
12172 *modconst = CONST_VECTOR_ELT (op, 0);
12173
12174 return 1;
12175 }
12176
12177 /* Return a string suitable for output of Neon immediate logic operation
12178 MNEM. */
12179
12180 char *
12181 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12182 int inverse, int quad)
12183 {
12184 int width, is_valid;
12185 static char templ[40];
12186
12187 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12188
12189 gcc_assert (is_valid != 0);
12190
12191 if (quad)
12192 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12193 else
12194 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12195
12196 return templ;
12197 }
12198
12199 /* Return a string suitable for output of Neon immediate shift operation
12200 (VSHR or VSHL) MNEM. */
12201
12202 char *
12203 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12204 machine_mode mode, int quad,
12205 bool isleftshift)
12206 {
12207 int width, is_valid;
12208 static char templ[40];
12209
12210 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12211 gcc_assert (is_valid != 0);
12212
12213 if (quad)
12214 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12215 else
12216 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12217
12218 return templ;
12219 }
12220
12221 /* Output a sequence of pairwise operations to implement a reduction.
12222 NOTE: We do "too much work" here, because pairwise operations work on two
12223 registers-worth of operands in one go. Unfortunately we can't exploit those
12224 extra calculations to do the full operation in fewer steps, I don't think.
12225 Although all vector elements of the result but the first are ignored, we
12226 actually calculate the same result in each of the elements. An alternative
12227 such as initially loading a vector with zero to use as each of the second
12228 operands would use up an additional register and take an extra instruction,
12229 for no particular gain. */
12230
12231 void
12232 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12233 rtx (*reduc) (rtx, rtx, rtx))
12234 {
12235 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12236 rtx tmpsum = op1;
12237
12238 for (i = parts / 2; i >= 1; i /= 2)
12239 {
12240 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12241 emit_insn (reduc (dest, tmpsum, tmpsum));
12242 tmpsum = dest;
12243 }
12244 }
12245
12246 /* If VALS is a vector constant that can be loaded into a register
12247 using VDUP, generate instructions to do so and return an RTX to
12248 assign to the register. Otherwise return NULL_RTX. */
12249
12250 static rtx
12251 neon_vdup_constant (rtx vals)
12252 {
12253 machine_mode mode = GET_MODE (vals);
12254 machine_mode inner_mode = GET_MODE_INNER (mode);
12255 rtx x;
12256
12257 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12258 return NULL_RTX;
12259
12260 if (!const_vec_duplicate_p (vals, &x))
12261 /* The elements are not all the same. We could handle repeating
12262 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12263 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12264 vdup.i16). */
12265 return NULL_RTX;
12266
12267 /* We can load this constant by using VDUP and a constant in a
12268 single ARM register. This will be cheaper than a vector
12269 load. */
12270
12271 x = copy_to_mode_reg (inner_mode, x);
12272 return gen_vec_duplicate (mode, x);
12273 }
12274
12275 /* Generate code to load VALS, which is a PARALLEL containing only
12276 constants (for vec_init) or CONST_VECTOR, efficiently into a
12277 register. Returns an RTX to copy into the register, or NULL_RTX
12278 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12279
12280 rtx
12281 neon_make_constant (rtx vals)
12282 {
12283 machine_mode mode = GET_MODE (vals);
12284 rtx target;
12285 rtx const_vec = NULL_RTX;
12286 int n_elts = GET_MODE_NUNITS (mode);
12287 int n_const = 0;
12288 int i;
12289
12290 if (GET_CODE (vals) == CONST_VECTOR)
12291 const_vec = vals;
12292 else if (GET_CODE (vals) == PARALLEL)
12293 {
12294 /* A CONST_VECTOR must contain only CONST_INTs and
12295 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12296 Only store valid constants in a CONST_VECTOR. */
12297 for (i = 0; i < n_elts; ++i)
12298 {
12299 rtx x = XVECEXP (vals, 0, i);
12300 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12301 n_const++;
12302 }
12303 if (n_const == n_elts)
12304 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12305 }
12306 else
12307 gcc_unreachable ();
12308
12309 if (const_vec != NULL
12310 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12311 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12312 return const_vec;
12313 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12314 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12315 pipeline cycle; creating the constant takes one or two ARM
12316 pipeline cycles. */
12317 return target;
12318 else if (const_vec != NULL_RTX)
12319 /* Load from constant pool. On Cortex-A8 this takes two cycles
12320 (for either double or quad vectors). We can not take advantage
12321 of single-cycle VLD1 because we need a PC-relative addressing
12322 mode. */
12323 return const_vec;
12324 else
12325 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12326 We can not construct an initializer. */
12327 return NULL_RTX;
12328 }
12329
12330 /* Initialize vector TARGET to VALS. */
12331
12332 void
12333 neon_expand_vector_init (rtx target, rtx vals)
12334 {
12335 machine_mode mode = GET_MODE (target);
12336 machine_mode inner_mode = GET_MODE_INNER (mode);
12337 int n_elts = GET_MODE_NUNITS (mode);
12338 int n_var = 0, one_var = -1;
12339 bool all_same = true;
12340 rtx x, mem;
12341 int i;
12342
12343 for (i = 0; i < n_elts; ++i)
12344 {
12345 x = XVECEXP (vals, 0, i);
12346 if (!CONSTANT_P (x))
12347 ++n_var, one_var = i;
12348
12349 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12350 all_same = false;
12351 }
12352
12353 if (n_var == 0)
12354 {
12355 rtx constant = neon_make_constant (vals);
12356 if (constant != NULL_RTX)
12357 {
12358 emit_move_insn (target, constant);
12359 return;
12360 }
12361 }
12362
12363 /* Splat a single non-constant element if we can. */
12364 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12365 {
12366 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12367 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12368 return;
12369 }
12370
12371 /* One field is non-constant. Load constant then overwrite varying
12372 field. This is more efficient than using the stack. */
12373 if (n_var == 1)
12374 {
12375 rtx copy = copy_rtx (vals);
12376 rtx index = GEN_INT (one_var);
12377
12378 /* Load constant part of vector, substitute neighboring value for
12379 varying element. */
12380 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12381 neon_expand_vector_init (target, copy);
12382
12383 /* Insert variable. */
12384 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12385 switch (mode)
12386 {
12387 case E_V8QImode:
12388 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12389 break;
12390 case E_V16QImode:
12391 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12392 break;
12393 case E_V4HImode:
12394 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12395 break;
12396 case E_V8HImode:
12397 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12398 break;
12399 case E_V2SImode:
12400 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12401 break;
12402 case E_V4SImode:
12403 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12404 break;
12405 case E_V2SFmode:
12406 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12407 break;
12408 case E_V4SFmode:
12409 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12410 break;
12411 case E_V2DImode:
12412 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12413 break;
12414 default:
12415 gcc_unreachable ();
12416 }
12417 return;
12418 }
12419
12420 /* Construct the vector in memory one field at a time
12421 and load the whole vector. */
12422 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12423 for (i = 0; i < n_elts; i++)
12424 emit_move_insn (adjust_address_nv (mem, inner_mode,
12425 i * GET_MODE_SIZE (inner_mode)),
12426 XVECEXP (vals, 0, i));
12427 emit_move_insn (target, mem);
12428 }
12429
12430 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12431 ERR if it doesn't. EXP indicates the source location, which includes the
12432 inlining history for intrinsics. */
12433
12434 static void
12435 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12436 const_tree exp, const char *desc)
12437 {
12438 HOST_WIDE_INT lane;
12439
12440 gcc_assert (CONST_INT_P (operand));
12441
12442 lane = INTVAL (operand);
12443
12444 if (lane < low || lane >= high)
12445 {
12446 if (exp)
12447 error ("%K%s %wd out of range %wd - %wd",
12448 exp, desc, lane, low, high - 1);
12449 else
12450 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12451 }
12452 }
12453
12454 /* Bounds-check lanes. */
12455
12456 void
12457 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12458 const_tree exp)
12459 {
12460 bounds_check (operand, low, high, exp, "lane");
12461 }
12462
12463 /* Bounds-check constants. */
12464
12465 void
12466 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12467 {
12468 bounds_check (operand, low, high, NULL_TREE, "constant");
12469 }
12470
12471 HOST_WIDE_INT
12472 neon_element_bits (machine_mode mode)
12473 {
12474 return GET_MODE_UNIT_BITSIZE (mode);
12475 }
12476
12477 \f
12478 /* Predicates for `match_operand' and `match_operator'. */
12479
12480 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12481 WB is true if full writeback address modes are allowed and is false
12482 if limited writeback address modes (POST_INC and PRE_DEC) are
12483 allowed. */
12484
12485 int
12486 arm_coproc_mem_operand (rtx op, bool wb)
12487 {
12488 rtx ind;
12489
12490 /* Reject eliminable registers. */
12491 if (! (reload_in_progress || reload_completed || lra_in_progress)
12492 && ( reg_mentioned_p (frame_pointer_rtx, op)
12493 || reg_mentioned_p (arg_pointer_rtx, op)
12494 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12495 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12496 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12497 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12498 return FALSE;
12499
12500 /* Constants are converted into offsets from labels. */
12501 if (!MEM_P (op))
12502 return FALSE;
12503
12504 ind = XEXP (op, 0);
12505
12506 if (reload_completed
12507 && (GET_CODE (ind) == LABEL_REF
12508 || (GET_CODE (ind) == CONST
12509 && GET_CODE (XEXP (ind, 0)) == PLUS
12510 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12511 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12512 return TRUE;
12513
12514 /* Match: (mem (reg)). */
12515 if (REG_P (ind))
12516 return arm_address_register_rtx_p (ind, 0);
12517
12518 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12519 acceptable in any case (subject to verification by
12520 arm_address_register_rtx_p). We need WB to be true to accept
12521 PRE_INC and POST_DEC. */
12522 if (GET_CODE (ind) == POST_INC
12523 || GET_CODE (ind) == PRE_DEC
12524 || (wb
12525 && (GET_CODE (ind) == PRE_INC
12526 || GET_CODE (ind) == POST_DEC)))
12527 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12528
12529 if (wb
12530 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12531 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12532 && GET_CODE (XEXP (ind, 1)) == PLUS
12533 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12534 ind = XEXP (ind, 1);
12535
12536 /* Match:
12537 (plus (reg)
12538 (const)). */
12539 if (GET_CODE (ind) == PLUS
12540 && REG_P (XEXP (ind, 0))
12541 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12542 && CONST_INT_P (XEXP (ind, 1))
12543 && INTVAL (XEXP (ind, 1)) > -1024
12544 && INTVAL (XEXP (ind, 1)) < 1024
12545 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12546 return TRUE;
12547
12548 return FALSE;
12549 }
12550
12551 /* Return TRUE if OP is a memory operand which we can load or store a vector
12552 to/from. TYPE is one of the following values:
12553 0 - Vector load/stor (vldr)
12554 1 - Core registers (ldm)
12555 2 - Element/structure loads (vld1)
12556 */
12557 int
12558 neon_vector_mem_operand (rtx op, int type, bool strict)
12559 {
12560 rtx ind;
12561
12562 /* Reject eliminable registers. */
12563 if (strict && ! (reload_in_progress || reload_completed)
12564 && (reg_mentioned_p (frame_pointer_rtx, op)
12565 || reg_mentioned_p (arg_pointer_rtx, op)
12566 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12567 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12568 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12569 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12570 return FALSE;
12571
12572 /* Constants are converted into offsets from labels. */
12573 if (!MEM_P (op))
12574 return FALSE;
12575
12576 ind = XEXP (op, 0);
12577
12578 if (reload_completed
12579 && (GET_CODE (ind) == LABEL_REF
12580 || (GET_CODE (ind) == CONST
12581 && GET_CODE (XEXP (ind, 0)) == PLUS
12582 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12583 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12584 return TRUE;
12585
12586 /* Match: (mem (reg)). */
12587 if (REG_P (ind))
12588 return arm_address_register_rtx_p (ind, 0);
12589
12590 /* Allow post-increment with Neon registers. */
12591 if ((type != 1 && GET_CODE (ind) == POST_INC)
12592 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12593 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12594
12595 /* Allow post-increment by register for VLDn */
12596 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12597 && GET_CODE (XEXP (ind, 1)) == PLUS
12598 && REG_P (XEXP (XEXP (ind, 1), 1)))
12599 return true;
12600
12601 /* Match:
12602 (plus (reg)
12603 (const)). */
12604 if (type == 0
12605 && GET_CODE (ind) == PLUS
12606 && REG_P (XEXP (ind, 0))
12607 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12608 && CONST_INT_P (XEXP (ind, 1))
12609 && INTVAL (XEXP (ind, 1)) > -1024
12610 /* For quad modes, we restrict the constant offset to be slightly less
12611 than what the instruction format permits. We have no such constraint
12612 on double mode offsets. (This must match arm_legitimate_index_p.) */
12613 && (INTVAL (XEXP (ind, 1))
12614 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12615 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12616 return TRUE;
12617
12618 return FALSE;
12619 }
12620
12621 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12622 type. */
12623 int
12624 neon_struct_mem_operand (rtx op)
12625 {
12626 rtx ind;
12627
12628 /* Reject eliminable registers. */
12629 if (! (reload_in_progress || reload_completed)
12630 && ( reg_mentioned_p (frame_pointer_rtx, op)
12631 || reg_mentioned_p (arg_pointer_rtx, op)
12632 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12633 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12634 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12635 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12636 return FALSE;
12637
12638 /* Constants are converted into offsets from labels. */
12639 if (!MEM_P (op))
12640 return FALSE;
12641
12642 ind = XEXP (op, 0);
12643
12644 if (reload_completed
12645 && (GET_CODE (ind) == LABEL_REF
12646 || (GET_CODE (ind) == CONST
12647 && GET_CODE (XEXP (ind, 0)) == PLUS
12648 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12649 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12650 return TRUE;
12651
12652 /* Match: (mem (reg)). */
12653 if (REG_P (ind))
12654 return arm_address_register_rtx_p (ind, 0);
12655
12656 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12657 if (GET_CODE (ind) == POST_INC
12658 || GET_CODE (ind) == PRE_DEC)
12659 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12660
12661 return FALSE;
12662 }
12663
12664 /* Return true if X is a register that will be eliminated later on. */
12665 int
12666 arm_eliminable_register (rtx x)
12667 {
12668 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12669 || REGNO (x) == ARG_POINTER_REGNUM
12670 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12671 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12672 }
12673
12674 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12675 coprocessor registers. Otherwise return NO_REGS. */
12676
12677 enum reg_class
12678 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12679 {
12680 if (mode == HFmode)
12681 {
12682 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12683 return GENERAL_REGS;
12684 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12685 return NO_REGS;
12686 return GENERAL_REGS;
12687 }
12688
12689 /* The neon move patterns handle all legitimate vector and struct
12690 addresses. */
12691 if (TARGET_NEON
12692 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12693 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12694 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12695 || VALID_NEON_STRUCT_MODE (mode)))
12696 return NO_REGS;
12697
12698 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12699 return NO_REGS;
12700
12701 return GENERAL_REGS;
12702 }
12703
12704 /* Values which must be returned in the most-significant end of the return
12705 register. */
12706
12707 static bool
12708 arm_return_in_msb (const_tree valtype)
12709 {
12710 return (TARGET_AAPCS_BASED
12711 && BYTES_BIG_ENDIAN
12712 && (AGGREGATE_TYPE_P (valtype)
12713 || TREE_CODE (valtype) == COMPLEX_TYPE
12714 || FIXED_POINT_TYPE_P (valtype)));
12715 }
12716
12717 /* Return TRUE if X references a SYMBOL_REF. */
12718 int
12719 symbol_mentioned_p (rtx x)
12720 {
12721 const char * fmt;
12722 int i;
12723
12724 if (GET_CODE (x) == SYMBOL_REF)
12725 return 1;
12726
12727 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12728 are constant offsets, not symbols. */
12729 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12730 return 0;
12731
12732 fmt = GET_RTX_FORMAT (GET_CODE (x));
12733
12734 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12735 {
12736 if (fmt[i] == 'E')
12737 {
12738 int j;
12739
12740 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12741 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12742 return 1;
12743 }
12744 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12745 return 1;
12746 }
12747
12748 return 0;
12749 }
12750
12751 /* Return TRUE if X references a LABEL_REF. */
12752 int
12753 label_mentioned_p (rtx x)
12754 {
12755 const char * fmt;
12756 int i;
12757
12758 if (GET_CODE (x) == LABEL_REF)
12759 return 1;
12760
12761 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12762 instruction, but they are constant offsets, not symbols. */
12763 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12764 return 0;
12765
12766 fmt = GET_RTX_FORMAT (GET_CODE (x));
12767 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12768 {
12769 if (fmt[i] == 'E')
12770 {
12771 int j;
12772
12773 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12774 if (label_mentioned_p (XVECEXP (x, i, j)))
12775 return 1;
12776 }
12777 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12778 return 1;
12779 }
12780
12781 return 0;
12782 }
12783
12784 int
12785 tls_mentioned_p (rtx x)
12786 {
12787 switch (GET_CODE (x))
12788 {
12789 case CONST:
12790 return tls_mentioned_p (XEXP (x, 0));
12791
12792 case UNSPEC:
12793 if (XINT (x, 1) == UNSPEC_TLS)
12794 return 1;
12795
12796 /* Fall through. */
12797 default:
12798 return 0;
12799 }
12800 }
12801
12802 /* Must not copy any rtx that uses a pc-relative address.
12803 Also, disallow copying of load-exclusive instructions that
12804 may appear after splitting of compare-and-swap-style operations
12805 so as to prevent those loops from being transformed away from their
12806 canonical forms (see PR 69904). */
12807
12808 static bool
12809 arm_cannot_copy_insn_p (rtx_insn *insn)
12810 {
12811 /* The tls call insn cannot be copied, as it is paired with a data
12812 word. */
12813 if (recog_memoized (insn) == CODE_FOR_tlscall)
12814 return true;
12815
12816 subrtx_iterator::array_type array;
12817 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12818 {
12819 const_rtx x = *iter;
12820 if (GET_CODE (x) == UNSPEC
12821 && (XINT (x, 1) == UNSPEC_PIC_BASE
12822 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12823 return true;
12824 }
12825
12826 rtx set = single_set (insn);
12827 if (set)
12828 {
12829 rtx src = SET_SRC (set);
12830 if (GET_CODE (src) == ZERO_EXTEND)
12831 src = XEXP (src, 0);
12832
12833 /* Catch the load-exclusive and load-acquire operations. */
12834 if (GET_CODE (src) == UNSPEC_VOLATILE
12835 && (XINT (src, 1) == VUNSPEC_LL
12836 || XINT (src, 1) == VUNSPEC_LAX))
12837 return true;
12838 }
12839 return false;
12840 }
12841
12842 enum rtx_code
12843 minmax_code (rtx x)
12844 {
12845 enum rtx_code code = GET_CODE (x);
12846
12847 switch (code)
12848 {
12849 case SMAX:
12850 return GE;
12851 case SMIN:
12852 return LE;
12853 case UMIN:
12854 return LEU;
12855 case UMAX:
12856 return GEU;
12857 default:
12858 gcc_unreachable ();
12859 }
12860 }
12861
12862 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12863
12864 bool
12865 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12866 int *mask, bool *signed_sat)
12867 {
12868 /* The high bound must be a power of two minus one. */
12869 int log = exact_log2 (INTVAL (hi_bound) + 1);
12870 if (log == -1)
12871 return false;
12872
12873 /* The low bound is either zero (for usat) or one less than the
12874 negation of the high bound (for ssat). */
12875 if (INTVAL (lo_bound) == 0)
12876 {
12877 if (mask)
12878 *mask = log;
12879 if (signed_sat)
12880 *signed_sat = false;
12881
12882 return true;
12883 }
12884
12885 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12886 {
12887 if (mask)
12888 *mask = log + 1;
12889 if (signed_sat)
12890 *signed_sat = true;
12891
12892 return true;
12893 }
12894
12895 return false;
12896 }
12897
12898 /* Return 1 if memory locations are adjacent. */
12899 int
12900 adjacent_mem_locations (rtx a, rtx b)
12901 {
12902 /* We don't guarantee to preserve the order of these memory refs. */
12903 if (volatile_refs_p (a) || volatile_refs_p (b))
12904 return 0;
12905
12906 if ((REG_P (XEXP (a, 0))
12907 || (GET_CODE (XEXP (a, 0)) == PLUS
12908 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12909 && (REG_P (XEXP (b, 0))
12910 || (GET_CODE (XEXP (b, 0)) == PLUS
12911 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12912 {
12913 HOST_WIDE_INT val0 = 0, val1 = 0;
12914 rtx reg0, reg1;
12915 int val_diff;
12916
12917 if (GET_CODE (XEXP (a, 0)) == PLUS)
12918 {
12919 reg0 = XEXP (XEXP (a, 0), 0);
12920 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12921 }
12922 else
12923 reg0 = XEXP (a, 0);
12924
12925 if (GET_CODE (XEXP (b, 0)) == PLUS)
12926 {
12927 reg1 = XEXP (XEXP (b, 0), 0);
12928 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12929 }
12930 else
12931 reg1 = XEXP (b, 0);
12932
12933 /* Don't accept any offset that will require multiple
12934 instructions to handle, since this would cause the
12935 arith_adjacentmem pattern to output an overlong sequence. */
12936 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12937 return 0;
12938
12939 /* Don't allow an eliminable register: register elimination can make
12940 the offset too large. */
12941 if (arm_eliminable_register (reg0))
12942 return 0;
12943
12944 val_diff = val1 - val0;
12945
12946 if (arm_ld_sched)
12947 {
12948 /* If the target has load delay slots, then there's no benefit
12949 to using an ldm instruction unless the offset is zero and
12950 we are optimizing for size. */
12951 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12952 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12953 && (val_diff == 4 || val_diff == -4));
12954 }
12955
12956 return ((REGNO (reg0) == REGNO (reg1))
12957 && (val_diff == 4 || val_diff == -4));
12958 }
12959
12960 return 0;
12961 }
12962
12963 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12964 for load operations, false for store operations. CONSECUTIVE is true
12965 if the register numbers in the operation must be consecutive in the register
12966 bank. RETURN_PC is true if value is to be loaded in PC.
12967 The pattern we are trying to match for load is:
12968 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12969 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12970 :
12971 :
12972 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12973 ]
12974 where
12975 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12976 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12977 3. If consecutive is TRUE, then for kth register being loaded,
12978 REGNO (R_dk) = REGNO (R_d0) + k.
12979 The pattern for store is similar. */
12980 bool
12981 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12982 bool consecutive, bool return_pc)
12983 {
12984 HOST_WIDE_INT count = XVECLEN (op, 0);
12985 rtx reg, mem, addr;
12986 unsigned regno;
12987 unsigned first_regno;
12988 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12989 rtx elt;
12990 bool addr_reg_in_reglist = false;
12991 bool update = false;
12992 int reg_increment;
12993 int offset_adj;
12994 int regs_per_val;
12995
12996 /* If not in SImode, then registers must be consecutive
12997 (e.g., VLDM instructions for DFmode). */
12998 gcc_assert ((mode == SImode) || consecutive);
12999 /* Setting return_pc for stores is illegal. */
13000 gcc_assert (!return_pc || load);
13001
13002 /* Set up the increments and the regs per val based on the mode. */
13003 reg_increment = GET_MODE_SIZE (mode);
13004 regs_per_val = reg_increment / 4;
13005 offset_adj = return_pc ? 1 : 0;
13006
13007 if (count <= 1
13008 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13009 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13010 return false;
13011
13012 /* Check if this is a write-back. */
13013 elt = XVECEXP (op, 0, offset_adj);
13014 if (GET_CODE (SET_SRC (elt)) == PLUS)
13015 {
13016 i++;
13017 base = 1;
13018 update = true;
13019
13020 /* The offset adjustment must be the number of registers being
13021 popped times the size of a single register. */
13022 if (!REG_P (SET_DEST (elt))
13023 || !REG_P (XEXP (SET_SRC (elt), 0))
13024 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13025 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13026 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13027 ((count - 1 - offset_adj) * reg_increment))
13028 return false;
13029 }
13030
13031 i = i + offset_adj;
13032 base = base + offset_adj;
13033 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13034 success depends on the type: VLDM can do just one reg,
13035 LDM must do at least two. */
13036 if ((count <= i) && (mode == SImode))
13037 return false;
13038
13039 elt = XVECEXP (op, 0, i - 1);
13040 if (GET_CODE (elt) != SET)
13041 return false;
13042
13043 if (load)
13044 {
13045 reg = SET_DEST (elt);
13046 mem = SET_SRC (elt);
13047 }
13048 else
13049 {
13050 reg = SET_SRC (elt);
13051 mem = SET_DEST (elt);
13052 }
13053
13054 if (!REG_P (reg) || !MEM_P (mem))
13055 return false;
13056
13057 regno = REGNO (reg);
13058 first_regno = regno;
13059 addr = XEXP (mem, 0);
13060 if (GET_CODE (addr) == PLUS)
13061 {
13062 if (!CONST_INT_P (XEXP (addr, 1)))
13063 return false;
13064
13065 offset = INTVAL (XEXP (addr, 1));
13066 addr = XEXP (addr, 0);
13067 }
13068
13069 if (!REG_P (addr))
13070 return false;
13071
13072 /* Don't allow SP to be loaded unless it is also the base register. It
13073 guarantees that SP is reset correctly when an LDM instruction
13074 is interrupted. Otherwise, we might end up with a corrupt stack. */
13075 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13076 return false;
13077
13078 for (; i < count; i++)
13079 {
13080 elt = XVECEXP (op, 0, i);
13081 if (GET_CODE (elt) != SET)
13082 return false;
13083
13084 if (load)
13085 {
13086 reg = SET_DEST (elt);
13087 mem = SET_SRC (elt);
13088 }
13089 else
13090 {
13091 reg = SET_SRC (elt);
13092 mem = SET_DEST (elt);
13093 }
13094
13095 if (!REG_P (reg)
13096 || GET_MODE (reg) != mode
13097 || REGNO (reg) <= regno
13098 || (consecutive
13099 && (REGNO (reg) !=
13100 (unsigned int) (first_regno + regs_per_val * (i - base))))
13101 /* Don't allow SP to be loaded unless it is also the base register. It
13102 guarantees that SP is reset correctly when an LDM instruction
13103 is interrupted. Otherwise, we might end up with a corrupt stack. */
13104 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13105 || !MEM_P (mem)
13106 || GET_MODE (mem) != mode
13107 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13108 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13109 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13110 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13111 offset + (i - base) * reg_increment))
13112 && (!REG_P (XEXP (mem, 0))
13113 || offset + (i - base) * reg_increment != 0)))
13114 return false;
13115
13116 regno = REGNO (reg);
13117 if (regno == REGNO (addr))
13118 addr_reg_in_reglist = true;
13119 }
13120
13121 if (load)
13122 {
13123 if (update && addr_reg_in_reglist)
13124 return false;
13125
13126 /* For Thumb-1, address register is always modified - either by write-back
13127 or by explicit load. If the pattern does not describe an update,
13128 then the address register must be in the list of loaded registers. */
13129 if (TARGET_THUMB1)
13130 return update || addr_reg_in_reglist;
13131 }
13132
13133 return true;
13134 }
13135
13136 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13137 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13138 instruction. ADD_OFFSET is nonzero if the base address register needs
13139 to be modified with an add instruction before we can use it. */
13140
13141 static bool
13142 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13143 int nops, HOST_WIDE_INT add_offset)
13144 {
13145 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13146 if the offset isn't small enough. The reason 2 ldrs are faster
13147 is because these ARMs are able to do more than one cache access
13148 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13149 whilst the ARM8 has a double bandwidth cache. This means that
13150 these cores can do both an instruction fetch and a data fetch in
13151 a single cycle, so the trick of calculating the address into a
13152 scratch register (one of the result regs) and then doing a load
13153 multiple actually becomes slower (and no smaller in code size).
13154 That is the transformation
13155
13156 ldr rd1, [rbase + offset]
13157 ldr rd2, [rbase + offset + 4]
13158
13159 to
13160
13161 add rd1, rbase, offset
13162 ldmia rd1, {rd1, rd2}
13163
13164 produces worse code -- '3 cycles + any stalls on rd2' instead of
13165 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13166 access per cycle, the first sequence could never complete in less
13167 than 6 cycles, whereas the ldm sequence would only take 5 and
13168 would make better use of sequential accesses if not hitting the
13169 cache.
13170
13171 We cheat here and test 'arm_ld_sched' which we currently know to
13172 only be true for the ARM8, ARM9 and StrongARM. If this ever
13173 changes, then the test below needs to be reworked. */
13174 if (nops == 2 && arm_ld_sched && add_offset != 0)
13175 return false;
13176
13177 /* XScale has load-store double instructions, but they have stricter
13178 alignment requirements than load-store multiple, so we cannot
13179 use them.
13180
13181 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13182 the pipeline until completion.
13183
13184 NREGS CYCLES
13185 1 3
13186 2 4
13187 3 5
13188 4 6
13189
13190 An ldr instruction takes 1-3 cycles, but does not block the
13191 pipeline.
13192
13193 NREGS CYCLES
13194 1 1-3
13195 2 2-6
13196 3 3-9
13197 4 4-12
13198
13199 Best case ldr will always win. However, the more ldr instructions
13200 we issue, the less likely we are to be able to schedule them well.
13201 Using ldr instructions also increases code size.
13202
13203 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13204 for counts of 3 or 4 regs. */
13205 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13206 return false;
13207 return true;
13208 }
13209
13210 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13211 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13212 an array ORDER which describes the sequence to use when accessing the
13213 offsets that produces an ascending order. In this sequence, each
13214 offset must be larger by exactly 4 than the previous one. ORDER[0]
13215 must have been filled in with the lowest offset by the caller.
13216 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13217 we use to verify that ORDER produces an ascending order of registers.
13218 Return true if it was possible to construct such an order, false if
13219 not. */
13220
13221 static bool
13222 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13223 int *unsorted_regs)
13224 {
13225 int i;
13226 for (i = 1; i < nops; i++)
13227 {
13228 int j;
13229
13230 order[i] = order[i - 1];
13231 for (j = 0; j < nops; j++)
13232 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13233 {
13234 /* We must find exactly one offset that is higher than the
13235 previous one by 4. */
13236 if (order[i] != order[i - 1])
13237 return false;
13238 order[i] = j;
13239 }
13240 if (order[i] == order[i - 1])
13241 return false;
13242 /* The register numbers must be ascending. */
13243 if (unsorted_regs != NULL
13244 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13245 return false;
13246 }
13247 return true;
13248 }
13249
13250 /* Used to determine in a peephole whether a sequence of load
13251 instructions can be changed into a load-multiple instruction.
13252 NOPS is the number of separate load instructions we are examining. The
13253 first NOPS entries in OPERANDS are the destination registers, the
13254 next NOPS entries are memory operands. If this function is
13255 successful, *BASE is set to the common base register of the memory
13256 accesses; *LOAD_OFFSET is set to the first memory location's offset
13257 from that base register.
13258 REGS is an array filled in with the destination register numbers.
13259 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13260 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13261 the sequence of registers in REGS matches the loads from ascending memory
13262 locations, and the function verifies that the register numbers are
13263 themselves ascending. If CHECK_REGS is false, the register numbers
13264 are stored in the order they are found in the operands. */
13265 static int
13266 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13267 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13268 {
13269 int unsorted_regs[MAX_LDM_STM_OPS];
13270 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13271 int order[MAX_LDM_STM_OPS];
13272 rtx base_reg_rtx = NULL;
13273 int base_reg = -1;
13274 int i, ldm_case;
13275
13276 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13277 easily extended if required. */
13278 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13279
13280 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13281
13282 /* Loop over the operands and check that the memory references are
13283 suitable (i.e. immediate offsets from the same base register). At
13284 the same time, extract the target register, and the memory
13285 offsets. */
13286 for (i = 0; i < nops; i++)
13287 {
13288 rtx reg;
13289 rtx offset;
13290
13291 /* Convert a subreg of a mem into the mem itself. */
13292 if (GET_CODE (operands[nops + i]) == SUBREG)
13293 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13294
13295 gcc_assert (MEM_P (operands[nops + i]));
13296
13297 /* Don't reorder volatile memory references; it doesn't seem worth
13298 looking for the case where the order is ok anyway. */
13299 if (MEM_VOLATILE_P (operands[nops + i]))
13300 return 0;
13301
13302 offset = const0_rtx;
13303
13304 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13305 || (GET_CODE (reg) == SUBREG
13306 && REG_P (reg = SUBREG_REG (reg))))
13307 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13308 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13309 || (GET_CODE (reg) == SUBREG
13310 && REG_P (reg = SUBREG_REG (reg))))
13311 && (CONST_INT_P (offset
13312 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13313 {
13314 if (i == 0)
13315 {
13316 base_reg = REGNO (reg);
13317 base_reg_rtx = reg;
13318 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13319 return 0;
13320 }
13321 else if (base_reg != (int) REGNO (reg))
13322 /* Not addressed from the same base register. */
13323 return 0;
13324
13325 unsorted_regs[i] = (REG_P (operands[i])
13326 ? REGNO (operands[i])
13327 : REGNO (SUBREG_REG (operands[i])));
13328
13329 /* If it isn't an integer register, or if it overwrites the
13330 base register but isn't the last insn in the list, then
13331 we can't do this. */
13332 if (unsorted_regs[i] < 0
13333 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13334 || unsorted_regs[i] > 14
13335 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13336 return 0;
13337
13338 /* Don't allow SP to be loaded unless it is also the base
13339 register. It guarantees that SP is reset correctly when
13340 an LDM instruction is interrupted. Otherwise, we might
13341 end up with a corrupt stack. */
13342 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13343 return 0;
13344
13345 unsorted_offsets[i] = INTVAL (offset);
13346 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13347 order[0] = i;
13348 }
13349 else
13350 /* Not a suitable memory address. */
13351 return 0;
13352 }
13353
13354 /* All the useful information has now been extracted from the
13355 operands into unsorted_regs and unsorted_offsets; additionally,
13356 order[0] has been set to the lowest offset in the list. Sort
13357 the offsets into order, verifying that they are adjacent, and
13358 check that the register numbers are ascending. */
13359 if (!compute_offset_order (nops, unsorted_offsets, order,
13360 check_regs ? unsorted_regs : NULL))
13361 return 0;
13362
13363 if (saved_order)
13364 memcpy (saved_order, order, sizeof order);
13365
13366 if (base)
13367 {
13368 *base = base_reg;
13369
13370 for (i = 0; i < nops; i++)
13371 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13372
13373 *load_offset = unsorted_offsets[order[0]];
13374 }
13375
13376 if (TARGET_THUMB1
13377 && !peep2_reg_dead_p (nops, base_reg_rtx))
13378 return 0;
13379
13380 if (unsorted_offsets[order[0]] == 0)
13381 ldm_case = 1; /* ldmia */
13382 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13383 ldm_case = 2; /* ldmib */
13384 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13385 ldm_case = 3; /* ldmda */
13386 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13387 ldm_case = 4; /* ldmdb */
13388 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13389 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13390 ldm_case = 5;
13391 else
13392 return 0;
13393
13394 if (!multiple_operation_profitable_p (false, nops,
13395 ldm_case == 5
13396 ? unsorted_offsets[order[0]] : 0))
13397 return 0;
13398
13399 return ldm_case;
13400 }
13401
13402 /* Used to determine in a peephole whether a sequence of store instructions can
13403 be changed into a store-multiple instruction.
13404 NOPS is the number of separate store instructions we are examining.
13405 NOPS_TOTAL is the total number of instructions recognized by the peephole
13406 pattern.
13407 The first NOPS entries in OPERANDS are the source registers, the next
13408 NOPS entries are memory operands. If this function is successful, *BASE is
13409 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13410 to the first memory location's offset from that base register. REGS is an
13411 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13412 likewise filled with the corresponding rtx's.
13413 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13414 numbers to an ascending order of stores.
13415 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13416 from ascending memory locations, and the function verifies that the register
13417 numbers are themselves ascending. If CHECK_REGS is false, the register
13418 numbers are stored in the order they are found in the operands. */
13419 static int
13420 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13421 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13422 HOST_WIDE_INT *load_offset, bool check_regs)
13423 {
13424 int unsorted_regs[MAX_LDM_STM_OPS];
13425 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13426 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13427 int order[MAX_LDM_STM_OPS];
13428 int base_reg = -1;
13429 rtx base_reg_rtx = NULL;
13430 int i, stm_case;
13431
13432 /* Write back of base register is currently only supported for Thumb 1. */
13433 int base_writeback = TARGET_THUMB1;
13434
13435 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13436 easily extended if required. */
13437 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13438
13439 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13440
13441 /* Loop over the operands and check that the memory references are
13442 suitable (i.e. immediate offsets from the same base register). At
13443 the same time, extract the target register, and the memory
13444 offsets. */
13445 for (i = 0; i < nops; i++)
13446 {
13447 rtx reg;
13448 rtx offset;
13449
13450 /* Convert a subreg of a mem into the mem itself. */
13451 if (GET_CODE (operands[nops + i]) == SUBREG)
13452 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13453
13454 gcc_assert (MEM_P (operands[nops + i]));
13455
13456 /* Don't reorder volatile memory references; it doesn't seem worth
13457 looking for the case where the order is ok anyway. */
13458 if (MEM_VOLATILE_P (operands[nops + i]))
13459 return 0;
13460
13461 offset = const0_rtx;
13462
13463 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13464 || (GET_CODE (reg) == SUBREG
13465 && REG_P (reg = SUBREG_REG (reg))))
13466 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13467 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13468 || (GET_CODE (reg) == SUBREG
13469 && REG_P (reg = SUBREG_REG (reg))))
13470 && (CONST_INT_P (offset
13471 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13472 {
13473 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13474 ? operands[i] : SUBREG_REG (operands[i]));
13475 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13476
13477 if (i == 0)
13478 {
13479 base_reg = REGNO (reg);
13480 base_reg_rtx = reg;
13481 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13482 return 0;
13483 }
13484 else if (base_reg != (int) REGNO (reg))
13485 /* Not addressed from the same base register. */
13486 return 0;
13487
13488 /* If it isn't an integer register, then we can't do this. */
13489 if (unsorted_regs[i] < 0
13490 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13491 /* The effects are unpredictable if the base register is
13492 both updated and stored. */
13493 || (base_writeback && unsorted_regs[i] == base_reg)
13494 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13495 || unsorted_regs[i] > 14)
13496 return 0;
13497
13498 unsorted_offsets[i] = INTVAL (offset);
13499 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13500 order[0] = i;
13501 }
13502 else
13503 /* Not a suitable memory address. */
13504 return 0;
13505 }
13506
13507 /* All the useful information has now been extracted from the
13508 operands into unsorted_regs and unsorted_offsets; additionally,
13509 order[0] has been set to the lowest offset in the list. Sort
13510 the offsets into order, verifying that they are adjacent, and
13511 check that the register numbers are ascending. */
13512 if (!compute_offset_order (nops, unsorted_offsets, order,
13513 check_regs ? unsorted_regs : NULL))
13514 return 0;
13515
13516 if (saved_order)
13517 memcpy (saved_order, order, sizeof order);
13518
13519 if (base)
13520 {
13521 *base = base_reg;
13522
13523 for (i = 0; i < nops; i++)
13524 {
13525 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13526 if (reg_rtxs)
13527 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13528 }
13529
13530 *load_offset = unsorted_offsets[order[0]];
13531 }
13532
13533 if (TARGET_THUMB1
13534 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13535 return 0;
13536
13537 if (unsorted_offsets[order[0]] == 0)
13538 stm_case = 1; /* stmia */
13539 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13540 stm_case = 2; /* stmib */
13541 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13542 stm_case = 3; /* stmda */
13543 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13544 stm_case = 4; /* stmdb */
13545 else
13546 return 0;
13547
13548 if (!multiple_operation_profitable_p (false, nops, 0))
13549 return 0;
13550
13551 return stm_case;
13552 }
13553 \f
13554 /* Routines for use in generating RTL. */
13555
13556 /* Generate a load-multiple instruction. COUNT is the number of loads in
13557 the instruction; REGS and MEMS are arrays containing the operands.
13558 BASEREG is the base register to be used in addressing the memory operands.
13559 WBACK_OFFSET is nonzero if the instruction should update the base
13560 register. */
13561
13562 static rtx
13563 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13564 HOST_WIDE_INT wback_offset)
13565 {
13566 int i = 0, j;
13567 rtx result;
13568
13569 if (!multiple_operation_profitable_p (false, count, 0))
13570 {
13571 rtx seq;
13572
13573 start_sequence ();
13574
13575 for (i = 0; i < count; i++)
13576 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13577
13578 if (wback_offset != 0)
13579 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13580
13581 seq = get_insns ();
13582 end_sequence ();
13583
13584 return seq;
13585 }
13586
13587 result = gen_rtx_PARALLEL (VOIDmode,
13588 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13589 if (wback_offset != 0)
13590 {
13591 XVECEXP (result, 0, 0)
13592 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13593 i = 1;
13594 count++;
13595 }
13596
13597 for (j = 0; i < count; i++, j++)
13598 XVECEXP (result, 0, i)
13599 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13600
13601 return result;
13602 }
13603
13604 /* Generate a store-multiple instruction. COUNT is the number of stores in
13605 the instruction; REGS and MEMS are arrays containing the operands.
13606 BASEREG is the base register to be used in addressing the memory operands.
13607 WBACK_OFFSET is nonzero if the instruction should update the base
13608 register. */
13609
13610 static rtx
13611 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13612 HOST_WIDE_INT wback_offset)
13613 {
13614 int i = 0, j;
13615 rtx result;
13616
13617 if (GET_CODE (basereg) == PLUS)
13618 basereg = XEXP (basereg, 0);
13619
13620 if (!multiple_operation_profitable_p (false, count, 0))
13621 {
13622 rtx seq;
13623
13624 start_sequence ();
13625
13626 for (i = 0; i < count; i++)
13627 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13628
13629 if (wback_offset != 0)
13630 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13631
13632 seq = get_insns ();
13633 end_sequence ();
13634
13635 return seq;
13636 }
13637
13638 result = gen_rtx_PARALLEL (VOIDmode,
13639 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13640 if (wback_offset != 0)
13641 {
13642 XVECEXP (result, 0, 0)
13643 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13644 i = 1;
13645 count++;
13646 }
13647
13648 for (j = 0; i < count; i++, j++)
13649 XVECEXP (result, 0, i)
13650 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13651
13652 return result;
13653 }
13654
13655 /* Generate either a load-multiple or a store-multiple instruction. This
13656 function can be used in situations where we can start with a single MEM
13657 rtx and adjust its address upwards.
13658 COUNT is the number of operations in the instruction, not counting a
13659 possible update of the base register. REGS is an array containing the
13660 register operands.
13661 BASEREG is the base register to be used in addressing the memory operands,
13662 which are constructed from BASEMEM.
13663 WRITE_BACK specifies whether the generated instruction should include an
13664 update of the base register.
13665 OFFSETP is used to pass an offset to and from this function; this offset
13666 is not used when constructing the address (instead BASEMEM should have an
13667 appropriate offset in its address), it is used only for setting
13668 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13669
13670 static rtx
13671 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13672 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13673 {
13674 rtx mems[MAX_LDM_STM_OPS];
13675 HOST_WIDE_INT offset = *offsetp;
13676 int i;
13677
13678 gcc_assert (count <= MAX_LDM_STM_OPS);
13679
13680 if (GET_CODE (basereg) == PLUS)
13681 basereg = XEXP (basereg, 0);
13682
13683 for (i = 0; i < count; i++)
13684 {
13685 rtx addr = plus_constant (Pmode, basereg, i * 4);
13686 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13687 offset += 4;
13688 }
13689
13690 if (write_back)
13691 *offsetp = offset;
13692
13693 if (is_load)
13694 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13695 write_back ? 4 * count : 0);
13696 else
13697 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13698 write_back ? 4 * count : 0);
13699 }
13700
13701 rtx
13702 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13703 rtx basemem, HOST_WIDE_INT *offsetp)
13704 {
13705 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13706 offsetp);
13707 }
13708
13709 rtx
13710 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13711 rtx basemem, HOST_WIDE_INT *offsetp)
13712 {
13713 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13714 offsetp);
13715 }
13716
13717 /* Called from a peephole2 expander to turn a sequence of loads into an
13718 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13719 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13720 is true if we can reorder the registers because they are used commutatively
13721 subsequently.
13722 Returns true iff we could generate a new instruction. */
13723
13724 bool
13725 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13726 {
13727 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13728 rtx mems[MAX_LDM_STM_OPS];
13729 int i, j, base_reg;
13730 rtx base_reg_rtx;
13731 HOST_WIDE_INT offset;
13732 int write_back = FALSE;
13733 int ldm_case;
13734 rtx addr;
13735
13736 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13737 &base_reg, &offset, !sort_regs);
13738
13739 if (ldm_case == 0)
13740 return false;
13741
13742 if (sort_regs)
13743 for (i = 0; i < nops - 1; i++)
13744 for (j = i + 1; j < nops; j++)
13745 if (regs[i] > regs[j])
13746 {
13747 int t = regs[i];
13748 regs[i] = regs[j];
13749 regs[j] = t;
13750 }
13751 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13752
13753 if (TARGET_THUMB1)
13754 {
13755 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13756 gcc_assert (ldm_case == 1 || ldm_case == 5);
13757 write_back = TRUE;
13758 }
13759
13760 if (ldm_case == 5)
13761 {
13762 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13763 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13764 offset = 0;
13765 if (!TARGET_THUMB1)
13766 base_reg_rtx = newbase;
13767 }
13768
13769 for (i = 0; i < nops; i++)
13770 {
13771 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13772 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13773 SImode, addr, 0);
13774 }
13775 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13776 write_back ? offset + i * 4 : 0));
13777 return true;
13778 }
13779
13780 /* Called from a peephole2 expander to turn a sequence of stores into an
13781 STM instruction. OPERANDS are the operands found by the peephole matcher;
13782 NOPS indicates how many separate stores we are trying to combine.
13783 Returns true iff we could generate a new instruction. */
13784
13785 bool
13786 gen_stm_seq (rtx *operands, int nops)
13787 {
13788 int i;
13789 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13790 rtx mems[MAX_LDM_STM_OPS];
13791 int base_reg;
13792 rtx base_reg_rtx;
13793 HOST_WIDE_INT offset;
13794 int write_back = FALSE;
13795 int stm_case;
13796 rtx addr;
13797 bool base_reg_dies;
13798
13799 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13800 mem_order, &base_reg, &offset, true);
13801
13802 if (stm_case == 0)
13803 return false;
13804
13805 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13806
13807 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13808 if (TARGET_THUMB1)
13809 {
13810 gcc_assert (base_reg_dies);
13811 write_back = TRUE;
13812 }
13813
13814 if (stm_case == 5)
13815 {
13816 gcc_assert (base_reg_dies);
13817 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13818 offset = 0;
13819 }
13820
13821 addr = plus_constant (Pmode, base_reg_rtx, offset);
13822
13823 for (i = 0; i < nops; i++)
13824 {
13825 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13826 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13827 SImode, addr, 0);
13828 }
13829 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13830 write_back ? offset + i * 4 : 0));
13831 return true;
13832 }
13833
13834 /* Called from a peephole2 expander to turn a sequence of stores that are
13835 preceded by constant loads into an STM instruction. OPERANDS are the
13836 operands found by the peephole matcher; NOPS indicates how many
13837 separate stores we are trying to combine; there are 2 * NOPS
13838 instructions in the peephole.
13839 Returns true iff we could generate a new instruction. */
13840
13841 bool
13842 gen_const_stm_seq (rtx *operands, int nops)
13843 {
13844 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13845 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13846 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13847 rtx mems[MAX_LDM_STM_OPS];
13848 int base_reg;
13849 rtx base_reg_rtx;
13850 HOST_WIDE_INT offset;
13851 int write_back = FALSE;
13852 int stm_case;
13853 rtx addr;
13854 bool base_reg_dies;
13855 int i, j;
13856 HARD_REG_SET allocated;
13857
13858 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13859 mem_order, &base_reg, &offset, false);
13860
13861 if (stm_case == 0)
13862 return false;
13863
13864 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13865
13866 /* If the same register is used more than once, try to find a free
13867 register. */
13868 CLEAR_HARD_REG_SET (allocated);
13869 for (i = 0; i < nops; i++)
13870 {
13871 for (j = i + 1; j < nops; j++)
13872 if (regs[i] == regs[j])
13873 {
13874 rtx t = peep2_find_free_register (0, nops * 2,
13875 TARGET_THUMB1 ? "l" : "r",
13876 SImode, &allocated);
13877 if (t == NULL_RTX)
13878 return false;
13879 reg_rtxs[i] = t;
13880 regs[i] = REGNO (t);
13881 }
13882 }
13883
13884 /* Compute an ordering that maps the register numbers to an ascending
13885 sequence. */
13886 reg_order[0] = 0;
13887 for (i = 0; i < nops; i++)
13888 if (regs[i] < regs[reg_order[0]])
13889 reg_order[0] = i;
13890
13891 for (i = 1; i < nops; i++)
13892 {
13893 int this_order = reg_order[i - 1];
13894 for (j = 0; j < nops; j++)
13895 if (regs[j] > regs[reg_order[i - 1]]
13896 && (this_order == reg_order[i - 1]
13897 || regs[j] < regs[this_order]))
13898 this_order = j;
13899 reg_order[i] = this_order;
13900 }
13901
13902 /* Ensure that registers that must be live after the instruction end
13903 up with the correct value. */
13904 for (i = 0; i < nops; i++)
13905 {
13906 int this_order = reg_order[i];
13907 if ((this_order != mem_order[i]
13908 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13909 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13910 return false;
13911 }
13912
13913 /* Load the constants. */
13914 for (i = 0; i < nops; i++)
13915 {
13916 rtx op = operands[2 * nops + mem_order[i]];
13917 sorted_regs[i] = regs[reg_order[i]];
13918 emit_move_insn (reg_rtxs[reg_order[i]], op);
13919 }
13920
13921 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13922
13923 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13924 if (TARGET_THUMB1)
13925 {
13926 gcc_assert (base_reg_dies);
13927 write_back = TRUE;
13928 }
13929
13930 if (stm_case == 5)
13931 {
13932 gcc_assert (base_reg_dies);
13933 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13934 offset = 0;
13935 }
13936
13937 addr = plus_constant (Pmode, base_reg_rtx, offset);
13938
13939 for (i = 0; i < nops; i++)
13940 {
13941 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13942 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13943 SImode, addr, 0);
13944 }
13945 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13946 write_back ? offset + i * 4 : 0));
13947 return true;
13948 }
13949
13950 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13951 unaligned copies on processors which support unaligned semantics for those
13952 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13953 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13954 An interleave factor of 1 (the minimum) will perform no interleaving.
13955 Load/store multiple are used for aligned addresses where possible. */
13956
13957 static void
13958 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13959 HOST_WIDE_INT length,
13960 unsigned int interleave_factor)
13961 {
13962 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13963 int *regnos = XALLOCAVEC (int, interleave_factor);
13964 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13965 HOST_WIDE_INT i, j;
13966 HOST_WIDE_INT remaining = length, words;
13967 rtx halfword_tmp = NULL, byte_tmp = NULL;
13968 rtx dst, src;
13969 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13970 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13971 HOST_WIDE_INT srcoffset, dstoffset;
13972 HOST_WIDE_INT src_autoinc, dst_autoinc;
13973 rtx mem, addr;
13974
13975 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
13976
13977 /* Use hard registers if we have aligned source or destination so we can use
13978 load/store multiple with contiguous registers. */
13979 if (dst_aligned || src_aligned)
13980 for (i = 0; i < interleave_factor; i++)
13981 regs[i] = gen_rtx_REG (SImode, i);
13982 else
13983 for (i = 0; i < interleave_factor; i++)
13984 regs[i] = gen_reg_rtx (SImode);
13985
13986 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13987 src = copy_addr_to_reg (XEXP (srcbase, 0));
13988
13989 srcoffset = dstoffset = 0;
13990
13991 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13992 For copying the last bytes we want to subtract this offset again. */
13993 src_autoinc = dst_autoinc = 0;
13994
13995 for (i = 0; i < interleave_factor; i++)
13996 regnos[i] = i;
13997
13998 /* Copy BLOCK_SIZE_BYTES chunks. */
13999
14000 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14001 {
14002 /* Load words. */
14003 if (src_aligned && interleave_factor > 1)
14004 {
14005 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14006 TRUE, srcbase, &srcoffset));
14007 src_autoinc += UNITS_PER_WORD * interleave_factor;
14008 }
14009 else
14010 {
14011 for (j = 0; j < interleave_factor; j++)
14012 {
14013 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14014 - src_autoinc));
14015 mem = adjust_automodify_address (srcbase, SImode, addr,
14016 srcoffset + j * UNITS_PER_WORD);
14017 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14018 }
14019 srcoffset += block_size_bytes;
14020 }
14021
14022 /* Store words. */
14023 if (dst_aligned && interleave_factor > 1)
14024 {
14025 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14026 TRUE, dstbase, &dstoffset));
14027 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14028 }
14029 else
14030 {
14031 for (j = 0; j < interleave_factor; j++)
14032 {
14033 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14034 - dst_autoinc));
14035 mem = adjust_automodify_address (dstbase, SImode, addr,
14036 dstoffset + j * UNITS_PER_WORD);
14037 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14038 }
14039 dstoffset += block_size_bytes;
14040 }
14041
14042 remaining -= block_size_bytes;
14043 }
14044
14045 /* Copy any whole words left (note these aren't interleaved with any
14046 subsequent halfword/byte load/stores in the interests of simplicity). */
14047
14048 words = remaining / UNITS_PER_WORD;
14049
14050 gcc_assert (words < interleave_factor);
14051
14052 if (src_aligned && words > 1)
14053 {
14054 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14055 &srcoffset));
14056 src_autoinc += UNITS_PER_WORD * words;
14057 }
14058 else
14059 {
14060 for (j = 0; j < words; j++)
14061 {
14062 addr = plus_constant (Pmode, src,
14063 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14064 mem = adjust_automodify_address (srcbase, SImode, addr,
14065 srcoffset + j * UNITS_PER_WORD);
14066 if (src_aligned)
14067 emit_move_insn (regs[j], mem);
14068 else
14069 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14070 }
14071 srcoffset += words * UNITS_PER_WORD;
14072 }
14073
14074 if (dst_aligned && words > 1)
14075 {
14076 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14077 &dstoffset));
14078 dst_autoinc += words * UNITS_PER_WORD;
14079 }
14080 else
14081 {
14082 for (j = 0; j < words; j++)
14083 {
14084 addr = plus_constant (Pmode, dst,
14085 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14086 mem = adjust_automodify_address (dstbase, SImode, addr,
14087 dstoffset + j * UNITS_PER_WORD);
14088 if (dst_aligned)
14089 emit_move_insn (mem, regs[j]);
14090 else
14091 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14092 }
14093 dstoffset += words * UNITS_PER_WORD;
14094 }
14095
14096 remaining -= words * UNITS_PER_WORD;
14097
14098 gcc_assert (remaining < 4);
14099
14100 /* Copy a halfword if necessary. */
14101
14102 if (remaining >= 2)
14103 {
14104 halfword_tmp = gen_reg_rtx (SImode);
14105
14106 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14107 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14108 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14109
14110 /* Either write out immediately, or delay until we've loaded the last
14111 byte, depending on interleave factor. */
14112 if (interleave_factor == 1)
14113 {
14114 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14115 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14116 emit_insn (gen_unaligned_storehi (mem,
14117 gen_lowpart (HImode, halfword_tmp)));
14118 halfword_tmp = NULL;
14119 dstoffset += 2;
14120 }
14121
14122 remaining -= 2;
14123 srcoffset += 2;
14124 }
14125
14126 gcc_assert (remaining < 2);
14127
14128 /* Copy last byte. */
14129
14130 if ((remaining & 1) != 0)
14131 {
14132 byte_tmp = gen_reg_rtx (SImode);
14133
14134 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14135 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14136 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14137
14138 if (interleave_factor == 1)
14139 {
14140 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14141 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14142 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14143 byte_tmp = NULL;
14144 dstoffset++;
14145 }
14146
14147 remaining--;
14148 srcoffset++;
14149 }
14150
14151 /* Store last halfword if we haven't done so already. */
14152
14153 if (halfword_tmp)
14154 {
14155 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14156 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14157 emit_insn (gen_unaligned_storehi (mem,
14158 gen_lowpart (HImode, halfword_tmp)));
14159 dstoffset += 2;
14160 }
14161
14162 /* Likewise for last byte. */
14163
14164 if (byte_tmp)
14165 {
14166 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14167 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14168 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14169 dstoffset++;
14170 }
14171
14172 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14173 }
14174
14175 /* From mips_adjust_block_mem:
14176
14177 Helper function for doing a loop-based block operation on memory
14178 reference MEM. Each iteration of the loop will operate on LENGTH
14179 bytes of MEM.
14180
14181 Create a new base register for use within the loop and point it to
14182 the start of MEM. Create a new memory reference that uses this
14183 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14184
14185 static void
14186 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14187 rtx *loop_mem)
14188 {
14189 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14190
14191 /* Although the new mem does not refer to a known location,
14192 it does keep up to LENGTH bytes of alignment. */
14193 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14194 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14195 }
14196
14197 /* From mips_block_move_loop:
14198
14199 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14200 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14201 the memory regions do not overlap. */
14202
14203 static void
14204 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14205 unsigned int interleave_factor,
14206 HOST_WIDE_INT bytes_per_iter)
14207 {
14208 rtx src_reg, dest_reg, final_src, test;
14209 HOST_WIDE_INT leftover;
14210
14211 leftover = length % bytes_per_iter;
14212 length -= leftover;
14213
14214 /* Create registers and memory references for use within the loop. */
14215 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14216 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14217
14218 /* Calculate the value that SRC_REG should have after the last iteration of
14219 the loop. */
14220 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14221 0, 0, OPTAB_WIDEN);
14222
14223 /* Emit the start of the loop. */
14224 rtx_code_label *label = gen_label_rtx ();
14225 emit_label (label);
14226
14227 /* Emit the loop body. */
14228 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14229 interleave_factor);
14230
14231 /* Move on to the next block. */
14232 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14233 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14234
14235 /* Emit the loop condition. */
14236 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14237 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14238
14239 /* Mop up any left-over bytes. */
14240 if (leftover)
14241 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14242 }
14243
14244 /* Emit a block move when either the source or destination is unaligned (not
14245 aligned to a four-byte boundary). This may need further tuning depending on
14246 core type, optimize_size setting, etc. */
14247
14248 static int
14249 arm_movmemqi_unaligned (rtx *operands)
14250 {
14251 HOST_WIDE_INT length = INTVAL (operands[2]);
14252
14253 if (optimize_size)
14254 {
14255 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14256 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14257 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14258 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14259 or dst_aligned though: allow more interleaving in those cases since the
14260 resulting code can be smaller. */
14261 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14262 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14263
14264 if (length > 12)
14265 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14266 interleave_factor, bytes_per_iter);
14267 else
14268 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14269 interleave_factor);
14270 }
14271 else
14272 {
14273 /* Note that the loop created by arm_block_move_unaligned_loop may be
14274 subject to loop unrolling, which makes tuning this condition a little
14275 redundant. */
14276 if (length > 32)
14277 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14278 else
14279 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14280 }
14281
14282 return 1;
14283 }
14284
14285 int
14286 arm_gen_movmemqi (rtx *operands)
14287 {
14288 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14289 HOST_WIDE_INT srcoffset, dstoffset;
14290 rtx src, dst, srcbase, dstbase;
14291 rtx part_bytes_reg = NULL;
14292 rtx mem;
14293
14294 if (!CONST_INT_P (operands[2])
14295 || !CONST_INT_P (operands[3])
14296 || INTVAL (operands[2]) > 64)
14297 return 0;
14298
14299 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14300 return arm_movmemqi_unaligned (operands);
14301
14302 if (INTVAL (operands[3]) & 3)
14303 return 0;
14304
14305 dstbase = operands[0];
14306 srcbase = operands[1];
14307
14308 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14309 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14310
14311 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14312 out_words_to_go = INTVAL (operands[2]) / 4;
14313 last_bytes = INTVAL (operands[2]) & 3;
14314 dstoffset = srcoffset = 0;
14315
14316 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14317 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14318
14319 while (in_words_to_go >= 2)
14320 {
14321 if (in_words_to_go > 4)
14322 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14323 TRUE, srcbase, &srcoffset));
14324 else
14325 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14326 src, FALSE, srcbase,
14327 &srcoffset));
14328
14329 if (out_words_to_go)
14330 {
14331 if (out_words_to_go > 4)
14332 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14333 TRUE, dstbase, &dstoffset));
14334 else if (out_words_to_go != 1)
14335 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14336 out_words_to_go, dst,
14337 (last_bytes == 0
14338 ? FALSE : TRUE),
14339 dstbase, &dstoffset));
14340 else
14341 {
14342 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14343 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14344 if (last_bytes != 0)
14345 {
14346 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14347 dstoffset += 4;
14348 }
14349 }
14350 }
14351
14352 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14353 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14354 }
14355
14356 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14357 if (out_words_to_go)
14358 {
14359 rtx sreg;
14360
14361 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14362 sreg = copy_to_reg (mem);
14363
14364 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14365 emit_move_insn (mem, sreg);
14366 in_words_to_go--;
14367
14368 gcc_assert (!in_words_to_go); /* Sanity check */
14369 }
14370
14371 if (in_words_to_go)
14372 {
14373 gcc_assert (in_words_to_go > 0);
14374
14375 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14376 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14377 }
14378
14379 gcc_assert (!last_bytes || part_bytes_reg);
14380
14381 if (BYTES_BIG_ENDIAN && last_bytes)
14382 {
14383 rtx tmp = gen_reg_rtx (SImode);
14384
14385 /* The bytes we want are in the top end of the word. */
14386 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14387 GEN_INT (8 * (4 - last_bytes))));
14388 part_bytes_reg = tmp;
14389
14390 while (last_bytes)
14391 {
14392 mem = adjust_automodify_address (dstbase, QImode,
14393 plus_constant (Pmode, dst,
14394 last_bytes - 1),
14395 dstoffset + last_bytes - 1);
14396 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14397
14398 if (--last_bytes)
14399 {
14400 tmp = gen_reg_rtx (SImode);
14401 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14402 part_bytes_reg = tmp;
14403 }
14404 }
14405
14406 }
14407 else
14408 {
14409 if (last_bytes > 1)
14410 {
14411 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14412 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14413 last_bytes -= 2;
14414 if (last_bytes)
14415 {
14416 rtx tmp = gen_reg_rtx (SImode);
14417 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14418 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14419 part_bytes_reg = tmp;
14420 dstoffset += 2;
14421 }
14422 }
14423
14424 if (last_bytes)
14425 {
14426 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14427 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14428 }
14429 }
14430
14431 return 1;
14432 }
14433
14434 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14435 by mode size. */
14436 inline static rtx
14437 next_consecutive_mem (rtx mem)
14438 {
14439 machine_mode mode = GET_MODE (mem);
14440 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14441 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14442
14443 return adjust_automodify_address (mem, mode, addr, offset);
14444 }
14445
14446 /* Copy using LDRD/STRD instructions whenever possible.
14447 Returns true upon success. */
14448 bool
14449 gen_movmem_ldrd_strd (rtx *operands)
14450 {
14451 unsigned HOST_WIDE_INT len;
14452 HOST_WIDE_INT align;
14453 rtx src, dst, base;
14454 rtx reg0;
14455 bool src_aligned, dst_aligned;
14456 bool src_volatile, dst_volatile;
14457
14458 gcc_assert (CONST_INT_P (operands[2]));
14459 gcc_assert (CONST_INT_P (operands[3]));
14460
14461 len = UINTVAL (operands[2]);
14462 if (len > 64)
14463 return false;
14464
14465 /* Maximum alignment we can assume for both src and dst buffers. */
14466 align = INTVAL (operands[3]);
14467
14468 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14469 return false;
14470
14471 /* Place src and dst addresses in registers
14472 and update the corresponding mem rtx. */
14473 dst = operands[0];
14474 dst_volatile = MEM_VOLATILE_P (dst);
14475 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14476 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14477 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14478
14479 src = operands[1];
14480 src_volatile = MEM_VOLATILE_P (src);
14481 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14482 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14483 src = adjust_automodify_address (src, VOIDmode, base, 0);
14484
14485 if (!unaligned_access && !(src_aligned && dst_aligned))
14486 return false;
14487
14488 if (src_volatile || dst_volatile)
14489 return false;
14490
14491 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14492 if (!(dst_aligned || src_aligned))
14493 return arm_gen_movmemqi (operands);
14494
14495 /* If the either src or dst is unaligned we'll be accessing it as pairs
14496 of unaligned SImode accesses. Otherwise we can generate DImode
14497 ldrd/strd instructions. */
14498 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14499 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14500
14501 while (len >= 8)
14502 {
14503 len -= 8;
14504 reg0 = gen_reg_rtx (DImode);
14505 rtx low_reg = NULL_RTX;
14506 rtx hi_reg = NULL_RTX;
14507
14508 if (!src_aligned || !dst_aligned)
14509 {
14510 low_reg = gen_lowpart (SImode, reg0);
14511 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14512 }
14513 if (src_aligned)
14514 emit_move_insn (reg0, src);
14515 else
14516 {
14517 emit_insn (gen_unaligned_loadsi (low_reg, src));
14518 src = next_consecutive_mem (src);
14519 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14520 }
14521
14522 if (dst_aligned)
14523 emit_move_insn (dst, reg0);
14524 else
14525 {
14526 emit_insn (gen_unaligned_storesi (dst, low_reg));
14527 dst = next_consecutive_mem (dst);
14528 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14529 }
14530
14531 src = next_consecutive_mem (src);
14532 dst = next_consecutive_mem (dst);
14533 }
14534
14535 gcc_assert (len < 8);
14536 if (len >= 4)
14537 {
14538 /* More than a word but less than a double-word to copy. Copy a word. */
14539 reg0 = gen_reg_rtx (SImode);
14540 src = adjust_address (src, SImode, 0);
14541 dst = adjust_address (dst, SImode, 0);
14542 if (src_aligned)
14543 emit_move_insn (reg0, src);
14544 else
14545 emit_insn (gen_unaligned_loadsi (reg0, src));
14546
14547 if (dst_aligned)
14548 emit_move_insn (dst, reg0);
14549 else
14550 emit_insn (gen_unaligned_storesi (dst, reg0));
14551
14552 src = next_consecutive_mem (src);
14553 dst = next_consecutive_mem (dst);
14554 len -= 4;
14555 }
14556
14557 if (len == 0)
14558 return true;
14559
14560 /* Copy the remaining bytes. */
14561 if (len >= 2)
14562 {
14563 dst = adjust_address (dst, HImode, 0);
14564 src = adjust_address (src, HImode, 0);
14565 reg0 = gen_reg_rtx (SImode);
14566 if (src_aligned)
14567 emit_insn (gen_zero_extendhisi2 (reg0, src));
14568 else
14569 emit_insn (gen_unaligned_loadhiu (reg0, src));
14570
14571 if (dst_aligned)
14572 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14573 else
14574 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14575
14576 src = next_consecutive_mem (src);
14577 dst = next_consecutive_mem (dst);
14578 if (len == 2)
14579 return true;
14580 }
14581
14582 dst = adjust_address (dst, QImode, 0);
14583 src = adjust_address (src, QImode, 0);
14584 reg0 = gen_reg_rtx (QImode);
14585 emit_move_insn (reg0, src);
14586 emit_move_insn (dst, reg0);
14587 return true;
14588 }
14589
14590 /* Select a dominance comparison mode if possible for a test of the general
14591 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14592 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14593 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14594 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14595 In all cases OP will be either EQ or NE, but we don't need to know which
14596 here. If we are unable to support a dominance comparison we return
14597 CC mode. This will then fail to match for the RTL expressions that
14598 generate this call. */
14599 machine_mode
14600 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14601 {
14602 enum rtx_code cond1, cond2;
14603 int swapped = 0;
14604
14605 /* Currently we will probably get the wrong result if the individual
14606 comparisons are not simple. This also ensures that it is safe to
14607 reverse a comparison if necessary. */
14608 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14609 != CCmode)
14610 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14611 != CCmode))
14612 return CCmode;
14613
14614 /* The if_then_else variant of this tests the second condition if the
14615 first passes, but is true if the first fails. Reverse the first
14616 condition to get a true "inclusive-or" expression. */
14617 if (cond_or == DOM_CC_NX_OR_Y)
14618 cond1 = reverse_condition (cond1);
14619
14620 /* If the comparisons are not equal, and one doesn't dominate the other,
14621 then we can't do this. */
14622 if (cond1 != cond2
14623 && !comparison_dominates_p (cond1, cond2)
14624 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14625 return CCmode;
14626
14627 if (swapped)
14628 std::swap (cond1, cond2);
14629
14630 switch (cond1)
14631 {
14632 case EQ:
14633 if (cond_or == DOM_CC_X_AND_Y)
14634 return CC_DEQmode;
14635
14636 switch (cond2)
14637 {
14638 case EQ: return CC_DEQmode;
14639 case LE: return CC_DLEmode;
14640 case LEU: return CC_DLEUmode;
14641 case GE: return CC_DGEmode;
14642 case GEU: return CC_DGEUmode;
14643 default: gcc_unreachable ();
14644 }
14645
14646 case LT:
14647 if (cond_or == DOM_CC_X_AND_Y)
14648 return CC_DLTmode;
14649
14650 switch (cond2)
14651 {
14652 case LT:
14653 return CC_DLTmode;
14654 case LE:
14655 return CC_DLEmode;
14656 case NE:
14657 return CC_DNEmode;
14658 default:
14659 gcc_unreachable ();
14660 }
14661
14662 case GT:
14663 if (cond_or == DOM_CC_X_AND_Y)
14664 return CC_DGTmode;
14665
14666 switch (cond2)
14667 {
14668 case GT:
14669 return CC_DGTmode;
14670 case GE:
14671 return CC_DGEmode;
14672 case NE:
14673 return CC_DNEmode;
14674 default:
14675 gcc_unreachable ();
14676 }
14677
14678 case LTU:
14679 if (cond_or == DOM_CC_X_AND_Y)
14680 return CC_DLTUmode;
14681
14682 switch (cond2)
14683 {
14684 case LTU:
14685 return CC_DLTUmode;
14686 case LEU:
14687 return CC_DLEUmode;
14688 case NE:
14689 return CC_DNEmode;
14690 default:
14691 gcc_unreachable ();
14692 }
14693
14694 case GTU:
14695 if (cond_or == DOM_CC_X_AND_Y)
14696 return CC_DGTUmode;
14697
14698 switch (cond2)
14699 {
14700 case GTU:
14701 return CC_DGTUmode;
14702 case GEU:
14703 return CC_DGEUmode;
14704 case NE:
14705 return CC_DNEmode;
14706 default:
14707 gcc_unreachable ();
14708 }
14709
14710 /* The remaining cases only occur when both comparisons are the
14711 same. */
14712 case NE:
14713 gcc_assert (cond1 == cond2);
14714 return CC_DNEmode;
14715
14716 case LE:
14717 gcc_assert (cond1 == cond2);
14718 return CC_DLEmode;
14719
14720 case GE:
14721 gcc_assert (cond1 == cond2);
14722 return CC_DGEmode;
14723
14724 case LEU:
14725 gcc_assert (cond1 == cond2);
14726 return CC_DLEUmode;
14727
14728 case GEU:
14729 gcc_assert (cond1 == cond2);
14730 return CC_DGEUmode;
14731
14732 default:
14733 gcc_unreachable ();
14734 }
14735 }
14736
14737 machine_mode
14738 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14739 {
14740 /* All floating point compares return CCFP if it is an equality
14741 comparison, and CCFPE otherwise. */
14742 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14743 {
14744 switch (op)
14745 {
14746 case EQ:
14747 case NE:
14748 case UNORDERED:
14749 case ORDERED:
14750 case UNLT:
14751 case UNLE:
14752 case UNGT:
14753 case UNGE:
14754 case UNEQ:
14755 case LTGT:
14756 return CCFPmode;
14757
14758 case LT:
14759 case LE:
14760 case GT:
14761 case GE:
14762 return CCFPEmode;
14763
14764 default:
14765 gcc_unreachable ();
14766 }
14767 }
14768
14769 /* A compare with a shifted operand. Because of canonicalization, the
14770 comparison will have to be swapped when we emit the assembler. */
14771 if (GET_MODE (y) == SImode
14772 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14773 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14774 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14775 || GET_CODE (x) == ROTATERT))
14776 return CC_SWPmode;
14777
14778 /* This operation is performed swapped, but since we only rely on the Z
14779 flag we don't need an additional mode. */
14780 if (GET_MODE (y) == SImode
14781 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14782 && GET_CODE (x) == NEG
14783 && (op == EQ || op == NE))
14784 return CC_Zmode;
14785
14786 /* This is a special case that is used by combine to allow a
14787 comparison of a shifted byte load to be split into a zero-extend
14788 followed by a comparison of the shifted integer (only valid for
14789 equalities and unsigned inequalities). */
14790 if (GET_MODE (x) == SImode
14791 && GET_CODE (x) == ASHIFT
14792 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14793 && GET_CODE (XEXP (x, 0)) == SUBREG
14794 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14795 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14796 && (op == EQ || op == NE
14797 || op == GEU || op == GTU || op == LTU || op == LEU)
14798 && CONST_INT_P (y))
14799 return CC_Zmode;
14800
14801 /* A construct for a conditional compare, if the false arm contains
14802 0, then both conditions must be true, otherwise either condition
14803 must be true. Not all conditions are possible, so CCmode is
14804 returned if it can't be done. */
14805 if (GET_CODE (x) == IF_THEN_ELSE
14806 && (XEXP (x, 2) == const0_rtx
14807 || XEXP (x, 2) == const1_rtx)
14808 && COMPARISON_P (XEXP (x, 0))
14809 && COMPARISON_P (XEXP (x, 1)))
14810 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14811 INTVAL (XEXP (x, 2)));
14812
14813 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14814 if (GET_CODE (x) == AND
14815 && (op == EQ || op == NE)
14816 && COMPARISON_P (XEXP (x, 0))
14817 && COMPARISON_P (XEXP (x, 1)))
14818 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14819 DOM_CC_X_AND_Y);
14820
14821 if (GET_CODE (x) == IOR
14822 && (op == EQ || op == NE)
14823 && COMPARISON_P (XEXP (x, 0))
14824 && COMPARISON_P (XEXP (x, 1)))
14825 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14826 DOM_CC_X_OR_Y);
14827
14828 /* An operation (on Thumb) where we want to test for a single bit.
14829 This is done by shifting that bit up into the top bit of a
14830 scratch register; we can then branch on the sign bit. */
14831 if (TARGET_THUMB1
14832 && GET_MODE (x) == SImode
14833 && (op == EQ || op == NE)
14834 && GET_CODE (x) == ZERO_EXTRACT
14835 && XEXP (x, 1) == const1_rtx)
14836 return CC_Nmode;
14837
14838 /* An operation that sets the condition codes as a side-effect, the
14839 V flag is not set correctly, so we can only use comparisons where
14840 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14841 instead.) */
14842 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14843 if (GET_MODE (x) == SImode
14844 && y == const0_rtx
14845 && (op == EQ || op == NE || op == LT || op == GE)
14846 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14847 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14848 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14849 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14850 || GET_CODE (x) == LSHIFTRT
14851 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14852 || GET_CODE (x) == ROTATERT
14853 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14854 return CC_NOOVmode;
14855
14856 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14857 return CC_Zmode;
14858
14859 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14860 && GET_CODE (x) == PLUS
14861 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14862 return CC_Cmode;
14863
14864 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14865 {
14866 switch (op)
14867 {
14868 case EQ:
14869 case NE:
14870 /* A DImode comparison against zero can be implemented by
14871 or'ing the two halves together. */
14872 if (y == const0_rtx)
14873 return CC_Zmode;
14874
14875 /* We can do an equality test in three Thumb instructions. */
14876 if (!TARGET_32BIT)
14877 return CC_Zmode;
14878
14879 /* FALLTHROUGH */
14880
14881 case LTU:
14882 case LEU:
14883 case GTU:
14884 case GEU:
14885 /* DImode unsigned comparisons can be implemented by cmp +
14886 cmpeq without a scratch register. Not worth doing in
14887 Thumb-2. */
14888 if (TARGET_32BIT)
14889 return CC_CZmode;
14890
14891 /* FALLTHROUGH */
14892
14893 case LT:
14894 case LE:
14895 case GT:
14896 case GE:
14897 /* DImode signed and unsigned comparisons can be implemented
14898 by cmp + sbcs with a scratch register, but that does not
14899 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14900 gcc_assert (op != EQ && op != NE);
14901 return CC_NCVmode;
14902
14903 default:
14904 gcc_unreachable ();
14905 }
14906 }
14907
14908 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14909 return GET_MODE (x);
14910
14911 return CCmode;
14912 }
14913
14914 /* X and Y are two things to compare using CODE. Emit the compare insn and
14915 return the rtx for register 0 in the proper mode. FP means this is a
14916 floating point compare: I don't think that it is needed on the arm. */
14917 rtx
14918 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14919 {
14920 machine_mode mode;
14921 rtx cc_reg;
14922 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14923
14924 /* We might have X as a constant, Y as a register because of the predicates
14925 used for cmpdi. If so, force X to a register here. */
14926 if (dimode_comparison && !REG_P (x))
14927 x = force_reg (DImode, x);
14928
14929 mode = SELECT_CC_MODE (code, x, y);
14930 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14931
14932 if (dimode_comparison
14933 && mode != CC_CZmode)
14934 {
14935 rtx clobber, set;
14936
14937 /* To compare two non-zero values for equality, XOR them and
14938 then compare against zero. Not used for ARM mode; there
14939 CC_CZmode is cheaper. */
14940 if (mode == CC_Zmode && y != const0_rtx)
14941 {
14942 gcc_assert (!reload_completed);
14943 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14944 y = const0_rtx;
14945 }
14946
14947 /* A scratch register is required. */
14948 if (reload_completed)
14949 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14950 else
14951 scratch = gen_rtx_SCRATCH (SImode);
14952
14953 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14954 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14955 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14956 }
14957 else
14958 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14959
14960 return cc_reg;
14961 }
14962
14963 /* Generate a sequence of insns that will generate the correct return
14964 address mask depending on the physical architecture that the program
14965 is running on. */
14966 rtx
14967 arm_gen_return_addr_mask (void)
14968 {
14969 rtx reg = gen_reg_rtx (Pmode);
14970
14971 emit_insn (gen_return_addr_mask (reg));
14972 return reg;
14973 }
14974
14975 void
14976 arm_reload_in_hi (rtx *operands)
14977 {
14978 rtx ref = operands[1];
14979 rtx base, scratch;
14980 HOST_WIDE_INT offset = 0;
14981
14982 if (GET_CODE (ref) == SUBREG)
14983 {
14984 offset = SUBREG_BYTE (ref);
14985 ref = SUBREG_REG (ref);
14986 }
14987
14988 if (REG_P (ref))
14989 {
14990 /* We have a pseudo which has been spilt onto the stack; there
14991 are two cases here: the first where there is a simple
14992 stack-slot replacement and a second where the stack-slot is
14993 out of range, or is used as a subreg. */
14994 if (reg_equiv_mem (REGNO (ref)))
14995 {
14996 ref = reg_equiv_mem (REGNO (ref));
14997 base = find_replacement (&XEXP (ref, 0));
14998 }
14999 else
15000 /* The slot is out of range, or was dressed up in a SUBREG. */
15001 base = reg_equiv_address (REGNO (ref));
15002
15003 /* PR 62554: If there is no equivalent memory location then just move
15004 the value as an SImode register move. This happens when the target
15005 architecture variant does not have an HImode register move. */
15006 if (base == NULL)
15007 {
15008 gcc_assert (REG_P (operands[0]));
15009 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15010 gen_rtx_SUBREG (SImode, ref, 0)));
15011 return;
15012 }
15013 }
15014 else
15015 base = find_replacement (&XEXP (ref, 0));
15016
15017 /* Handle the case where the address is too complex to be offset by 1. */
15018 if (GET_CODE (base) == MINUS
15019 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15020 {
15021 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15022
15023 emit_set_insn (base_plus, base);
15024 base = base_plus;
15025 }
15026 else if (GET_CODE (base) == PLUS)
15027 {
15028 /* The addend must be CONST_INT, or we would have dealt with it above. */
15029 HOST_WIDE_INT hi, lo;
15030
15031 offset += INTVAL (XEXP (base, 1));
15032 base = XEXP (base, 0);
15033
15034 /* Rework the address into a legal sequence of insns. */
15035 /* Valid range for lo is -4095 -> 4095 */
15036 lo = (offset >= 0
15037 ? (offset & 0xfff)
15038 : -((-offset) & 0xfff));
15039
15040 /* Corner case, if lo is the max offset then we would be out of range
15041 once we have added the additional 1 below, so bump the msb into the
15042 pre-loading insn(s). */
15043 if (lo == 4095)
15044 lo &= 0x7ff;
15045
15046 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15047 ^ (HOST_WIDE_INT) 0x80000000)
15048 - (HOST_WIDE_INT) 0x80000000);
15049
15050 gcc_assert (hi + lo == offset);
15051
15052 if (hi != 0)
15053 {
15054 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15055
15056 /* Get the base address; addsi3 knows how to handle constants
15057 that require more than one insn. */
15058 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15059 base = base_plus;
15060 offset = lo;
15061 }
15062 }
15063
15064 /* Operands[2] may overlap operands[0] (though it won't overlap
15065 operands[1]), that's why we asked for a DImode reg -- so we can
15066 use the bit that does not overlap. */
15067 if (REGNO (operands[2]) == REGNO (operands[0]))
15068 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15069 else
15070 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15071
15072 emit_insn (gen_zero_extendqisi2 (scratch,
15073 gen_rtx_MEM (QImode,
15074 plus_constant (Pmode, base,
15075 offset))));
15076 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15077 gen_rtx_MEM (QImode,
15078 plus_constant (Pmode, base,
15079 offset + 1))));
15080 if (!BYTES_BIG_ENDIAN)
15081 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15082 gen_rtx_IOR (SImode,
15083 gen_rtx_ASHIFT
15084 (SImode,
15085 gen_rtx_SUBREG (SImode, operands[0], 0),
15086 GEN_INT (8)),
15087 scratch));
15088 else
15089 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15090 gen_rtx_IOR (SImode,
15091 gen_rtx_ASHIFT (SImode, scratch,
15092 GEN_INT (8)),
15093 gen_rtx_SUBREG (SImode, operands[0], 0)));
15094 }
15095
15096 /* Handle storing a half-word to memory during reload by synthesizing as two
15097 byte stores. Take care not to clobber the input values until after we
15098 have moved them somewhere safe. This code assumes that if the DImode
15099 scratch in operands[2] overlaps either the input value or output address
15100 in some way, then that value must die in this insn (we absolutely need
15101 two scratch registers for some corner cases). */
15102 void
15103 arm_reload_out_hi (rtx *operands)
15104 {
15105 rtx ref = operands[0];
15106 rtx outval = operands[1];
15107 rtx base, scratch;
15108 HOST_WIDE_INT offset = 0;
15109
15110 if (GET_CODE (ref) == SUBREG)
15111 {
15112 offset = SUBREG_BYTE (ref);
15113 ref = SUBREG_REG (ref);
15114 }
15115
15116 if (REG_P (ref))
15117 {
15118 /* We have a pseudo which has been spilt onto the stack; there
15119 are two cases here: the first where there is a simple
15120 stack-slot replacement and a second where the stack-slot is
15121 out of range, or is used as a subreg. */
15122 if (reg_equiv_mem (REGNO (ref)))
15123 {
15124 ref = reg_equiv_mem (REGNO (ref));
15125 base = find_replacement (&XEXP (ref, 0));
15126 }
15127 else
15128 /* The slot is out of range, or was dressed up in a SUBREG. */
15129 base = reg_equiv_address (REGNO (ref));
15130
15131 /* PR 62254: If there is no equivalent memory location then just move
15132 the value as an SImode register move. This happens when the target
15133 architecture variant does not have an HImode register move. */
15134 if (base == NULL)
15135 {
15136 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15137
15138 if (REG_P (outval))
15139 {
15140 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15141 gen_rtx_SUBREG (SImode, outval, 0)));
15142 }
15143 else /* SUBREG_P (outval) */
15144 {
15145 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15146 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15147 SUBREG_REG (outval)));
15148 else
15149 /* FIXME: Handle other cases ? */
15150 gcc_unreachable ();
15151 }
15152 return;
15153 }
15154 }
15155 else
15156 base = find_replacement (&XEXP (ref, 0));
15157
15158 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15159
15160 /* Handle the case where the address is too complex to be offset by 1. */
15161 if (GET_CODE (base) == MINUS
15162 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15163 {
15164 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15165
15166 /* Be careful not to destroy OUTVAL. */
15167 if (reg_overlap_mentioned_p (base_plus, outval))
15168 {
15169 /* Updating base_plus might destroy outval, see if we can
15170 swap the scratch and base_plus. */
15171 if (!reg_overlap_mentioned_p (scratch, outval))
15172 std::swap (scratch, base_plus);
15173 else
15174 {
15175 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15176
15177 /* Be conservative and copy OUTVAL into the scratch now,
15178 this should only be necessary if outval is a subreg
15179 of something larger than a word. */
15180 /* XXX Might this clobber base? I can't see how it can,
15181 since scratch is known to overlap with OUTVAL, and
15182 must be wider than a word. */
15183 emit_insn (gen_movhi (scratch_hi, outval));
15184 outval = scratch_hi;
15185 }
15186 }
15187
15188 emit_set_insn (base_plus, base);
15189 base = base_plus;
15190 }
15191 else if (GET_CODE (base) == PLUS)
15192 {
15193 /* The addend must be CONST_INT, or we would have dealt with it above. */
15194 HOST_WIDE_INT hi, lo;
15195
15196 offset += INTVAL (XEXP (base, 1));
15197 base = XEXP (base, 0);
15198
15199 /* Rework the address into a legal sequence of insns. */
15200 /* Valid range for lo is -4095 -> 4095 */
15201 lo = (offset >= 0
15202 ? (offset & 0xfff)
15203 : -((-offset) & 0xfff));
15204
15205 /* Corner case, if lo is the max offset then we would be out of range
15206 once we have added the additional 1 below, so bump the msb into the
15207 pre-loading insn(s). */
15208 if (lo == 4095)
15209 lo &= 0x7ff;
15210
15211 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15212 ^ (HOST_WIDE_INT) 0x80000000)
15213 - (HOST_WIDE_INT) 0x80000000);
15214
15215 gcc_assert (hi + lo == offset);
15216
15217 if (hi != 0)
15218 {
15219 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15220
15221 /* Be careful not to destroy OUTVAL. */
15222 if (reg_overlap_mentioned_p (base_plus, outval))
15223 {
15224 /* Updating base_plus might destroy outval, see if we
15225 can swap the scratch and base_plus. */
15226 if (!reg_overlap_mentioned_p (scratch, outval))
15227 std::swap (scratch, base_plus);
15228 else
15229 {
15230 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15231
15232 /* Be conservative and copy outval into scratch now,
15233 this should only be necessary if outval is a
15234 subreg of something larger than a word. */
15235 /* XXX Might this clobber base? I can't see how it
15236 can, since scratch is known to overlap with
15237 outval. */
15238 emit_insn (gen_movhi (scratch_hi, outval));
15239 outval = scratch_hi;
15240 }
15241 }
15242
15243 /* Get the base address; addsi3 knows how to handle constants
15244 that require more than one insn. */
15245 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15246 base = base_plus;
15247 offset = lo;
15248 }
15249 }
15250
15251 if (BYTES_BIG_ENDIAN)
15252 {
15253 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15254 plus_constant (Pmode, base,
15255 offset + 1)),
15256 gen_lowpart (QImode, outval)));
15257 emit_insn (gen_lshrsi3 (scratch,
15258 gen_rtx_SUBREG (SImode, outval, 0),
15259 GEN_INT (8)));
15260 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15261 offset)),
15262 gen_lowpart (QImode, scratch)));
15263 }
15264 else
15265 {
15266 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15267 offset)),
15268 gen_lowpart (QImode, outval)));
15269 emit_insn (gen_lshrsi3 (scratch,
15270 gen_rtx_SUBREG (SImode, outval, 0),
15271 GEN_INT (8)));
15272 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15273 plus_constant (Pmode, base,
15274 offset + 1)),
15275 gen_lowpart (QImode, scratch)));
15276 }
15277 }
15278
15279 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15280 (padded to the size of a word) should be passed in a register. */
15281
15282 static bool
15283 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15284 {
15285 if (TARGET_AAPCS_BASED)
15286 return must_pass_in_stack_var_size (mode, type);
15287 else
15288 return must_pass_in_stack_var_size_or_pad (mode, type);
15289 }
15290
15291
15292 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15293 byte of a stack argument has useful data. For legacy APCS ABIs we use
15294 the default. For AAPCS based ABIs small aggregate types are placed
15295 in the lowest memory address. */
15296
15297 static pad_direction
15298 arm_function_arg_padding (machine_mode mode, const_tree type)
15299 {
15300 if (!TARGET_AAPCS_BASED)
15301 return default_function_arg_padding (mode, type);
15302
15303 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15304 return PAD_DOWNWARD;
15305
15306 return PAD_UPWARD;
15307 }
15308
15309
15310 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15311 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15312 register has useful data, and return the opposite if the most
15313 significant byte does. */
15314
15315 bool
15316 arm_pad_reg_upward (machine_mode mode,
15317 tree type, int first ATTRIBUTE_UNUSED)
15318 {
15319 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15320 {
15321 /* For AAPCS, small aggregates, small fixed-point types,
15322 and small complex types are always padded upwards. */
15323 if (type)
15324 {
15325 if ((AGGREGATE_TYPE_P (type)
15326 || TREE_CODE (type) == COMPLEX_TYPE
15327 || FIXED_POINT_TYPE_P (type))
15328 && int_size_in_bytes (type) <= 4)
15329 return true;
15330 }
15331 else
15332 {
15333 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15334 && GET_MODE_SIZE (mode) <= 4)
15335 return true;
15336 }
15337 }
15338
15339 /* Otherwise, use default padding. */
15340 return !BYTES_BIG_ENDIAN;
15341 }
15342
15343 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15344 assuming that the address in the base register is word aligned. */
15345 bool
15346 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15347 {
15348 HOST_WIDE_INT max_offset;
15349
15350 /* Offset must be a multiple of 4 in Thumb mode. */
15351 if (TARGET_THUMB2 && ((offset & 3) != 0))
15352 return false;
15353
15354 if (TARGET_THUMB2)
15355 max_offset = 1020;
15356 else if (TARGET_ARM)
15357 max_offset = 255;
15358 else
15359 return false;
15360
15361 return ((offset <= max_offset) && (offset >= -max_offset));
15362 }
15363
15364 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15365 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15366 Assumes that the address in the base register RN is word aligned. Pattern
15367 guarantees that both memory accesses use the same base register,
15368 the offsets are constants within the range, and the gap between the offsets is 4.
15369 If preload complete then check that registers are legal. WBACK indicates whether
15370 address is updated. LOAD indicates whether memory access is load or store. */
15371 bool
15372 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15373 bool wback, bool load)
15374 {
15375 unsigned int t, t2, n;
15376
15377 if (!reload_completed)
15378 return true;
15379
15380 if (!offset_ok_for_ldrd_strd (offset))
15381 return false;
15382
15383 t = REGNO (rt);
15384 t2 = REGNO (rt2);
15385 n = REGNO (rn);
15386
15387 if ((TARGET_THUMB2)
15388 && ((wback && (n == t || n == t2))
15389 || (t == SP_REGNUM)
15390 || (t == PC_REGNUM)
15391 || (t2 == SP_REGNUM)
15392 || (t2 == PC_REGNUM)
15393 || (!load && (n == PC_REGNUM))
15394 || (load && (t == t2))
15395 /* Triggers Cortex-M3 LDRD errata. */
15396 || (!wback && load && fix_cm3_ldrd && (n == t))))
15397 return false;
15398
15399 if ((TARGET_ARM)
15400 && ((wback && (n == t || n == t2))
15401 || (t2 == PC_REGNUM)
15402 || (t % 2 != 0) /* First destination register is not even. */
15403 || (t2 != t + 1)
15404 /* PC can be used as base register (for offset addressing only),
15405 but it is depricated. */
15406 || (n == PC_REGNUM)))
15407 return false;
15408
15409 return true;
15410 }
15411
15412 /* Return true if a 64-bit access with alignment ALIGN and with a
15413 constant offset OFFSET from the base pointer is permitted on this
15414 architecture. */
15415 static bool
15416 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15417 {
15418 return (unaligned_access
15419 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15420 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15421 }
15422
15423 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15424 operand MEM's address contains an immediate offset from the base
15425 register and has no side effects, in which case it sets BASE,
15426 OFFSET and ALIGN accordingly. */
15427 static bool
15428 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15429 {
15430 rtx addr;
15431
15432 gcc_assert (base != NULL && offset != NULL);
15433
15434 /* TODO: Handle more general memory operand patterns, such as
15435 PRE_DEC and PRE_INC. */
15436
15437 if (side_effects_p (mem))
15438 return false;
15439
15440 /* Can't deal with subregs. */
15441 if (GET_CODE (mem) == SUBREG)
15442 return false;
15443
15444 gcc_assert (MEM_P (mem));
15445
15446 *offset = const0_rtx;
15447 *align = MEM_ALIGN (mem);
15448
15449 addr = XEXP (mem, 0);
15450
15451 /* If addr isn't valid for DImode, then we can't handle it. */
15452 if (!arm_legitimate_address_p (DImode, addr,
15453 reload_in_progress || reload_completed))
15454 return false;
15455
15456 if (REG_P (addr))
15457 {
15458 *base = addr;
15459 return true;
15460 }
15461 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15462 {
15463 *base = XEXP (addr, 0);
15464 *offset = XEXP (addr, 1);
15465 return (REG_P (*base) && CONST_INT_P (*offset));
15466 }
15467
15468 return false;
15469 }
15470
15471 /* Called from a peephole2 to replace two word-size accesses with a
15472 single LDRD/STRD instruction. Returns true iff we can generate a
15473 new instruction sequence. That is, both accesses use the same base
15474 register and the gap between constant offsets is 4. This function
15475 may reorder its operands to match ldrd/strd RTL templates.
15476 OPERANDS are the operands found by the peephole matcher;
15477 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15478 corresponding memory operands. LOAD indicaates whether the access
15479 is load or store. CONST_STORE indicates a store of constant
15480 integer values held in OPERANDS[4,5] and assumes that the pattern
15481 is of length 4 insn, for the purpose of checking dead registers.
15482 COMMUTE indicates that register operands may be reordered. */
15483 bool
15484 gen_operands_ldrd_strd (rtx *operands, bool load,
15485 bool const_store, bool commute)
15486 {
15487 int nops = 2;
15488 HOST_WIDE_INT offsets[2], offset, align[2];
15489 rtx base = NULL_RTX;
15490 rtx cur_base, cur_offset, tmp;
15491 int i, gap;
15492 HARD_REG_SET regset;
15493
15494 gcc_assert (!const_store || !load);
15495 /* Check that the memory references are immediate offsets from the
15496 same base register. Extract the base register, the destination
15497 registers, and the corresponding memory offsets. */
15498 for (i = 0; i < nops; i++)
15499 {
15500 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15501 &align[i]))
15502 return false;
15503
15504 if (i == 0)
15505 base = cur_base;
15506 else if (REGNO (base) != REGNO (cur_base))
15507 return false;
15508
15509 offsets[i] = INTVAL (cur_offset);
15510 if (GET_CODE (operands[i]) == SUBREG)
15511 {
15512 tmp = SUBREG_REG (operands[i]);
15513 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15514 operands[i] = tmp;
15515 }
15516 }
15517
15518 /* Make sure there is no dependency between the individual loads. */
15519 if (load && REGNO (operands[0]) == REGNO (base))
15520 return false; /* RAW */
15521
15522 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15523 return false; /* WAW */
15524
15525 /* If the same input register is used in both stores
15526 when storing different constants, try to find a free register.
15527 For example, the code
15528 mov r0, 0
15529 str r0, [r2]
15530 mov r0, 1
15531 str r0, [r2, #4]
15532 can be transformed into
15533 mov r1, 0
15534 mov r0, 1
15535 strd r1, r0, [r2]
15536 in Thumb mode assuming that r1 is free.
15537 For ARM mode do the same but only if the starting register
15538 can be made to be even. */
15539 if (const_store
15540 && REGNO (operands[0]) == REGNO (operands[1])
15541 && INTVAL (operands[4]) != INTVAL (operands[5]))
15542 {
15543 if (TARGET_THUMB2)
15544 {
15545 CLEAR_HARD_REG_SET (regset);
15546 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15547 if (tmp == NULL_RTX)
15548 return false;
15549
15550 /* Use the new register in the first load to ensure that
15551 if the original input register is not dead after peephole,
15552 then it will have the correct constant value. */
15553 operands[0] = tmp;
15554 }
15555 else if (TARGET_ARM)
15556 {
15557 int regno = REGNO (operands[0]);
15558 if (!peep2_reg_dead_p (4, operands[0]))
15559 {
15560 /* When the input register is even and is not dead after the
15561 pattern, it has to hold the second constant but we cannot
15562 form a legal STRD in ARM mode with this register as the second
15563 register. */
15564 if (regno % 2 == 0)
15565 return false;
15566
15567 /* Is regno-1 free? */
15568 SET_HARD_REG_SET (regset);
15569 CLEAR_HARD_REG_BIT(regset, regno - 1);
15570 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15571 if (tmp == NULL_RTX)
15572 return false;
15573
15574 operands[0] = tmp;
15575 }
15576 else
15577 {
15578 /* Find a DImode register. */
15579 CLEAR_HARD_REG_SET (regset);
15580 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15581 if (tmp != NULL_RTX)
15582 {
15583 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15584 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15585 }
15586 else
15587 {
15588 /* Can we use the input register to form a DI register? */
15589 SET_HARD_REG_SET (regset);
15590 CLEAR_HARD_REG_BIT(regset,
15591 regno % 2 == 0 ? regno + 1 : regno - 1);
15592 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15593 if (tmp == NULL_RTX)
15594 return false;
15595 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15596 }
15597 }
15598
15599 gcc_assert (operands[0] != NULL_RTX);
15600 gcc_assert (operands[1] != NULL_RTX);
15601 gcc_assert (REGNO (operands[0]) % 2 == 0);
15602 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15603 }
15604 }
15605
15606 /* Make sure the instructions are ordered with lower memory access first. */
15607 if (offsets[0] > offsets[1])
15608 {
15609 gap = offsets[0] - offsets[1];
15610 offset = offsets[1];
15611
15612 /* Swap the instructions such that lower memory is accessed first. */
15613 std::swap (operands[0], operands[1]);
15614 std::swap (operands[2], operands[3]);
15615 std::swap (align[0], align[1]);
15616 if (const_store)
15617 std::swap (operands[4], operands[5]);
15618 }
15619 else
15620 {
15621 gap = offsets[1] - offsets[0];
15622 offset = offsets[0];
15623 }
15624
15625 /* Make sure accesses are to consecutive memory locations. */
15626 if (gap != 4)
15627 return false;
15628
15629 if (!align_ok_ldrd_strd (align[0], offset))
15630 return false;
15631
15632 /* Make sure we generate legal instructions. */
15633 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15634 false, load))
15635 return true;
15636
15637 /* In Thumb state, where registers are almost unconstrained, there
15638 is little hope to fix it. */
15639 if (TARGET_THUMB2)
15640 return false;
15641
15642 if (load && commute)
15643 {
15644 /* Try reordering registers. */
15645 std::swap (operands[0], operands[1]);
15646 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15647 false, load))
15648 return true;
15649 }
15650
15651 if (const_store)
15652 {
15653 /* If input registers are dead after this pattern, they can be
15654 reordered or replaced by other registers that are free in the
15655 current pattern. */
15656 if (!peep2_reg_dead_p (4, operands[0])
15657 || !peep2_reg_dead_p (4, operands[1]))
15658 return false;
15659
15660 /* Try to reorder the input registers. */
15661 /* For example, the code
15662 mov r0, 0
15663 mov r1, 1
15664 str r1, [r2]
15665 str r0, [r2, #4]
15666 can be transformed into
15667 mov r1, 0
15668 mov r0, 1
15669 strd r0, [r2]
15670 */
15671 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15672 false, false))
15673 {
15674 std::swap (operands[0], operands[1]);
15675 return true;
15676 }
15677
15678 /* Try to find a free DI register. */
15679 CLEAR_HARD_REG_SET (regset);
15680 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15681 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15682 while (true)
15683 {
15684 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15685 if (tmp == NULL_RTX)
15686 return false;
15687
15688 /* DREG must be an even-numbered register in DImode.
15689 Split it into SI registers. */
15690 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15691 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15692 gcc_assert (operands[0] != NULL_RTX);
15693 gcc_assert (operands[1] != NULL_RTX);
15694 gcc_assert (REGNO (operands[0]) % 2 == 0);
15695 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15696
15697 return (operands_ok_ldrd_strd (operands[0], operands[1],
15698 base, offset,
15699 false, load));
15700 }
15701 }
15702
15703 return false;
15704 }
15705
15706
15707
15708 \f
15709 /* Print a symbolic form of X to the debug file, F. */
15710 static void
15711 arm_print_value (FILE *f, rtx x)
15712 {
15713 switch (GET_CODE (x))
15714 {
15715 case CONST_INT:
15716 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15717 return;
15718
15719 case CONST_DOUBLE:
15720 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15721 return;
15722
15723 case CONST_VECTOR:
15724 {
15725 int i;
15726
15727 fprintf (f, "<");
15728 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15729 {
15730 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15731 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15732 fputc (',', f);
15733 }
15734 fprintf (f, ">");
15735 }
15736 return;
15737
15738 case CONST_STRING:
15739 fprintf (f, "\"%s\"", XSTR (x, 0));
15740 return;
15741
15742 case SYMBOL_REF:
15743 fprintf (f, "`%s'", XSTR (x, 0));
15744 return;
15745
15746 case LABEL_REF:
15747 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15748 return;
15749
15750 case CONST:
15751 arm_print_value (f, XEXP (x, 0));
15752 return;
15753
15754 case PLUS:
15755 arm_print_value (f, XEXP (x, 0));
15756 fprintf (f, "+");
15757 arm_print_value (f, XEXP (x, 1));
15758 return;
15759
15760 case PC:
15761 fprintf (f, "pc");
15762 return;
15763
15764 default:
15765 fprintf (f, "????");
15766 return;
15767 }
15768 }
15769 \f
15770 /* Routines for manipulation of the constant pool. */
15771
15772 /* Arm instructions cannot load a large constant directly into a
15773 register; they have to come from a pc relative load. The constant
15774 must therefore be placed in the addressable range of the pc
15775 relative load. Depending on the precise pc relative load
15776 instruction the range is somewhere between 256 bytes and 4k. This
15777 means that we often have to dump a constant inside a function, and
15778 generate code to branch around it.
15779
15780 It is important to minimize this, since the branches will slow
15781 things down and make the code larger.
15782
15783 Normally we can hide the table after an existing unconditional
15784 branch so that there is no interruption of the flow, but in the
15785 worst case the code looks like this:
15786
15787 ldr rn, L1
15788 ...
15789 b L2
15790 align
15791 L1: .long value
15792 L2:
15793 ...
15794
15795 ldr rn, L3
15796 ...
15797 b L4
15798 align
15799 L3: .long value
15800 L4:
15801 ...
15802
15803 We fix this by performing a scan after scheduling, which notices
15804 which instructions need to have their operands fetched from the
15805 constant table and builds the table.
15806
15807 The algorithm starts by building a table of all the constants that
15808 need fixing up and all the natural barriers in the function (places
15809 where a constant table can be dropped without breaking the flow).
15810 For each fixup we note how far the pc-relative replacement will be
15811 able to reach and the offset of the instruction into the function.
15812
15813 Having built the table we then group the fixes together to form
15814 tables that are as large as possible (subject to addressing
15815 constraints) and emit each table of constants after the last
15816 barrier that is within range of all the instructions in the group.
15817 If a group does not contain a barrier, then we forcibly create one
15818 by inserting a jump instruction into the flow. Once the table has
15819 been inserted, the insns are then modified to reference the
15820 relevant entry in the pool.
15821
15822 Possible enhancements to the algorithm (not implemented) are:
15823
15824 1) For some processors and object formats, there may be benefit in
15825 aligning the pools to the start of cache lines; this alignment
15826 would need to be taken into account when calculating addressability
15827 of a pool. */
15828
15829 /* These typedefs are located at the start of this file, so that
15830 they can be used in the prototypes there. This comment is to
15831 remind readers of that fact so that the following structures
15832 can be understood more easily.
15833
15834 typedef struct minipool_node Mnode;
15835 typedef struct minipool_fixup Mfix; */
15836
15837 struct minipool_node
15838 {
15839 /* Doubly linked chain of entries. */
15840 Mnode * next;
15841 Mnode * prev;
15842 /* The maximum offset into the code that this entry can be placed. While
15843 pushing fixes for forward references, all entries are sorted in order
15844 of increasing max_address. */
15845 HOST_WIDE_INT max_address;
15846 /* Similarly for an entry inserted for a backwards ref. */
15847 HOST_WIDE_INT min_address;
15848 /* The number of fixes referencing this entry. This can become zero
15849 if we "unpush" an entry. In this case we ignore the entry when we
15850 come to emit the code. */
15851 int refcount;
15852 /* The offset from the start of the minipool. */
15853 HOST_WIDE_INT offset;
15854 /* The value in table. */
15855 rtx value;
15856 /* The mode of value. */
15857 machine_mode mode;
15858 /* The size of the value. With iWMMXt enabled
15859 sizes > 4 also imply an alignment of 8-bytes. */
15860 int fix_size;
15861 };
15862
15863 struct minipool_fixup
15864 {
15865 Mfix * next;
15866 rtx_insn * insn;
15867 HOST_WIDE_INT address;
15868 rtx * loc;
15869 machine_mode mode;
15870 int fix_size;
15871 rtx value;
15872 Mnode * minipool;
15873 HOST_WIDE_INT forwards;
15874 HOST_WIDE_INT backwards;
15875 };
15876
15877 /* Fixes less than a word need padding out to a word boundary. */
15878 #define MINIPOOL_FIX_SIZE(mode) \
15879 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15880
15881 static Mnode * minipool_vector_head;
15882 static Mnode * minipool_vector_tail;
15883 static rtx_code_label *minipool_vector_label;
15884 static int minipool_pad;
15885
15886 /* The linked list of all minipool fixes required for this function. */
15887 Mfix * minipool_fix_head;
15888 Mfix * minipool_fix_tail;
15889 /* The fix entry for the current minipool, once it has been placed. */
15890 Mfix * minipool_barrier;
15891
15892 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15893 #define JUMP_TABLES_IN_TEXT_SECTION 0
15894 #endif
15895
15896 static HOST_WIDE_INT
15897 get_jump_table_size (rtx_jump_table_data *insn)
15898 {
15899 /* ADDR_VECs only take room if read-only data does into the text
15900 section. */
15901 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15902 {
15903 rtx body = PATTERN (insn);
15904 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15905 HOST_WIDE_INT size;
15906 HOST_WIDE_INT modesize;
15907
15908 modesize = GET_MODE_SIZE (GET_MODE (body));
15909 size = modesize * XVECLEN (body, elt);
15910 switch (modesize)
15911 {
15912 case 1:
15913 /* Round up size of TBB table to a halfword boundary. */
15914 size = (size + 1) & ~HOST_WIDE_INT_1;
15915 break;
15916 case 2:
15917 /* No padding necessary for TBH. */
15918 break;
15919 case 4:
15920 /* Add two bytes for alignment on Thumb. */
15921 if (TARGET_THUMB)
15922 size += 2;
15923 break;
15924 default:
15925 gcc_unreachable ();
15926 }
15927 return size;
15928 }
15929
15930 return 0;
15931 }
15932
15933 /* Return the maximum amount of padding that will be inserted before
15934 label LABEL. */
15935
15936 static HOST_WIDE_INT
15937 get_label_padding (rtx label)
15938 {
15939 HOST_WIDE_INT align, min_insn_size;
15940
15941 align = 1 << label_to_alignment (label);
15942 min_insn_size = TARGET_THUMB ? 2 : 4;
15943 return align > min_insn_size ? align - min_insn_size : 0;
15944 }
15945
15946 /* Move a minipool fix MP from its current location to before MAX_MP.
15947 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15948 constraints may need updating. */
15949 static Mnode *
15950 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15951 HOST_WIDE_INT max_address)
15952 {
15953 /* The code below assumes these are different. */
15954 gcc_assert (mp != max_mp);
15955
15956 if (max_mp == NULL)
15957 {
15958 if (max_address < mp->max_address)
15959 mp->max_address = max_address;
15960 }
15961 else
15962 {
15963 if (max_address > max_mp->max_address - mp->fix_size)
15964 mp->max_address = max_mp->max_address - mp->fix_size;
15965 else
15966 mp->max_address = max_address;
15967
15968 /* Unlink MP from its current position. Since max_mp is non-null,
15969 mp->prev must be non-null. */
15970 mp->prev->next = mp->next;
15971 if (mp->next != NULL)
15972 mp->next->prev = mp->prev;
15973 else
15974 minipool_vector_tail = mp->prev;
15975
15976 /* Re-insert it before MAX_MP. */
15977 mp->next = max_mp;
15978 mp->prev = max_mp->prev;
15979 max_mp->prev = mp;
15980
15981 if (mp->prev != NULL)
15982 mp->prev->next = mp;
15983 else
15984 minipool_vector_head = mp;
15985 }
15986
15987 /* Save the new entry. */
15988 max_mp = mp;
15989
15990 /* Scan over the preceding entries and adjust their addresses as
15991 required. */
15992 while (mp->prev != NULL
15993 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15994 {
15995 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15996 mp = mp->prev;
15997 }
15998
15999 return max_mp;
16000 }
16001
16002 /* Add a constant to the minipool for a forward reference. Returns the
16003 node added or NULL if the constant will not fit in this pool. */
16004 static Mnode *
16005 add_minipool_forward_ref (Mfix *fix)
16006 {
16007 /* If set, max_mp is the first pool_entry that has a lower
16008 constraint than the one we are trying to add. */
16009 Mnode * max_mp = NULL;
16010 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16011 Mnode * mp;
16012
16013 /* If the minipool starts before the end of FIX->INSN then this FIX
16014 can not be placed into the current pool. Furthermore, adding the
16015 new constant pool entry may cause the pool to start FIX_SIZE bytes
16016 earlier. */
16017 if (minipool_vector_head &&
16018 (fix->address + get_attr_length (fix->insn)
16019 >= minipool_vector_head->max_address - fix->fix_size))
16020 return NULL;
16021
16022 /* Scan the pool to see if a constant with the same value has
16023 already been added. While we are doing this, also note the
16024 location where we must insert the constant if it doesn't already
16025 exist. */
16026 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16027 {
16028 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16029 && fix->mode == mp->mode
16030 && (!LABEL_P (fix->value)
16031 || (CODE_LABEL_NUMBER (fix->value)
16032 == CODE_LABEL_NUMBER (mp->value)))
16033 && rtx_equal_p (fix->value, mp->value))
16034 {
16035 /* More than one fix references this entry. */
16036 mp->refcount++;
16037 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16038 }
16039
16040 /* Note the insertion point if necessary. */
16041 if (max_mp == NULL
16042 && mp->max_address > max_address)
16043 max_mp = mp;
16044
16045 /* If we are inserting an 8-bytes aligned quantity and
16046 we have not already found an insertion point, then
16047 make sure that all such 8-byte aligned quantities are
16048 placed at the start of the pool. */
16049 if (ARM_DOUBLEWORD_ALIGN
16050 && max_mp == NULL
16051 && fix->fix_size >= 8
16052 && mp->fix_size < 8)
16053 {
16054 max_mp = mp;
16055 max_address = mp->max_address;
16056 }
16057 }
16058
16059 /* The value is not currently in the minipool, so we need to create
16060 a new entry for it. If MAX_MP is NULL, the entry will be put on
16061 the end of the list since the placement is less constrained than
16062 any existing entry. Otherwise, we insert the new fix before
16063 MAX_MP and, if necessary, adjust the constraints on the other
16064 entries. */
16065 mp = XNEW (Mnode);
16066 mp->fix_size = fix->fix_size;
16067 mp->mode = fix->mode;
16068 mp->value = fix->value;
16069 mp->refcount = 1;
16070 /* Not yet required for a backwards ref. */
16071 mp->min_address = -65536;
16072
16073 if (max_mp == NULL)
16074 {
16075 mp->max_address = max_address;
16076 mp->next = NULL;
16077 mp->prev = minipool_vector_tail;
16078
16079 if (mp->prev == NULL)
16080 {
16081 minipool_vector_head = mp;
16082 minipool_vector_label = gen_label_rtx ();
16083 }
16084 else
16085 mp->prev->next = mp;
16086
16087 minipool_vector_tail = mp;
16088 }
16089 else
16090 {
16091 if (max_address > max_mp->max_address - mp->fix_size)
16092 mp->max_address = max_mp->max_address - mp->fix_size;
16093 else
16094 mp->max_address = max_address;
16095
16096 mp->next = max_mp;
16097 mp->prev = max_mp->prev;
16098 max_mp->prev = mp;
16099 if (mp->prev != NULL)
16100 mp->prev->next = mp;
16101 else
16102 minipool_vector_head = mp;
16103 }
16104
16105 /* Save the new entry. */
16106 max_mp = mp;
16107
16108 /* Scan over the preceding entries and adjust their addresses as
16109 required. */
16110 while (mp->prev != NULL
16111 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16112 {
16113 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16114 mp = mp->prev;
16115 }
16116
16117 return max_mp;
16118 }
16119
16120 static Mnode *
16121 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16122 HOST_WIDE_INT min_address)
16123 {
16124 HOST_WIDE_INT offset;
16125
16126 /* The code below assumes these are different. */
16127 gcc_assert (mp != min_mp);
16128
16129 if (min_mp == NULL)
16130 {
16131 if (min_address > mp->min_address)
16132 mp->min_address = min_address;
16133 }
16134 else
16135 {
16136 /* We will adjust this below if it is too loose. */
16137 mp->min_address = min_address;
16138
16139 /* Unlink MP from its current position. Since min_mp is non-null,
16140 mp->next must be non-null. */
16141 mp->next->prev = mp->prev;
16142 if (mp->prev != NULL)
16143 mp->prev->next = mp->next;
16144 else
16145 minipool_vector_head = mp->next;
16146
16147 /* Reinsert it after MIN_MP. */
16148 mp->prev = min_mp;
16149 mp->next = min_mp->next;
16150 min_mp->next = mp;
16151 if (mp->next != NULL)
16152 mp->next->prev = mp;
16153 else
16154 minipool_vector_tail = mp;
16155 }
16156
16157 min_mp = mp;
16158
16159 offset = 0;
16160 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16161 {
16162 mp->offset = offset;
16163 if (mp->refcount > 0)
16164 offset += mp->fix_size;
16165
16166 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16167 mp->next->min_address = mp->min_address + mp->fix_size;
16168 }
16169
16170 return min_mp;
16171 }
16172
16173 /* Add a constant to the minipool for a backward reference. Returns the
16174 node added or NULL if the constant will not fit in this pool.
16175
16176 Note that the code for insertion for a backwards reference can be
16177 somewhat confusing because the calculated offsets for each fix do
16178 not take into account the size of the pool (which is still under
16179 construction. */
16180 static Mnode *
16181 add_minipool_backward_ref (Mfix *fix)
16182 {
16183 /* If set, min_mp is the last pool_entry that has a lower constraint
16184 than the one we are trying to add. */
16185 Mnode *min_mp = NULL;
16186 /* This can be negative, since it is only a constraint. */
16187 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16188 Mnode *mp;
16189
16190 /* If we can't reach the current pool from this insn, or if we can't
16191 insert this entry at the end of the pool without pushing other
16192 fixes out of range, then we don't try. This ensures that we
16193 can't fail later on. */
16194 if (min_address >= minipool_barrier->address
16195 || (minipool_vector_tail->min_address + fix->fix_size
16196 >= minipool_barrier->address))
16197 return NULL;
16198
16199 /* Scan the pool to see if a constant with the same value has
16200 already been added. While we are doing this, also note the
16201 location where we must insert the constant if it doesn't already
16202 exist. */
16203 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16204 {
16205 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16206 && fix->mode == mp->mode
16207 && (!LABEL_P (fix->value)
16208 || (CODE_LABEL_NUMBER (fix->value)
16209 == CODE_LABEL_NUMBER (mp->value)))
16210 && rtx_equal_p (fix->value, mp->value)
16211 /* Check that there is enough slack to move this entry to the
16212 end of the table (this is conservative). */
16213 && (mp->max_address
16214 > (minipool_barrier->address
16215 + minipool_vector_tail->offset
16216 + minipool_vector_tail->fix_size)))
16217 {
16218 mp->refcount++;
16219 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16220 }
16221
16222 if (min_mp != NULL)
16223 mp->min_address += fix->fix_size;
16224 else
16225 {
16226 /* Note the insertion point if necessary. */
16227 if (mp->min_address < min_address)
16228 {
16229 /* For now, we do not allow the insertion of 8-byte alignment
16230 requiring nodes anywhere but at the start of the pool. */
16231 if (ARM_DOUBLEWORD_ALIGN
16232 && fix->fix_size >= 8 && mp->fix_size < 8)
16233 return NULL;
16234 else
16235 min_mp = mp;
16236 }
16237 else if (mp->max_address
16238 < minipool_barrier->address + mp->offset + fix->fix_size)
16239 {
16240 /* Inserting before this entry would push the fix beyond
16241 its maximum address (which can happen if we have
16242 re-located a forwards fix); force the new fix to come
16243 after it. */
16244 if (ARM_DOUBLEWORD_ALIGN
16245 && fix->fix_size >= 8 && mp->fix_size < 8)
16246 return NULL;
16247 else
16248 {
16249 min_mp = mp;
16250 min_address = mp->min_address + fix->fix_size;
16251 }
16252 }
16253 /* Do not insert a non-8-byte aligned quantity before 8-byte
16254 aligned quantities. */
16255 else if (ARM_DOUBLEWORD_ALIGN
16256 && fix->fix_size < 8
16257 && mp->fix_size >= 8)
16258 {
16259 min_mp = mp;
16260 min_address = mp->min_address + fix->fix_size;
16261 }
16262 }
16263 }
16264
16265 /* We need to create a new entry. */
16266 mp = XNEW (Mnode);
16267 mp->fix_size = fix->fix_size;
16268 mp->mode = fix->mode;
16269 mp->value = fix->value;
16270 mp->refcount = 1;
16271 mp->max_address = minipool_barrier->address + 65536;
16272
16273 mp->min_address = min_address;
16274
16275 if (min_mp == NULL)
16276 {
16277 mp->prev = NULL;
16278 mp->next = minipool_vector_head;
16279
16280 if (mp->next == NULL)
16281 {
16282 minipool_vector_tail = mp;
16283 minipool_vector_label = gen_label_rtx ();
16284 }
16285 else
16286 mp->next->prev = mp;
16287
16288 minipool_vector_head = mp;
16289 }
16290 else
16291 {
16292 mp->next = min_mp->next;
16293 mp->prev = min_mp;
16294 min_mp->next = mp;
16295
16296 if (mp->next != NULL)
16297 mp->next->prev = mp;
16298 else
16299 minipool_vector_tail = mp;
16300 }
16301
16302 /* Save the new entry. */
16303 min_mp = mp;
16304
16305 if (mp->prev)
16306 mp = mp->prev;
16307 else
16308 mp->offset = 0;
16309
16310 /* Scan over the following entries and adjust their offsets. */
16311 while (mp->next != NULL)
16312 {
16313 if (mp->next->min_address < mp->min_address + mp->fix_size)
16314 mp->next->min_address = mp->min_address + mp->fix_size;
16315
16316 if (mp->refcount)
16317 mp->next->offset = mp->offset + mp->fix_size;
16318 else
16319 mp->next->offset = mp->offset;
16320
16321 mp = mp->next;
16322 }
16323
16324 return min_mp;
16325 }
16326
16327 static void
16328 assign_minipool_offsets (Mfix *barrier)
16329 {
16330 HOST_WIDE_INT offset = 0;
16331 Mnode *mp;
16332
16333 minipool_barrier = barrier;
16334
16335 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16336 {
16337 mp->offset = offset;
16338
16339 if (mp->refcount > 0)
16340 offset += mp->fix_size;
16341 }
16342 }
16343
16344 /* Output the literal table */
16345 static void
16346 dump_minipool (rtx_insn *scan)
16347 {
16348 Mnode * mp;
16349 Mnode * nmp;
16350 int align64 = 0;
16351
16352 if (ARM_DOUBLEWORD_ALIGN)
16353 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16354 if (mp->refcount > 0 && mp->fix_size >= 8)
16355 {
16356 align64 = 1;
16357 break;
16358 }
16359
16360 if (dump_file)
16361 fprintf (dump_file,
16362 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16363 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16364
16365 scan = emit_label_after (gen_label_rtx (), scan);
16366 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16367 scan = emit_label_after (minipool_vector_label, scan);
16368
16369 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16370 {
16371 if (mp->refcount > 0)
16372 {
16373 if (dump_file)
16374 {
16375 fprintf (dump_file,
16376 ";; Offset %u, min %ld, max %ld ",
16377 (unsigned) mp->offset, (unsigned long) mp->min_address,
16378 (unsigned long) mp->max_address);
16379 arm_print_value (dump_file, mp->value);
16380 fputc ('\n', dump_file);
16381 }
16382
16383 rtx val = copy_rtx (mp->value);
16384
16385 switch (GET_MODE_SIZE (mp->mode))
16386 {
16387 #ifdef HAVE_consttable_1
16388 case 1:
16389 scan = emit_insn_after (gen_consttable_1 (val), scan);
16390 break;
16391
16392 #endif
16393 #ifdef HAVE_consttable_2
16394 case 2:
16395 scan = emit_insn_after (gen_consttable_2 (val), scan);
16396 break;
16397
16398 #endif
16399 #ifdef HAVE_consttable_4
16400 case 4:
16401 scan = emit_insn_after (gen_consttable_4 (val), scan);
16402 break;
16403
16404 #endif
16405 #ifdef HAVE_consttable_8
16406 case 8:
16407 scan = emit_insn_after (gen_consttable_8 (val), scan);
16408 break;
16409
16410 #endif
16411 #ifdef HAVE_consttable_16
16412 case 16:
16413 scan = emit_insn_after (gen_consttable_16 (val), scan);
16414 break;
16415
16416 #endif
16417 default:
16418 gcc_unreachable ();
16419 }
16420 }
16421
16422 nmp = mp->next;
16423 free (mp);
16424 }
16425
16426 minipool_vector_head = minipool_vector_tail = NULL;
16427 scan = emit_insn_after (gen_consttable_end (), scan);
16428 scan = emit_barrier_after (scan);
16429 }
16430
16431 /* Return the cost of forcibly inserting a barrier after INSN. */
16432 static int
16433 arm_barrier_cost (rtx_insn *insn)
16434 {
16435 /* Basing the location of the pool on the loop depth is preferable,
16436 but at the moment, the basic block information seems to be
16437 corrupt by this stage of the compilation. */
16438 int base_cost = 50;
16439 rtx_insn *next = next_nonnote_insn (insn);
16440
16441 if (next != NULL && LABEL_P (next))
16442 base_cost -= 20;
16443
16444 switch (GET_CODE (insn))
16445 {
16446 case CODE_LABEL:
16447 /* It will always be better to place the table before the label, rather
16448 than after it. */
16449 return 50;
16450
16451 case INSN:
16452 case CALL_INSN:
16453 return base_cost;
16454
16455 case JUMP_INSN:
16456 return base_cost - 10;
16457
16458 default:
16459 return base_cost + 10;
16460 }
16461 }
16462
16463 /* Find the best place in the insn stream in the range
16464 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16465 Create the barrier by inserting a jump and add a new fix entry for
16466 it. */
16467 static Mfix *
16468 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16469 {
16470 HOST_WIDE_INT count = 0;
16471 rtx_barrier *barrier;
16472 rtx_insn *from = fix->insn;
16473 /* The instruction after which we will insert the jump. */
16474 rtx_insn *selected = NULL;
16475 int selected_cost;
16476 /* The address at which the jump instruction will be placed. */
16477 HOST_WIDE_INT selected_address;
16478 Mfix * new_fix;
16479 HOST_WIDE_INT max_count = max_address - fix->address;
16480 rtx_code_label *label = gen_label_rtx ();
16481
16482 selected_cost = arm_barrier_cost (from);
16483 selected_address = fix->address;
16484
16485 while (from && count < max_count)
16486 {
16487 rtx_jump_table_data *tmp;
16488 int new_cost;
16489
16490 /* This code shouldn't have been called if there was a natural barrier
16491 within range. */
16492 gcc_assert (!BARRIER_P (from));
16493
16494 /* Count the length of this insn. This must stay in sync with the
16495 code that pushes minipool fixes. */
16496 if (LABEL_P (from))
16497 count += get_label_padding (from);
16498 else
16499 count += get_attr_length (from);
16500
16501 /* If there is a jump table, add its length. */
16502 if (tablejump_p (from, NULL, &tmp))
16503 {
16504 count += get_jump_table_size (tmp);
16505
16506 /* Jump tables aren't in a basic block, so base the cost on
16507 the dispatch insn. If we select this location, we will
16508 still put the pool after the table. */
16509 new_cost = arm_barrier_cost (from);
16510
16511 if (count < max_count
16512 && (!selected || new_cost <= selected_cost))
16513 {
16514 selected = tmp;
16515 selected_cost = new_cost;
16516 selected_address = fix->address + count;
16517 }
16518
16519 /* Continue after the dispatch table. */
16520 from = NEXT_INSN (tmp);
16521 continue;
16522 }
16523
16524 new_cost = arm_barrier_cost (from);
16525
16526 if (count < max_count
16527 && (!selected || new_cost <= selected_cost))
16528 {
16529 selected = from;
16530 selected_cost = new_cost;
16531 selected_address = fix->address + count;
16532 }
16533
16534 from = NEXT_INSN (from);
16535 }
16536
16537 /* Make sure that we found a place to insert the jump. */
16538 gcc_assert (selected);
16539
16540 /* Create a new JUMP_INSN that branches around a barrier. */
16541 from = emit_jump_insn_after (gen_jump (label), selected);
16542 JUMP_LABEL (from) = label;
16543 barrier = emit_barrier_after (from);
16544 emit_label_after (label, barrier);
16545
16546 /* Create a minipool barrier entry for the new barrier. */
16547 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16548 new_fix->insn = barrier;
16549 new_fix->address = selected_address;
16550 new_fix->next = fix->next;
16551 fix->next = new_fix;
16552
16553 return new_fix;
16554 }
16555
16556 /* Record that there is a natural barrier in the insn stream at
16557 ADDRESS. */
16558 static void
16559 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16560 {
16561 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16562
16563 fix->insn = insn;
16564 fix->address = address;
16565
16566 fix->next = NULL;
16567 if (minipool_fix_head != NULL)
16568 minipool_fix_tail->next = fix;
16569 else
16570 minipool_fix_head = fix;
16571
16572 minipool_fix_tail = fix;
16573 }
16574
16575 /* Record INSN, which will need fixing up to load a value from the
16576 minipool. ADDRESS is the offset of the insn since the start of the
16577 function; LOC is a pointer to the part of the insn which requires
16578 fixing; VALUE is the constant that must be loaded, which is of type
16579 MODE. */
16580 static void
16581 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16582 machine_mode mode, rtx value)
16583 {
16584 gcc_assert (!arm_disable_literal_pool);
16585 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16586
16587 fix->insn = insn;
16588 fix->address = address;
16589 fix->loc = loc;
16590 fix->mode = mode;
16591 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16592 fix->value = value;
16593 fix->forwards = get_attr_pool_range (insn);
16594 fix->backwards = get_attr_neg_pool_range (insn);
16595 fix->minipool = NULL;
16596
16597 /* If an insn doesn't have a range defined for it, then it isn't
16598 expecting to be reworked by this code. Better to stop now than
16599 to generate duff assembly code. */
16600 gcc_assert (fix->forwards || fix->backwards);
16601
16602 /* If an entry requires 8-byte alignment then assume all constant pools
16603 require 4 bytes of padding. Trying to do this later on a per-pool
16604 basis is awkward because existing pool entries have to be modified. */
16605 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16606 minipool_pad = 4;
16607
16608 if (dump_file)
16609 {
16610 fprintf (dump_file,
16611 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16612 GET_MODE_NAME (mode),
16613 INSN_UID (insn), (unsigned long) address,
16614 -1 * (long)fix->backwards, (long)fix->forwards);
16615 arm_print_value (dump_file, fix->value);
16616 fprintf (dump_file, "\n");
16617 }
16618
16619 /* Add it to the chain of fixes. */
16620 fix->next = NULL;
16621
16622 if (minipool_fix_head != NULL)
16623 minipool_fix_tail->next = fix;
16624 else
16625 minipool_fix_head = fix;
16626
16627 minipool_fix_tail = fix;
16628 }
16629
16630 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16631 Returns the number of insns needed, or 99 if we always want to synthesize
16632 the value. */
16633 int
16634 arm_max_const_double_inline_cost ()
16635 {
16636 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16637 }
16638
16639 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16640 Returns the number of insns needed, or 99 if we don't know how to
16641 do it. */
16642 int
16643 arm_const_double_inline_cost (rtx val)
16644 {
16645 rtx lowpart, highpart;
16646 machine_mode mode;
16647
16648 mode = GET_MODE (val);
16649
16650 if (mode == VOIDmode)
16651 mode = DImode;
16652
16653 gcc_assert (GET_MODE_SIZE (mode) == 8);
16654
16655 lowpart = gen_lowpart (SImode, val);
16656 highpart = gen_highpart_mode (SImode, mode, val);
16657
16658 gcc_assert (CONST_INT_P (lowpart));
16659 gcc_assert (CONST_INT_P (highpart));
16660
16661 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16662 NULL_RTX, NULL_RTX, 0, 0)
16663 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16664 NULL_RTX, NULL_RTX, 0, 0));
16665 }
16666
16667 /* Cost of loading a SImode constant. */
16668 static inline int
16669 arm_const_inline_cost (enum rtx_code code, rtx val)
16670 {
16671 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16672 NULL_RTX, NULL_RTX, 1, 0);
16673 }
16674
16675 /* Return true if it is worthwhile to split a 64-bit constant into two
16676 32-bit operations. This is the case if optimizing for size, or
16677 if we have load delay slots, or if one 32-bit part can be done with
16678 a single data operation. */
16679 bool
16680 arm_const_double_by_parts (rtx val)
16681 {
16682 machine_mode mode = GET_MODE (val);
16683 rtx part;
16684
16685 if (optimize_size || arm_ld_sched)
16686 return true;
16687
16688 if (mode == VOIDmode)
16689 mode = DImode;
16690
16691 part = gen_highpart_mode (SImode, mode, val);
16692
16693 gcc_assert (CONST_INT_P (part));
16694
16695 if (const_ok_for_arm (INTVAL (part))
16696 || const_ok_for_arm (~INTVAL (part)))
16697 return true;
16698
16699 part = gen_lowpart (SImode, val);
16700
16701 gcc_assert (CONST_INT_P (part));
16702
16703 if (const_ok_for_arm (INTVAL (part))
16704 || const_ok_for_arm (~INTVAL (part)))
16705 return true;
16706
16707 return false;
16708 }
16709
16710 /* Return true if it is possible to inline both the high and low parts
16711 of a 64-bit constant into 32-bit data processing instructions. */
16712 bool
16713 arm_const_double_by_immediates (rtx val)
16714 {
16715 machine_mode mode = GET_MODE (val);
16716 rtx part;
16717
16718 if (mode == VOIDmode)
16719 mode = DImode;
16720
16721 part = gen_highpart_mode (SImode, mode, val);
16722
16723 gcc_assert (CONST_INT_P (part));
16724
16725 if (!const_ok_for_arm (INTVAL (part)))
16726 return false;
16727
16728 part = gen_lowpart (SImode, val);
16729
16730 gcc_assert (CONST_INT_P (part));
16731
16732 if (!const_ok_for_arm (INTVAL (part)))
16733 return false;
16734
16735 return true;
16736 }
16737
16738 /* Scan INSN and note any of its operands that need fixing.
16739 If DO_PUSHES is false we do not actually push any of the fixups
16740 needed. */
16741 static void
16742 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16743 {
16744 int opno;
16745
16746 extract_constrain_insn (insn);
16747
16748 if (recog_data.n_alternatives == 0)
16749 return;
16750
16751 /* Fill in recog_op_alt with information about the constraints of
16752 this insn. */
16753 preprocess_constraints (insn);
16754
16755 const operand_alternative *op_alt = which_op_alt ();
16756 for (opno = 0; opno < recog_data.n_operands; opno++)
16757 {
16758 /* Things we need to fix can only occur in inputs. */
16759 if (recog_data.operand_type[opno] != OP_IN)
16760 continue;
16761
16762 /* If this alternative is a memory reference, then any mention
16763 of constants in this alternative is really to fool reload
16764 into allowing us to accept one there. We need to fix them up
16765 now so that we output the right code. */
16766 if (op_alt[opno].memory_ok)
16767 {
16768 rtx op = recog_data.operand[opno];
16769
16770 if (CONSTANT_P (op))
16771 {
16772 if (do_pushes)
16773 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16774 recog_data.operand_mode[opno], op);
16775 }
16776 else if (MEM_P (op)
16777 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16778 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16779 {
16780 if (do_pushes)
16781 {
16782 rtx cop = avoid_constant_pool_reference (op);
16783
16784 /* Casting the address of something to a mode narrower
16785 than a word can cause avoid_constant_pool_reference()
16786 to return the pool reference itself. That's no good to
16787 us here. Lets just hope that we can use the
16788 constant pool value directly. */
16789 if (op == cop)
16790 cop = get_pool_constant (XEXP (op, 0));
16791
16792 push_minipool_fix (insn, address,
16793 recog_data.operand_loc[opno],
16794 recog_data.operand_mode[opno], cop);
16795 }
16796
16797 }
16798 }
16799 }
16800
16801 return;
16802 }
16803
16804 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16805 and unions in the context of ARMv8-M Security Extensions. It is used as a
16806 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16807 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16808 or four masks, depending on whether it is being computed for a
16809 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16810 respectively. The tree for the type of the argument or a field within an
16811 argument is passed in ARG_TYPE, the current register this argument or field
16812 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16813 argument or field starts at is passed in STARTING_BIT and the last used bit
16814 is kept in LAST_USED_BIT which is also updated accordingly. */
16815
16816 static unsigned HOST_WIDE_INT
16817 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16818 uint32_t * padding_bits_to_clear,
16819 unsigned starting_bit, int * last_used_bit)
16820
16821 {
16822 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16823
16824 if (TREE_CODE (arg_type) == RECORD_TYPE)
16825 {
16826 unsigned current_bit = starting_bit;
16827 tree field;
16828 long int offset, size;
16829
16830
16831 field = TYPE_FIELDS (arg_type);
16832 while (field)
16833 {
16834 /* The offset within a structure is always an offset from
16835 the start of that structure. Make sure we take that into the
16836 calculation of the register based offset that we use here. */
16837 offset = starting_bit;
16838 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16839 offset %= 32;
16840
16841 /* This is the actual size of the field, for bitfields this is the
16842 bitfield width and not the container size. */
16843 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16844
16845 if (*last_used_bit != offset)
16846 {
16847 if (offset < *last_used_bit)
16848 {
16849 /* This field's offset is before the 'last_used_bit', that
16850 means this field goes on the next register. So we need to
16851 pad the rest of the current register and increase the
16852 register number. */
16853 uint32_t mask;
16854 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16855 mask++;
16856
16857 padding_bits_to_clear[*regno] |= mask;
16858 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16859 (*regno)++;
16860 }
16861 else
16862 {
16863 /* Otherwise we pad the bits between the last field's end and
16864 the start of the new field. */
16865 uint32_t mask;
16866
16867 mask = ((uint32_t)-1) >> (32 - offset);
16868 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16869 padding_bits_to_clear[*regno] |= mask;
16870 }
16871 current_bit = offset;
16872 }
16873
16874 /* Calculate further padding bits for inner structs/unions too. */
16875 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16876 {
16877 *last_used_bit = current_bit;
16878 not_to_clear_reg_mask
16879 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16880 padding_bits_to_clear, offset,
16881 last_used_bit);
16882 }
16883 else
16884 {
16885 /* Update 'current_bit' with this field's size. If the
16886 'current_bit' lies in a subsequent register, update 'regno' and
16887 reset 'current_bit' to point to the current bit in that new
16888 register. */
16889 current_bit += size;
16890 while (current_bit >= 32)
16891 {
16892 current_bit-=32;
16893 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16894 (*regno)++;
16895 }
16896 *last_used_bit = current_bit;
16897 }
16898
16899 field = TREE_CHAIN (field);
16900 }
16901 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16902 }
16903 else if (TREE_CODE (arg_type) == UNION_TYPE)
16904 {
16905 tree field, field_t;
16906 int i, regno_t, field_size;
16907 int max_reg = -1;
16908 int max_bit = -1;
16909 uint32_t mask;
16910 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16911 = {-1, -1, -1, -1};
16912
16913 /* To compute the padding bits in a union we only consider bits as
16914 padding bits if they are always either a padding bit or fall outside a
16915 fields size for all fields in the union. */
16916 field = TYPE_FIELDS (arg_type);
16917 while (field)
16918 {
16919 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16920 = {0U, 0U, 0U, 0U};
16921 int last_used_bit_t = *last_used_bit;
16922 regno_t = *regno;
16923 field_t = TREE_TYPE (field);
16924
16925 /* If the field's type is either a record or a union make sure to
16926 compute their padding bits too. */
16927 if (RECORD_OR_UNION_TYPE_P (field_t))
16928 not_to_clear_reg_mask
16929 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16930 &padding_bits_to_clear_t[0],
16931 starting_bit, &last_used_bit_t);
16932 else
16933 {
16934 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16935 regno_t = (field_size / 32) + *regno;
16936 last_used_bit_t = (starting_bit + field_size) % 32;
16937 }
16938
16939 for (i = *regno; i < regno_t; i++)
16940 {
16941 /* For all but the last register used by this field only keep the
16942 padding bits that were padding bits in this field. */
16943 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16944 }
16945
16946 /* For the last register, keep all padding bits that were padding
16947 bits in this field and any padding bits that are still valid
16948 as padding bits but fall outside of this field's size. */
16949 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16950 padding_bits_to_clear_res[regno_t]
16951 &= padding_bits_to_clear_t[regno_t] | mask;
16952
16953 /* Update the maximum size of the fields in terms of registers used
16954 ('max_reg') and the 'last_used_bit' in said register. */
16955 if (max_reg < regno_t)
16956 {
16957 max_reg = regno_t;
16958 max_bit = last_used_bit_t;
16959 }
16960 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16961 max_bit = last_used_bit_t;
16962
16963 field = TREE_CHAIN (field);
16964 }
16965
16966 /* Update the current padding_bits_to_clear using the intersection of the
16967 padding bits of all the fields. */
16968 for (i=*regno; i < max_reg; i++)
16969 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16970
16971 /* Do not keep trailing padding bits, we do not know yet whether this
16972 is the end of the argument. */
16973 mask = ((uint32_t) 1 << max_bit) - 1;
16974 padding_bits_to_clear[max_reg]
16975 |= padding_bits_to_clear_res[max_reg] & mask;
16976
16977 *regno = max_reg;
16978 *last_used_bit = max_bit;
16979 }
16980 else
16981 /* This function should only be used for structs and unions. */
16982 gcc_unreachable ();
16983
16984 return not_to_clear_reg_mask;
16985 }
16986
16987 /* In the context of ARMv8-M Security Extensions, this function is used for both
16988 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16989 registers are used when returning or passing arguments, which is then
16990 returned as a mask. It will also compute a mask to indicate padding/unused
16991 bits for each of these registers, and passes this through the
16992 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16993 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16994 the starting register used to pass this argument or return value is passed
16995 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16996 for struct and union types. */
16997
16998 static unsigned HOST_WIDE_INT
16999 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17000 uint32_t * padding_bits_to_clear)
17001
17002 {
17003 int last_used_bit = 0;
17004 unsigned HOST_WIDE_INT not_to_clear_mask;
17005
17006 if (RECORD_OR_UNION_TYPE_P (arg_type))
17007 {
17008 not_to_clear_mask
17009 = comp_not_to_clear_mask_str_un (arg_type, &regno,
17010 padding_bits_to_clear, 0,
17011 &last_used_bit);
17012
17013
17014 /* If the 'last_used_bit' is not zero, that means we are still using a
17015 part of the last 'regno'. In such cases we must clear the trailing
17016 bits. Otherwise we are not using regno and we should mark it as to
17017 clear. */
17018 if (last_used_bit != 0)
17019 padding_bits_to_clear[regno]
17020 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17021 else
17022 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17023 }
17024 else
17025 {
17026 not_to_clear_mask = 0;
17027 /* We are not dealing with structs nor unions. So these arguments may be
17028 passed in floating point registers too. In some cases a BLKmode is
17029 used when returning or passing arguments in multiple VFP registers. */
17030 if (GET_MODE (arg_rtx) == BLKmode)
17031 {
17032 int i, arg_regs;
17033 rtx reg;
17034
17035 /* This should really only occur when dealing with the hard-float
17036 ABI. */
17037 gcc_assert (TARGET_HARD_FLOAT_ABI);
17038
17039 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17040 {
17041 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17042 gcc_assert (REG_P (reg));
17043
17044 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17045
17046 /* If we are dealing with DF mode, make sure we don't
17047 clear either of the registers it addresses. */
17048 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17049 if (arg_regs > 1)
17050 {
17051 unsigned HOST_WIDE_INT mask;
17052 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17053 mask -= HOST_WIDE_INT_1U << REGNO (reg);
17054 not_to_clear_mask |= mask;
17055 }
17056 }
17057 }
17058 else
17059 {
17060 /* Otherwise we can rely on the MODE to determine how many registers
17061 are being used by this argument. */
17062 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17063 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17064 if (arg_regs > 1)
17065 {
17066 unsigned HOST_WIDE_INT
17067 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17068 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17069 not_to_clear_mask |= mask;
17070 }
17071 }
17072 }
17073
17074 return not_to_clear_mask;
17075 }
17076
17077 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17078 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
17079 are to be fully cleared, using the value in register CLEARING_REG if more
17080 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17081 the bits that needs to be cleared in caller-saved core registers, with
17082 SCRATCH_REG used as a scratch register for that clearing.
17083
17084 NOTE: one of three following assertions must hold:
17085 - SCRATCH_REG is a low register
17086 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17087 in TO_CLEAR_BITMAP)
17088 - CLEARING_REG is a low register. */
17089
17090 static void
17091 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17092 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17093 {
17094 bool saved_clearing = false;
17095 rtx saved_clearing_reg = NULL_RTX;
17096 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17097
17098 gcc_assert (arm_arch_cmse);
17099
17100 if (!bitmap_empty_p (to_clear_bitmap))
17101 {
17102 minregno = bitmap_first_set_bit (to_clear_bitmap);
17103 maxregno = bitmap_last_set_bit (to_clear_bitmap);
17104 }
17105 clearing_regno = REGNO (clearing_reg);
17106
17107 /* Clear padding bits. */
17108 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17109 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17110 {
17111 uint64_t mask;
17112 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17113
17114 if (padding_bits_to_clear[i] == 0)
17115 continue;
17116
17117 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17118 CLEARING_REG as scratch. */
17119 if (TARGET_THUMB1
17120 && REGNO (scratch_reg) > LAST_LO_REGNUM)
17121 {
17122 /* clearing_reg is not to be cleared, copy its value into scratch_reg
17123 such that we can use clearing_reg to clear the unused bits in the
17124 arguments. */
17125 if ((clearing_regno > maxregno
17126 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17127 && !saved_clearing)
17128 {
17129 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17130 emit_move_insn (scratch_reg, clearing_reg);
17131 saved_clearing = true;
17132 saved_clearing_reg = scratch_reg;
17133 }
17134 scratch_reg = clearing_reg;
17135 }
17136
17137 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
17138 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17139 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17140
17141 /* Fill the top half of the negated padding_bits_to_clear[i]. */
17142 mask = (~padding_bits_to_clear[i]) >> 16;
17143 rtx16 = gen_int_mode (16, SImode);
17144 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17145 if (mask)
17146 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17147
17148 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17149 }
17150 if (saved_clearing)
17151 emit_move_insn (clearing_reg, saved_clearing_reg);
17152
17153
17154 /* Clear full registers. */
17155
17156 /* If not marked for clearing, clearing_reg already does not contain
17157 any secret. */
17158 if (clearing_regno <= maxregno
17159 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17160 {
17161 emit_move_insn (clearing_reg, const0_rtx);
17162 emit_use (clearing_reg);
17163 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17164 }
17165
17166 for (regno = minregno; regno <= maxregno; regno++)
17167 {
17168 if (!bitmap_bit_p (to_clear_bitmap, regno))
17169 continue;
17170
17171 if (IS_VFP_REGNUM (regno))
17172 {
17173 /* If regno is an even vfp register and its successor is also to
17174 be cleared, use vmov. */
17175 if (TARGET_VFP_DOUBLE
17176 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17177 && bitmap_bit_p (to_clear_bitmap, regno + 1))
17178 {
17179 emit_move_insn (gen_rtx_REG (DFmode, regno),
17180 CONST1_RTX (DFmode));
17181 emit_use (gen_rtx_REG (DFmode, regno));
17182 regno++;
17183 }
17184 else
17185 {
17186 emit_move_insn (gen_rtx_REG (SFmode, regno),
17187 CONST1_RTX (SFmode));
17188 emit_use (gen_rtx_REG (SFmode, regno));
17189 }
17190 }
17191 else
17192 {
17193 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
17194 emit_use (gen_rtx_REG (SImode, regno));
17195 }
17196 }
17197 }
17198
17199 /* Clears caller saved registers not used to pass arguments before a
17200 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
17201 registers is done in __gnu_cmse_nonsecure_call libcall.
17202 See libgcc/config/arm/cmse_nonsecure_call.S. */
17203
17204 static void
17205 cmse_nonsecure_call_clear_caller_saved (void)
17206 {
17207 basic_block bb;
17208
17209 FOR_EACH_BB_FN (bb, cfun)
17210 {
17211 rtx_insn *insn;
17212
17213 FOR_BB_INSNS (bb, insn)
17214 {
17215 unsigned address_regnum, regno, maxregno =
17216 TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
17217 auto_sbitmap to_clear_bitmap (maxregno + 1);
17218 rtx_insn *seq;
17219 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
17220 rtx address;
17221 CUMULATIVE_ARGS args_so_far_v;
17222 cumulative_args_t args_so_far;
17223 tree arg_type, fntype;
17224 bool first_param = true;
17225 function_args_iterator args_iter;
17226 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17227
17228 if (!NONDEBUG_INSN_P (insn))
17229 continue;
17230
17231 if (!CALL_P (insn))
17232 continue;
17233
17234 pat = PATTERN (insn);
17235 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17236 call = XVECEXP (pat, 0, 0);
17237
17238 /* Get the real call RTX if the insn sets a value, ie. returns. */
17239 if (GET_CODE (call) == SET)
17240 call = SET_SRC (call);
17241
17242 /* Check if it is a cmse_nonsecure_call. */
17243 unspec = XEXP (call, 0);
17244 if (GET_CODE (unspec) != UNSPEC
17245 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17246 continue;
17247
17248 /* Determine the caller-saved registers we need to clear. */
17249 bitmap_clear (to_clear_bitmap);
17250 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
17251
17252 /* Only look at the caller-saved floating point registers in case of
17253 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17254 lazy store and loads which clear both caller- and callee-saved
17255 registers. */
17256 if (TARGET_HARD_FLOAT_ABI)
17257 {
17258 auto_sbitmap float_bitmap (maxregno + 1);
17259
17260 bitmap_clear (float_bitmap);
17261 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
17262 D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
17263 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
17264 }
17265
17266 /* Make sure the register used to hold the function address is not
17267 cleared. */
17268 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17269 gcc_assert (MEM_P (address));
17270 gcc_assert (REG_P (XEXP (address, 0)));
17271 address_regnum = REGNO (XEXP (address, 0));
17272 if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
17273 bitmap_clear_bit (to_clear_bitmap, address_regnum);
17274
17275 /* Set basic block of call insn so that df rescan is performed on
17276 insns inserted here. */
17277 set_block_for_insn (insn, bb);
17278 df_set_flags (DF_DEFER_INSN_RESCAN);
17279 start_sequence ();
17280
17281 /* Make sure the scheduler doesn't schedule other insns beyond
17282 here. */
17283 emit_insn (gen_blockage ());
17284
17285 /* Walk through all arguments and clear registers appropriately.
17286 */
17287 fntype = TREE_TYPE (MEM_EXPR (address));
17288 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17289 NULL_TREE);
17290 args_so_far = pack_cumulative_args (&args_so_far_v);
17291 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17292 {
17293 rtx arg_rtx;
17294 uint64_t to_clear_args_mask;
17295 machine_mode arg_mode = TYPE_MODE (arg_type);
17296
17297 if (VOID_TYPE_P (arg_type))
17298 continue;
17299
17300 if (!first_param)
17301 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17302 true);
17303
17304 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17305 true);
17306 gcc_assert (REG_P (arg_rtx));
17307 to_clear_args_mask
17308 = compute_not_to_clear_mask (arg_type, arg_rtx,
17309 REGNO (arg_rtx),
17310 &padding_bits_to_clear[0]);
17311 if (to_clear_args_mask)
17312 {
17313 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17314 {
17315 if (to_clear_args_mask & (1ULL << regno))
17316 bitmap_clear_bit (to_clear_bitmap, regno);
17317 }
17318 }
17319
17320 first_param = false;
17321 }
17322
17323 /* We use right shift and left shift to clear the LSB of the address
17324 we jump to instead of using bic, to avoid having to use an extra
17325 register on Thumb-1. */
17326 clearing_reg = XEXP (address, 0);
17327 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
17328 emit_insn (gen_rtx_SET (clearing_reg, shift));
17329 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
17330 emit_insn (gen_rtx_SET (clearing_reg, shift));
17331
17332 /* Clear caller-saved registers that leak before doing a non-secure
17333 call. */
17334 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
17335 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
17336 NUM_ARG_REGS, ip_reg, clearing_reg);
17337
17338 seq = get_insns ();
17339 end_sequence ();
17340 emit_insn_before (seq, insn);
17341 }
17342 }
17343 }
17344
17345 /* Rewrite move insn into subtract of 0 if the condition codes will
17346 be useful in next conditional jump insn. */
17347
17348 static void
17349 thumb1_reorg (void)
17350 {
17351 basic_block bb;
17352
17353 FOR_EACH_BB_FN (bb, cfun)
17354 {
17355 rtx dest, src;
17356 rtx cmp, op0, op1, set = NULL;
17357 rtx_insn *prev, *insn = BB_END (bb);
17358 bool insn_clobbered = false;
17359
17360 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17361 insn = PREV_INSN (insn);
17362
17363 /* Find the last cbranchsi4_insn in basic block BB. */
17364 if (insn == BB_HEAD (bb)
17365 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17366 continue;
17367
17368 /* Get the register with which we are comparing. */
17369 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17370 op0 = XEXP (cmp, 0);
17371 op1 = XEXP (cmp, 1);
17372
17373 /* Check that comparison is against ZERO. */
17374 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17375 continue;
17376
17377 /* Find the first flag setting insn before INSN in basic block BB. */
17378 gcc_assert (insn != BB_HEAD (bb));
17379 for (prev = PREV_INSN (insn);
17380 (!insn_clobbered
17381 && prev != BB_HEAD (bb)
17382 && (NOTE_P (prev)
17383 || DEBUG_INSN_P (prev)
17384 || ((set = single_set (prev)) != NULL
17385 && get_attr_conds (prev) == CONDS_NOCOND)));
17386 prev = PREV_INSN (prev))
17387 {
17388 if (reg_set_p (op0, prev))
17389 insn_clobbered = true;
17390 }
17391
17392 /* Skip if op0 is clobbered by insn other than prev. */
17393 if (insn_clobbered)
17394 continue;
17395
17396 if (!set)
17397 continue;
17398
17399 dest = SET_DEST (set);
17400 src = SET_SRC (set);
17401 if (!low_register_operand (dest, SImode)
17402 || !low_register_operand (src, SImode))
17403 continue;
17404
17405 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17406 in INSN. Both src and dest of the move insn are checked. */
17407 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17408 {
17409 dest = copy_rtx (dest);
17410 src = copy_rtx (src);
17411 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17412 PATTERN (prev) = gen_rtx_SET (dest, src);
17413 INSN_CODE (prev) = -1;
17414 /* Set test register in INSN to dest. */
17415 XEXP (cmp, 0) = copy_rtx (dest);
17416 INSN_CODE (insn) = -1;
17417 }
17418 }
17419 }
17420
17421 /* Convert instructions to their cc-clobbering variant if possible, since
17422 that allows us to use smaller encodings. */
17423
17424 static void
17425 thumb2_reorg (void)
17426 {
17427 basic_block bb;
17428 regset_head live;
17429
17430 INIT_REG_SET (&live);
17431
17432 /* We are freeing block_for_insn in the toplev to keep compatibility
17433 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17434 compute_bb_for_insn ();
17435 df_analyze ();
17436
17437 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17438
17439 FOR_EACH_BB_FN (bb, cfun)
17440 {
17441 if ((current_tune->disparage_flag_setting_t16_encodings
17442 == tune_params::DISPARAGE_FLAGS_ALL)
17443 && optimize_bb_for_speed_p (bb))
17444 continue;
17445
17446 rtx_insn *insn;
17447 Convert_Action action = SKIP;
17448 Convert_Action action_for_partial_flag_setting
17449 = ((current_tune->disparage_flag_setting_t16_encodings
17450 != tune_params::DISPARAGE_FLAGS_NEITHER)
17451 && optimize_bb_for_speed_p (bb))
17452 ? SKIP : CONV;
17453
17454 COPY_REG_SET (&live, DF_LR_OUT (bb));
17455 df_simulate_initialize_backwards (bb, &live);
17456 FOR_BB_INSNS_REVERSE (bb, insn)
17457 {
17458 if (NONJUMP_INSN_P (insn)
17459 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17460 && GET_CODE (PATTERN (insn)) == SET)
17461 {
17462 action = SKIP;
17463 rtx pat = PATTERN (insn);
17464 rtx dst = XEXP (pat, 0);
17465 rtx src = XEXP (pat, 1);
17466 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17467
17468 if (UNARY_P (src) || BINARY_P (src))
17469 op0 = XEXP (src, 0);
17470
17471 if (BINARY_P (src))
17472 op1 = XEXP (src, 1);
17473
17474 if (low_register_operand (dst, SImode))
17475 {
17476 switch (GET_CODE (src))
17477 {
17478 case PLUS:
17479 /* Adding two registers and storing the result
17480 in the first source is already a 16-bit
17481 operation. */
17482 if (rtx_equal_p (dst, op0)
17483 && register_operand (op1, SImode))
17484 break;
17485
17486 if (low_register_operand (op0, SImode))
17487 {
17488 /* ADDS <Rd>,<Rn>,<Rm> */
17489 if (low_register_operand (op1, SImode))
17490 action = CONV;
17491 /* ADDS <Rdn>,#<imm8> */
17492 /* SUBS <Rdn>,#<imm8> */
17493 else if (rtx_equal_p (dst, op0)
17494 && CONST_INT_P (op1)
17495 && IN_RANGE (INTVAL (op1), -255, 255))
17496 action = CONV;
17497 /* ADDS <Rd>,<Rn>,#<imm3> */
17498 /* SUBS <Rd>,<Rn>,#<imm3> */
17499 else if (CONST_INT_P (op1)
17500 && IN_RANGE (INTVAL (op1), -7, 7))
17501 action = CONV;
17502 }
17503 /* ADCS <Rd>, <Rn> */
17504 else if (GET_CODE (XEXP (src, 0)) == PLUS
17505 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17506 && low_register_operand (XEXP (XEXP (src, 0), 1),
17507 SImode)
17508 && COMPARISON_P (op1)
17509 && cc_register (XEXP (op1, 0), VOIDmode)
17510 && maybe_get_arm_condition_code (op1) == ARM_CS
17511 && XEXP (op1, 1) == const0_rtx)
17512 action = CONV;
17513 break;
17514
17515 case MINUS:
17516 /* RSBS <Rd>,<Rn>,#0
17517 Not handled here: see NEG below. */
17518 /* SUBS <Rd>,<Rn>,#<imm3>
17519 SUBS <Rdn>,#<imm8>
17520 Not handled here: see PLUS above. */
17521 /* SUBS <Rd>,<Rn>,<Rm> */
17522 if (low_register_operand (op0, SImode)
17523 && low_register_operand (op1, SImode))
17524 action = CONV;
17525 break;
17526
17527 case MULT:
17528 /* MULS <Rdm>,<Rn>,<Rdm>
17529 As an exception to the rule, this is only used
17530 when optimizing for size since MULS is slow on all
17531 known implementations. We do not even want to use
17532 MULS in cold code, if optimizing for speed, so we
17533 test the global flag here. */
17534 if (!optimize_size)
17535 break;
17536 /* Fall through. */
17537 case AND:
17538 case IOR:
17539 case XOR:
17540 /* ANDS <Rdn>,<Rm> */
17541 if (rtx_equal_p (dst, op0)
17542 && low_register_operand (op1, SImode))
17543 action = action_for_partial_flag_setting;
17544 else if (rtx_equal_p (dst, op1)
17545 && low_register_operand (op0, SImode))
17546 action = action_for_partial_flag_setting == SKIP
17547 ? SKIP : SWAP_CONV;
17548 break;
17549
17550 case ASHIFTRT:
17551 case ASHIFT:
17552 case LSHIFTRT:
17553 /* ASRS <Rdn>,<Rm> */
17554 /* LSRS <Rdn>,<Rm> */
17555 /* LSLS <Rdn>,<Rm> */
17556 if (rtx_equal_p (dst, op0)
17557 && low_register_operand (op1, SImode))
17558 action = action_for_partial_flag_setting;
17559 /* ASRS <Rd>,<Rm>,#<imm5> */
17560 /* LSRS <Rd>,<Rm>,#<imm5> */
17561 /* LSLS <Rd>,<Rm>,#<imm5> */
17562 else if (low_register_operand (op0, SImode)
17563 && CONST_INT_P (op1)
17564 && IN_RANGE (INTVAL (op1), 0, 31))
17565 action = action_for_partial_flag_setting;
17566 break;
17567
17568 case ROTATERT:
17569 /* RORS <Rdn>,<Rm> */
17570 if (rtx_equal_p (dst, op0)
17571 && low_register_operand (op1, SImode))
17572 action = action_for_partial_flag_setting;
17573 break;
17574
17575 case NOT:
17576 /* MVNS <Rd>,<Rm> */
17577 if (low_register_operand (op0, SImode))
17578 action = action_for_partial_flag_setting;
17579 break;
17580
17581 case NEG:
17582 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17583 if (low_register_operand (op0, SImode))
17584 action = CONV;
17585 break;
17586
17587 case CONST_INT:
17588 /* MOVS <Rd>,#<imm8> */
17589 if (CONST_INT_P (src)
17590 && IN_RANGE (INTVAL (src), 0, 255))
17591 action = action_for_partial_flag_setting;
17592 break;
17593
17594 case REG:
17595 /* MOVS and MOV<c> with registers have different
17596 encodings, so are not relevant here. */
17597 break;
17598
17599 default:
17600 break;
17601 }
17602 }
17603
17604 if (action != SKIP)
17605 {
17606 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17607 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17608 rtvec vec;
17609
17610 if (action == SWAP_CONV)
17611 {
17612 src = copy_rtx (src);
17613 XEXP (src, 0) = op1;
17614 XEXP (src, 1) = op0;
17615 pat = gen_rtx_SET (dst, src);
17616 vec = gen_rtvec (2, pat, clobber);
17617 }
17618 else /* action == CONV */
17619 vec = gen_rtvec (2, pat, clobber);
17620
17621 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17622 INSN_CODE (insn) = -1;
17623 }
17624 }
17625
17626 if (NONDEBUG_INSN_P (insn))
17627 df_simulate_one_insn_backwards (bb, insn, &live);
17628 }
17629 }
17630
17631 CLEAR_REG_SET (&live);
17632 }
17633
17634 /* Gcc puts the pool in the wrong place for ARM, since we can only
17635 load addresses a limited distance around the pc. We do some
17636 special munging to move the constant pool values to the correct
17637 point in the code. */
17638 static void
17639 arm_reorg (void)
17640 {
17641 rtx_insn *insn;
17642 HOST_WIDE_INT address = 0;
17643 Mfix * fix;
17644
17645 if (use_cmse)
17646 cmse_nonsecure_call_clear_caller_saved ();
17647 if (TARGET_THUMB1)
17648 thumb1_reorg ();
17649 else if (TARGET_THUMB2)
17650 thumb2_reorg ();
17651
17652 /* Ensure all insns that must be split have been split at this point.
17653 Otherwise, the pool placement code below may compute incorrect
17654 insn lengths. Note that when optimizing, all insns have already
17655 been split at this point. */
17656 if (!optimize)
17657 split_all_insns_noflow ();
17658
17659 /* Make sure we do not attempt to create a literal pool even though it should
17660 no longer be necessary to create any. */
17661 if (arm_disable_literal_pool)
17662 return ;
17663
17664 minipool_fix_head = minipool_fix_tail = NULL;
17665
17666 /* The first insn must always be a note, or the code below won't
17667 scan it properly. */
17668 insn = get_insns ();
17669 gcc_assert (NOTE_P (insn));
17670 minipool_pad = 0;
17671
17672 /* Scan all the insns and record the operands that will need fixing. */
17673 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17674 {
17675 if (BARRIER_P (insn))
17676 push_minipool_barrier (insn, address);
17677 else if (INSN_P (insn))
17678 {
17679 rtx_jump_table_data *table;
17680
17681 note_invalid_constants (insn, address, true);
17682 address += get_attr_length (insn);
17683
17684 /* If the insn is a vector jump, add the size of the table
17685 and skip the table. */
17686 if (tablejump_p (insn, NULL, &table))
17687 {
17688 address += get_jump_table_size (table);
17689 insn = table;
17690 }
17691 }
17692 else if (LABEL_P (insn))
17693 /* Add the worst-case padding due to alignment. We don't add
17694 the _current_ padding because the minipool insertions
17695 themselves might change it. */
17696 address += get_label_padding (insn);
17697 }
17698
17699 fix = minipool_fix_head;
17700
17701 /* Now scan the fixups and perform the required changes. */
17702 while (fix)
17703 {
17704 Mfix * ftmp;
17705 Mfix * fdel;
17706 Mfix * last_added_fix;
17707 Mfix * last_barrier = NULL;
17708 Mfix * this_fix;
17709
17710 /* Skip any further barriers before the next fix. */
17711 while (fix && BARRIER_P (fix->insn))
17712 fix = fix->next;
17713
17714 /* No more fixes. */
17715 if (fix == NULL)
17716 break;
17717
17718 last_added_fix = NULL;
17719
17720 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17721 {
17722 if (BARRIER_P (ftmp->insn))
17723 {
17724 if (ftmp->address >= minipool_vector_head->max_address)
17725 break;
17726
17727 last_barrier = ftmp;
17728 }
17729 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17730 break;
17731
17732 last_added_fix = ftmp; /* Keep track of the last fix added. */
17733 }
17734
17735 /* If we found a barrier, drop back to that; any fixes that we
17736 could have reached but come after the barrier will now go in
17737 the next mini-pool. */
17738 if (last_barrier != NULL)
17739 {
17740 /* Reduce the refcount for those fixes that won't go into this
17741 pool after all. */
17742 for (fdel = last_barrier->next;
17743 fdel && fdel != ftmp;
17744 fdel = fdel->next)
17745 {
17746 fdel->minipool->refcount--;
17747 fdel->minipool = NULL;
17748 }
17749
17750 ftmp = last_barrier;
17751 }
17752 else
17753 {
17754 /* ftmp is first fix that we can't fit into this pool and
17755 there no natural barriers that we could use. Insert a
17756 new barrier in the code somewhere between the previous
17757 fix and this one, and arrange to jump around it. */
17758 HOST_WIDE_INT max_address;
17759
17760 /* The last item on the list of fixes must be a barrier, so
17761 we can never run off the end of the list of fixes without
17762 last_barrier being set. */
17763 gcc_assert (ftmp);
17764
17765 max_address = minipool_vector_head->max_address;
17766 /* Check that there isn't another fix that is in range that
17767 we couldn't fit into this pool because the pool was
17768 already too large: we need to put the pool before such an
17769 instruction. The pool itself may come just after the
17770 fix because create_fix_barrier also allows space for a
17771 jump instruction. */
17772 if (ftmp->address < max_address)
17773 max_address = ftmp->address + 1;
17774
17775 last_barrier = create_fix_barrier (last_added_fix, max_address);
17776 }
17777
17778 assign_minipool_offsets (last_barrier);
17779
17780 while (ftmp)
17781 {
17782 if (!BARRIER_P (ftmp->insn)
17783 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17784 == NULL))
17785 break;
17786
17787 ftmp = ftmp->next;
17788 }
17789
17790 /* Scan over the fixes we have identified for this pool, fixing them
17791 up and adding the constants to the pool itself. */
17792 for (this_fix = fix; this_fix && ftmp != this_fix;
17793 this_fix = this_fix->next)
17794 if (!BARRIER_P (this_fix->insn))
17795 {
17796 rtx addr
17797 = plus_constant (Pmode,
17798 gen_rtx_LABEL_REF (VOIDmode,
17799 minipool_vector_label),
17800 this_fix->minipool->offset);
17801 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17802 }
17803
17804 dump_minipool (last_barrier->insn);
17805 fix = ftmp;
17806 }
17807
17808 /* From now on we must synthesize any constants that we can't handle
17809 directly. This can happen if the RTL gets split during final
17810 instruction generation. */
17811 cfun->machine->after_arm_reorg = 1;
17812
17813 /* Free the minipool memory. */
17814 obstack_free (&minipool_obstack, minipool_startobj);
17815 }
17816 \f
17817 /* Routines to output assembly language. */
17818
17819 /* Return string representation of passed in real value. */
17820 static const char *
17821 fp_const_from_val (REAL_VALUE_TYPE *r)
17822 {
17823 if (!fp_consts_inited)
17824 init_fp_table ();
17825
17826 gcc_assert (real_equal (r, &value_fp0));
17827 return "0";
17828 }
17829
17830 /* OPERANDS[0] is the entire list of insns that constitute pop,
17831 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17832 is in the list, UPDATE is true iff the list contains explicit
17833 update of base register. */
17834 void
17835 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17836 bool update)
17837 {
17838 int i;
17839 char pattern[100];
17840 int offset;
17841 const char *conditional;
17842 int num_saves = XVECLEN (operands[0], 0);
17843 unsigned int regno;
17844 unsigned int regno_base = REGNO (operands[1]);
17845 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17846
17847 offset = 0;
17848 offset += update ? 1 : 0;
17849 offset += return_pc ? 1 : 0;
17850
17851 /* Is the base register in the list? */
17852 for (i = offset; i < num_saves; i++)
17853 {
17854 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17855 /* If SP is in the list, then the base register must be SP. */
17856 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17857 /* If base register is in the list, there must be no explicit update. */
17858 if (regno == regno_base)
17859 gcc_assert (!update);
17860 }
17861
17862 conditional = reverse ? "%?%D0" : "%?%d0";
17863 /* Can't use POP if returning from an interrupt. */
17864 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17865 sprintf (pattern, "pop%s\t{", conditional);
17866 else
17867 {
17868 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17869 It's just a convention, their semantics are identical. */
17870 if (regno_base == SP_REGNUM)
17871 sprintf (pattern, "ldmfd%s\t", conditional);
17872 else if (update)
17873 sprintf (pattern, "ldmia%s\t", conditional);
17874 else
17875 sprintf (pattern, "ldm%s\t", conditional);
17876
17877 strcat (pattern, reg_names[regno_base]);
17878 if (update)
17879 strcat (pattern, "!, {");
17880 else
17881 strcat (pattern, ", {");
17882 }
17883
17884 /* Output the first destination register. */
17885 strcat (pattern,
17886 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17887
17888 /* Output the rest of the destination registers. */
17889 for (i = offset + 1; i < num_saves; i++)
17890 {
17891 strcat (pattern, ", ");
17892 strcat (pattern,
17893 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17894 }
17895
17896 strcat (pattern, "}");
17897
17898 if (interrupt_p && return_pc)
17899 strcat (pattern, "^");
17900
17901 output_asm_insn (pattern, &cond);
17902 }
17903
17904
17905 /* Output the assembly for a store multiple. */
17906
17907 const char *
17908 vfp_output_vstmd (rtx * operands)
17909 {
17910 char pattern[100];
17911 int p;
17912 int base;
17913 int i;
17914 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17915 ? XEXP (operands[0], 0)
17916 : XEXP (XEXP (operands[0], 0), 0);
17917 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17918
17919 if (push_p)
17920 strcpy (pattern, "vpush%?.64\t{%P1");
17921 else
17922 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17923
17924 p = strlen (pattern);
17925
17926 gcc_assert (REG_P (operands[1]));
17927
17928 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17929 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17930 {
17931 p += sprintf (&pattern[p], ", d%d", base + i);
17932 }
17933 strcpy (&pattern[p], "}");
17934
17935 output_asm_insn (pattern, operands);
17936 return "";
17937 }
17938
17939
17940 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17941 number of bytes pushed. */
17942
17943 static int
17944 vfp_emit_fstmd (int base_reg, int count)
17945 {
17946 rtx par;
17947 rtx dwarf;
17948 rtx tmp, reg;
17949 int i;
17950
17951 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17952 register pairs are stored by a store multiple insn. We avoid this
17953 by pushing an extra pair. */
17954 if (count == 2 && !arm_arch6)
17955 {
17956 if (base_reg == LAST_VFP_REGNUM - 3)
17957 base_reg -= 2;
17958 count++;
17959 }
17960
17961 /* FSTMD may not store more than 16 doubleword registers at once. Split
17962 larger stores into multiple parts (up to a maximum of two, in
17963 practice). */
17964 if (count > 16)
17965 {
17966 int saved;
17967 /* NOTE: base_reg is an internal register number, so each D register
17968 counts as 2. */
17969 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17970 saved += vfp_emit_fstmd (base_reg, 16);
17971 return saved;
17972 }
17973
17974 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17975 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17976
17977 reg = gen_rtx_REG (DFmode, base_reg);
17978 base_reg += 2;
17979
17980 XVECEXP (par, 0, 0)
17981 = gen_rtx_SET (gen_frame_mem
17982 (BLKmode,
17983 gen_rtx_PRE_MODIFY (Pmode,
17984 stack_pointer_rtx,
17985 plus_constant
17986 (Pmode, stack_pointer_rtx,
17987 - (count * 8)))
17988 ),
17989 gen_rtx_UNSPEC (BLKmode,
17990 gen_rtvec (1, reg),
17991 UNSPEC_PUSH_MULT));
17992
17993 tmp = gen_rtx_SET (stack_pointer_rtx,
17994 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17995 RTX_FRAME_RELATED_P (tmp) = 1;
17996 XVECEXP (dwarf, 0, 0) = tmp;
17997
17998 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17999 RTX_FRAME_RELATED_P (tmp) = 1;
18000 XVECEXP (dwarf, 0, 1) = tmp;
18001
18002 for (i = 1; i < count; i++)
18003 {
18004 reg = gen_rtx_REG (DFmode, base_reg);
18005 base_reg += 2;
18006 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18007
18008 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18009 plus_constant (Pmode,
18010 stack_pointer_rtx,
18011 i * 8)),
18012 reg);
18013 RTX_FRAME_RELATED_P (tmp) = 1;
18014 XVECEXP (dwarf, 0, i + 1) = tmp;
18015 }
18016
18017 par = emit_insn (par);
18018 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18019 RTX_FRAME_RELATED_P (par) = 1;
18020
18021 return count * 8;
18022 }
18023
18024 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18025 has the cmse_nonsecure_call attribute and returns false otherwise. */
18026
18027 bool
18028 detect_cmse_nonsecure_call (tree addr)
18029 {
18030 if (!addr)
18031 return FALSE;
18032
18033 tree fntype = TREE_TYPE (addr);
18034 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18035 TYPE_ATTRIBUTES (fntype)))
18036 return TRUE;
18037 return FALSE;
18038 }
18039
18040
18041 /* Emit a call instruction with pattern PAT. ADDR is the address of
18042 the call target. */
18043
18044 void
18045 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18046 {
18047 rtx insn;
18048
18049 insn = emit_call_insn (pat);
18050
18051 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18052 If the call might use such an entry, add a use of the PIC register
18053 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18054 if (TARGET_VXWORKS_RTP
18055 && flag_pic
18056 && !sibcall
18057 && GET_CODE (addr) == SYMBOL_REF
18058 && (SYMBOL_REF_DECL (addr)
18059 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18060 : !SYMBOL_REF_LOCAL_P (addr)))
18061 {
18062 require_pic_register ();
18063 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18064 }
18065
18066 if (TARGET_AAPCS_BASED)
18067 {
18068 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18069 linker. We need to add an IP clobber to allow setting
18070 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18071 is not needed since it's a fixed register. */
18072 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18073 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18074 }
18075 }
18076
18077 /* Output a 'call' insn. */
18078 const char *
18079 output_call (rtx *operands)
18080 {
18081 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
18082
18083 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18084 if (REGNO (operands[0]) == LR_REGNUM)
18085 {
18086 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18087 output_asm_insn ("mov%?\t%0, %|lr", operands);
18088 }
18089
18090 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18091
18092 if (TARGET_INTERWORK || arm_arch4t)
18093 output_asm_insn ("bx%?\t%0", operands);
18094 else
18095 output_asm_insn ("mov%?\t%|pc, %0", operands);
18096
18097 return "";
18098 }
18099
18100 /* Output a move from arm registers to arm registers of a long double
18101 OPERANDS[0] is the destination.
18102 OPERANDS[1] is the source. */
18103 const char *
18104 output_mov_long_double_arm_from_arm (rtx *operands)
18105 {
18106 /* We have to be careful here because the two might overlap. */
18107 int dest_start = REGNO (operands[0]);
18108 int src_start = REGNO (operands[1]);
18109 rtx ops[2];
18110 int i;
18111
18112 if (dest_start < src_start)
18113 {
18114 for (i = 0; i < 3; i++)
18115 {
18116 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18117 ops[1] = gen_rtx_REG (SImode, src_start + i);
18118 output_asm_insn ("mov%?\t%0, %1", ops);
18119 }
18120 }
18121 else
18122 {
18123 for (i = 2; i >= 0; i--)
18124 {
18125 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18126 ops[1] = gen_rtx_REG (SImode, src_start + i);
18127 output_asm_insn ("mov%?\t%0, %1", ops);
18128 }
18129 }
18130
18131 return "";
18132 }
18133
18134 void
18135 arm_emit_movpair (rtx dest, rtx src)
18136 {
18137 /* If the src is an immediate, simplify it. */
18138 if (CONST_INT_P (src))
18139 {
18140 HOST_WIDE_INT val = INTVAL (src);
18141 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18142 if ((val >> 16) & 0x0000ffff)
18143 {
18144 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18145 GEN_INT (16)),
18146 GEN_INT ((val >> 16) & 0x0000ffff));
18147 rtx_insn *insn = get_last_insn ();
18148 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18149 }
18150 return;
18151 }
18152 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18153 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18154 rtx_insn *insn = get_last_insn ();
18155 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18156 }
18157
18158 /* Output a move between double words. It must be REG<-MEM
18159 or MEM<-REG. */
18160 const char *
18161 output_move_double (rtx *operands, bool emit, int *count)
18162 {
18163 enum rtx_code code0 = GET_CODE (operands[0]);
18164 enum rtx_code code1 = GET_CODE (operands[1]);
18165 rtx otherops[3];
18166 if (count)
18167 *count = 1;
18168
18169 /* The only case when this might happen is when
18170 you are looking at the length of a DImode instruction
18171 that has an invalid constant in it. */
18172 if (code0 == REG && code1 != MEM)
18173 {
18174 gcc_assert (!emit);
18175 *count = 2;
18176 return "";
18177 }
18178
18179 if (code0 == REG)
18180 {
18181 unsigned int reg0 = REGNO (operands[0]);
18182
18183 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18184
18185 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18186
18187 switch (GET_CODE (XEXP (operands[1], 0)))
18188 {
18189 case REG:
18190
18191 if (emit)
18192 {
18193 if (TARGET_LDRD
18194 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18195 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18196 else
18197 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18198 }
18199 break;
18200
18201 case PRE_INC:
18202 gcc_assert (TARGET_LDRD);
18203 if (emit)
18204 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18205 break;
18206
18207 case PRE_DEC:
18208 if (emit)
18209 {
18210 if (TARGET_LDRD)
18211 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18212 else
18213 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18214 }
18215 break;
18216
18217 case POST_INC:
18218 if (emit)
18219 {
18220 if (TARGET_LDRD)
18221 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18222 else
18223 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18224 }
18225 break;
18226
18227 case POST_DEC:
18228 gcc_assert (TARGET_LDRD);
18229 if (emit)
18230 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18231 break;
18232
18233 case PRE_MODIFY:
18234 case POST_MODIFY:
18235 /* Autoicrement addressing modes should never have overlapping
18236 base and destination registers, and overlapping index registers
18237 are already prohibited, so this doesn't need to worry about
18238 fix_cm3_ldrd. */
18239 otherops[0] = operands[0];
18240 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18241 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18242
18243 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18244 {
18245 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18246 {
18247 /* Registers overlap so split out the increment. */
18248 if (emit)
18249 {
18250 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18251 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18252 }
18253 if (count)
18254 *count = 2;
18255 }
18256 else
18257 {
18258 /* Use a single insn if we can.
18259 FIXME: IWMMXT allows offsets larger than ldrd can
18260 handle, fix these up with a pair of ldr. */
18261 if (TARGET_THUMB2
18262 || !CONST_INT_P (otherops[2])
18263 || (INTVAL (otherops[2]) > -256
18264 && INTVAL (otherops[2]) < 256))
18265 {
18266 if (emit)
18267 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18268 }
18269 else
18270 {
18271 if (emit)
18272 {
18273 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18274 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18275 }
18276 if (count)
18277 *count = 2;
18278
18279 }
18280 }
18281 }
18282 else
18283 {
18284 /* Use a single insn if we can.
18285 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18286 fix these up with a pair of ldr. */
18287 if (TARGET_THUMB2
18288 || !CONST_INT_P (otherops[2])
18289 || (INTVAL (otherops[2]) > -256
18290 && INTVAL (otherops[2]) < 256))
18291 {
18292 if (emit)
18293 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18294 }
18295 else
18296 {
18297 if (emit)
18298 {
18299 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18300 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18301 }
18302 if (count)
18303 *count = 2;
18304 }
18305 }
18306 break;
18307
18308 case LABEL_REF:
18309 case CONST:
18310 /* We might be able to use ldrd %0, %1 here. However the range is
18311 different to ldr/adr, and it is broken on some ARMv7-M
18312 implementations. */
18313 /* Use the second register of the pair to avoid problematic
18314 overlap. */
18315 otherops[1] = operands[1];
18316 if (emit)
18317 output_asm_insn ("adr%?\t%0, %1", otherops);
18318 operands[1] = otherops[0];
18319 if (emit)
18320 {
18321 if (TARGET_LDRD)
18322 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18323 else
18324 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18325 }
18326
18327 if (count)
18328 *count = 2;
18329 break;
18330
18331 /* ??? This needs checking for thumb2. */
18332 default:
18333 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18334 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18335 {
18336 otherops[0] = operands[0];
18337 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18338 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18339
18340 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18341 {
18342 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18343 {
18344 switch ((int) INTVAL (otherops[2]))
18345 {
18346 case -8:
18347 if (emit)
18348 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18349 return "";
18350 case -4:
18351 if (TARGET_THUMB2)
18352 break;
18353 if (emit)
18354 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18355 return "";
18356 case 4:
18357 if (TARGET_THUMB2)
18358 break;
18359 if (emit)
18360 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18361 return "";
18362 }
18363 }
18364 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18365 operands[1] = otherops[0];
18366 if (TARGET_LDRD
18367 && (REG_P (otherops[2])
18368 || TARGET_THUMB2
18369 || (CONST_INT_P (otherops[2])
18370 && INTVAL (otherops[2]) > -256
18371 && INTVAL (otherops[2]) < 256)))
18372 {
18373 if (reg_overlap_mentioned_p (operands[0],
18374 otherops[2]))
18375 {
18376 /* Swap base and index registers over to
18377 avoid a conflict. */
18378 std::swap (otherops[1], otherops[2]);
18379 }
18380 /* If both registers conflict, it will usually
18381 have been fixed by a splitter. */
18382 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18383 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18384 {
18385 if (emit)
18386 {
18387 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18388 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18389 }
18390 if (count)
18391 *count = 2;
18392 }
18393 else
18394 {
18395 otherops[0] = operands[0];
18396 if (emit)
18397 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18398 }
18399 return "";
18400 }
18401
18402 if (CONST_INT_P (otherops[2]))
18403 {
18404 if (emit)
18405 {
18406 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18407 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18408 else
18409 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18410 }
18411 }
18412 else
18413 {
18414 if (emit)
18415 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18416 }
18417 }
18418 else
18419 {
18420 if (emit)
18421 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18422 }
18423
18424 if (count)
18425 *count = 2;
18426
18427 if (TARGET_LDRD)
18428 return "ldrd%?\t%0, [%1]";
18429
18430 return "ldmia%?\t%1, %M0";
18431 }
18432 else
18433 {
18434 otherops[1] = adjust_address (operands[1], SImode, 4);
18435 /* Take care of overlapping base/data reg. */
18436 if (reg_mentioned_p (operands[0], operands[1]))
18437 {
18438 if (emit)
18439 {
18440 output_asm_insn ("ldr%?\t%0, %1", otherops);
18441 output_asm_insn ("ldr%?\t%0, %1", operands);
18442 }
18443 if (count)
18444 *count = 2;
18445
18446 }
18447 else
18448 {
18449 if (emit)
18450 {
18451 output_asm_insn ("ldr%?\t%0, %1", operands);
18452 output_asm_insn ("ldr%?\t%0, %1", otherops);
18453 }
18454 if (count)
18455 *count = 2;
18456 }
18457 }
18458 }
18459 }
18460 else
18461 {
18462 /* Constraints should ensure this. */
18463 gcc_assert (code0 == MEM && code1 == REG);
18464 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18465 || (TARGET_ARM && TARGET_LDRD));
18466
18467 /* For TARGET_ARM the first source register of an STRD
18468 must be even. This is usually the case for double-word
18469 values but user assembly constraints can force an odd
18470 starting register. */
18471 bool allow_strd = TARGET_LDRD
18472 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
18473 switch (GET_CODE (XEXP (operands[0], 0)))
18474 {
18475 case REG:
18476 if (emit)
18477 {
18478 if (allow_strd)
18479 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18480 else
18481 output_asm_insn ("stm%?\t%m0, %M1", operands);
18482 }
18483 break;
18484
18485 case PRE_INC:
18486 gcc_assert (allow_strd);
18487 if (emit)
18488 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18489 break;
18490
18491 case PRE_DEC:
18492 if (emit)
18493 {
18494 if (allow_strd)
18495 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18496 else
18497 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18498 }
18499 break;
18500
18501 case POST_INC:
18502 if (emit)
18503 {
18504 if (allow_strd)
18505 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18506 else
18507 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18508 }
18509 break;
18510
18511 case POST_DEC:
18512 gcc_assert (allow_strd);
18513 if (emit)
18514 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18515 break;
18516
18517 case PRE_MODIFY:
18518 case POST_MODIFY:
18519 otherops[0] = operands[1];
18520 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18521 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18522
18523 /* IWMMXT allows offsets larger than strd can handle,
18524 fix these up with a pair of str. */
18525 if (!TARGET_THUMB2
18526 && CONST_INT_P (otherops[2])
18527 && (INTVAL(otherops[2]) <= -256
18528 || INTVAL(otherops[2]) >= 256))
18529 {
18530 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18531 {
18532 if (emit)
18533 {
18534 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18535 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18536 }
18537 if (count)
18538 *count = 2;
18539 }
18540 else
18541 {
18542 if (emit)
18543 {
18544 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18545 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18546 }
18547 if (count)
18548 *count = 2;
18549 }
18550 }
18551 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18552 {
18553 if (emit)
18554 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18555 }
18556 else
18557 {
18558 if (emit)
18559 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18560 }
18561 break;
18562
18563 case PLUS:
18564 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18565 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18566 {
18567 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18568 {
18569 case -8:
18570 if (emit)
18571 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18572 return "";
18573
18574 case -4:
18575 if (TARGET_THUMB2)
18576 break;
18577 if (emit)
18578 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18579 return "";
18580
18581 case 4:
18582 if (TARGET_THUMB2)
18583 break;
18584 if (emit)
18585 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18586 return "";
18587 }
18588 }
18589 if (allow_strd
18590 && (REG_P (otherops[2])
18591 || TARGET_THUMB2
18592 || (CONST_INT_P (otherops[2])
18593 && INTVAL (otherops[2]) > -256
18594 && INTVAL (otherops[2]) < 256)))
18595 {
18596 otherops[0] = operands[1];
18597 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18598 if (emit)
18599 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18600 return "";
18601 }
18602 /* Fall through */
18603
18604 default:
18605 otherops[0] = adjust_address (operands[0], SImode, 4);
18606 otherops[1] = operands[1];
18607 if (emit)
18608 {
18609 output_asm_insn ("str%?\t%1, %0", operands);
18610 output_asm_insn ("str%?\t%H1, %0", otherops);
18611 }
18612 if (count)
18613 *count = 2;
18614 }
18615 }
18616
18617 return "";
18618 }
18619
18620 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18621 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18622
18623 const char *
18624 output_move_quad (rtx *operands)
18625 {
18626 if (REG_P (operands[0]))
18627 {
18628 /* Load, or reg->reg move. */
18629
18630 if (MEM_P (operands[1]))
18631 {
18632 switch (GET_CODE (XEXP (operands[1], 0)))
18633 {
18634 case REG:
18635 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18636 break;
18637
18638 case LABEL_REF:
18639 case CONST:
18640 output_asm_insn ("adr%?\t%0, %1", operands);
18641 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18642 break;
18643
18644 default:
18645 gcc_unreachable ();
18646 }
18647 }
18648 else
18649 {
18650 rtx ops[2];
18651 int dest, src, i;
18652
18653 gcc_assert (REG_P (operands[1]));
18654
18655 dest = REGNO (operands[0]);
18656 src = REGNO (operands[1]);
18657
18658 /* This seems pretty dumb, but hopefully GCC won't try to do it
18659 very often. */
18660 if (dest < src)
18661 for (i = 0; i < 4; i++)
18662 {
18663 ops[0] = gen_rtx_REG (SImode, dest + i);
18664 ops[1] = gen_rtx_REG (SImode, src + i);
18665 output_asm_insn ("mov%?\t%0, %1", ops);
18666 }
18667 else
18668 for (i = 3; i >= 0; i--)
18669 {
18670 ops[0] = gen_rtx_REG (SImode, dest + i);
18671 ops[1] = gen_rtx_REG (SImode, src + i);
18672 output_asm_insn ("mov%?\t%0, %1", ops);
18673 }
18674 }
18675 }
18676 else
18677 {
18678 gcc_assert (MEM_P (operands[0]));
18679 gcc_assert (REG_P (operands[1]));
18680 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18681
18682 switch (GET_CODE (XEXP (operands[0], 0)))
18683 {
18684 case REG:
18685 output_asm_insn ("stm%?\t%m0, %M1", operands);
18686 break;
18687
18688 default:
18689 gcc_unreachable ();
18690 }
18691 }
18692
18693 return "";
18694 }
18695
18696 /* Output a VFP load or store instruction. */
18697
18698 const char *
18699 output_move_vfp (rtx *operands)
18700 {
18701 rtx reg, mem, addr, ops[2];
18702 int load = REG_P (operands[0]);
18703 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18704 int sp = (!TARGET_VFP_FP16INST
18705 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18706 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18707 const char *templ;
18708 char buff[50];
18709 machine_mode mode;
18710
18711 reg = operands[!load];
18712 mem = operands[load];
18713
18714 mode = GET_MODE (reg);
18715
18716 gcc_assert (REG_P (reg));
18717 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18718 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18719 || mode == SFmode
18720 || mode == DFmode
18721 || mode == HImode
18722 || mode == SImode
18723 || mode == DImode
18724 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18725 gcc_assert (MEM_P (mem));
18726
18727 addr = XEXP (mem, 0);
18728
18729 switch (GET_CODE (addr))
18730 {
18731 case PRE_DEC:
18732 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18733 ops[0] = XEXP (addr, 0);
18734 ops[1] = reg;
18735 break;
18736
18737 case POST_INC:
18738 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18739 ops[0] = XEXP (addr, 0);
18740 ops[1] = reg;
18741 break;
18742
18743 default:
18744 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18745 ops[0] = reg;
18746 ops[1] = mem;
18747 break;
18748 }
18749
18750 sprintf (buff, templ,
18751 load ? "ld" : "st",
18752 dp ? "64" : sp ? "32" : "16",
18753 dp ? "P" : "",
18754 integer_p ? "\t%@ int" : "");
18755 output_asm_insn (buff, ops);
18756
18757 return "";
18758 }
18759
18760 /* Output a Neon double-word or quad-word load or store, or a load
18761 or store for larger structure modes.
18762
18763 WARNING: The ordering of elements is weird in big-endian mode,
18764 because the EABI requires that vectors stored in memory appear
18765 as though they were stored by a VSTM, as required by the EABI.
18766 GCC RTL defines element ordering based on in-memory order.
18767 This can be different from the architectural ordering of elements
18768 within a NEON register. The intrinsics defined in arm_neon.h use the
18769 NEON register element ordering, not the GCC RTL element ordering.
18770
18771 For example, the in-memory ordering of a big-endian a quadword
18772 vector with 16-bit elements when stored from register pair {d0,d1}
18773 will be (lowest address first, d0[N] is NEON register element N):
18774
18775 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18776
18777 When necessary, quadword registers (dN, dN+1) are moved to ARM
18778 registers from rN in the order:
18779
18780 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18781
18782 So that STM/LDM can be used on vectors in ARM registers, and the
18783 same memory layout will result as if VSTM/VLDM were used.
18784
18785 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18786 possible, which allows use of appropriate alignment tags.
18787 Note that the choice of "64" is independent of the actual vector
18788 element size; this size simply ensures that the behavior is
18789 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18790
18791 Due to limitations of those instructions, use of VST1.64/VLD1.64
18792 is not possible if:
18793 - the address contains PRE_DEC, or
18794 - the mode refers to more than 4 double-word registers
18795
18796 In those cases, it would be possible to replace VSTM/VLDM by a
18797 sequence of instructions; this is not currently implemented since
18798 this is not certain to actually improve performance. */
18799
18800 const char *
18801 output_move_neon (rtx *operands)
18802 {
18803 rtx reg, mem, addr, ops[2];
18804 int regno, nregs, load = REG_P (operands[0]);
18805 const char *templ;
18806 char buff[50];
18807 machine_mode mode;
18808
18809 reg = operands[!load];
18810 mem = operands[load];
18811
18812 mode = GET_MODE (reg);
18813
18814 gcc_assert (REG_P (reg));
18815 regno = REGNO (reg);
18816 nregs = REG_NREGS (reg) / 2;
18817 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18818 || NEON_REGNO_OK_FOR_QUAD (regno));
18819 gcc_assert (VALID_NEON_DREG_MODE (mode)
18820 || VALID_NEON_QREG_MODE (mode)
18821 || VALID_NEON_STRUCT_MODE (mode));
18822 gcc_assert (MEM_P (mem));
18823
18824 addr = XEXP (mem, 0);
18825
18826 /* Strip off const from addresses like (const (plus (...))). */
18827 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18828 addr = XEXP (addr, 0);
18829
18830 switch (GET_CODE (addr))
18831 {
18832 case POST_INC:
18833 /* We have to use vldm / vstm for too-large modes. */
18834 if (nregs > 4)
18835 {
18836 templ = "v%smia%%?\t%%0!, %%h1";
18837 ops[0] = XEXP (addr, 0);
18838 }
18839 else
18840 {
18841 templ = "v%s1.64\t%%h1, %%A0";
18842 ops[0] = mem;
18843 }
18844 ops[1] = reg;
18845 break;
18846
18847 case PRE_DEC:
18848 /* We have to use vldm / vstm in this case, since there is no
18849 pre-decrement form of the vld1 / vst1 instructions. */
18850 templ = "v%smdb%%?\t%%0!, %%h1";
18851 ops[0] = XEXP (addr, 0);
18852 ops[1] = reg;
18853 break;
18854
18855 case POST_MODIFY:
18856 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18857 gcc_unreachable ();
18858
18859 case REG:
18860 /* We have to use vldm / vstm for too-large modes. */
18861 if (nregs > 1)
18862 {
18863 if (nregs > 4)
18864 templ = "v%smia%%?\t%%m0, %%h1";
18865 else
18866 templ = "v%s1.64\t%%h1, %%A0";
18867
18868 ops[0] = mem;
18869 ops[1] = reg;
18870 break;
18871 }
18872 /* Fall through. */
18873 case LABEL_REF:
18874 case PLUS:
18875 {
18876 int i;
18877 int overlap = -1;
18878 for (i = 0; i < nregs; i++)
18879 {
18880 /* We're only using DImode here because it's a convenient size. */
18881 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18882 ops[1] = adjust_address (mem, DImode, 8 * i);
18883 if (reg_overlap_mentioned_p (ops[0], mem))
18884 {
18885 gcc_assert (overlap == -1);
18886 overlap = i;
18887 }
18888 else
18889 {
18890 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18891 output_asm_insn (buff, ops);
18892 }
18893 }
18894 if (overlap != -1)
18895 {
18896 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18897 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18898 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18899 output_asm_insn (buff, ops);
18900 }
18901
18902 return "";
18903 }
18904
18905 default:
18906 gcc_unreachable ();
18907 }
18908
18909 sprintf (buff, templ, load ? "ld" : "st");
18910 output_asm_insn (buff, ops);
18911
18912 return "";
18913 }
18914
18915 /* Compute and return the length of neon_mov<mode>, where <mode> is
18916 one of VSTRUCT modes: EI, OI, CI or XI. */
18917 int
18918 arm_attr_length_move_neon (rtx_insn *insn)
18919 {
18920 rtx reg, mem, addr;
18921 int load;
18922 machine_mode mode;
18923
18924 extract_insn_cached (insn);
18925
18926 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18927 {
18928 mode = GET_MODE (recog_data.operand[0]);
18929 switch (mode)
18930 {
18931 case E_EImode:
18932 case E_OImode:
18933 return 8;
18934 case E_CImode:
18935 return 12;
18936 case E_XImode:
18937 return 16;
18938 default:
18939 gcc_unreachable ();
18940 }
18941 }
18942
18943 load = REG_P (recog_data.operand[0]);
18944 reg = recog_data.operand[!load];
18945 mem = recog_data.operand[load];
18946
18947 gcc_assert (MEM_P (mem));
18948
18949 addr = XEXP (mem, 0);
18950
18951 /* Strip off const from addresses like (const (plus (...))). */
18952 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18953 addr = XEXP (addr, 0);
18954
18955 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18956 {
18957 int insns = REG_NREGS (reg) / 2;
18958 return insns * 4;
18959 }
18960 else
18961 return 4;
18962 }
18963
18964 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18965 return zero. */
18966
18967 int
18968 arm_address_offset_is_imm (rtx_insn *insn)
18969 {
18970 rtx mem, addr;
18971
18972 extract_insn_cached (insn);
18973
18974 if (REG_P (recog_data.operand[0]))
18975 return 0;
18976
18977 mem = recog_data.operand[0];
18978
18979 gcc_assert (MEM_P (mem));
18980
18981 addr = XEXP (mem, 0);
18982
18983 if (REG_P (addr)
18984 || (GET_CODE (addr) == PLUS
18985 && REG_P (XEXP (addr, 0))
18986 && CONST_INT_P (XEXP (addr, 1))))
18987 return 1;
18988 else
18989 return 0;
18990 }
18991
18992 /* Output an ADD r, s, #n where n may be too big for one instruction.
18993 If adding zero to one register, output nothing. */
18994 const char *
18995 output_add_immediate (rtx *operands)
18996 {
18997 HOST_WIDE_INT n = INTVAL (operands[2]);
18998
18999 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19000 {
19001 if (n < 0)
19002 output_multi_immediate (operands,
19003 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19004 -n);
19005 else
19006 output_multi_immediate (operands,
19007 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19008 n);
19009 }
19010
19011 return "";
19012 }
19013
19014 /* Output a multiple immediate operation.
19015 OPERANDS is the vector of operands referred to in the output patterns.
19016 INSTR1 is the output pattern to use for the first constant.
19017 INSTR2 is the output pattern to use for subsequent constants.
19018 IMMED_OP is the index of the constant slot in OPERANDS.
19019 N is the constant value. */
19020 static const char *
19021 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19022 int immed_op, HOST_WIDE_INT n)
19023 {
19024 #if HOST_BITS_PER_WIDE_INT > 32
19025 n &= 0xffffffff;
19026 #endif
19027
19028 if (n == 0)
19029 {
19030 /* Quick and easy output. */
19031 operands[immed_op] = const0_rtx;
19032 output_asm_insn (instr1, operands);
19033 }
19034 else
19035 {
19036 int i;
19037 const char * instr = instr1;
19038
19039 /* Note that n is never zero here (which would give no output). */
19040 for (i = 0; i < 32; i += 2)
19041 {
19042 if (n & (3 << i))
19043 {
19044 operands[immed_op] = GEN_INT (n & (255 << i));
19045 output_asm_insn (instr, operands);
19046 instr = instr2;
19047 i += 6;
19048 }
19049 }
19050 }
19051
19052 return "";
19053 }
19054
19055 /* Return the name of a shifter operation. */
19056 static const char *
19057 arm_shift_nmem(enum rtx_code code)
19058 {
19059 switch (code)
19060 {
19061 case ASHIFT:
19062 return ARM_LSL_NAME;
19063
19064 case ASHIFTRT:
19065 return "asr";
19066
19067 case LSHIFTRT:
19068 return "lsr";
19069
19070 case ROTATERT:
19071 return "ror";
19072
19073 default:
19074 abort();
19075 }
19076 }
19077
19078 /* Return the appropriate ARM instruction for the operation code.
19079 The returned result should not be overwritten. OP is the rtx of the
19080 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19081 was shifted. */
19082 const char *
19083 arithmetic_instr (rtx op, int shift_first_arg)
19084 {
19085 switch (GET_CODE (op))
19086 {
19087 case PLUS:
19088 return "add";
19089
19090 case MINUS:
19091 return shift_first_arg ? "rsb" : "sub";
19092
19093 case IOR:
19094 return "orr";
19095
19096 case XOR:
19097 return "eor";
19098
19099 case AND:
19100 return "and";
19101
19102 case ASHIFT:
19103 case ASHIFTRT:
19104 case LSHIFTRT:
19105 case ROTATERT:
19106 return arm_shift_nmem(GET_CODE(op));
19107
19108 default:
19109 gcc_unreachable ();
19110 }
19111 }
19112
19113 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19114 for the operation code. The returned result should not be overwritten.
19115 OP is the rtx code of the shift.
19116 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19117 shift. */
19118 static const char *
19119 shift_op (rtx op, HOST_WIDE_INT *amountp)
19120 {
19121 const char * mnem;
19122 enum rtx_code code = GET_CODE (op);
19123
19124 switch (code)
19125 {
19126 case ROTATE:
19127 if (!CONST_INT_P (XEXP (op, 1)))
19128 {
19129 output_operand_lossage ("invalid shift operand");
19130 return NULL;
19131 }
19132
19133 code = ROTATERT;
19134 *amountp = 32 - INTVAL (XEXP (op, 1));
19135 mnem = "ror";
19136 break;
19137
19138 case ASHIFT:
19139 case ASHIFTRT:
19140 case LSHIFTRT:
19141 case ROTATERT:
19142 mnem = arm_shift_nmem(code);
19143 if (CONST_INT_P (XEXP (op, 1)))
19144 {
19145 *amountp = INTVAL (XEXP (op, 1));
19146 }
19147 else if (REG_P (XEXP (op, 1)))
19148 {
19149 *amountp = -1;
19150 return mnem;
19151 }
19152 else
19153 {
19154 output_operand_lossage ("invalid shift operand");
19155 return NULL;
19156 }
19157 break;
19158
19159 case MULT:
19160 /* We never have to worry about the amount being other than a
19161 power of 2, since this case can never be reloaded from a reg. */
19162 if (!CONST_INT_P (XEXP (op, 1)))
19163 {
19164 output_operand_lossage ("invalid shift operand");
19165 return NULL;
19166 }
19167
19168 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19169
19170 /* Amount must be a power of two. */
19171 if (*amountp & (*amountp - 1))
19172 {
19173 output_operand_lossage ("invalid shift operand");
19174 return NULL;
19175 }
19176
19177 *amountp = exact_log2 (*amountp);
19178 gcc_assert (IN_RANGE (*amountp, 0, 31));
19179 return ARM_LSL_NAME;
19180
19181 default:
19182 output_operand_lossage ("invalid shift operand");
19183 return NULL;
19184 }
19185
19186 /* This is not 100% correct, but follows from the desire to merge
19187 multiplication by a power of 2 with the recognizer for a
19188 shift. >=32 is not a valid shift for "lsl", so we must try and
19189 output a shift that produces the correct arithmetical result.
19190 Using lsr #32 is identical except for the fact that the carry bit
19191 is not set correctly if we set the flags; but we never use the
19192 carry bit from such an operation, so we can ignore that. */
19193 if (code == ROTATERT)
19194 /* Rotate is just modulo 32. */
19195 *amountp &= 31;
19196 else if (*amountp != (*amountp & 31))
19197 {
19198 if (code == ASHIFT)
19199 mnem = "lsr";
19200 *amountp = 32;
19201 }
19202
19203 /* Shifts of 0 are no-ops. */
19204 if (*amountp == 0)
19205 return NULL;
19206
19207 return mnem;
19208 }
19209
19210 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19211 because /bin/as is horribly restrictive. The judgement about
19212 whether or not each character is 'printable' (and can be output as
19213 is) or not (and must be printed with an octal escape) must be made
19214 with reference to the *host* character set -- the situation is
19215 similar to that discussed in the comments above pp_c_char in
19216 c-pretty-print.c. */
19217
19218 #define MAX_ASCII_LEN 51
19219
19220 void
19221 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19222 {
19223 int i;
19224 int len_so_far = 0;
19225
19226 fputs ("\t.ascii\t\"", stream);
19227
19228 for (i = 0; i < len; i++)
19229 {
19230 int c = p[i];
19231
19232 if (len_so_far >= MAX_ASCII_LEN)
19233 {
19234 fputs ("\"\n\t.ascii\t\"", stream);
19235 len_so_far = 0;
19236 }
19237
19238 if (ISPRINT (c))
19239 {
19240 if (c == '\\' || c == '\"')
19241 {
19242 putc ('\\', stream);
19243 len_so_far++;
19244 }
19245 putc (c, stream);
19246 len_so_far++;
19247 }
19248 else
19249 {
19250 fprintf (stream, "\\%03o", c);
19251 len_so_far += 4;
19252 }
19253 }
19254
19255 fputs ("\"\n", stream);
19256 }
19257 \f
19258 /* Whether a register is callee saved or not. This is necessary because high
19259 registers are marked as caller saved when optimizing for size on Thumb-1
19260 targets despite being callee saved in order to avoid using them. */
19261 #define callee_saved_reg_p(reg) \
19262 (!call_used_regs[reg] \
19263 || (TARGET_THUMB1 && optimize_size \
19264 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19265
19266 /* Compute the register save mask for registers 0 through 12
19267 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19268
19269 static unsigned long
19270 arm_compute_save_reg0_reg12_mask (void)
19271 {
19272 unsigned long func_type = arm_current_func_type ();
19273 unsigned long save_reg_mask = 0;
19274 unsigned int reg;
19275
19276 if (IS_INTERRUPT (func_type))
19277 {
19278 unsigned int max_reg;
19279 /* Interrupt functions must not corrupt any registers,
19280 even call clobbered ones. If this is a leaf function
19281 we can just examine the registers used by the RTL, but
19282 otherwise we have to assume that whatever function is
19283 called might clobber anything, and so we have to save
19284 all the call-clobbered registers as well. */
19285 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19286 /* FIQ handlers have registers r8 - r12 banked, so
19287 we only need to check r0 - r7, Normal ISRs only
19288 bank r14 and r15, so we must check up to r12.
19289 r13 is the stack pointer which is always preserved,
19290 so we do not need to consider it here. */
19291 max_reg = 7;
19292 else
19293 max_reg = 12;
19294
19295 for (reg = 0; reg <= max_reg; reg++)
19296 if (df_regs_ever_live_p (reg)
19297 || (! crtl->is_leaf && call_used_regs[reg]))
19298 save_reg_mask |= (1 << reg);
19299
19300 /* Also save the pic base register if necessary. */
19301 if (flag_pic
19302 && !TARGET_SINGLE_PIC_BASE
19303 && arm_pic_register != INVALID_REGNUM
19304 && crtl->uses_pic_offset_table)
19305 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19306 }
19307 else if (IS_VOLATILE(func_type))
19308 {
19309 /* For noreturn functions we historically omitted register saves
19310 altogether. However this really messes up debugging. As a
19311 compromise save just the frame pointers. Combined with the link
19312 register saved elsewhere this should be sufficient to get
19313 a backtrace. */
19314 if (frame_pointer_needed)
19315 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19316 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19317 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19318 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19319 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19320 }
19321 else
19322 {
19323 /* In the normal case we only need to save those registers
19324 which are call saved and which are used by this function. */
19325 for (reg = 0; reg <= 11; reg++)
19326 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19327 save_reg_mask |= (1 << reg);
19328
19329 /* Handle the frame pointer as a special case. */
19330 if (frame_pointer_needed)
19331 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19332
19333 /* If we aren't loading the PIC register,
19334 don't stack it even though it may be live. */
19335 if (flag_pic
19336 && !TARGET_SINGLE_PIC_BASE
19337 && arm_pic_register != INVALID_REGNUM
19338 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19339 || crtl->uses_pic_offset_table))
19340 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19341
19342 /* The prologue will copy SP into R0, so save it. */
19343 if (IS_STACKALIGN (func_type))
19344 save_reg_mask |= 1;
19345 }
19346
19347 /* Save registers so the exception handler can modify them. */
19348 if (crtl->calls_eh_return)
19349 {
19350 unsigned int i;
19351
19352 for (i = 0; ; i++)
19353 {
19354 reg = EH_RETURN_DATA_REGNO (i);
19355 if (reg == INVALID_REGNUM)
19356 break;
19357 save_reg_mask |= 1 << reg;
19358 }
19359 }
19360
19361 return save_reg_mask;
19362 }
19363
19364 /* Return true if r3 is live at the start of the function. */
19365
19366 static bool
19367 arm_r3_live_at_start_p (void)
19368 {
19369 /* Just look at cfg info, which is still close enough to correct at this
19370 point. This gives false positives for broken functions that might use
19371 uninitialized data that happens to be allocated in r3, but who cares? */
19372 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19373 }
19374
19375 /* Compute the number of bytes used to store the static chain register on the
19376 stack, above the stack frame. We need to know this accurately to get the
19377 alignment of the rest of the stack frame correct. */
19378
19379 static int
19380 arm_compute_static_chain_stack_bytes (void)
19381 {
19382 /* Once the value is updated from the init value of -1, do not
19383 re-compute. */
19384 if (cfun->machine->static_chain_stack_bytes != -1)
19385 return cfun->machine->static_chain_stack_bytes;
19386
19387 /* See the defining assertion in arm_expand_prologue. */
19388 if (IS_NESTED (arm_current_func_type ())
19389 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19390 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19391 || flag_stack_clash_protection)
19392 && !df_regs_ever_live_p (LR_REGNUM)))
19393 && arm_r3_live_at_start_p ()
19394 && crtl->args.pretend_args_size == 0)
19395 return 4;
19396
19397 return 0;
19398 }
19399
19400 /* Compute a bit mask of which core registers need to be
19401 saved on the stack for the current function.
19402 This is used by arm_compute_frame_layout, which may add extra registers. */
19403
19404 static unsigned long
19405 arm_compute_save_core_reg_mask (void)
19406 {
19407 unsigned int save_reg_mask = 0;
19408 unsigned long func_type = arm_current_func_type ();
19409 unsigned int reg;
19410
19411 if (IS_NAKED (func_type))
19412 /* This should never really happen. */
19413 return 0;
19414
19415 /* If we are creating a stack frame, then we must save the frame pointer,
19416 IP (which will hold the old stack pointer), LR and the PC. */
19417 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19418 save_reg_mask |=
19419 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19420 | (1 << IP_REGNUM)
19421 | (1 << LR_REGNUM)
19422 | (1 << PC_REGNUM);
19423
19424 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19425
19426 /* Decide if we need to save the link register.
19427 Interrupt routines have their own banked link register,
19428 so they never need to save it.
19429 Otherwise if we do not use the link register we do not need to save
19430 it. If we are pushing other registers onto the stack however, we
19431 can save an instruction in the epilogue by pushing the link register
19432 now and then popping it back into the PC. This incurs extra memory
19433 accesses though, so we only do it when optimizing for size, and only
19434 if we know that we will not need a fancy return sequence. */
19435 if (df_regs_ever_live_p (LR_REGNUM)
19436 || (save_reg_mask
19437 && optimize_size
19438 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19439 && !crtl->tail_call_emit
19440 && !crtl->calls_eh_return))
19441 save_reg_mask |= 1 << LR_REGNUM;
19442
19443 if (cfun->machine->lr_save_eliminated)
19444 save_reg_mask &= ~ (1 << LR_REGNUM);
19445
19446 if (TARGET_REALLY_IWMMXT
19447 && ((bit_count (save_reg_mask)
19448 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19449 arm_compute_static_chain_stack_bytes())
19450 ) % 2) != 0)
19451 {
19452 /* The total number of registers that are going to be pushed
19453 onto the stack is odd. We need to ensure that the stack
19454 is 64-bit aligned before we start to save iWMMXt registers,
19455 and also before we start to create locals. (A local variable
19456 might be a double or long long which we will load/store using
19457 an iWMMXt instruction). Therefore we need to push another
19458 ARM register, so that the stack will be 64-bit aligned. We
19459 try to avoid using the arg registers (r0 -r3) as they might be
19460 used to pass values in a tail call. */
19461 for (reg = 4; reg <= 12; reg++)
19462 if ((save_reg_mask & (1 << reg)) == 0)
19463 break;
19464
19465 if (reg <= 12)
19466 save_reg_mask |= (1 << reg);
19467 else
19468 {
19469 cfun->machine->sibcall_blocked = 1;
19470 save_reg_mask |= (1 << 3);
19471 }
19472 }
19473
19474 /* We may need to push an additional register for use initializing the
19475 PIC base register. */
19476 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19477 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19478 {
19479 reg = thumb_find_work_register (1 << 4);
19480 if (!call_used_regs[reg])
19481 save_reg_mask |= (1 << reg);
19482 }
19483
19484 return save_reg_mask;
19485 }
19486
19487 /* Compute a bit mask of which core registers need to be
19488 saved on the stack for the current function. */
19489 static unsigned long
19490 thumb1_compute_save_core_reg_mask (void)
19491 {
19492 unsigned long mask;
19493 unsigned reg;
19494
19495 mask = 0;
19496 for (reg = 0; reg < 12; reg ++)
19497 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19498 mask |= 1 << reg;
19499
19500 /* Handle the frame pointer as a special case. */
19501 if (frame_pointer_needed)
19502 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19503
19504 if (flag_pic
19505 && !TARGET_SINGLE_PIC_BASE
19506 && arm_pic_register != INVALID_REGNUM
19507 && crtl->uses_pic_offset_table)
19508 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19509
19510 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19511 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19512 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19513
19514 /* LR will also be pushed if any lo regs are pushed. */
19515 if (mask & 0xff || thumb_force_lr_save ())
19516 mask |= (1 << LR_REGNUM);
19517
19518 /* Make sure we have a low work register if we need one.
19519 We will need one if we are going to push a high register,
19520 but we are not currently intending to push a low register. */
19521 if ((mask & 0xff) == 0
19522 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19523 {
19524 /* Use thumb_find_work_register to choose which register
19525 we will use. If the register is live then we will
19526 have to push it. Use LAST_LO_REGNUM as our fallback
19527 choice for the register to select. */
19528 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19529 /* Make sure the register returned by thumb_find_work_register is
19530 not part of the return value. */
19531 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19532 reg = LAST_LO_REGNUM;
19533
19534 if (callee_saved_reg_p (reg))
19535 mask |= 1 << reg;
19536 }
19537
19538 /* The 504 below is 8 bytes less than 512 because there are two possible
19539 alignment words. We can't tell here if they will be present or not so we
19540 have to play it safe and assume that they are. */
19541 if ((CALLER_INTERWORKING_SLOT_SIZE +
19542 ROUND_UP_WORD (get_frame_size ()) +
19543 crtl->outgoing_args_size) >= 504)
19544 {
19545 /* This is the same as the code in thumb1_expand_prologue() which
19546 determines which register to use for stack decrement. */
19547 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19548 if (mask & (1 << reg))
19549 break;
19550
19551 if (reg > LAST_LO_REGNUM)
19552 {
19553 /* Make sure we have a register available for stack decrement. */
19554 mask |= 1 << LAST_LO_REGNUM;
19555 }
19556 }
19557
19558 return mask;
19559 }
19560
19561
19562 /* Return the number of bytes required to save VFP registers. */
19563 static int
19564 arm_get_vfp_saved_size (void)
19565 {
19566 unsigned int regno;
19567 int count;
19568 int saved;
19569
19570 saved = 0;
19571 /* Space for saved VFP registers. */
19572 if (TARGET_HARD_FLOAT)
19573 {
19574 count = 0;
19575 for (regno = FIRST_VFP_REGNUM;
19576 regno < LAST_VFP_REGNUM;
19577 regno += 2)
19578 {
19579 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19580 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19581 {
19582 if (count > 0)
19583 {
19584 /* Workaround ARM10 VFPr1 bug. */
19585 if (count == 2 && !arm_arch6)
19586 count++;
19587 saved += count * 8;
19588 }
19589 count = 0;
19590 }
19591 else
19592 count++;
19593 }
19594 if (count > 0)
19595 {
19596 if (count == 2 && !arm_arch6)
19597 count++;
19598 saved += count * 8;
19599 }
19600 }
19601 return saved;
19602 }
19603
19604
19605 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19606 everything bar the final return instruction. If simple_return is true,
19607 then do not output epilogue, because it has already been emitted in RTL.
19608
19609 Note: do not forget to update length attribute of corresponding insn pattern
19610 when changing assembly output (eg. length attribute of
19611 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19612 register clearing sequences). */
19613 const char *
19614 output_return_instruction (rtx operand, bool really_return, bool reverse,
19615 bool simple_return)
19616 {
19617 char conditional[10];
19618 char instr[100];
19619 unsigned reg;
19620 unsigned long live_regs_mask;
19621 unsigned long func_type;
19622 arm_stack_offsets *offsets;
19623
19624 func_type = arm_current_func_type ();
19625
19626 if (IS_NAKED (func_type))
19627 return "";
19628
19629 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19630 {
19631 /* If this function was declared non-returning, and we have
19632 found a tail call, then we have to trust that the called
19633 function won't return. */
19634 if (really_return)
19635 {
19636 rtx ops[2];
19637
19638 /* Otherwise, trap an attempted return by aborting. */
19639 ops[0] = operand;
19640 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19641 : "abort");
19642 assemble_external_libcall (ops[1]);
19643 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19644 }
19645
19646 return "";
19647 }
19648
19649 gcc_assert (!cfun->calls_alloca || really_return);
19650
19651 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19652
19653 cfun->machine->return_used_this_function = 1;
19654
19655 offsets = arm_get_frame_offsets ();
19656 live_regs_mask = offsets->saved_regs_mask;
19657
19658 if (!simple_return && live_regs_mask)
19659 {
19660 const char * return_reg;
19661
19662 /* If we do not have any special requirements for function exit
19663 (e.g. interworking) then we can load the return address
19664 directly into the PC. Otherwise we must load it into LR. */
19665 if (really_return
19666 && !IS_CMSE_ENTRY (func_type)
19667 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19668 return_reg = reg_names[PC_REGNUM];
19669 else
19670 return_reg = reg_names[LR_REGNUM];
19671
19672 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19673 {
19674 /* There are three possible reasons for the IP register
19675 being saved. 1) a stack frame was created, in which case
19676 IP contains the old stack pointer, or 2) an ISR routine
19677 corrupted it, or 3) it was saved to align the stack on
19678 iWMMXt. In case 1, restore IP into SP, otherwise just
19679 restore IP. */
19680 if (frame_pointer_needed)
19681 {
19682 live_regs_mask &= ~ (1 << IP_REGNUM);
19683 live_regs_mask |= (1 << SP_REGNUM);
19684 }
19685 else
19686 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19687 }
19688
19689 /* On some ARM architectures it is faster to use LDR rather than
19690 LDM to load a single register. On other architectures, the
19691 cost is the same. In 26 bit mode, or for exception handlers,
19692 we have to use LDM to load the PC so that the CPSR is also
19693 restored. */
19694 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19695 if (live_regs_mask == (1U << reg))
19696 break;
19697
19698 if (reg <= LAST_ARM_REGNUM
19699 && (reg != LR_REGNUM
19700 || ! really_return
19701 || ! IS_INTERRUPT (func_type)))
19702 {
19703 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19704 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19705 }
19706 else
19707 {
19708 char *p;
19709 int first = 1;
19710
19711 /* Generate the load multiple instruction to restore the
19712 registers. Note we can get here, even if
19713 frame_pointer_needed is true, but only if sp already
19714 points to the base of the saved core registers. */
19715 if (live_regs_mask & (1 << SP_REGNUM))
19716 {
19717 unsigned HOST_WIDE_INT stack_adjust;
19718
19719 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19720 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19721
19722 if (stack_adjust && arm_arch5t && TARGET_ARM)
19723 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19724 else
19725 {
19726 /* If we can't use ldmib (SA110 bug),
19727 then try to pop r3 instead. */
19728 if (stack_adjust)
19729 live_regs_mask |= 1 << 3;
19730
19731 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19732 }
19733 }
19734 /* For interrupt returns we have to use an LDM rather than
19735 a POP so that we can use the exception return variant. */
19736 else if (IS_INTERRUPT (func_type))
19737 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19738 else
19739 sprintf (instr, "pop%s\t{", conditional);
19740
19741 p = instr + strlen (instr);
19742
19743 for (reg = 0; reg <= SP_REGNUM; reg++)
19744 if (live_regs_mask & (1 << reg))
19745 {
19746 int l = strlen (reg_names[reg]);
19747
19748 if (first)
19749 first = 0;
19750 else
19751 {
19752 memcpy (p, ", ", 2);
19753 p += 2;
19754 }
19755
19756 memcpy (p, "%|", 2);
19757 memcpy (p + 2, reg_names[reg], l);
19758 p += l + 2;
19759 }
19760
19761 if (live_regs_mask & (1 << LR_REGNUM))
19762 {
19763 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19764 /* If returning from an interrupt, restore the CPSR. */
19765 if (IS_INTERRUPT (func_type))
19766 strcat (p, "^");
19767 }
19768 else
19769 strcpy (p, "}");
19770 }
19771
19772 output_asm_insn (instr, & operand);
19773
19774 /* See if we need to generate an extra instruction to
19775 perform the actual function return. */
19776 if (really_return
19777 && func_type != ARM_FT_INTERWORKED
19778 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19779 {
19780 /* The return has already been handled
19781 by loading the LR into the PC. */
19782 return "";
19783 }
19784 }
19785
19786 if (really_return)
19787 {
19788 switch ((int) ARM_FUNC_TYPE (func_type))
19789 {
19790 case ARM_FT_ISR:
19791 case ARM_FT_FIQ:
19792 /* ??? This is wrong for unified assembly syntax. */
19793 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19794 break;
19795
19796 case ARM_FT_INTERWORKED:
19797 gcc_assert (arm_arch5t || arm_arch4t);
19798 sprintf (instr, "bx%s\t%%|lr", conditional);
19799 break;
19800
19801 case ARM_FT_EXCEPTION:
19802 /* ??? This is wrong for unified assembly syntax. */
19803 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19804 break;
19805
19806 default:
19807 if (IS_CMSE_ENTRY (func_type))
19808 {
19809 /* Check if we have to clear the 'GE bits' which is only used if
19810 parallel add and subtraction instructions are available. */
19811 if (TARGET_INT_SIMD)
19812 snprintf (instr, sizeof (instr),
19813 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19814 else
19815 snprintf (instr, sizeof (instr),
19816 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19817
19818 output_asm_insn (instr, & operand);
19819 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19820 {
19821 /* Clear the cumulative exception-status bits (0-4,7) and the
19822 condition code bits (28-31) of the FPSCR. We need to
19823 remember to clear the first scratch register used (IP) and
19824 save and restore the second (r4). */
19825 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19826 output_asm_insn (instr, & operand);
19827 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19828 output_asm_insn (instr, & operand);
19829 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19830 output_asm_insn (instr, & operand);
19831 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19832 output_asm_insn (instr, & operand);
19833 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19834 output_asm_insn (instr, & operand);
19835 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19836 output_asm_insn (instr, & operand);
19837 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19838 output_asm_insn (instr, & operand);
19839 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19840 output_asm_insn (instr, & operand);
19841 }
19842 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19843 }
19844 /* Use bx if it's available. */
19845 else if (arm_arch5t || arm_arch4t)
19846 sprintf (instr, "bx%s\t%%|lr", conditional);
19847 else
19848 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19849 break;
19850 }
19851
19852 output_asm_insn (instr, & operand);
19853 }
19854
19855 return "";
19856 }
19857
19858 /* Output in FILE asm statements needed to declare the NAME of the function
19859 defined by its DECL node. */
19860
19861 void
19862 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19863 {
19864 size_t cmse_name_len;
19865 char *cmse_name = 0;
19866 char cmse_prefix[] = "__acle_se_";
19867
19868 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19869 extra function label for each function with the 'cmse_nonsecure_entry'
19870 attribute. This extra function label should be prepended with
19871 '__acle_se_', telling the linker that it needs to create secure gateway
19872 veneers for this function. */
19873 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19874 DECL_ATTRIBUTES (decl)))
19875 {
19876 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19877 cmse_name = XALLOCAVEC (char, cmse_name_len);
19878 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19879 targetm.asm_out.globalize_label (file, cmse_name);
19880
19881 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19882 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19883 }
19884
19885 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19886 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19887 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19888 ASM_OUTPUT_LABEL (file, name);
19889
19890 if (cmse_name)
19891 ASM_OUTPUT_LABEL (file, cmse_name);
19892
19893 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19894 }
19895
19896 /* Write the function name into the code section, directly preceding
19897 the function prologue.
19898
19899 Code will be output similar to this:
19900 t0
19901 .ascii "arm_poke_function_name", 0
19902 .align
19903 t1
19904 .word 0xff000000 + (t1 - t0)
19905 arm_poke_function_name
19906 mov ip, sp
19907 stmfd sp!, {fp, ip, lr, pc}
19908 sub fp, ip, #4
19909
19910 When performing a stack backtrace, code can inspect the value
19911 of 'pc' stored at 'fp' + 0. If the trace function then looks
19912 at location pc - 12 and the top 8 bits are set, then we know
19913 that there is a function name embedded immediately preceding this
19914 location and has length ((pc[-3]) & 0xff000000).
19915
19916 We assume that pc is declared as a pointer to an unsigned long.
19917
19918 It is of no benefit to output the function name if we are assembling
19919 a leaf function. These function types will not contain a stack
19920 backtrace structure, therefore it is not possible to determine the
19921 function name. */
19922 void
19923 arm_poke_function_name (FILE *stream, const char *name)
19924 {
19925 unsigned long alignlength;
19926 unsigned long length;
19927 rtx x;
19928
19929 length = strlen (name) + 1;
19930 alignlength = ROUND_UP_WORD (length);
19931
19932 ASM_OUTPUT_ASCII (stream, name, length);
19933 ASM_OUTPUT_ALIGN (stream, 2);
19934 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19935 assemble_aligned_integer (UNITS_PER_WORD, x);
19936 }
19937
19938 /* Place some comments into the assembler stream
19939 describing the current function. */
19940 static void
19941 arm_output_function_prologue (FILE *f)
19942 {
19943 unsigned long func_type;
19944
19945 /* Sanity check. */
19946 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19947
19948 func_type = arm_current_func_type ();
19949
19950 switch ((int) ARM_FUNC_TYPE (func_type))
19951 {
19952 default:
19953 case ARM_FT_NORMAL:
19954 break;
19955 case ARM_FT_INTERWORKED:
19956 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19957 break;
19958 case ARM_FT_ISR:
19959 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19960 break;
19961 case ARM_FT_FIQ:
19962 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19963 break;
19964 case ARM_FT_EXCEPTION:
19965 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19966 break;
19967 }
19968
19969 if (IS_NAKED (func_type))
19970 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19971
19972 if (IS_VOLATILE (func_type))
19973 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19974
19975 if (IS_NESTED (func_type))
19976 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19977 if (IS_STACKALIGN (func_type))
19978 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19979 if (IS_CMSE_ENTRY (func_type))
19980 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19981
19982 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
19983 (HOST_WIDE_INT) crtl->args.size,
19984 crtl->args.pretend_args_size,
19985 (HOST_WIDE_INT) get_frame_size ());
19986
19987 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19988 frame_pointer_needed,
19989 cfun->machine->uses_anonymous_args);
19990
19991 if (cfun->machine->lr_save_eliminated)
19992 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19993
19994 if (crtl->calls_eh_return)
19995 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19996
19997 }
19998
19999 static void
20000 arm_output_function_epilogue (FILE *)
20001 {
20002 arm_stack_offsets *offsets;
20003
20004 if (TARGET_THUMB1)
20005 {
20006 int regno;
20007
20008 /* Emit any call-via-reg trampolines that are needed for v4t support
20009 of call_reg and call_value_reg type insns. */
20010 for (regno = 0; regno < LR_REGNUM; regno++)
20011 {
20012 rtx label = cfun->machine->call_via[regno];
20013
20014 if (label != NULL)
20015 {
20016 switch_to_section (function_section (current_function_decl));
20017 targetm.asm_out.internal_label (asm_out_file, "L",
20018 CODE_LABEL_NUMBER (label));
20019 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20020 }
20021 }
20022
20023 /* ??? Probably not safe to set this here, since it assumes that a
20024 function will be emitted as assembly immediately after we generate
20025 RTL for it. This does not happen for inline functions. */
20026 cfun->machine->return_used_this_function = 0;
20027 }
20028 else /* TARGET_32BIT */
20029 {
20030 /* We need to take into account any stack-frame rounding. */
20031 offsets = arm_get_frame_offsets ();
20032
20033 gcc_assert (!use_return_insn (FALSE, NULL)
20034 || (cfun->machine->return_used_this_function != 0)
20035 || offsets->saved_regs == offsets->outgoing_args
20036 || frame_pointer_needed);
20037 }
20038 }
20039
20040 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20041 STR and STRD. If an even number of registers are being pushed, one
20042 or more STRD patterns are created for each register pair. If an
20043 odd number of registers are pushed, emit an initial STR followed by
20044 as many STRD instructions as are needed. This works best when the
20045 stack is initially 64-bit aligned (the normal case), since it
20046 ensures that each STRD is also 64-bit aligned. */
20047 static void
20048 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20049 {
20050 int num_regs = 0;
20051 int i;
20052 int regno;
20053 rtx par = NULL_RTX;
20054 rtx dwarf = NULL_RTX;
20055 rtx tmp;
20056 bool first = true;
20057
20058 num_regs = bit_count (saved_regs_mask);
20059
20060 /* Must be at least one register to save, and can't save SP or PC. */
20061 gcc_assert (num_regs > 0 && num_regs <= 14);
20062 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20063 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20064
20065 /* Create sequence for DWARF info. All the frame-related data for
20066 debugging is held in this wrapper. */
20067 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20068
20069 /* Describe the stack adjustment. */
20070 tmp = gen_rtx_SET (stack_pointer_rtx,
20071 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20072 RTX_FRAME_RELATED_P (tmp) = 1;
20073 XVECEXP (dwarf, 0, 0) = tmp;
20074
20075 /* Find the first register. */
20076 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20077 ;
20078
20079 i = 0;
20080
20081 /* If there's an odd number of registers to push. Start off by
20082 pushing a single register. This ensures that subsequent strd
20083 operations are dword aligned (assuming that SP was originally
20084 64-bit aligned). */
20085 if ((num_regs & 1) != 0)
20086 {
20087 rtx reg, mem, insn;
20088
20089 reg = gen_rtx_REG (SImode, regno);
20090 if (num_regs == 1)
20091 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20092 stack_pointer_rtx));
20093 else
20094 mem = gen_frame_mem (Pmode,
20095 gen_rtx_PRE_MODIFY
20096 (Pmode, stack_pointer_rtx,
20097 plus_constant (Pmode, stack_pointer_rtx,
20098 -4 * num_regs)));
20099
20100 tmp = gen_rtx_SET (mem, reg);
20101 RTX_FRAME_RELATED_P (tmp) = 1;
20102 insn = emit_insn (tmp);
20103 RTX_FRAME_RELATED_P (insn) = 1;
20104 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20105 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20106 RTX_FRAME_RELATED_P (tmp) = 1;
20107 i++;
20108 regno++;
20109 XVECEXP (dwarf, 0, i) = tmp;
20110 first = false;
20111 }
20112
20113 while (i < num_regs)
20114 if (saved_regs_mask & (1 << regno))
20115 {
20116 rtx reg1, reg2, mem1, mem2;
20117 rtx tmp0, tmp1, tmp2;
20118 int regno2;
20119
20120 /* Find the register to pair with this one. */
20121 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20122 regno2++)
20123 ;
20124
20125 reg1 = gen_rtx_REG (SImode, regno);
20126 reg2 = gen_rtx_REG (SImode, regno2);
20127
20128 if (first)
20129 {
20130 rtx insn;
20131
20132 first = false;
20133 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20134 stack_pointer_rtx,
20135 -4 * num_regs));
20136 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20137 stack_pointer_rtx,
20138 -4 * (num_regs - 1)));
20139 tmp0 = gen_rtx_SET (stack_pointer_rtx,
20140 plus_constant (Pmode, stack_pointer_rtx,
20141 -4 * (num_regs)));
20142 tmp1 = gen_rtx_SET (mem1, reg1);
20143 tmp2 = gen_rtx_SET (mem2, reg2);
20144 RTX_FRAME_RELATED_P (tmp0) = 1;
20145 RTX_FRAME_RELATED_P (tmp1) = 1;
20146 RTX_FRAME_RELATED_P (tmp2) = 1;
20147 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20148 XVECEXP (par, 0, 0) = tmp0;
20149 XVECEXP (par, 0, 1) = tmp1;
20150 XVECEXP (par, 0, 2) = tmp2;
20151 insn = emit_insn (par);
20152 RTX_FRAME_RELATED_P (insn) = 1;
20153 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20154 }
20155 else
20156 {
20157 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20158 stack_pointer_rtx,
20159 4 * i));
20160 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20161 stack_pointer_rtx,
20162 4 * (i + 1)));
20163 tmp1 = gen_rtx_SET (mem1, reg1);
20164 tmp2 = gen_rtx_SET (mem2, reg2);
20165 RTX_FRAME_RELATED_P (tmp1) = 1;
20166 RTX_FRAME_RELATED_P (tmp2) = 1;
20167 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20168 XVECEXP (par, 0, 0) = tmp1;
20169 XVECEXP (par, 0, 1) = tmp2;
20170 emit_insn (par);
20171 }
20172
20173 /* Create unwind information. This is an approximation. */
20174 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20175 plus_constant (Pmode,
20176 stack_pointer_rtx,
20177 4 * i)),
20178 reg1);
20179 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20180 plus_constant (Pmode,
20181 stack_pointer_rtx,
20182 4 * (i + 1))),
20183 reg2);
20184
20185 RTX_FRAME_RELATED_P (tmp1) = 1;
20186 RTX_FRAME_RELATED_P (tmp2) = 1;
20187 XVECEXP (dwarf, 0, i + 1) = tmp1;
20188 XVECEXP (dwarf, 0, i + 2) = tmp2;
20189 i += 2;
20190 regno = regno2 + 1;
20191 }
20192 else
20193 regno++;
20194
20195 return;
20196 }
20197
20198 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20199 whenever possible, otherwise it emits single-word stores. The first store
20200 also allocates stack space for all saved registers, using writeback with
20201 post-addressing mode. All other stores use offset addressing. If no STRD
20202 can be emitted, this function emits a sequence of single-word stores,
20203 and not an STM as before, because single-word stores provide more freedom
20204 scheduling and can be turned into an STM by peephole optimizations. */
20205 static void
20206 arm_emit_strd_push (unsigned long saved_regs_mask)
20207 {
20208 int num_regs = 0;
20209 int i, j, dwarf_index = 0;
20210 int offset = 0;
20211 rtx dwarf = NULL_RTX;
20212 rtx insn = NULL_RTX;
20213 rtx tmp, mem;
20214
20215 /* TODO: A more efficient code can be emitted by changing the
20216 layout, e.g., first push all pairs that can use STRD to keep the
20217 stack aligned, and then push all other registers. */
20218 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20219 if (saved_regs_mask & (1 << i))
20220 num_regs++;
20221
20222 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20223 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20224 gcc_assert (num_regs > 0);
20225
20226 /* Create sequence for DWARF info. */
20227 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20228
20229 /* For dwarf info, we generate explicit stack update. */
20230 tmp = gen_rtx_SET (stack_pointer_rtx,
20231 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20232 RTX_FRAME_RELATED_P (tmp) = 1;
20233 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20234
20235 /* Save registers. */
20236 offset = - 4 * num_regs;
20237 j = 0;
20238 while (j <= LAST_ARM_REGNUM)
20239 if (saved_regs_mask & (1 << j))
20240 {
20241 if ((j % 2 == 0)
20242 && (saved_regs_mask & (1 << (j + 1))))
20243 {
20244 /* Current register and previous register form register pair for
20245 which STRD can be generated. */
20246 if (offset < 0)
20247 {
20248 /* Allocate stack space for all saved registers. */
20249 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20250 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20251 mem = gen_frame_mem (DImode, tmp);
20252 offset = 0;
20253 }
20254 else if (offset > 0)
20255 mem = gen_frame_mem (DImode,
20256 plus_constant (Pmode,
20257 stack_pointer_rtx,
20258 offset));
20259 else
20260 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20261
20262 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20263 RTX_FRAME_RELATED_P (tmp) = 1;
20264 tmp = emit_insn (tmp);
20265
20266 /* Record the first store insn. */
20267 if (dwarf_index == 1)
20268 insn = tmp;
20269
20270 /* Generate dwarf info. */
20271 mem = gen_frame_mem (SImode,
20272 plus_constant (Pmode,
20273 stack_pointer_rtx,
20274 offset));
20275 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20276 RTX_FRAME_RELATED_P (tmp) = 1;
20277 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20278
20279 mem = gen_frame_mem (SImode,
20280 plus_constant (Pmode,
20281 stack_pointer_rtx,
20282 offset + 4));
20283 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20284 RTX_FRAME_RELATED_P (tmp) = 1;
20285 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20286
20287 offset += 8;
20288 j += 2;
20289 }
20290 else
20291 {
20292 /* Emit a single word store. */
20293 if (offset < 0)
20294 {
20295 /* Allocate stack space for all saved registers. */
20296 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20297 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20298 mem = gen_frame_mem (SImode, tmp);
20299 offset = 0;
20300 }
20301 else if (offset > 0)
20302 mem = gen_frame_mem (SImode,
20303 plus_constant (Pmode,
20304 stack_pointer_rtx,
20305 offset));
20306 else
20307 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20308
20309 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20310 RTX_FRAME_RELATED_P (tmp) = 1;
20311 tmp = emit_insn (tmp);
20312
20313 /* Record the first store insn. */
20314 if (dwarf_index == 1)
20315 insn = tmp;
20316
20317 /* Generate dwarf info. */
20318 mem = gen_frame_mem (SImode,
20319 plus_constant(Pmode,
20320 stack_pointer_rtx,
20321 offset));
20322 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20323 RTX_FRAME_RELATED_P (tmp) = 1;
20324 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20325
20326 offset += 4;
20327 j += 1;
20328 }
20329 }
20330 else
20331 j++;
20332
20333 /* Attach dwarf info to the first insn we generate. */
20334 gcc_assert (insn != NULL_RTX);
20335 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20336 RTX_FRAME_RELATED_P (insn) = 1;
20337 }
20338
20339 /* Generate and emit an insn that we will recognize as a push_multi.
20340 Unfortunately, since this insn does not reflect very well the actual
20341 semantics of the operation, we need to annotate the insn for the benefit
20342 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20343 MASK for registers that should be annotated for DWARF2 frame unwind
20344 information. */
20345 static rtx
20346 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20347 {
20348 int num_regs = 0;
20349 int num_dwarf_regs = 0;
20350 int i, j;
20351 rtx par;
20352 rtx dwarf;
20353 int dwarf_par_index;
20354 rtx tmp, reg;
20355
20356 /* We don't record the PC in the dwarf frame information. */
20357 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20358
20359 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20360 {
20361 if (mask & (1 << i))
20362 num_regs++;
20363 if (dwarf_regs_mask & (1 << i))
20364 num_dwarf_regs++;
20365 }
20366
20367 gcc_assert (num_regs && num_regs <= 16);
20368 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20369
20370 /* For the body of the insn we are going to generate an UNSPEC in
20371 parallel with several USEs. This allows the insn to be recognized
20372 by the push_multi pattern in the arm.md file.
20373
20374 The body of the insn looks something like this:
20375
20376 (parallel [
20377 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20378 (const_int:SI <num>)))
20379 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20380 (use (reg:SI XX))
20381 (use (reg:SI YY))
20382 ...
20383 ])
20384
20385 For the frame note however, we try to be more explicit and actually
20386 show each register being stored into the stack frame, plus a (single)
20387 decrement of the stack pointer. We do it this way in order to be
20388 friendly to the stack unwinding code, which only wants to see a single
20389 stack decrement per instruction. The RTL we generate for the note looks
20390 something like this:
20391
20392 (sequence [
20393 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20394 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20395 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20396 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20397 ...
20398 ])
20399
20400 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20401 instead we'd have a parallel expression detailing all
20402 the stores to the various memory addresses so that debug
20403 information is more up-to-date. Remember however while writing
20404 this to take care of the constraints with the push instruction.
20405
20406 Note also that this has to be taken care of for the VFP registers.
20407
20408 For more see PR43399. */
20409
20410 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20411 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20412 dwarf_par_index = 1;
20413
20414 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20415 {
20416 if (mask & (1 << i))
20417 {
20418 reg = gen_rtx_REG (SImode, i);
20419
20420 XVECEXP (par, 0, 0)
20421 = gen_rtx_SET (gen_frame_mem
20422 (BLKmode,
20423 gen_rtx_PRE_MODIFY (Pmode,
20424 stack_pointer_rtx,
20425 plus_constant
20426 (Pmode, stack_pointer_rtx,
20427 -4 * num_regs))
20428 ),
20429 gen_rtx_UNSPEC (BLKmode,
20430 gen_rtvec (1, reg),
20431 UNSPEC_PUSH_MULT));
20432
20433 if (dwarf_regs_mask & (1 << i))
20434 {
20435 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20436 reg);
20437 RTX_FRAME_RELATED_P (tmp) = 1;
20438 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20439 }
20440
20441 break;
20442 }
20443 }
20444
20445 for (j = 1, i++; j < num_regs; i++)
20446 {
20447 if (mask & (1 << i))
20448 {
20449 reg = gen_rtx_REG (SImode, i);
20450
20451 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20452
20453 if (dwarf_regs_mask & (1 << i))
20454 {
20455 tmp
20456 = gen_rtx_SET (gen_frame_mem
20457 (SImode,
20458 plus_constant (Pmode, stack_pointer_rtx,
20459 4 * j)),
20460 reg);
20461 RTX_FRAME_RELATED_P (tmp) = 1;
20462 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20463 }
20464
20465 j++;
20466 }
20467 }
20468
20469 par = emit_insn (par);
20470
20471 tmp = gen_rtx_SET (stack_pointer_rtx,
20472 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20473 RTX_FRAME_RELATED_P (tmp) = 1;
20474 XVECEXP (dwarf, 0, 0) = tmp;
20475
20476 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20477
20478 return par;
20479 }
20480
20481 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20482 SIZE is the offset to be adjusted.
20483 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20484 static void
20485 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20486 {
20487 rtx dwarf;
20488
20489 RTX_FRAME_RELATED_P (insn) = 1;
20490 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20491 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20492 }
20493
20494 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20495 SAVED_REGS_MASK shows which registers need to be restored.
20496
20497 Unfortunately, since this insn does not reflect very well the actual
20498 semantics of the operation, we need to annotate the insn for the benefit
20499 of DWARF2 frame unwind information. */
20500 static void
20501 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20502 {
20503 int num_regs = 0;
20504 int i, j;
20505 rtx par;
20506 rtx dwarf = NULL_RTX;
20507 rtx tmp, reg;
20508 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20509 int offset_adj;
20510 int emit_update;
20511
20512 offset_adj = return_in_pc ? 1 : 0;
20513 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20514 if (saved_regs_mask & (1 << i))
20515 num_regs++;
20516
20517 gcc_assert (num_regs && num_regs <= 16);
20518
20519 /* If SP is in reglist, then we don't emit SP update insn. */
20520 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20521
20522 /* The parallel needs to hold num_regs SETs
20523 and one SET for the stack update. */
20524 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20525
20526 if (return_in_pc)
20527 XVECEXP (par, 0, 0) = ret_rtx;
20528
20529 if (emit_update)
20530 {
20531 /* Increment the stack pointer, based on there being
20532 num_regs 4-byte registers to restore. */
20533 tmp = gen_rtx_SET (stack_pointer_rtx,
20534 plus_constant (Pmode,
20535 stack_pointer_rtx,
20536 4 * num_regs));
20537 RTX_FRAME_RELATED_P (tmp) = 1;
20538 XVECEXP (par, 0, offset_adj) = tmp;
20539 }
20540
20541 /* Now restore every reg, which may include PC. */
20542 for (j = 0, i = 0; j < num_regs; i++)
20543 if (saved_regs_mask & (1 << i))
20544 {
20545 reg = gen_rtx_REG (SImode, i);
20546 if ((num_regs == 1) && emit_update && !return_in_pc)
20547 {
20548 /* Emit single load with writeback. */
20549 tmp = gen_frame_mem (SImode,
20550 gen_rtx_POST_INC (Pmode,
20551 stack_pointer_rtx));
20552 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20553 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20554 return;
20555 }
20556
20557 tmp = gen_rtx_SET (reg,
20558 gen_frame_mem
20559 (SImode,
20560 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20561 RTX_FRAME_RELATED_P (tmp) = 1;
20562 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20563
20564 /* We need to maintain a sequence for DWARF info too. As dwarf info
20565 should not have PC, skip PC. */
20566 if (i != PC_REGNUM)
20567 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20568
20569 j++;
20570 }
20571
20572 if (return_in_pc)
20573 par = emit_jump_insn (par);
20574 else
20575 par = emit_insn (par);
20576
20577 REG_NOTES (par) = dwarf;
20578 if (!return_in_pc)
20579 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20580 stack_pointer_rtx, stack_pointer_rtx);
20581 }
20582
20583 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20584 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20585
20586 Unfortunately, since this insn does not reflect very well the actual
20587 semantics of the operation, we need to annotate the insn for the benefit
20588 of DWARF2 frame unwind information. */
20589 static void
20590 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20591 {
20592 int i, j;
20593 rtx par;
20594 rtx dwarf = NULL_RTX;
20595 rtx tmp, reg;
20596
20597 gcc_assert (num_regs && num_regs <= 32);
20598
20599 /* Workaround ARM10 VFPr1 bug. */
20600 if (num_regs == 2 && !arm_arch6)
20601 {
20602 if (first_reg == 15)
20603 first_reg--;
20604
20605 num_regs++;
20606 }
20607
20608 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20609 there could be up to 32 D-registers to restore.
20610 If there are more than 16 D-registers, make two recursive calls,
20611 each of which emits one pop_multi instruction. */
20612 if (num_regs > 16)
20613 {
20614 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20615 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20616 return;
20617 }
20618
20619 /* The parallel needs to hold num_regs SETs
20620 and one SET for the stack update. */
20621 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20622
20623 /* Increment the stack pointer, based on there being
20624 num_regs 8-byte registers to restore. */
20625 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20626 RTX_FRAME_RELATED_P (tmp) = 1;
20627 XVECEXP (par, 0, 0) = tmp;
20628
20629 /* Now show every reg that will be restored, using a SET for each. */
20630 for (j = 0, i=first_reg; j < num_regs; i += 2)
20631 {
20632 reg = gen_rtx_REG (DFmode, i);
20633
20634 tmp = gen_rtx_SET (reg,
20635 gen_frame_mem
20636 (DFmode,
20637 plus_constant (Pmode, base_reg, 8 * j)));
20638 RTX_FRAME_RELATED_P (tmp) = 1;
20639 XVECEXP (par, 0, j + 1) = tmp;
20640
20641 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20642
20643 j++;
20644 }
20645
20646 par = emit_insn (par);
20647 REG_NOTES (par) = dwarf;
20648
20649 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20650 if (REGNO (base_reg) == IP_REGNUM)
20651 {
20652 RTX_FRAME_RELATED_P (par) = 1;
20653 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20654 }
20655 else
20656 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20657 base_reg, base_reg);
20658 }
20659
20660 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20661 number of registers are being popped, multiple LDRD patterns are created for
20662 all register pairs. If odd number of registers are popped, last register is
20663 loaded by using LDR pattern. */
20664 static void
20665 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20666 {
20667 int num_regs = 0;
20668 int i, j;
20669 rtx par = NULL_RTX;
20670 rtx dwarf = NULL_RTX;
20671 rtx tmp, reg, tmp1;
20672 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20673
20674 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20675 if (saved_regs_mask & (1 << i))
20676 num_regs++;
20677
20678 gcc_assert (num_regs && num_regs <= 16);
20679
20680 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20681 to be popped. So, if num_regs is even, now it will become odd,
20682 and we can generate pop with PC. If num_regs is odd, it will be
20683 even now, and ldr with return can be generated for PC. */
20684 if (return_in_pc)
20685 num_regs--;
20686
20687 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20688
20689 /* Var j iterates over all the registers to gather all the registers in
20690 saved_regs_mask. Var i gives index of saved registers in stack frame.
20691 A PARALLEL RTX of register-pair is created here, so that pattern for
20692 LDRD can be matched. As PC is always last register to be popped, and
20693 we have already decremented num_regs if PC, we don't have to worry
20694 about PC in this loop. */
20695 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20696 if (saved_regs_mask & (1 << j))
20697 {
20698 /* Create RTX for memory load. */
20699 reg = gen_rtx_REG (SImode, j);
20700 tmp = gen_rtx_SET (reg,
20701 gen_frame_mem (SImode,
20702 plus_constant (Pmode,
20703 stack_pointer_rtx, 4 * i)));
20704 RTX_FRAME_RELATED_P (tmp) = 1;
20705
20706 if (i % 2 == 0)
20707 {
20708 /* When saved-register index (i) is even, the RTX to be emitted is
20709 yet to be created. Hence create it first. The LDRD pattern we
20710 are generating is :
20711 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20712 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20713 where target registers need not be consecutive. */
20714 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20715 dwarf = NULL_RTX;
20716 }
20717
20718 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20719 added as 0th element and if i is odd, reg_i is added as 1st element
20720 of LDRD pattern shown above. */
20721 XVECEXP (par, 0, (i % 2)) = tmp;
20722 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20723
20724 if ((i % 2) == 1)
20725 {
20726 /* When saved-register index (i) is odd, RTXs for both the registers
20727 to be loaded are generated in above given LDRD pattern, and the
20728 pattern can be emitted now. */
20729 par = emit_insn (par);
20730 REG_NOTES (par) = dwarf;
20731 RTX_FRAME_RELATED_P (par) = 1;
20732 }
20733
20734 i++;
20735 }
20736
20737 /* If the number of registers pushed is odd AND return_in_pc is false OR
20738 number of registers are even AND return_in_pc is true, last register is
20739 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20740 then LDR with post increment. */
20741
20742 /* Increment the stack pointer, based on there being
20743 num_regs 4-byte registers to restore. */
20744 tmp = gen_rtx_SET (stack_pointer_rtx,
20745 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20746 RTX_FRAME_RELATED_P (tmp) = 1;
20747 tmp = emit_insn (tmp);
20748 if (!return_in_pc)
20749 {
20750 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20751 stack_pointer_rtx, stack_pointer_rtx);
20752 }
20753
20754 dwarf = NULL_RTX;
20755
20756 if (((num_regs % 2) == 1 && !return_in_pc)
20757 || ((num_regs % 2) == 0 && return_in_pc))
20758 {
20759 /* Scan for the single register to be popped. Skip until the saved
20760 register is found. */
20761 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20762
20763 /* Gen LDR with post increment here. */
20764 tmp1 = gen_rtx_MEM (SImode,
20765 gen_rtx_POST_INC (SImode,
20766 stack_pointer_rtx));
20767 set_mem_alias_set (tmp1, get_frame_alias_set ());
20768
20769 reg = gen_rtx_REG (SImode, j);
20770 tmp = gen_rtx_SET (reg, tmp1);
20771 RTX_FRAME_RELATED_P (tmp) = 1;
20772 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20773
20774 if (return_in_pc)
20775 {
20776 /* If return_in_pc, j must be PC_REGNUM. */
20777 gcc_assert (j == PC_REGNUM);
20778 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20779 XVECEXP (par, 0, 0) = ret_rtx;
20780 XVECEXP (par, 0, 1) = tmp;
20781 par = emit_jump_insn (par);
20782 }
20783 else
20784 {
20785 par = emit_insn (tmp);
20786 REG_NOTES (par) = dwarf;
20787 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20788 stack_pointer_rtx, stack_pointer_rtx);
20789 }
20790
20791 }
20792 else if ((num_regs % 2) == 1 && return_in_pc)
20793 {
20794 /* There are 2 registers to be popped. So, generate the pattern
20795 pop_multiple_with_stack_update_and_return to pop in PC. */
20796 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20797 }
20798
20799 return;
20800 }
20801
20802 /* LDRD in ARM mode needs consecutive registers as operands. This function
20803 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20804 offset addressing and then generates one separate stack udpate. This provides
20805 more scheduling freedom, compared to writeback on every load. However,
20806 if the function returns using load into PC directly
20807 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20808 before the last load. TODO: Add a peephole optimization to recognize
20809 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20810 peephole optimization to merge the load at stack-offset zero
20811 with the stack update instruction using load with writeback
20812 in post-index addressing mode. */
20813 static void
20814 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20815 {
20816 int j = 0;
20817 int offset = 0;
20818 rtx par = NULL_RTX;
20819 rtx dwarf = NULL_RTX;
20820 rtx tmp, mem;
20821
20822 /* Restore saved registers. */
20823 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20824 j = 0;
20825 while (j <= LAST_ARM_REGNUM)
20826 if (saved_regs_mask & (1 << j))
20827 {
20828 if ((j % 2) == 0
20829 && (saved_regs_mask & (1 << (j + 1)))
20830 && (j + 1) != PC_REGNUM)
20831 {
20832 /* Current register and next register form register pair for which
20833 LDRD can be generated. PC is always the last register popped, and
20834 we handle it separately. */
20835 if (offset > 0)
20836 mem = gen_frame_mem (DImode,
20837 plus_constant (Pmode,
20838 stack_pointer_rtx,
20839 offset));
20840 else
20841 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20842
20843 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20844 tmp = emit_insn (tmp);
20845 RTX_FRAME_RELATED_P (tmp) = 1;
20846
20847 /* Generate dwarf info. */
20848
20849 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20850 gen_rtx_REG (SImode, j),
20851 NULL_RTX);
20852 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20853 gen_rtx_REG (SImode, j + 1),
20854 dwarf);
20855
20856 REG_NOTES (tmp) = dwarf;
20857
20858 offset += 8;
20859 j += 2;
20860 }
20861 else if (j != PC_REGNUM)
20862 {
20863 /* Emit a single word load. */
20864 if (offset > 0)
20865 mem = gen_frame_mem (SImode,
20866 plus_constant (Pmode,
20867 stack_pointer_rtx,
20868 offset));
20869 else
20870 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20871
20872 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20873 tmp = emit_insn (tmp);
20874 RTX_FRAME_RELATED_P (tmp) = 1;
20875
20876 /* Generate dwarf info. */
20877 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20878 gen_rtx_REG (SImode, j),
20879 NULL_RTX);
20880
20881 offset += 4;
20882 j += 1;
20883 }
20884 else /* j == PC_REGNUM */
20885 j++;
20886 }
20887 else
20888 j++;
20889
20890 /* Update the stack. */
20891 if (offset > 0)
20892 {
20893 tmp = gen_rtx_SET (stack_pointer_rtx,
20894 plus_constant (Pmode,
20895 stack_pointer_rtx,
20896 offset));
20897 tmp = emit_insn (tmp);
20898 arm_add_cfa_adjust_cfa_note (tmp, offset,
20899 stack_pointer_rtx, stack_pointer_rtx);
20900 offset = 0;
20901 }
20902
20903 if (saved_regs_mask & (1 << PC_REGNUM))
20904 {
20905 /* Only PC is to be popped. */
20906 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20907 XVECEXP (par, 0, 0) = ret_rtx;
20908 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20909 gen_frame_mem (SImode,
20910 gen_rtx_POST_INC (SImode,
20911 stack_pointer_rtx)));
20912 RTX_FRAME_RELATED_P (tmp) = 1;
20913 XVECEXP (par, 0, 1) = tmp;
20914 par = emit_jump_insn (par);
20915
20916 /* Generate dwarf info. */
20917 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20918 gen_rtx_REG (SImode, PC_REGNUM),
20919 NULL_RTX);
20920 REG_NOTES (par) = dwarf;
20921 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20922 stack_pointer_rtx, stack_pointer_rtx);
20923 }
20924 }
20925
20926 /* Calculate the size of the return value that is passed in registers. */
20927 static unsigned
20928 arm_size_return_regs (void)
20929 {
20930 machine_mode mode;
20931
20932 if (crtl->return_rtx != 0)
20933 mode = GET_MODE (crtl->return_rtx);
20934 else
20935 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20936
20937 return GET_MODE_SIZE (mode);
20938 }
20939
20940 /* Return true if the current function needs to save/restore LR. */
20941 static bool
20942 thumb_force_lr_save (void)
20943 {
20944 return !cfun->machine->lr_save_eliminated
20945 && (!crtl->is_leaf
20946 || thumb_far_jump_used_p ()
20947 || df_regs_ever_live_p (LR_REGNUM));
20948 }
20949
20950 /* We do not know if r3 will be available because
20951 we do have an indirect tailcall happening in this
20952 particular case. */
20953 static bool
20954 is_indirect_tailcall_p (rtx call)
20955 {
20956 rtx pat = PATTERN (call);
20957
20958 /* Indirect tail call. */
20959 pat = XVECEXP (pat, 0, 0);
20960 if (GET_CODE (pat) == SET)
20961 pat = SET_SRC (pat);
20962
20963 pat = XEXP (XEXP (pat, 0), 0);
20964 return REG_P (pat);
20965 }
20966
20967 /* Return true if r3 is used by any of the tail call insns in the
20968 current function. */
20969 static bool
20970 any_sibcall_could_use_r3 (void)
20971 {
20972 edge_iterator ei;
20973 edge e;
20974
20975 if (!crtl->tail_call_emit)
20976 return false;
20977 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20978 if (e->flags & EDGE_SIBCALL)
20979 {
20980 rtx_insn *call = BB_END (e->src);
20981 if (!CALL_P (call))
20982 call = prev_nonnote_nondebug_insn (call);
20983 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20984 if (find_regno_fusage (call, USE, 3)
20985 || is_indirect_tailcall_p (call))
20986 return true;
20987 }
20988 return false;
20989 }
20990
20991
20992 /* Compute the distance from register FROM to register TO.
20993 These can be the arg pointer (26), the soft frame pointer (25),
20994 the stack pointer (13) or the hard frame pointer (11).
20995 In thumb mode r7 is used as the soft frame pointer, if needed.
20996 Typical stack layout looks like this:
20997
20998 old stack pointer -> | |
20999 ----
21000 | | \
21001 | | saved arguments for
21002 | | vararg functions
21003 | | /
21004 --
21005 hard FP & arg pointer -> | | \
21006 | | stack
21007 | | frame
21008 | | /
21009 --
21010 | | \
21011 | | call saved
21012 | | registers
21013 soft frame pointer -> | | /
21014 --
21015 | | \
21016 | | local
21017 | | variables
21018 locals base pointer -> | | /
21019 --
21020 | | \
21021 | | outgoing
21022 | | arguments
21023 current stack pointer -> | | /
21024 --
21025
21026 For a given function some or all of these stack components
21027 may not be needed, giving rise to the possibility of
21028 eliminating some of the registers.
21029
21030 The values returned by this function must reflect the behavior
21031 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21032
21033 The sign of the number returned reflects the direction of stack
21034 growth, so the values are positive for all eliminations except
21035 from the soft frame pointer to the hard frame pointer.
21036
21037 SFP may point just inside the local variables block to ensure correct
21038 alignment. */
21039
21040
21041 /* Return cached stack offsets. */
21042
21043 static arm_stack_offsets *
21044 arm_get_frame_offsets (void)
21045 {
21046 struct arm_stack_offsets *offsets;
21047
21048 offsets = &cfun->machine->stack_offsets;
21049
21050 return offsets;
21051 }
21052
21053
21054 /* Calculate stack offsets. These are used to calculate register elimination
21055 offsets and in prologue/epilogue code. Also calculates which registers
21056 should be saved. */
21057
21058 static void
21059 arm_compute_frame_layout (void)
21060 {
21061 struct arm_stack_offsets *offsets;
21062 unsigned long func_type;
21063 int saved;
21064 int core_saved;
21065 HOST_WIDE_INT frame_size;
21066 int i;
21067
21068 offsets = &cfun->machine->stack_offsets;
21069
21070 /* Initially this is the size of the local variables. It will translated
21071 into an offset once we have determined the size of preceding data. */
21072 frame_size = ROUND_UP_WORD (get_frame_size ());
21073
21074 /* Space for variadic functions. */
21075 offsets->saved_args = crtl->args.pretend_args_size;
21076
21077 /* In Thumb mode this is incorrect, but never used. */
21078 offsets->frame
21079 = (offsets->saved_args
21080 + arm_compute_static_chain_stack_bytes ()
21081 + (frame_pointer_needed ? 4 : 0));
21082
21083 if (TARGET_32BIT)
21084 {
21085 unsigned int regno;
21086
21087 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21088 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21089 saved = core_saved;
21090
21091 /* We know that SP will be doubleword aligned on entry, and we must
21092 preserve that condition at any subroutine call. We also require the
21093 soft frame pointer to be doubleword aligned. */
21094
21095 if (TARGET_REALLY_IWMMXT)
21096 {
21097 /* Check for the call-saved iWMMXt registers. */
21098 for (regno = FIRST_IWMMXT_REGNUM;
21099 regno <= LAST_IWMMXT_REGNUM;
21100 regno++)
21101 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
21102 saved += 8;
21103 }
21104
21105 func_type = arm_current_func_type ();
21106 /* Space for saved VFP registers. */
21107 if (! IS_VOLATILE (func_type)
21108 && TARGET_HARD_FLOAT)
21109 saved += arm_get_vfp_saved_size ();
21110 }
21111 else /* TARGET_THUMB1 */
21112 {
21113 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21114 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21115 saved = core_saved;
21116 if (TARGET_BACKTRACE)
21117 saved += 16;
21118 }
21119
21120 /* Saved registers include the stack frame. */
21121 offsets->saved_regs
21122 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21123 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21124
21125 /* A leaf function does not need any stack alignment if it has nothing
21126 on the stack. */
21127 if (crtl->is_leaf && frame_size == 0
21128 /* However if it calls alloca(), we have a dynamically allocated
21129 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21130 && ! cfun->calls_alloca)
21131 {
21132 offsets->outgoing_args = offsets->soft_frame;
21133 offsets->locals_base = offsets->soft_frame;
21134 return;
21135 }
21136
21137 /* Ensure SFP has the correct alignment. */
21138 if (ARM_DOUBLEWORD_ALIGN
21139 && (offsets->soft_frame & 7))
21140 {
21141 offsets->soft_frame += 4;
21142 /* Try to align stack by pushing an extra reg. Don't bother doing this
21143 when there is a stack frame as the alignment will be rolled into
21144 the normal stack adjustment. */
21145 if (frame_size + crtl->outgoing_args_size == 0)
21146 {
21147 int reg = -1;
21148
21149 /* Register r3 is caller-saved. Normally it does not need to be
21150 saved on entry by the prologue. However if we choose to save
21151 it for padding then we may confuse the compiler into thinking
21152 a prologue sequence is required when in fact it is not. This
21153 will occur when shrink-wrapping if r3 is used as a scratch
21154 register and there are no other callee-saved writes.
21155
21156 This situation can be avoided when other callee-saved registers
21157 are available and r3 is not mandatory if we choose a callee-saved
21158 register for padding. */
21159 bool prefer_callee_reg_p = false;
21160
21161 /* If it is safe to use r3, then do so. This sometimes
21162 generates better code on Thumb-2 by avoiding the need to
21163 use 32-bit push/pop instructions. */
21164 if (! any_sibcall_could_use_r3 ()
21165 && arm_size_return_regs () <= 12
21166 && (offsets->saved_regs_mask & (1 << 3)) == 0
21167 && (TARGET_THUMB2
21168 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21169 {
21170 reg = 3;
21171 if (!TARGET_THUMB2)
21172 prefer_callee_reg_p = true;
21173 }
21174 if (reg == -1
21175 || prefer_callee_reg_p)
21176 {
21177 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21178 {
21179 /* Avoid fixed registers; they may be changed at
21180 arbitrary times so it's unsafe to restore them
21181 during the epilogue. */
21182 if (!fixed_regs[i]
21183 && (offsets->saved_regs_mask & (1 << i)) == 0)
21184 {
21185 reg = i;
21186 break;
21187 }
21188 }
21189 }
21190
21191 if (reg != -1)
21192 {
21193 offsets->saved_regs += 4;
21194 offsets->saved_regs_mask |= (1 << reg);
21195 }
21196 }
21197 }
21198
21199 offsets->locals_base = offsets->soft_frame + frame_size;
21200 offsets->outgoing_args = (offsets->locals_base
21201 + crtl->outgoing_args_size);
21202
21203 if (ARM_DOUBLEWORD_ALIGN)
21204 {
21205 /* Ensure SP remains doubleword aligned. */
21206 if (offsets->outgoing_args & 7)
21207 offsets->outgoing_args += 4;
21208 gcc_assert (!(offsets->outgoing_args & 7));
21209 }
21210 }
21211
21212
21213 /* Calculate the relative offsets for the different stack pointers. Positive
21214 offsets are in the direction of stack growth. */
21215
21216 HOST_WIDE_INT
21217 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21218 {
21219 arm_stack_offsets *offsets;
21220
21221 offsets = arm_get_frame_offsets ();
21222
21223 /* OK, now we have enough information to compute the distances.
21224 There must be an entry in these switch tables for each pair
21225 of registers in ELIMINABLE_REGS, even if some of the entries
21226 seem to be redundant or useless. */
21227 switch (from)
21228 {
21229 case ARG_POINTER_REGNUM:
21230 switch (to)
21231 {
21232 case THUMB_HARD_FRAME_POINTER_REGNUM:
21233 return 0;
21234
21235 case FRAME_POINTER_REGNUM:
21236 /* This is the reverse of the soft frame pointer
21237 to hard frame pointer elimination below. */
21238 return offsets->soft_frame - offsets->saved_args;
21239
21240 case ARM_HARD_FRAME_POINTER_REGNUM:
21241 /* This is only non-zero in the case where the static chain register
21242 is stored above the frame. */
21243 return offsets->frame - offsets->saved_args - 4;
21244
21245 case STACK_POINTER_REGNUM:
21246 /* If nothing has been pushed on the stack at all
21247 then this will return -4. This *is* correct! */
21248 return offsets->outgoing_args - (offsets->saved_args + 4);
21249
21250 default:
21251 gcc_unreachable ();
21252 }
21253 gcc_unreachable ();
21254
21255 case FRAME_POINTER_REGNUM:
21256 switch (to)
21257 {
21258 case THUMB_HARD_FRAME_POINTER_REGNUM:
21259 return 0;
21260
21261 case ARM_HARD_FRAME_POINTER_REGNUM:
21262 /* The hard frame pointer points to the top entry in the
21263 stack frame. The soft frame pointer to the bottom entry
21264 in the stack frame. If there is no stack frame at all,
21265 then they are identical. */
21266
21267 return offsets->frame - offsets->soft_frame;
21268
21269 case STACK_POINTER_REGNUM:
21270 return offsets->outgoing_args - offsets->soft_frame;
21271
21272 default:
21273 gcc_unreachable ();
21274 }
21275 gcc_unreachable ();
21276
21277 default:
21278 /* You cannot eliminate from the stack pointer.
21279 In theory you could eliminate from the hard frame
21280 pointer to the stack pointer, but this will never
21281 happen, since if a stack frame is not needed the
21282 hard frame pointer will never be used. */
21283 gcc_unreachable ();
21284 }
21285 }
21286
21287 /* Given FROM and TO register numbers, say whether this elimination is
21288 allowed. Frame pointer elimination is automatically handled.
21289
21290 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21291 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21292 pointer, we must eliminate FRAME_POINTER_REGNUM into
21293 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21294 ARG_POINTER_REGNUM. */
21295
21296 bool
21297 arm_can_eliminate (const int from, const int to)
21298 {
21299 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21300 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21301 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21302 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21303 true);
21304 }
21305
21306 /* Emit RTL to save coprocessor registers on function entry. Returns the
21307 number of bytes pushed. */
21308
21309 static int
21310 arm_save_coproc_regs(void)
21311 {
21312 int saved_size = 0;
21313 unsigned reg;
21314 unsigned start_reg;
21315 rtx insn;
21316
21317 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21318 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21319 {
21320 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21321 insn = gen_rtx_MEM (V2SImode, insn);
21322 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21323 RTX_FRAME_RELATED_P (insn) = 1;
21324 saved_size += 8;
21325 }
21326
21327 if (TARGET_HARD_FLOAT)
21328 {
21329 start_reg = FIRST_VFP_REGNUM;
21330
21331 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21332 {
21333 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21334 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21335 {
21336 if (start_reg != reg)
21337 saved_size += vfp_emit_fstmd (start_reg,
21338 (reg - start_reg) / 2);
21339 start_reg = reg + 2;
21340 }
21341 }
21342 if (start_reg != reg)
21343 saved_size += vfp_emit_fstmd (start_reg,
21344 (reg - start_reg) / 2);
21345 }
21346 return saved_size;
21347 }
21348
21349
21350 /* Set the Thumb frame pointer from the stack pointer. */
21351
21352 static void
21353 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21354 {
21355 HOST_WIDE_INT amount;
21356 rtx insn, dwarf;
21357
21358 amount = offsets->outgoing_args - offsets->locals_base;
21359 if (amount < 1024)
21360 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21361 stack_pointer_rtx, GEN_INT (amount)));
21362 else
21363 {
21364 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21365 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21366 expects the first two operands to be the same. */
21367 if (TARGET_THUMB2)
21368 {
21369 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21370 stack_pointer_rtx,
21371 hard_frame_pointer_rtx));
21372 }
21373 else
21374 {
21375 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21376 hard_frame_pointer_rtx,
21377 stack_pointer_rtx));
21378 }
21379 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21380 plus_constant (Pmode, stack_pointer_rtx, amount));
21381 RTX_FRAME_RELATED_P (dwarf) = 1;
21382 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21383 }
21384
21385 RTX_FRAME_RELATED_P (insn) = 1;
21386 }
21387
21388 struct scratch_reg {
21389 rtx reg;
21390 bool saved;
21391 };
21392
21393 /* Return a short-lived scratch register for use as a 2nd scratch register on
21394 function entry after the registers are saved in the prologue. This register
21395 must be released by means of release_scratch_register_on_entry. IP is not
21396 considered since it is always used as the 1st scratch register if available.
21397
21398 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21399 mask of live registers. */
21400
21401 static void
21402 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21403 unsigned long live_regs)
21404 {
21405 int regno = -1;
21406
21407 sr->saved = false;
21408
21409 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21410 regno = LR_REGNUM;
21411 else
21412 {
21413 unsigned int i;
21414
21415 for (i = 4; i < 11; i++)
21416 if (regno1 != i && (live_regs & (1 << i)) != 0)
21417 {
21418 regno = i;
21419 break;
21420 }
21421
21422 if (regno < 0)
21423 {
21424 /* If IP is used as the 1st scratch register for a nested function,
21425 then either r3 wasn't available or is used to preserve IP. */
21426 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21427 regno1 = 3;
21428 regno = (regno1 == 3 ? 2 : 3);
21429 sr->saved
21430 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21431 regno);
21432 }
21433 }
21434
21435 sr->reg = gen_rtx_REG (SImode, regno);
21436 if (sr->saved)
21437 {
21438 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21439 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21440 rtx x = gen_rtx_SET (stack_pointer_rtx,
21441 plus_constant (Pmode, stack_pointer_rtx, -4));
21442 RTX_FRAME_RELATED_P (insn) = 1;
21443 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21444 }
21445 }
21446
21447 /* Release a scratch register obtained from the preceding function. */
21448
21449 static void
21450 release_scratch_register_on_entry (struct scratch_reg *sr)
21451 {
21452 if (sr->saved)
21453 {
21454 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21455 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21456 rtx x = gen_rtx_SET (stack_pointer_rtx,
21457 plus_constant (Pmode, stack_pointer_rtx, 4));
21458 RTX_FRAME_RELATED_P (insn) = 1;
21459 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21460 }
21461 }
21462
21463 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21464
21465 #if PROBE_INTERVAL > 4096
21466 #error Cannot use indexed addressing mode for stack probing
21467 #endif
21468
21469 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21470 inclusive. These are offsets from the current stack pointer. REGNO1
21471 is the index number of the 1st scratch register and LIVE_REGS is the
21472 mask of live registers. */
21473
21474 static void
21475 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21476 unsigned int regno1, unsigned long live_regs)
21477 {
21478 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21479
21480 /* See if we have a constant small number of probes to generate. If so,
21481 that's the easy case. */
21482 if (size <= PROBE_INTERVAL)
21483 {
21484 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21485 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21486 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21487 }
21488
21489 /* The run-time loop is made up of 10 insns in the generic case while the
21490 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21491 else if (size <= 5 * PROBE_INTERVAL)
21492 {
21493 HOST_WIDE_INT i, rem;
21494
21495 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21496 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21497 emit_stack_probe (reg1);
21498
21499 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21500 it exceeds SIZE. If only two probes are needed, this will not
21501 generate any code. Then probe at FIRST + SIZE. */
21502 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21503 {
21504 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21505 emit_stack_probe (reg1);
21506 }
21507
21508 rem = size - (i - PROBE_INTERVAL);
21509 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21510 {
21511 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21512 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21513 }
21514 else
21515 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21516 }
21517
21518 /* Otherwise, do the same as above, but in a loop. Note that we must be
21519 extra careful with variables wrapping around because we might be at
21520 the very top (or the very bottom) of the address space and we have
21521 to be able to handle this case properly; in particular, we use an
21522 equality test for the loop condition. */
21523 else
21524 {
21525 HOST_WIDE_INT rounded_size;
21526 struct scratch_reg sr;
21527
21528 get_scratch_register_on_entry (&sr, regno1, live_regs);
21529
21530 emit_move_insn (reg1, GEN_INT (first));
21531
21532
21533 /* Step 1: round SIZE to the previous multiple of the interval. */
21534
21535 rounded_size = size & -PROBE_INTERVAL;
21536 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21537
21538
21539 /* Step 2: compute initial and final value of the loop counter. */
21540
21541 /* TEST_ADDR = SP + FIRST. */
21542 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21543
21544 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21545 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21546
21547
21548 /* Step 3: the loop
21549
21550 do
21551 {
21552 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21553 probe at TEST_ADDR
21554 }
21555 while (TEST_ADDR != LAST_ADDR)
21556
21557 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21558 until it is equal to ROUNDED_SIZE. */
21559
21560 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21561
21562
21563 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21564 that SIZE is equal to ROUNDED_SIZE. */
21565
21566 if (size != rounded_size)
21567 {
21568 HOST_WIDE_INT rem = size - rounded_size;
21569
21570 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21571 {
21572 emit_set_insn (sr.reg,
21573 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21574 emit_stack_probe (plus_constant (Pmode, sr.reg,
21575 PROBE_INTERVAL - rem));
21576 }
21577 else
21578 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21579 }
21580
21581 release_scratch_register_on_entry (&sr);
21582 }
21583
21584 /* Make sure nothing is scheduled before we are done. */
21585 emit_insn (gen_blockage ());
21586 }
21587
21588 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21589 absolute addresses. */
21590
21591 const char *
21592 output_probe_stack_range (rtx reg1, rtx reg2)
21593 {
21594 static int labelno = 0;
21595 char loop_lab[32];
21596 rtx xops[2];
21597
21598 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21599
21600 /* Loop. */
21601 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21602
21603 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21604 xops[0] = reg1;
21605 xops[1] = GEN_INT (PROBE_INTERVAL);
21606 output_asm_insn ("sub\t%0, %0, %1", xops);
21607
21608 /* Probe at TEST_ADDR. */
21609 output_asm_insn ("str\tr0, [%0, #0]", xops);
21610
21611 /* Test if TEST_ADDR == LAST_ADDR. */
21612 xops[1] = reg2;
21613 output_asm_insn ("cmp\t%0, %1", xops);
21614
21615 /* Branch. */
21616 fputs ("\tbne\t", asm_out_file);
21617 assemble_name_raw (asm_out_file, loop_lab);
21618 fputc ('\n', asm_out_file);
21619
21620 return "";
21621 }
21622
21623 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21624 function. */
21625 void
21626 arm_expand_prologue (void)
21627 {
21628 rtx amount;
21629 rtx insn;
21630 rtx ip_rtx;
21631 unsigned long live_regs_mask;
21632 unsigned long func_type;
21633 int fp_offset = 0;
21634 int saved_pretend_args = 0;
21635 int saved_regs = 0;
21636 unsigned HOST_WIDE_INT args_to_push;
21637 HOST_WIDE_INT size;
21638 arm_stack_offsets *offsets;
21639 bool clobber_ip;
21640
21641 func_type = arm_current_func_type ();
21642
21643 /* Naked functions don't have prologues. */
21644 if (IS_NAKED (func_type))
21645 {
21646 if (flag_stack_usage_info)
21647 current_function_static_stack_size = 0;
21648 return;
21649 }
21650
21651 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21652 args_to_push = crtl->args.pretend_args_size;
21653
21654 /* Compute which register we will have to save onto the stack. */
21655 offsets = arm_get_frame_offsets ();
21656 live_regs_mask = offsets->saved_regs_mask;
21657
21658 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21659
21660 if (IS_STACKALIGN (func_type))
21661 {
21662 rtx r0, r1;
21663
21664 /* Handle a word-aligned stack pointer. We generate the following:
21665
21666 mov r0, sp
21667 bic r1, r0, #7
21668 mov sp, r1
21669 <save and restore r0 in normal prologue/epilogue>
21670 mov sp, r0
21671 bx lr
21672
21673 The unwinder doesn't need to know about the stack realignment.
21674 Just tell it we saved SP in r0. */
21675 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21676
21677 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21678 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21679
21680 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21681 RTX_FRAME_RELATED_P (insn) = 1;
21682 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21683
21684 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21685
21686 /* ??? The CFA changes here, which may cause GDB to conclude that it
21687 has entered a different function. That said, the unwind info is
21688 correct, individually, before and after this instruction because
21689 we've described the save of SP, which will override the default
21690 handling of SP as restoring from the CFA. */
21691 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21692 }
21693
21694 /* Let's compute the static_chain_stack_bytes required and store it. Right
21695 now the value must be -1 as stored by arm_init_machine_status (). */
21696 cfun->machine->static_chain_stack_bytes
21697 = arm_compute_static_chain_stack_bytes ();
21698
21699 /* The static chain register is the same as the IP register. If it is
21700 clobbered when creating the frame, we need to save and restore it. */
21701 clobber_ip = IS_NESTED (func_type)
21702 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21703 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21704 || flag_stack_clash_protection)
21705 && !df_regs_ever_live_p (LR_REGNUM)
21706 && arm_r3_live_at_start_p ()));
21707
21708 /* Find somewhere to store IP whilst the frame is being created.
21709 We try the following places in order:
21710
21711 1. The last argument register r3 if it is available.
21712 2. A slot on the stack above the frame if there are no
21713 arguments to push onto the stack.
21714 3. Register r3 again, after pushing the argument registers
21715 onto the stack, if this is a varargs function.
21716 4. The last slot on the stack created for the arguments to
21717 push, if this isn't a varargs function.
21718
21719 Note - we only need to tell the dwarf2 backend about the SP
21720 adjustment in the second variant; the static chain register
21721 doesn't need to be unwound, as it doesn't contain a value
21722 inherited from the caller. */
21723 if (clobber_ip)
21724 {
21725 if (!arm_r3_live_at_start_p ())
21726 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21727 else if (args_to_push == 0)
21728 {
21729 rtx addr, dwarf;
21730
21731 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21732 saved_regs += 4;
21733
21734 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21735 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21736 fp_offset = 4;
21737
21738 /* Just tell the dwarf backend that we adjusted SP. */
21739 dwarf = gen_rtx_SET (stack_pointer_rtx,
21740 plus_constant (Pmode, stack_pointer_rtx,
21741 -fp_offset));
21742 RTX_FRAME_RELATED_P (insn) = 1;
21743 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21744 }
21745 else
21746 {
21747 /* Store the args on the stack. */
21748 if (cfun->machine->uses_anonymous_args)
21749 {
21750 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21751 (0xf0 >> (args_to_push / 4)) & 0xf);
21752 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21753 saved_pretend_args = 1;
21754 }
21755 else
21756 {
21757 rtx addr, dwarf;
21758
21759 if (args_to_push == 4)
21760 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21761 else
21762 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21763 plus_constant (Pmode,
21764 stack_pointer_rtx,
21765 -args_to_push));
21766
21767 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21768
21769 /* Just tell the dwarf backend that we adjusted SP. */
21770 dwarf = gen_rtx_SET (stack_pointer_rtx,
21771 plus_constant (Pmode, stack_pointer_rtx,
21772 -args_to_push));
21773 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21774 }
21775
21776 RTX_FRAME_RELATED_P (insn) = 1;
21777 fp_offset = args_to_push;
21778 args_to_push = 0;
21779 }
21780 }
21781
21782 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21783 {
21784 if (IS_INTERRUPT (func_type))
21785 {
21786 /* Interrupt functions must not corrupt any registers.
21787 Creating a frame pointer however, corrupts the IP
21788 register, so we must push it first. */
21789 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21790
21791 /* Do not set RTX_FRAME_RELATED_P on this insn.
21792 The dwarf stack unwinding code only wants to see one
21793 stack decrement per function, and this is not it. If
21794 this instruction is labeled as being part of the frame
21795 creation sequence then dwarf2out_frame_debug_expr will
21796 die when it encounters the assignment of IP to FP
21797 later on, since the use of SP here establishes SP as
21798 the CFA register and not IP.
21799
21800 Anyway this instruction is not really part of the stack
21801 frame creation although it is part of the prologue. */
21802 }
21803
21804 insn = emit_set_insn (ip_rtx,
21805 plus_constant (Pmode, stack_pointer_rtx,
21806 fp_offset));
21807 RTX_FRAME_RELATED_P (insn) = 1;
21808 }
21809
21810 if (args_to_push)
21811 {
21812 /* Push the argument registers, or reserve space for them. */
21813 if (cfun->machine->uses_anonymous_args)
21814 insn = emit_multi_reg_push
21815 ((0xf0 >> (args_to_push / 4)) & 0xf,
21816 (0xf0 >> (args_to_push / 4)) & 0xf);
21817 else
21818 insn = emit_insn
21819 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21820 GEN_INT (- args_to_push)));
21821 RTX_FRAME_RELATED_P (insn) = 1;
21822 }
21823
21824 /* If this is an interrupt service routine, and the link register
21825 is going to be pushed, and we're not generating extra
21826 push of IP (needed when frame is needed and frame layout if apcs),
21827 subtracting four from LR now will mean that the function return
21828 can be done with a single instruction. */
21829 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21830 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21831 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21832 && TARGET_ARM)
21833 {
21834 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21835
21836 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21837 }
21838
21839 if (live_regs_mask)
21840 {
21841 unsigned long dwarf_regs_mask = live_regs_mask;
21842
21843 saved_regs += bit_count (live_regs_mask) * 4;
21844 if (optimize_size && !frame_pointer_needed
21845 && saved_regs == offsets->saved_regs - offsets->saved_args)
21846 {
21847 /* If no coprocessor registers are being pushed and we don't have
21848 to worry about a frame pointer then push extra registers to
21849 create the stack frame. This is done in a way that does not
21850 alter the frame layout, so is independent of the epilogue. */
21851 int n;
21852 int frame;
21853 n = 0;
21854 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21855 n++;
21856 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21857 if (frame && n * 4 >= frame)
21858 {
21859 n = frame / 4;
21860 live_regs_mask |= (1 << n) - 1;
21861 saved_regs += frame;
21862 }
21863 }
21864
21865 if (TARGET_LDRD
21866 && current_tune->prefer_ldrd_strd
21867 && !optimize_function_for_size_p (cfun))
21868 {
21869 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21870 if (TARGET_THUMB2)
21871 thumb2_emit_strd_push (live_regs_mask);
21872 else if (TARGET_ARM
21873 && !TARGET_APCS_FRAME
21874 && !IS_INTERRUPT (func_type))
21875 arm_emit_strd_push (live_regs_mask);
21876 else
21877 {
21878 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21879 RTX_FRAME_RELATED_P (insn) = 1;
21880 }
21881 }
21882 else
21883 {
21884 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21885 RTX_FRAME_RELATED_P (insn) = 1;
21886 }
21887 }
21888
21889 if (! IS_VOLATILE (func_type))
21890 saved_regs += arm_save_coproc_regs ();
21891
21892 if (frame_pointer_needed && TARGET_ARM)
21893 {
21894 /* Create the new frame pointer. */
21895 if (TARGET_APCS_FRAME)
21896 {
21897 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21898 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21899 RTX_FRAME_RELATED_P (insn) = 1;
21900 }
21901 else
21902 {
21903 insn = GEN_INT (saved_regs - (4 + fp_offset));
21904 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21905 stack_pointer_rtx, insn));
21906 RTX_FRAME_RELATED_P (insn) = 1;
21907 }
21908 }
21909
21910 size = offsets->outgoing_args - offsets->saved_args;
21911 if (flag_stack_usage_info)
21912 current_function_static_stack_size = size;
21913
21914 /* If this isn't an interrupt service routine and we have a frame, then do
21915 stack checking. We use IP as the first scratch register, except for the
21916 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21917 if (!IS_INTERRUPT (func_type)
21918 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21919 || flag_stack_clash_protection))
21920 {
21921 unsigned int regno;
21922
21923 if (!IS_NESTED (func_type) || clobber_ip)
21924 regno = IP_REGNUM;
21925 else if (df_regs_ever_live_p (LR_REGNUM))
21926 regno = LR_REGNUM;
21927 else
21928 regno = 3;
21929
21930 if (crtl->is_leaf && !cfun->calls_alloca)
21931 {
21932 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21933 arm_emit_probe_stack_range (get_stack_check_protect (),
21934 size - get_stack_check_protect (),
21935 regno, live_regs_mask);
21936 }
21937 else if (size > 0)
21938 arm_emit_probe_stack_range (get_stack_check_protect (), size,
21939 regno, live_regs_mask);
21940 }
21941
21942 /* Recover the static chain register. */
21943 if (clobber_ip)
21944 {
21945 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21946 insn = gen_rtx_REG (SImode, 3);
21947 else
21948 {
21949 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21950 insn = gen_frame_mem (SImode, insn);
21951 }
21952 emit_set_insn (ip_rtx, insn);
21953 emit_insn (gen_force_register_use (ip_rtx));
21954 }
21955
21956 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21957 {
21958 /* This add can produce multiple insns for a large constant, so we
21959 need to get tricky. */
21960 rtx_insn *last = get_last_insn ();
21961
21962 amount = GEN_INT (offsets->saved_args + saved_regs
21963 - offsets->outgoing_args);
21964
21965 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21966 amount));
21967 do
21968 {
21969 last = last ? NEXT_INSN (last) : get_insns ();
21970 RTX_FRAME_RELATED_P (last) = 1;
21971 }
21972 while (last != insn);
21973
21974 /* If the frame pointer is needed, emit a special barrier that
21975 will prevent the scheduler from moving stores to the frame
21976 before the stack adjustment. */
21977 if (frame_pointer_needed)
21978 emit_insn (gen_stack_tie (stack_pointer_rtx,
21979 hard_frame_pointer_rtx));
21980 }
21981
21982
21983 if (frame_pointer_needed && TARGET_THUMB2)
21984 thumb_set_frame_pointer (offsets);
21985
21986 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21987 {
21988 unsigned long mask;
21989
21990 mask = live_regs_mask;
21991 mask &= THUMB2_WORK_REGS;
21992 if (!IS_NESTED (func_type))
21993 mask |= (1 << IP_REGNUM);
21994 arm_load_pic_register (mask);
21995 }
21996
21997 /* If we are profiling, make sure no instructions are scheduled before
21998 the call to mcount. Similarly if the user has requested no
21999 scheduling in the prolog. Similarly if we want non-call exceptions
22000 using the EABI unwinder, to prevent faulting instructions from being
22001 swapped with a stack adjustment. */
22002 if (crtl->profile || !TARGET_SCHED_PROLOG
22003 || (arm_except_unwind_info (&global_options) == UI_TARGET
22004 && cfun->can_throw_non_call_exceptions))
22005 emit_insn (gen_blockage ());
22006
22007 /* If the link register is being kept alive, with the return address in it,
22008 then make sure that it does not get reused by the ce2 pass. */
22009 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22010 cfun->machine->lr_save_eliminated = 1;
22011 }
22012 \f
22013 /* Print condition code to STREAM. Helper function for arm_print_operand. */
22014 static void
22015 arm_print_condition (FILE *stream)
22016 {
22017 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22018 {
22019 /* Branch conversion is not implemented for Thumb-2. */
22020 if (TARGET_THUMB)
22021 {
22022 output_operand_lossage ("predicated Thumb instruction");
22023 return;
22024 }
22025 if (current_insn_predicate != NULL)
22026 {
22027 output_operand_lossage
22028 ("predicated instruction in conditional sequence");
22029 return;
22030 }
22031
22032 fputs (arm_condition_codes[arm_current_cc], stream);
22033 }
22034 else if (current_insn_predicate)
22035 {
22036 enum arm_cond_code code;
22037
22038 if (TARGET_THUMB1)
22039 {
22040 output_operand_lossage ("predicated Thumb instruction");
22041 return;
22042 }
22043
22044 code = get_arm_condition_code (current_insn_predicate);
22045 fputs (arm_condition_codes[code], stream);
22046 }
22047 }
22048
22049
22050 /* Globally reserved letters: acln
22051 Puncutation letters currently used: @_|?().!#
22052 Lower case letters currently used: bcdefhimpqtvwxyz
22053 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22054 Letters previously used, but now deprecated/obsolete: sVWXYZ.
22055
22056 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22057
22058 If CODE is 'd', then the X is a condition operand and the instruction
22059 should only be executed if the condition is true.
22060 if CODE is 'D', then the X is a condition operand and the instruction
22061 should only be executed if the condition is false: however, if the mode
22062 of the comparison is CCFPEmode, then always execute the instruction -- we
22063 do this because in these circumstances !GE does not necessarily imply LT;
22064 in these cases the instruction pattern will take care to make sure that
22065 an instruction containing %d will follow, thereby undoing the effects of
22066 doing this instruction unconditionally.
22067 If CODE is 'N' then X is a floating point operand that must be negated
22068 before output.
22069 If CODE is 'B' then output a bitwise inverted value of X (a const int).
22070 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
22071 static void
22072 arm_print_operand (FILE *stream, rtx x, int code)
22073 {
22074 switch (code)
22075 {
22076 case '@':
22077 fputs (ASM_COMMENT_START, stream);
22078 return;
22079
22080 case '_':
22081 fputs (user_label_prefix, stream);
22082 return;
22083
22084 case '|':
22085 fputs (REGISTER_PREFIX, stream);
22086 return;
22087
22088 case '?':
22089 arm_print_condition (stream);
22090 return;
22091
22092 case '.':
22093 /* The current condition code for a condition code setting instruction.
22094 Preceded by 's' in unified syntax, otherwise followed by 's'. */
22095 fputc('s', stream);
22096 arm_print_condition (stream);
22097 return;
22098
22099 case '!':
22100 /* If the instruction is conditionally executed then print
22101 the current condition code, otherwise print 's'. */
22102 gcc_assert (TARGET_THUMB2);
22103 if (current_insn_predicate)
22104 arm_print_condition (stream);
22105 else
22106 fputc('s', stream);
22107 break;
22108
22109 /* %# is a "break" sequence. It doesn't output anything, but is used to
22110 separate e.g. operand numbers from following text, if that text consists
22111 of further digits which we don't want to be part of the operand
22112 number. */
22113 case '#':
22114 return;
22115
22116 case 'N':
22117 {
22118 REAL_VALUE_TYPE r;
22119 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22120 fprintf (stream, "%s", fp_const_from_val (&r));
22121 }
22122 return;
22123
22124 /* An integer or symbol address without a preceding # sign. */
22125 case 'c':
22126 switch (GET_CODE (x))
22127 {
22128 case CONST_INT:
22129 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22130 break;
22131
22132 case SYMBOL_REF:
22133 output_addr_const (stream, x);
22134 break;
22135
22136 case CONST:
22137 if (GET_CODE (XEXP (x, 0)) == PLUS
22138 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22139 {
22140 output_addr_const (stream, x);
22141 break;
22142 }
22143 /* Fall through. */
22144
22145 default:
22146 output_operand_lossage ("Unsupported operand for code '%c'", code);
22147 }
22148 return;
22149
22150 /* An integer that we want to print in HEX. */
22151 case 'x':
22152 switch (GET_CODE (x))
22153 {
22154 case CONST_INT:
22155 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22156 break;
22157
22158 default:
22159 output_operand_lossage ("Unsupported operand for code '%c'", code);
22160 }
22161 return;
22162
22163 case 'B':
22164 if (CONST_INT_P (x))
22165 {
22166 HOST_WIDE_INT val;
22167 val = ARM_SIGN_EXTEND (~INTVAL (x));
22168 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22169 }
22170 else
22171 {
22172 putc ('~', stream);
22173 output_addr_const (stream, x);
22174 }
22175 return;
22176
22177 case 'b':
22178 /* Print the log2 of a CONST_INT. */
22179 {
22180 HOST_WIDE_INT val;
22181
22182 if (!CONST_INT_P (x)
22183 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22184 output_operand_lossage ("Unsupported operand for code '%c'", code);
22185 else
22186 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22187 }
22188 return;
22189
22190 case 'L':
22191 /* The low 16 bits of an immediate constant. */
22192 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22193 return;
22194
22195 case 'i':
22196 fprintf (stream, "%s", arithmetic_instr (x, 1));
22197 return;
22198
22199 case 'I':
22200 fprintf (stream, "%s", arithmetic_instr (x, 0));
22201 return;
22202
22203 case 'S':
22204 {
22205 HOST_WIDE_INT val;
22206 const char *shift;
22207
22208 shift = shift_op (x, &val);
22209
22210 if (shift)
22211 {
22212 fprintf (stream, ", %s ", shift);
22213 if (val == -1)
22214 arm_print_operand (stream, XEXP (x, 1), 0);
22215 else
22216 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22217 }
22218 }
22219 return;
22220
22221 /* An explanation of the 'Q', 'R' and 'H' register operands:
22222
22223 In a pair of registers containing a DI or DF value the 'Q'
22224 operand returns the register number of the register containing
22225 the least significant part of the value. The 'R' operand returns
22226 the register number of the register containing the most
22227 significant part of the value.
22228
22229 The 'H' operand returns the higher of the two register numbers.
22230 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22231 same as the 'Q' operand, since the most significant part of the
22232 value is held in the lower number register. The reverse is true
22233 on systems where WORDS_BIG_ENDIAN is false.
22234
22235 The purpose of these operands is to distinguish between cases
22236 where the endian-ness of the values is important (for example
22237 when they are added together), and cases where the endian-ness
22238 is irrelevant, but the order of register operations is important.
22239 For example when loading a value from memory into a register
22240 pair, the endian-ness does not matter. Provided that the value
22241 from the lower memory address is put into the lower numbered
22242 register, and the value from the higher address is put into the
22243 higher numbered register, the load will work regardless of whether
22244 the value being loaded is big-wordian or little-wordian. The
22245 order of the two register loads can matter however, if the address
22246 of the memory location is actually held in one of the registers
22247 being overwritten by the load.
22248
22249 The 'Q' and 'R' constraints are also available for 64-bit
22250 constants. */
22251 case 'Q':
22252 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22253 {
22254 rtx part = gen_lowpart (SImode, x);
22255 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22256 return;
22257 }
22258
22259 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22260 {
22261 output_operand_lossage ("invalid operand for code '%c'", code);
22262 return;
22263 }
22264
22265 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22266 return;
22267
22268 case 'R':
22269 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22270 {
22271 machine_mode mode = GET_MODE (x);
22272 rtx part;
22273
22274 if (mode == VOIDmode)
22275 mode = DImode;
22276 part = gen_highpart_mode (SImode, mode, x);
22277 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22278 return;
22279 }
22280
22281 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22282 {
22283 output_operand_lossage ("invalid operand for code '%c'", code);
22284 return;
22285 }
22286
22287 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22288 return;
22289
22290 case 'H':
22291 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22292 {
22293 output_operand_lossage ("invalid operand for code '%c'", code);
22294 return;
22295 }
22296
22297 asm_fprintf (stream, "%r", REGNO (x) + 1);
22298 return;
22299
22300 case 'J':
22301 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22302 {
22303 output_operand_lossage ("invalid operand for code '%c'", code);
22304 return;
22305 }
22306
22307 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22308 return;
22309
22310 case 'K':
22311 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22312 {
22313 output_operand_lossage ("invalid operand for code '%c'", code);
22314 return;
22315 }
22316
22317 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22318 return;
22319
22320 case 'm':
22321 asm_fprintf (stream, "%r",
22322 REG_P (XEXP (x, 0))
22323 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22324 return;
22325
22326 case 'M':
22327 asm_fprintf (stream, "{%r-%r}",
22328 REGNO (x),
22329 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22330 return;
22331
22332 /* Like 'M', but writing doubleword vector registers, for use by Neon
22333 insns. */
22334 case 'h':
22335 {
22336 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22337 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22338 if (numregs == 1)
22339 asm_fprintf (stream, "{d%d}", regno);
22340 else
22341 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22342 }
22343 return;
22344
22345 case 'd':
22346 /* CONST_TRUE_RTX means always -- that's the default. */
22347 if (x == const_true_rtx)
22348 return;
22349
22350 if (!COMPARISON_P (x))
22351 {
22352 output_operand_lossage ("invalid operand for code '%c'", code);
22353 return;
22354 }
22355
22356 fputs (arm_condition_codes[get_arm_condition_code (x)],
22357 stream);
22358 return;
22359
22360 case 'D':
22361 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22362 want to do that. */
22363 if (x == const_true_rtx)
22364 {
22365 output_operand_lossage ("instruction never executed");
22366 return;
22367 }
22368 if (!COMPARISON_P (x))
22369 {
22370 output_operand_lossage ("invalid operand for code '%c'", code);
22371 return;
22372 }
22373
22374 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22375 (get_arm_condition_code (x))],
22376 stream);
22377 return;
22378
22379 case 's':
22380 case 'V':
22381 case 'W':
22382 case 'X':
22383 case 'Y':
22384 case 'Z':
22385 /* Former Maverick support, removed after GCC-4.7. */
22386 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22387 return;
22388
22389 case 'U':
22390 if (!REG_P (x)
22391 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22392 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22393 /* Bad value for wCG register number. */
22394 {
22395 output_operand_lossage ("invalid operand for code '%c'", code);
22396 return;
22397 }
22398
22399 else
22400 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22401 return;
22402
22403 /* Print an iWMMXt control register name. */
22404 case 'w':
22405 if (!CONST_INT_P (x)
22406 || INTVAL (x) < 0
22407 || INTVAL (x) >= 16)
22408 /* Bad value for wC register number. */
22409 {
22410 output_operand_lossage ("invalid operand for code '%c'", code);
22411 return;
22412 }
22413
22414 else
22415 {
22416 static const char * wc_reg_names [16] =
22417 {
22418 "wCID", "wCon", "wCSSF", "wCASF",
22419 "wC4", "wC5", "wC6", "wC7",
22420 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22421 "wC12", "wC13", "wC14", "wC15"
22422 };
22423
22424 fputs (wc_reg_names [INTVAL (x)], stream);
22425 }
22426 return;
22427
22428 /* Print the high single-precision register of a VFP double-precision
22429 register. */
22430 case 'p':
22431 {
22432 machine_mode mode = GET_MODE (x);
22433 int regno;
22434
22435 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22436 {
22437 output_operand_lossage ("invalid operand for code '%c'", code);
22438 return;
22439 }
22440
22441 regno = REGNO (x);
22442 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22443 {
22444 output_operand_lossage ("invalid operand for code '%c'", code);
22445 return;
22446 }
22447
22448 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22449 }
22450 return;
22451
22452 /* Print a VFP/Neon double precision or quad precision register name. */
22453 case 'P':
22454 case 'q':
22455 {
22456 machine_mode mode = GET_MODE (x);
22457 int is_quad = (code == 'q');
22458 int regno;
22459
22460 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22461 {
22462 output_operand_lossage ("invalid operand for code '%c'", code);
22463 return;
22464 }
22465
22466 if (!REG_P (x)
22467 || !IS_VFP_REGNUM (REGNO (x)))
22468 {
22469 output_operand_lossage ("invalid operand for code '%c'", code);
22470 return;
22471 }
22472
22473 regno = REGNO (x);
22474 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22475 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22476 {
22477 output_operand_lossage ("invalid operand for code '%c'", code);
22478 return;
22479 }
22480
22481 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22482 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22483 }
22484 return;
22485
22486 /* These two codes print the low/high doubleword register of a Neon quad
22487 register, respectively. For pair-structure types, can also print
22488 low/high quadword registers. */
22489 case 'e':
22490 case 'f':
22491 {
22492 machine_mode mode = GET_MODE (x);
22493 int regno;
22494
22495 if ((GET_MODE_SIZE (mode) != 16
22496 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22497 {
22498 output_operand_lossage ("invalid operand for code '%c'", code);
22499 return;
22500 }
22501
22502 regno = REGNO (x);
22503 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22504 {
22505 output_operand_lossage ("invalid operand for code '%c'", code);
22506 return;
22507 }
22508
22509 if (GET_MODE_SIZE (mode) == 16)
22510 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22511 + (code == 'f' ? 1 : 0));
22512 else
22513 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22514 + (code == 'f' ? 1 : 0));
22515 }
22516 return;
22517
22518 /* Print a VFPv3 floating-point constant, represented as an integer
22519 index. */
22520 case 'G':
22521 {
22522 int index = vfp3_const_double_index (x);
22523 gcc_assert (index != -1);
22524 fprintf (stream, "%d", index);
22525 }
22526 return;
22527
22528 /* Print bits representing opcode features for Neon.
22529
22530 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22531 and polynomials as unsigned.
22532
22533 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22534
22535 Bit 2 is 1 for rounding functions, 0 otherwise. */
22536
22537 /* Identify the type as 's', 'u', 'p' or 'f'. */
22538 case 'T':
22539 {
22540 HOST_WIDE_INT bits = INTVAL (x);
22541 fputc ("uspf"[bits & 3], stream);
22542 }
22543 return;
22544
22545 /* Likewise, but signed and unsigned integers are both 'i'. */
22546 case 'F':
22547 {
22548 HOST_WIDE_INT bits = INTVAL (x);
22549 fputc ("iipf"[bits & 3], stream);
22550 }
22551 return;
22552
22553 /* As for 'T', but emit 'u' instead of 'p'. */
22554 case 't':
22555 {
22556 HOST_WIDE_INT bits = INTVAL (x);
22557 fputc ("usuf"[bits & 3], stream);
22558 }
22559 return;
22560
22561 /* Bit 2: rounding (vs none). */
22562 case 'O':
22563 {
22564 HOST_WIDE_INT bits = INTVAL (x);
22565 fputs ((bits & 4) != 0 ? "r" : "", stream);
22566 }
22567 return;
22568
22569 /* Memory operand for vld1/vst1 instruction. */
22570 case 'A':
22571 {
22572 rtx addr;
22573 bool postinc = FALSE;
22574 rtx postinc_reg = NULL;
22575 unsigned align, memsize, align_bits;
22576
22577 gcc_assert (MEM_P (x));
22578 addr = XEXP (x, 0);
22579 if (GET_CODE (addr) == POST_INC)
22580 {
22581 postinc = 1;
22582 addr = XEXP (addr, 0);
22583 }
22584 if (GET_CODE (addr) == POST_MODIFY)
22585 {
22586 postinc_reg = XEXP( XEXP (addr, 1), 1);
22587 addr = XEXP (addr, 0);
22588 }
22589 asm_fprintf (stream, "[%r", REGNO (addr));
22590
22591 /* We know the alignment of this access, so we can emit a hint in the
22592 instruction (for some alignments) as an aid to the memory subsystem
22593 of the target. */
22594 align = MEM_ALIGN (x) >> 3;
22595 memsize = MEM_SIZE (x);
22596
22597 /* Only certain alignment specifiers are supported by the hardware. */
22598 if (memsize == 32 && (align % 32) == 0)
22599 align_bits = 256;
22600 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22601 align_bits = 128;
22602 else if (memsize >= 8 && (align % 8) == 0)
22603 align_bits = 64;
22604 else
22605 align_bits = 0;
22606
22607 if (align_bits != 0)
22608 asm_fprintf (stream, ":%d", align_bits);
22609
22610 asm_fprintf (stream, "]");
22611
22612 if (postinc)
22613 fputs("!", stream);
22614 if (postinc_reg)
22615 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22616 }
22617 return;
22618
22619 case 'C':
22620 {
22621 rtx addr;
22622
22623 gcc_assert (MEM_P (x));
22624 addr = XEXP (x, 0);
22625 gcc_assert (REG_P (addr));
22626 asm_fprintf (stream, "[%r]", REGNO (addr));
22627 }
22628 return;
22629
22630 /* Translate an S register number into a D register number and element index. */
22631 case 'y':
22632 {
22633 machine_mode mode = GET_MODE (x);
22634 int regno;
22635
22636 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22637 {
22638 output_operand_lossage ("invalid operand for code '%c'", code);
22639 return;
22640 }
22641
22642 regno = REGNO (x);
22643 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22644 {
22645 output_operand_lossage ("invalid operand for code '%c'", code);
22646 return;
22647 }
22648
22649 regno = regno - FIRST_VFP_REGNUM;
22650 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22651 }
22652 return;
22653
22654 case 'v':
22655 gcc_assert (CONST_DOUBLE_P (x));
22656 int result;
22657 result = vfp3_const_double_for_fract_bits (x);
22658 if (result == 0)
22659 result = vfp3_const_double_for_bits (x);
22660 fprintf (stream, "#%d", result);
22661 return;
22662
22663 /* Register specifier for vld1.16/vst1.16. Translate the S register
22664 number into a D register number and element index. */
22665 case 'z':
22666 {
22667 machine_mode mode = GET_MODE (x);
22668 int regno;
22669
22670 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22671 {
22672 output_operand_lossage ("invalid operand for code '%c'", code);
22673 return;
22674 }
22675
22676 regno = REGNO (x);
22677 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22678 {
22679 output_operand_lossage ("invalid operand for code '%c'", code);
22680 return;
22681 }
22682
22683 regno = regno - FIRST_VFP_REGNUM;
22684 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22685 }
22686 return;
22687
22688 default:
22689 if (x == 0)
22690 {
22691 output_operand_lossage ("missing operand");
22692 return;
22693 }
22694
22695 switch (GET_CODE (x))
22696 {
22697 case REG:
22698 asm_fprintf (stream, "%r", REGNO (x));
22699 break;
22700
22701 case MEM:
22702 output_address (GET_MODE (x), XEXP (x, 0));
22703 break;
22704
22705 case CONST_DOUBLE:
22706 {
22707 char fpstr[20];
22708 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22709 sizeof (fpstr), 0, 1);
22710 fprintf (stream, "#%s", fpstr);
22711 }
22712 break;
22713
22714 default:
22715 gcc_assert (GET_CODE (x) != NEG);
22716 fputc ('#', stream);
22717 if (GET_CODE (x) == HIGH)
22718 {
22719 fputs (":lower16:", stream);
22720 x = XEXP (x, 0);
22721 }
22722
22723 output_addr_const (stream, x);
22724 break;
22725 }
22726 }
22727 }
22728 \f
22729 /* Target hook for printing a memory address. */
22730 static void
22731 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22732 {
22733 if (TARGET_32BIT)
22734 {
22735 int is_minus = GET_CODE (x) == MINUS;
22736
22737 if (REG_P (x))
22738 asm_fprintf (stream, "[%r]", REGNO (x));
22739 else if (GET_CODE (x) == PLUS || is_minus)
22740 {
22741 rtx base = XEXP (x, 0);
22742 rtx index = XEXP (x, 1);
22743 HOST_WIDE_INT offset = 0;
22744 if (!REG_P (base)
22745 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22746 {
22747 /* Ensure that BASE is a register. */
22748 /* (one of them must be). */
22749 /* Also ensure the SP is not used as in index register. */
22750 std::swap (base, index);
22751 }
22752 switch (GET_CODE (index))
22753 {
22754 case CONST_INT:
22755 offset = INTVAL (index);
22756 if (is_minus)
22757 offset = -offset;
22758 asm_fprintf (stream, "[%r, #%wd]",
22759 REGNO (base), offset);
22760 break;
22761
22762 case REG:
22763 asm_fprintf (stream, "[%r, %s%r]",
22764 REGNO (base), is_minus ? "-" : "",
22765 REGNO (index));
22766 break;
22767
22768 case MULT:
22769 case ASHIFTRT:
22770 case LSHIFTRT:
22771 case ASHIFT:
22772 case ROTATERT:
22773 {
22774 asm_fprintf (stream, "[%r, %s%r",
22775 REGNO (base), is_minus ? "-" : "",
22776 REGNO (XEXP (index, 0)));
22777 arm_print_operand (stream, index, 'S');
22778 fputs ("]", stream);
22779 break;
22780 }
22781
22782 default:
22783 gcc_unreachable ();
22784 }
22785 }
22786 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22787 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22788 {
22789 gcc_assert (REG_P (XEXP (x, 0)));
22790
22791 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22792 asm_fprintf (stream, "[%r, #%s%d]!",
22793 REGNO (XEXP (x, 0)),
22794 GET_CODE (x) == PRE_DEC ? "-" : "",
22795 GET_MODE_SIZE (mode));
22796 else
22797 asm_fprintf (stream, "[%r], #%s%d",
22798 REGNO (XEXP (x, 0)),
22799 GET_CODE (x) == POST_DEC ? "-" : "",
22800 GET_MODE_SIZE (mode));
22801 }
22802 else if (GET_CODE (x) == PRE_MODIFY)
22803 {
22804 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22805 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22806 asm_fprintf (stream, "#%wd]!",
22807 INTVAL (XEXP (XEXP (x, 1), 1)));
22808 else
22809 asm_fprintf (stream, "%r]!",
22810 REGNO (XEXP (XEXP (x, 1), 1)));
22811 }
22812 else if (GET_CODE (x) == POST_MODIFY)
22813 {
22814 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22815 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22816 asm_fprintf (stream, "#%wd",
22817 INTVAL (XEXP (XEXP (x, 1), 1)));
22818 else
22819 asm_fprintf (stream, "%r",
22820 REGNO (XEXP (XEXP (x, 1), 1)));
22821 }
22822 else output_addr_const (stream, x);
22823 }
22824 else
22825 {
22826 if (REG_P (x))
22827 asm_fprintf (stream, "[%r]", REGNO (x));
22828 else if (GET_CODE (x) == POST_INC)
22829 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22830 else if (GET_CODE (x) == PLUS)
22831 {
22832 gcc_assert (REG_P (XEXP (x, 0)));
22833 if (CONST_INT_P (XEXP (x, 1)))
22834 asm_fprintf (stream, "[%r, #%wd]",
22835 REGNO (XEXP (x, 0)),
22836 INTVAL (XEXP (x, 1)));
22837 else
22838 asm_fprintf (stream, "[%r, %r]",
22839 REGNO (XEXP (x, 0)),
22840 REGNO (XEXP (x, 1)));
22841 }
22842 else
22843 output_addr_const (stream, x);
22844 }
22845 }
22846 \f
22847 /* Target hook for indicating whether a punctuation character for
22848 TARGET_PRINT_OPERAND is valid. */
22849 static bool
22850 arm_print_operand_punct_valid_p (unsigned char code)
22851 {
22852 return (code == '@' || code == '|' || code == '.'
22853 || code == '(' || code == ')' || code == '#'
22854 || (TARGET_32BIT && (code == '?'))
22855 || (TARGET_THUMB2 && (code == '!'))
22856 || (TARGET_THUMB && (code == '_')));
22857 }
22858 \f
22859 /* Target hook for assembling integer objects. The ARM version needs to
22860 handle word-sized values specially. */
22861 static bool
22862 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22863 {
22864 machine_mode mode;
22865
22866 if (size == UNITS_PER_WORD && aligned_p)
22867 {
22868 fputs ("\t.word\t", asm_out_file);
22869 output_addr_const (asm_out_file, x);
22870
22871 /* Mark symbols as position independent. We only do this in the
22872 .text segment, not in the .data segment. */
22873 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22874 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22875 {
22876 /* See legitimize_pic_address for an explanation of the
22877 TARGET_VXWORKS_RTP check. */
22878 /* References to weak symbols cannot be resolved locally:
22879 they may be overridden by a non-weak definition at link
22880 time. */
22881 if (!arm_pic_data_is_text_relative
22882 || (GET_CODE (x) == SYMBOL_REF
22883 && (!SYMBOL_REF_LOCAL_P (x)
22884 || (SYMBOL_REF_DECL (x)
22885 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22886 fputs ("(GOT)", asm_out_file);
22887 else
22888 fputs ("(GOTOFF)", asm_out_file);
22889 }
22890 fputc ('\n', asm_out_file);
22891 return true;
22892 }
22893
22894 mode = GET_MODE (x);
22895
22896 if (arm_vector_mode_supported_p (mode))
22897 {
22898 int i, units;
22899
22900 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22901
22902 units = CONST_VECTOR_NUNITS (x);
22903 size = GET_MODE_UNIT_SIZE (mode);
22904
22905 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22906 for (i = 0; i < units; i++)
22907 {
22908 rtx elt = CONST_VECTOR_ELT (x, i);
22909 assemble_integer
22910 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22911 }
22912 else
22913 for (i = 0; i < units; i++)
22914 {
22915 rtx elt = CONST_VECTOR_ELT (x, i);
22916 assemble_real
22917 (*CONST_DOUBLE_REAL_VALUE (elt),
22918 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22919 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22920 }
22921
22922 return true;
22923 }
22924
22925 return default_assemble_integer (x, size, aligned_p);
22926 }
22927
22928 static void
22929 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22930 {
22931 section *s;
22932
22933 if (!TARGET_AAPCS_BASED)
22934 {
22935 (is_ctor ?
22936 default_named_section_asm_out_constructor
22937 : default_named_section_asm_out_destructor) (symbol, priority);
22938 return;
22939 }
22940
22941 /* Put these in the .init_array section, using a special relocation. */
22942 if (priority != DEFAULT_INIT_PRIORITY)
22943 {
22944 char buf[18];
22945 sprintf (buf, "%s.%.5u",
22946 is_ctor ? ".init_array" : ".fini_array",
22947 priority);
22948 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22949 }
22950 else if (is_ctor)
22951 s = ctors_section;
22952 else
22953 s = dtors_section;
22954
22955 switch_to_section (s);
22956 assemble_align (POINTER_SIZE);
22957 fputs ("\t.word\t", asm_out_file);
22958 output_addr_const (asm_out_file, symbol);
22959 fputs ("(target1)\n", asm_out_file);
22960 }
22961
22962 /* Add a function to the list of static constructors. */
22963
22964 static void
22965 arm_elf_asm_constructor (rtx symbol, int priority)
22966 {
22967 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22968 }
22969
22970 /* Add a function to the list of static destructors. */
22971
22972 static void
22973 arm_elf_asm_destructor (rtx symbol, int priority)
22974 {
22975 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22976 }
22977 \f
22978 /* A finite state machine takes care of noticing whether or not instructions
22979 can be conditionally executed, and thus decrease execution time and code
22980 size by deleting branch instructions. The fsm is controlled by
22981 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22982
22983 /* The state of the fsm controlling condition codes are:
22984 0: normal, do nothing special
22985 1: make ASM_OUTPUT_OPCODE not output this instruction
22986 2: make ASM_OUTPUT_OPCODE not output this instruction
22987 3: make instructions conditional
22988 4: make instructions conditional
22989
22990 State transitions (state->state by whom under condition):
22991 0 -> 1 final_prescan_insn if the `target' is a label
22992 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22993 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22994 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22995 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22996 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22997 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22998 (the target insn is arm_target_insn).
22999
23000 If the jump clobbers the conditions then we use states 2 and 4.
23001
23002 A similar thing can be done with conditional return insns.
23003
23004 XXX In case the `target' is an unconditional branch, this conditionalising
23005 of the instructions always reduces code size, but not always execution
23006 time. But then, I want to reduce the code size to somewhere near what
23007 /bin/cc produces. */
23008
23009 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23010 instructions. When a COND_EXEC instruction is seen the subsequent
23011 instructions are scanned so that multiple conditional instructions can be
23012 combined into a single IT block. arm_condexec_count and arm_condexec_mask
23013 specify the length and true/false mask for the IT block. These will be
23014 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
23015
23016 /* Returns the index of the ARM condition code string in
23017 `arm_condition_codes', or ARM_NV if the comparison is invalid.
23018 COMPARISON should be an rtx like `(eq (...) (...))'. */
23019
23020 enum arm_cond_code
23021 maybe_get_arm_condition_code (rtx comparison)
23022 {
23023 machine_mode mode = GET_MODE (XEXP (comparison, 0));
23024 enum arm_cond_code code;
23025 enum rtx_code comp_code = GET_CODE (comparison);
23026
23027 if (GET_MODE_CLASS (mode) != MODE_CC)
23028 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23029 XEXP (comparison, 1));
23030
23031 switch (mode)
23032 {
23033 case E_CC_DNEmode: code = ARM_NE; goto dominance;
23034 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23035 case E_CC_DGEmode: code = ARM_GE; goto dominance;
23036 case E_CC_DGTmode: code = ARM_GT; goto dominance;
23037 case E_CC_DLEmode: code = ARM_LE; goto dominance;
23038 case E_CC_DLTmode: code = ARM_LT; goto dominance;
23039 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23040 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23041 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23042 case E_CC_DLTUmode: code = ARM_CC;
23043
23044 dominance:
23045 if (comp_code == EQ)
23046 return ARM_INVERSE_CONDITION_CODE (code);
23047 if (comp_code == NE)
23048 return code;
23049 return ARM_NV;
23050
23051 case E_CC_NOOVmode:
23052 switch (comp_code)
23053 {
23054 case NE: return ARM_NE;
23055 case EQ: return ARM_EQ;
23056 case GE: return ARM_PL;
23057 case LT: return ARM_MI;
23058 default: return ARM_NV;
23059 }
23060
23061 case E_CC_Zmode:
23062 switch (comp_code)
23063 {
23064 case NE: return ARM_NE;
23065 case EQ: return ARM_EQ;
23066 default: return ARM_NV;
23067 }
23068
23069 case E_CC_Nmode:
23070 switch (comp_code)
23071 {
23072 case NE: return ARM_MI;
23073 case EQ: return ARM_PL;
23074 default: return ARM_NV;
23075 }
23076
23077 case E_CCFPEmode:
23078 case E_CCFPmode:
23079 /* We can handle all cases except UNEQ and LTGT. */
23080 switch (comp_code)
23081 {
23082 case GE: return ARM_GE;
23083 case GT: return ARM_GT;
23084 case LE: return ARM_LS;
23085 case LT: return ARM_MI;
23086 case NE: return ARM_NE;
23087 case EQ: return ARM_EQ;
23088 case ORDERED: return ARM_VC;
23089 case UNORDERED: return ARM_VS;
23090 case UNLT: return ARM_LT;
23091 case UNLE: return ARM_LE;
23092 case UNGT: return ARM_HI;
23093 case UNGE: return ARM_PL;
23094 /* UNEQ and LTGT do not have a representation. */
23095 case UNEQ: /* Fall through. */
23096 case LTGT: /* Fall through. */
23097 default: return ARM_NV;
23098 }
23099
23100 case E_CC_SWPmode:
23101 switch (comp_code)
23102 {
23103 case NE: return ARM_NE;
23104 case EQ: return ARM_EQ;
23105 case GE: return ARM_LE;
23106 case GT: return ARM_LT;
23107 case LE: return ARM_GE;
23108 case LT: return ARM_GT;
23109 case GEU: return ARM_LS;
23110 case GTU: return ARM_CC;
23111 case LEU: return ARM_CS;
23112 case LTU: return ARM_HI;
23113 default: return ARM_NV;
23114 }
23115
23116 case E_CC_Cmode:
23117 switch (comp_code)
23118 {
23119 case LTU: return ARM_CS;
23120 case GEU: return ARM_CC;
23121 case NE: return ARM_CS;
23122 case EQ: return ARM_CC;
23123 default: return ARM_NV;
23124 }
23125
23126 case E_CC_CZmode:
23127 switch (comp_code)
23128 {
23129 case NE: return ARM_NE;
23130 case EQ: return ARM_EQ;
23131 case GEU: return ARM_CS;
23132 case GTU: return ARM_HI;
23133 case LEU: return ARM_LS;
23134 case LTU: return ARM_CC;
23135 default: return ARM_NV;
23136 }
23137
23138 case E_CC_NCVmode:
23139 switch (comp_code)
23140 {
23141 case GE: return ARM_GE;
23142 case LT: return ARM_LT;
23143 case GEU: return ARM_CS;
23144 case LTU: return ARM_CC;
23145 default: return ARM_NV;
23146 }
23147
23148 case E_CC_Vmode:
23149 switch (comp_code)
23150 {
23151 case NE: return ARM_VS;
23152 case EQ: return ARM_VC;
23153 default: return ARM_NV;
23154 }
23155
23156 case E_CCmode:
23157 switch (comp_code)
23158 {
23159 case NE: return ARM_NE;
23160 case EQ: return ARM_EQ;
23161 case GE: return ARM_GE;
23162 case GT: return ARM_GT;
23163 case LE: return ARM_LE;
23164 case LT: return ARM_LT;
23165 case GEU: return ARM_CS;
23166 case GTU: return ARM_HI;
23167 case LEU: return ARM_LS;
23168 case LTU: return ARM_CC;
23169 default: return ARM_NV;
23170 }
23171
23172 default: gcc_unreachable ();
23173 }
23174 }
23175
23176 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23177 static enum arm_cond_code
23178 get_arm_condition_code (rtx comparison)
23179 {
23180 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23181 gcc_assert (code != ARM_NV);
23182 return code;
23183 }
23184
23185 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
23186 code registers when not targetting Thumb1. The VFP condition register
23187 only exists when generating hard-float code. */
23188 static bool
23189 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23190 {
23191 if (!TARGET_32BIT)
23192 return false;
23193
23194 *p1 = CC_REGNUM;
23195 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23196 return true;
23197 }
23198
23199 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23200 instructions. */
23201 void
23202 thumb2_final_prescan_insn (rtx_insn *insn)
23203 {
23204 rtx_insn *first_insn = insn;
23205 rtx body = PATTERN (insn);
23206 rtx predicate;
23207 enum arm_cond_code code;
23208 int n;
23209 int mask;
23210 int max;
23211
23212 /* max_insns_skipped in the tune was already taken into account in the
23213 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23214 just emit the IT blocks as we can. It does not make sense to split
23215 the IT blocks. */
23216 max = MAX_INSN_PER_IT_BLOCK;
23217
23218 /* Remove the previous insn from the count of insns to be output. */
23219 if (arm_condexec_count)
23220 arm_condexec_count--;
23221
23222 /* Nothing to do if we are already inside a conditional block. */
23223 if (arm_condexec_count)
23224 return;
23225
23226 if (GET_CODE (body) != COND_EXEC)
23227 return;
23228
23229 /* Conditional jumps are implemented directly. */
23230 if (JUMP_P (insn))
23231 return;
23232
23233 predicate = COND_EXEC_TEST (body);
23234 arm_current_cc = get_arm_condition_code (predicate);
23235
23236 n = get_attr_ce_count (insn);
23237 arm_condexec_count = 1;
23238 arm_condexec_mask = (1 << n) - 1;
23239 arm_condexec_masklen = n;
23240 /* See if subsequent instructions can be combined into the same block. */
23241 for (;;)
23242 {
23243 insn = next_nonnote_insn (insn);
23244
23245 /* Jumping into the middle of an IT block is illegal, so a label or
23246 barrier terminates the block. */
23247 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23248 break;
23249
23250 body = PATTERN (insn);
23251 /* USE and CLOBBER aren't really insns, so just skip them. */
23252 if (GET_CODE (body) == USE
23253 || GET_CODE (body) == CLOBBER)
23254 continue;
23255
23256 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23257 if (GET_CODE (body) != COND_EXEC)
23258 break;
23259 /* Maximum number of conditionally executed instructions in a block. */
23260 n = get_attr_ce_count (insn);
23261 if (arm_condexec_masklen + n > max)
23262 break;
23263
23264 predicate = COND_EXEC_TEST (body);
23265 code = get_arm_condition_code (predicate);
23266 mask = (1 << n) - 1;
23267 if (arm_current_cc == code)
23268 arm_condexec_mask |= (mask << arm_condexec_masklen);
23269 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23270 break;
23271
23272 arm_condexec_count++;
23273 arm_condexec_masklen += n;
23274
23275 /* A jump must be the last instruction in a conditional block. */
23276 if (JUMP_P (insn))
23277 break;
23278 }
23279 /* Restore recog_data (getting the attributes of other insns can
23280 destroy this array, but final.c assumes that it remains intact
23281 across this call). */
23282 extract_constrain_insn_cached (first_insn);
23283 }
23284
23285 void
23286 arm_final_prescan_insn (rtx_insn *insn)
23287 {
23288 /* BODY will hold the body of INSN. */
23289 rtx body = PATTERN (insn);
23290
23291 /* This will be 1 if trying to repeat the trick, and things need to be
23292 reversed if it appears to fail. */
23293 int reverse = 0;
23294
23295 /* If we start with a return insn, we only succeed if we find another one. */
23296 int seeking_return = 0;
23297 enum rtx_code return_code = UNKNOWN;
23298
23299 /* START_INSN will hold the insn from where we start looking. This is the
23300 first insn after the following code_label if REVERSE is true. */
23301 rtx_insn *start_insn = insn;
23302
23303 /* If in state 4, check if the target branch is reached, in order to
23304 change back to state 0. */
23305 if (arm_ccfsm_state == 4)
23306 {
23307 if (insn == arm_target_insn)
23308 {
23309 arm_target_insn = NULL;
23310 arm_ccfsm_state = 0;
23311 }
23312 return;
23313 }
23314
23315 /* If in state 3, it is possible to repeat the trick, if this insn is an
23316 unconditional branch to a label, and immediately following this branch
23317 is the previous target label which is only used once, and the label this
23318 branch jumps to is not too far off. */
23319 if (arm_ccfsm_state == 3)
23320 {
23321 if (simplejump_p (insn))
23322 {
23323 start_insn = next_nonnote_insn (start_insn);
23324 if (BARRIER_P (start_insn))
23325 {
23326 /* XXX Isn't this always a barrier? */
23327 start_insn = next_nonnote_insn (start_insn);
23328 }
23329 if (LABEL_P (start_insn)
23330 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23331 && LABEL_NUSES (start_insn) == 1)
23332 reverse = TRUE;
23333 else
23334 return;
23335 }
23336 else if (ANY_RETURN_P (body))
23337 {
23338 start_insn = next_nonnote_insn (start_insn);
23339 if (BARRIER_P (start_insn))
23340 start_insn = next_nonnote_insn (start_insn);
23341 if (LABEL_P (start_insn)
23342 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23343 && LABEL_NUSES (start_insn) == 1)
23344 {
23345 reverse = TRUE;
23346 seeking_return = 1;
23347 return_code = GET_CODE (body);
23348 }
23349 else
23350 return;
23351 }
23352 else
23353 return;
23354 }
23355
23356 gcc_assert (!arm_ccfsm_state || reverse);
23357 if (!JUMP_P (insn))
23358 return;
23359
23360 /* This jump might be paralleled with a clobber of the condition codes
23361 the jump should always come first */
23362 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23363 body = XVECEXP (body, 0, 0);
23364
23365 if (reverse
23366 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23367 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23368 {
23369 int insns_skipped;
23370 int fail = FALSE, succeed = FALSE;
23371 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23372 int then_not_else = TRUE;
23373 rtx_insn *this_insn = start_insn;
23374 rtx label = 0;
23375
23376 /* Register the insn jumped to. */
23377 if (reverse)
23378 {
23379 if (!seeking_return)
23380 label = XEXP (SET_SRC (body), 0);
23381 }
23382 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23383 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23384 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23385 {
23386 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23387 then_not_else = FALSE;
23388 }
23389 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23390 {
23391 seeking_return = 1;
23392 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23393 }
23394 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23395 {
23396 seeking_return = 1;
23397 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23398 then_not_else = FALSE;
23399 }
23400 else
23401 gcc_unreachable ();
23402
23403 /* See how many insns this branch skips, and what kind of insns. If all
23404 insns are okay, and the label or unconditional branch to the same
23405 label is not too far away, succeed. */
23406 for (insns_skipped = 0;
23407 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23408 {
23409 rtx scanbody;
23410
23411 this_insn = next_nonnote_insn (this_insn);
23412 if (!this_insn)
23413 break;
23414
23415 switch (GET_CODE (this_insn))
23416 {
23417 case CODE_LABEL:
23418 /* Succeed if it is the target label, otherwise fail since
23419 control falls in from somewhere else. */
23420 if (this_insn == label)
23421 {
23422 arm_ccfsm_state = 1;
23423 succeed = TRUE;
23424 }
23425 else
23426 fail = TRUE;
23427 break;
23428
23429 case BARRIER:
23430 /* Succeed if the following insn is the target label.
23431 Otherwise fail.
23432 If return insns are used then the last insn in a function
23433 will be a barrier. */
23434 this_insn = next_nonnote_insn (this_insn);
23435 if (this_insn && this_insn == label)
23436 {
23437 arm_ccfsm_state = 1;
23438 succeed = TRUE;
23439 }
23440 else
23441 fail = TRUE;
23442 break;
23443
23444 case CALL_INSN:
23445 /* The AAPCS says that conditional calls should not be
23446 used since they make interworking inefficient (the
23447 linker can't transform BL<cond> into BLX). That's
23448 only a problem if the machine has BLX. */
23449 if (arm_arch5t)
23450 {
23451 fail = TRUE;
23452 break;
23453 }
23454
23455 /* Succeed if the following insn is the target label, or
23456 if the following two insns are a barrier and the
23457 target label. */
23458 this_insn = next_nonnote_insn (this_insn);
23459 if (this_insn && BARRIER_P (this_insn))
23460 this_insn = next_nonnote_insn (this_insn);
23461
23462 if (this_insn && this_insn == label
23463 && insns_skipped < max_insns_skipped)
23464 {
23465 arm_ccfsm_state = 1;
23466 succeed = TRUE;
23467 }
23468 else
23469 fail = TRUE;
23470 break;
23471
23472 case JUMP_INSN:
23473 /* If this is an unconditional branch to the same label, succeed.
23474 If it is to another label, do nothing. If it is conditional,
23475 fail. */
23476 /* XXX Probably, the tests for SET and the PC are
23477 unnecessary. */
23478
23479 scanbody = PATTERN (this_insn);
23480 if (GET_CODE (scanbody) == SET
23481 && GET_CODE (SET_DEST (scanbody)) == PC)
23482 {
23483 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23484 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23485 {
23486 arm_ccfsm_state = 2;
23487 succeed = TRUE;
23488 }
23489 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23490 fail = TRUE;
23491 }
23492 /* Fail if a conditional return is undesirable (e.g. on a
23493 StrongARM), but still allow this if optimizing for size. */
23494 else if (GET_CODE (scanbody) == return_code
23495 && !use_return_insn (TRUE, NULL)
23496 && !optimize_size)
23497 fail = TRUE;
23498 else if (GET_CODE (scanbody) == return_code)
23499 {
23500 arm_ccfsm_state = 2;
23501 succeed = TRUE;
23502 }
23503 else if (GET_CODE (scanbody) == PARALLEL)
23504 {
23505 switch (get_attr_conds (this_insn))
23506 {
23507 case CONDS_NOCOND:
23508 break;
23509 default:
23510 fail = TRUE;
23511 break;
23512 }
23513 }
23514 else
23515 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23516
23517 break;
23518
23519 case INSN:
23520 /* Instructions using or affecting the condition codes make it
23521 fail. */
23522 scanbody = PATTERN (this_insn);
23523 if (!(GET_CODE (scanbody) == SET
23524 || GET_CODE (scanbody) == PARALLEL)
23525 || get_attr_conds (this_insn) != CONDS_NOCOND)
23526 fail = TRUE;
23527 break;
23528
23529 default:
23530 break;
23531 }
23532 }
23533 if (succeed)
23534 {
23535 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23536 arm_target_label = CODE_LABEL_NUMBER (label);
23537 else
23538 {
23539 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23540
23541 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23542 {
23543 this_insn = next_nonnote_insn (this_insn);
23544 gcc_assert (!this_insn
23545 || (!BARRIER_P (this_insn)
23546 && !LABEL_P (this_insn)));
23547 }
23548 if (!this_insn)
23549 {
23550 /* Oh, dear! we ran off the end.. give up. */
23551 extract_constrain_insn_cached (insn);
23552 arm_ccfsm_state = 0;
23553 arm_target_insn = NULL;
23554 return;
23555 }
23556 arm_target_insn = this_insn;
23557 }
23558
23559 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23560 what it was. */
23561 if (!reverse)
23562 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23563
23564 if (reverse || then_not_else)
23565 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23566 }
23567
23568 /* Restore recog_data (getting the attributes of other insns can
23569 destroy this array, but final.c assumes that it remains intact
23570 across this call. */
23571 extract_constrain_insn_cached (insn);
23572 }
23573 }
23574
23575 /* Output IT instructions. */
23576 void
23577 thumb2_asm_output_opcode (FILE * stream)
23578 {
23579 char buff[5];
23580 int n;
23581
23582 if (arm_condexec_mask)
23583 {
23584 for (n = 0; n < arm_condexec_masklen; n++)
23585 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23586 buff[n] = 0;
23587 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23588 arm_condition_codes[arm_current_cc]);
23589 arm_condexec_mask = 0;
23590 }
23591 }
23592
23593 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23594 UNITS_PER_WORD bytes wide. */
23595 static unsigned int
23596 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23597 {
23598 if (TARGET_32BIT
23599 && regno > PC_REGNUM
23600 && regno != FRAME_POINTER_REGNUM
23601 && regno != ARG_POINTER_REGNUM
23602 && !IS_VFP_REGNUM (regno))
23603 return 1;
23604
23605 return ARM_NUM_REGS (mode);
23606 }
23607
23608 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23609 static bool
23610 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23611 {
23612 if (GET_MODE_CLASS (mode) == MODE_CC)
23613 return (regno == CC_REGNUM
23614 || (TARGET_HARD_FLOAT
23615 && regno == VFPCC_REGNUM));
23616
23617 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23618 return false;
23619
23620 if (TARGET_THUMB1)
23621 /* For the Thumb we only allow values bigger than SImode in
23622 registers 0 - 6, so that there is always a second low
23623 register available to hold the upper part of the value.
23624 We probably we ought to ensure that the register is the
23625 start of an even numbered register pair. */
23626 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23627
23628 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23629 {
23630 if (mode == SFmode || mode == SImode)
23631 return VFP_REGNO_OK_FOR_SINGLE (regno);
23632
23633 if (mode == DFmode)
23634 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23635
23636 if (mode == HFmode)
23637 return VFP_REGNO_OK_FOR_SINGLE (regno);
23638
23639 /* VFP registers can hold HImode values. */
23640 if (mode == HImode)
23641 return VFP_REGNO_OK_FOR_SINGLE (regno);
23642
23643 if (TARGET_NEON)
23644 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23645 || (VALID_NEON_QREG_MODE (mode)
23646 && NEON_REGNO_OK_FOR_QUAD (regno))
23647 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23648 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23649 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23650 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23651 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23652
23653 return false;
23654 }
23655
23656 if (TARGET_REALLY_IWMMXT)
23657 {
23658 if (IS_IWMMXT_GR_REGNUM (regno))
23659 return mode == SImode;
23660
23661 if (IS_IWMMXT_REGNUM (regno))
23662 return VALID_IWMMXT_REG_MODE (mode);
23663 }
23664
23665 /* We allow almost any value to be stored in the general registers.
23666 Restrict doubleword quantities to even register pairs in ARM state
23667 so that we can use ldrd. Do not allow very large Neon structure
23668 opaque modes in general registers; they would use too many. */
23669 if (regno <= LAST_ARM_REGNUM)
23670 {
23671 if (ARM_NUM_REGS (mode) > 4)
23672 return false;
23673
23674 if (TARGET_THUMB2)
23675 return true;
23676
23677 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23678 }
23679
23680 if (regno == FRAME_POINTER_REGNUM
23681 || regno == ARG_POINTER_REGNUM)
23682 /* We only allow integers in the fake hard registers. */
23683 return GET_MODE_CLASS (mode) == MODE_INT;
23684
23685 return false;
23686 }
23687
23688 /* Implement TARGET_MODES_TIEABLE_P. */
23689
23690 static bool
23691 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23692 {
23693 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23694 return true;
23695
23696 /* We specifically want to allow elements of "structure" modes to
23697 be tieable to the structure. This more general condition allows
23698 other rarer situations too. */
23699 if (TARGET_NEON
23700 && (VALID_NEON_DREG_MODE (mode1)
23701 || VALID_NEON_QREG_MODE (mode1)
23702 || VALID_NEON_STRUCT_MODE (mode1))
23703 && (VALID_NEON_DREG_MODE (mode2)
23704 || VALID_NEON_QREG_MODE (mode2)
23705 || VALID_NEON_STRUCT_MODE (mode2)))
23706 return true;
23707
23708 return false;
23709 }
23710
23711 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23712 not used in arm mode. */
23713
23714 enum reg_class
23715 arm_regno_class (int regno)
23716 {
23717 if (regno == PC_REGNUM)
23718 return NO_REGS;
23719
23720 if (TARGET_THUMB1)
23721 {
23722 if (regno == STACK_POINTER_REGNUM)
23723 return STACK_REG;
23724 if (regno == CC_REGNUM)
23725 return CC_REG;
23726 if (regno < 8)
23727 return LO_REGS;
23728 return HI_REGS;
23729 }
23730
23731 if (TARGET_THUMB2 && regno < 8)
23732 return LO_REGS;
23733
23734 if ( regno <= LAST_ARM_REGNUM
23735 || regno == FRAME_POINTER_REGNUM
23736 || regno == ARG_POINTER_REGNUM)
23737 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23738
23739 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23740 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23741
23742 if (IS_VFP_REGNUM (regno))
23743 {
23744 if (regno <= D7_VFP_REGNUM)
23745 return VFP_D0_D7_REGS;
23746 else if (regno <= LAST_LO_VFP_REGNUM)
23747 return VFP_LO_REGS;
23748 else
23749 return VFP_HI_REGS;
23750 }
23751
23752 if (IS_IWMMXT_REGNUM (regno))
23753 return IWMMXT_REGS;
23754
23755 if (IS_IWMMXT_GR_REGNUM (regno))
23756 return IWMMXT_GR_REGS;
23757
23758 return NO_REGS;
23759 }
23760
23761 /* Handle a special case when computing the offset
23762 of an argument from the frame pointer. */
23763 int
23764 arm_debugger_arg_offset (int value, rtx addr)
23765 {
23766 rtx_insn *insn;
23767
23768 /* We are only interested if dbxout_parms() failed to compute the offset. */
23769 if (value != 0)
23770 return 0;
23771
23772 /* We can only cope with the case where the address is held in a register. */
23773 if (!REG_P (addr))
23774 return 0;
23775
23776 /* If we are using the frame pointer to point at the argument, then
23777 an offset of 0 is correct. */
23778 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23779 return 0;
23780
23781 /* If we are using the stack pointer to point at the
23782 argument, then an offset of 0 is correct. */
23783 /* ??? Check this is consistent with thumb2 frame layout. */
23784 if ((TARGET_THUMB || !frame_pointer_needed)
23785 && REGNO (addr) == SP_REGNUM)
23786 return 0;
23787
23788 /* Oh dear. The argument is pointed to by a register rather
23789 than being held in a register, or being stored at a known
23790 offset from the frame pointer. Since GDB only understands
23791 those two kinds of argument we must translate the address
23792 held in the register into an offset from the frame pointer.
23793 We do this by searching through the insns for the function
23794 looking to see where this register gets its value. If the
23795 register is initialized from the frame pointer plus an offset
23796 then we are in luck and we can continue, otherwise we give up.
23797
23798 This code is exercised by producing debugging information
23799 for a function with arguments like this:
23800
23801 double func (double a, double b, int c, double d) {return d;}
23802
23803 Without this code the stab for parameter 'd' will be set to
23804 an offset of 0 from the frame pointer, rather than 8. */
23805
23806 /* The if() statement says:
23807
23808 If the insn is a normal instruction
23809 and if the insn is setting the value in a register
23810 and if the register being set is the register holding the address of the argument
23811 and if the address is computing by an addition
23812 that involves adding to a register
23813 which is the frame pointer
23814 a constant integer
23815
23816 then... */
23817
23818 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23819 {
23820 if ( NONJUMP_INSN_P (insn)
23821 && GET_CODE (PATTERN (insn)) == SET
23822 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23823 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23824 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23825 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23826 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23827 )
23828 {
23829 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23830
23831 break;
23832 }
23833 }
23834
23835 if (value == 0)
23836 {
23837 debug_rtx (addr);
23838 warning (0, "unable to compute real location of stacked parameter");
23839 value = 8; /* XXX magic hack */
23840 }
23841
23842 return value;
23843 }
23844 \f
23845 /* Implement TARGET_PROMOTED_TYPE. */
23846
23847 static tree
23848 arm_promoted_type (const_tree t)
23849 {
23850 if (SCALAR_FLOAT_TYPE_P (t)
23851 && TYPE_PRECISION (t) == 16
23852 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23853 return float_type_node;
23854 return NULL_TREE;
23855 }
23856
23857 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23858 This simply adds HFmode as a supported mode; even though we don't
23859 implement arithmetic on this type directly, it's supported by
23860 optabs conversions, much the way the double-word arithmetic is
23861 special-cased in the default hook. */
23862
23863 static bool
23864 arm_scalar_mode_supported_p (scalar_mode mode)
23865 {
23866 if (mode == HFmode)
23867 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23868 else if (ALL_FIXED_POINT_MODE_P (mode))
23869 return true;
23870 else
23871 return default_scalar_mode_supported_p (mode);
23872 }
23873
23874 /* Set the value of FLT_EVAL_METHOD.
23875 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23876
23877 0: evaluate all operations and constants, whose semantic type has at
23878 most the range and precision of type float, to the range and
23879 precision of float; evaluate all other operations and constants to
23880 the range and precision of the semantic type;
23881
23882 N, where _FloatN is a supported interchange floating type
23883 evaluate all operations and constants, whose semantic type has at
23884 most the range and precision of _FloatN type, to the range and
23885 precision of the _FloatN type; evaluate all other operations and
23886 constants to the range and precision of the semantic type;
23887
23888 If we have the ARMv8.2-A extensions then we support _Float16 in native
23889 precision, so we should set this to 16. Otherwise, we support the type,
23890 but want to evaluate expressions in float precision, so set this to
23891 0. */
23892
23893 static enum flt_eval_method
23894 arm_excess_precision (enum excess_precision_type type)
23895 {
23896 switch (type)
23897 {
23898 case EXCESS_PRECISION_TYPE_FAST:
23899 case EXCESS_PRECISION_TYPE_STANDARD:
23900 /* We can calculate either in 16-bit range and precision or
23901 32-bit range and precision. Make that decision based on whether
23902 we have native support for the ARMv8.2-A 16-bit floating-point
23903 instructions or not. */
23904 return (TARGET_VFP_FP16INST
23905 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23906 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23907 case EXCESS_PRECISION_TYPE_IMPLICIT:
23908 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23909 default:
23910 gcc_unreachable ();
23911 }
23912 return FLT_EVAL_METHOD_UNPREDICTABLE;
23913 }
23914
23915
23916 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23917 _Float16 if we are using anything other than ieee format for 16-bit
23918 floating point. Otherwise, punt to the default implementation. */
23919 static opt_scalar_float_mode
23920 arm_floatn_mode (int n, bool extended)
23921 {
23922 if (!extended && n == 16)
23923 {
23924 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23925 return HFmode;
23926 return opt_scalar_float_mode ();
23927 }
23928
23929 return default_floatn_mode (n, extended);
23930 }
23931
23932
23933 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23934 not to early-clobber SRC registers in the process.
23935
23936 We assume that the operands described by SRC and DEST represent a
23937 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23938 number of components into which the copy has been decomposed. */
23939 void
23940 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23941 {
23942 unsigned int i;
23943
23944 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23945 || REGNO (operands[0]) < REGNO (operands[1]))
23946 {
23947 for (i = 0; i < count; i++)
23948 {
23949 operands[2 * i] = dest[i];
23950 operands[2 * i + 1] = src[i];
23951 }
23952 }
23953 else
23954 {
23955 for (i = 0; i < count; i++)
23956 {
23957 operands[2 * i] = dest[count - i - 1];
23958 operands[2 * i + 1] = src[count - i - 1];
23959 }
23960 }
23961 }
23962
23963 /* Split operands into moves from op[1] + op[2] into op[0]. */
23964
23965 void
23966 neon_split_vcombine (rtx operands[3])
23967 {
23968 unsigned int dest = REGNO (operands[0]);
23969 unsigned int src1 = REGNO (operands[1]);
23970 unsigned int src2 = REGNO (operands[2]);
23971 machine_mode halfmode = GET_MODE (operands[1]);
23972 unsigned int halfregs = REG_NREGS (operands[1]);
23973 rtx destlo, desthi;
23974
23975 if (src1 == dest && src2 == dest + halfregs)
23976 {
23977 /* No-op move. Can't split to nothing; emit something. */
23978 emit_note (NOTE_INSN_DELETED);
23979 return;
23980 }
23981
23982 /* Preserve register attributes for variable tracking. */
23983 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23984 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23985 GET_MODE_SIZE (halfmode));
23986
23987 /* Special case of reversed high/low parts. Use VSWP. */
23988 if (src2 == dest && src1 == dest + halfregs)
23989 {
23990 rtx x = gen_rtx_SET (destlo, operands[1]);
23991 rtx y = gen_rtx_SET (desthi, operands[2]);
23992 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23993 return;
23994 }
23995
23996 if (!reg_overlap_mentioned_p (operands[2], destlo))
23997 {
23998 /* Try to avoid unnecessary moves if part of the result
23999 is in the right place already. */
24000 if (src1 != dest)
24001 emit_move_insn (destlo, operands[1]);
24002 if (src2 != dest + halfregs)
24003 emit_move_insn (desthi, operands[2]);
24004 }
24005 else
24006 {
24007 if (src2 != dest + halfregs)
24008 emit_move_insn (desthi, operands[2]);
24009 if (src1 != dest)
24010 emit_move_insn (destlo, operands[1]);
24011 }
24012 }
24013 \f
24014 /* Return the number (counting from 0) of
24015 the least significant set bit in MASK. */
24016
24017 inline static int
24018 number_of_first_bit_set (unsigned mask)
24019 {
24020 return ctz_hwi (mask);
24021 }
24022
24023 /* Like emit_multi_reg_push, but allowing for a different set of
24024 registers to be described as saved. MASK is the set of registers
24025 to be saved; REAL_REGS is the set of registers to be described as
24026 saved. If REAL_REGS is 0, only describe the stack adjustment. */
24027
24028 static rtx_insn *
24029 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24030 {
24031 unsigned long regno;
24032 rtx par[10], tmp, reg;
24033 rtx_insn *insn;
24034 int i, j;
24035
24036 /* Build the parallel of the registers actually being stored. */
24037 for (i = 0; mask; ++i, mask &= mask - 1)
24038 {
24039 regno = ctz_hwi (mask);
24040 reg = gen_rtx_REG (SImode, regno);
24041
24042 if (i == 0)
24043 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24044 else
24045 tmp = gen_rtx_USE (VOIDmode, reg);
24046
24047 par[i] = tmp;
24048 }
24049
24050 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24051 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24052 tmp = gen_frame_mem (BLKmode, tmp);
24053 tmp = gen_rtx_SET (tmp, par[0]);
24054 par[0] = tmp;
24055
24056 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24057 insn = emit_insn (tmp);
24058
24059 /* Always build the stack adjustment note for unwind info. */
24060 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24061 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24062 par[0] = tmp;
24063
24064 /* Build the parallel of the registers recorded as saved for unwind. */
24065 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24066 {
24067 regno = ctz_hwi (real_regs);
24068 reg = gen_rtx_REG (SImode, regno);
24069
24070 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24071 tmp = gen_frame_mem (SImode, tmp);
24072 tmp = gen_rtx_SET (tmp, reg);
24073 RTX_FRAME_RELATED_P (tmp) = 1;
24074 par[j + 1] = tmp;
24075 }
24076
24077 if (j == 0)
24078 tmp = par[0];
24079 else
24080 {
24081 RTX_FRAME_RELATED_P (par[0]) = 1;
24082 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24083 }
24084
24085 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24086
24087 return insn;
24088 }
24089
24090 /* Emit code to push or pop registers to or from the stack. F is the
24091 assembly file. MASK is the registers to pop. */
24092 static void
24093 thumb_pop (FILE *f, unsigned long mask)
24094 {
24095 int regno;
24096 int lo_mask = mask & 0xFF;
24097
24098 gcc_assert (mask);
24099
24100 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24101 {
24102 /* Special case. Do not generate a POP PC statement here, do it in
24103 thumb_exit() */
24104 thumb_exit (f, -1);
24105 return;
24106 }
24107
24108 fprintf (f, "\tpop\t{");
24109
24110 /* Look at the low registers first. */
24111 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24112 {
24113 if (lo_mask & 1)
24114 {
24115 asm_fprintf (f, "%r", regno);
24116
24117 if ((lo_mask & ~1) != 0)
24118 fprintf (f, ", ");
24119 }
24120 }
24121
24122 if (mask & (1 << PC_REGNUM))
24123 {
24124 /* Catch popping the PC. */
24125 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24126 || IS_CMSE_ENTRY (arm_current_func_type ()))
24127 {
24128 /* The PC is never poped directly, instead
24129 it is popped into r3 and then BX is used. */
24130 fprintf (f, "}\n");
24131
24132 thumb_exit (f, -1);
24133
24134 return;
24135 }
24136 else
24137 {
24138 if (mask & 0xFF)
24139 fprintf (f, ", ");
24140
24141 asm_fprintf (f, "%r", PC_REGNUM);
24142 }
24143 }
24144
24145 fprintf (f, "}\n");
24146 }
24147
24148 /* Generate code to return from a thumb function.
24149 If 'reg_containing_return_addr' is -1, then the return address is
24150 actually on the stack, at the stack pointer.
24151
24152 Note: do not forget to update length attribute of corresponding insn pattern
24153 when changing assembly output (eg. length attribute of epilogue_insns when
24154 updating Armv8-M Baseline Security Extensions register clearing
24155 sequences). */
24156 static void
24157 thumb_exit (FILE *f, int reg_containing_return_addr)
24158 {
24159 unsigned regs_available_for_popping;
24160 unsigned regs_to_pop;
24161 int pops_needed;
24162 unsigned available;
24163 unsigned required;
24164 machine_mode mode;
24165 int size;
24166 int restore_a4 = FALSE;
24167
24168 /* Compute the registers we need to pop. */
24169 regs_to_pop = 0;
24170 pops_needed = 0;
24171
24172 if (reg_containing_return_addr == -1)
24173 {
24174 regs_to_pop |= 1 << LR_REGNUM;
24175 ++pops_needed;
24176 }
24177
24178 if (TARGET_BACKTRACE)
24179 {
24180 /* Restore the (ARM) frame pointer and stack pointer. */
24181 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24182 pops_needed += 2;
24183 }
24184
24185 /* If there is nothing to pop then just emit the BX instruction and
24186 return. */
24187 if (pops_needed == 0)
24188 {
24189 if (crtl->calls_eh_return)
24190 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24191
24192 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24193 {
24194 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24195 reg_containing_return_addr);
24196 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24197 }
24198 else
24199 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24200 return;
24201 }
24202 /* Otherwise if we are not supporting interworking and we have not created
24203 a backtrace structure and the function was not entered in ARM mode then
24204 just pop the return address straight into the PC. */
24205 else if (!TARGET_INTERWORK
24206 && !TARGET_BACKTRACE
24207 && !is_called_in_ARM_mode (current_function_decl)
24208 && !crtl->calls_eh_return
24209 && !IS_CMSE_ENTRY (arm_current_func_type ()))
24210 {
24211 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24212 return;
24213 }
24214
24215 /* Find out how many of the (return) argument registers we can corrupt. */
24216 regs_available_for_popping = 0;
24217
24218 /* If returning via __builtin_eh_return, the bottom three registers
24219 all contain information needed for the return. */
24220 if (crtl->calls_eh_return)
24221 size = 12;
24222 else
24223 {
24224 /* If we can deduce the registers used from the function's
24225 return value. This is more reliable that examining
24226 df_regs_ever_live_p () because that will be set if the register is
24227 ever used in the function, not just if the register is used
24228 to hold a return value. */
24229
24230 if (crtl->return_rtx != 0)
24231 mode = GET_MODE (crtl->return_rtx);
24232 else
24233 mode = DECL_MODE (DECL_RESULT (current_function_decl));
24234
24235 size = GET_MODE_SIZE (mode);
24236
24237 if (size == 0)
24238 {
24239 /* In a void function we can use any argument register.
24240 In a function that returns a structure on the stack
24241 we can use the second and third argument registers. */
24242 if (mode == VOIDmode)
24243 regs_available_for_popping =
24244 (1 << ARG_REGISTER (1))
24245 | (1 << ARG_REGISTER (2))
24246 | (1 << ARG_REGISTER (3));
24247 else
24248 regs_available_for_popping =
24249 (1 << ARG_REGISTER (2))
24250 | (1 << ARG_REGISTER (3));
24251 }
24252 else if (size <= 4)
24253 regs_available_for_popping =
24254 (1 << ARG_REGISTER (2))
24255 | (1 << ARG_REGISTER (3));
24256 else if (size <= 8)
24257 regs_available_for_popping =
24258 (1 << ARG_REGISTER (3));
24259 }
24260
24261 /* Match registers to be popped with registers into which we pop them. */
24262 for (available = regs_available_for_popping,
24263 required = regs_to_pop;
24264 required != 0 && available != 0;
24265 available &= ~(available & - available),
24266 required &= ~(required & - required))
24267 -- pops_needed;
24268
24269 /* If we have any popping registers left over, remove them. */
24270 if (available > 0)
24271 regs_available_for_popping &= ~available;
24272
24273 /* Otherwise if we need another popping register we can use
24274 the fourth argument register. */
24275 else if (pops_needed)
24276 {
24277 /* If we have not found any free argument registers and
24278 reg a4 contains the return address, we must move it. */
24279 if (regs_available_for_popping == 0
24280 && reg_containing_return_addr == LAST_ARG_REGNUM)
24281 {
24282 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24283 reg_containing_return_addr = LR_REGNUM;
24284 }
24285 else if (size > 12)
24286 {
24287 /* Register a4 is being used to hold part of the return value,
24288 but we have dire need of a free, low register. */
24289 restore_a4 = TRUE;
24290
24291 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24292 }
24293
24294 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24295 {
24296 /* The fourth argument register is available. */
24297 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24298
24299 --pops_needed;
24300 }
24301 }
24302
24303 /* Pop as many registers as we can. */
24304 thumb_pop (f, regs_available_for_popping);
24305
24306 /* Process the registers we popped. */
24307 if (reg_containing_return_addr == -1)
24308 {
24309 /* The return address was popped into the lowest numbered register. */
24310 regs_to_pop &= ~(1 << LR_REGNUM);
24311
24312 reg_containing_return_addr =
24313 number_of_first_bit_set (regs_available_for_popping);
24314
24315 /* Remove this register for the mask of available registers, so that
24316 the return address will not be corrupted by further pops. */
24317 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24318 }
24319
24320 /* If we popped other registers then handle them here. */
24321 if (regs_available_for_popping)
24322 {
24323 int frame_pointer;
24324
24325 /* Work out which register currently contains the frame pointer. */
24326 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24327
24328 /* Move it into the correct place. */
24329 asm_fprintf (f, "\tmov\t%r, %r\n",
24330 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24331
24332 /* (Temporarily) remove it from the mask of popped registers. */
24333 regs_available_for_popping &= ~(1 << frame_pointer);
24334 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24335
24336 if (regs_available_for_popping)
24337 {
24338 int stack_pointer;
24339
24340 /* We popped the stack pointer as well,
24341 find the register that contains it. */
24342 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24343
24344 /* Move it into the stack register. */
24345 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24346
24347 /* At this point we have popped all necessary registers, so
24348 do not worry about restoring regs_available_for_popping
24349 to its correct value:
24350
24351 assert (pops_needed == 0)
24352 assert (regs_available_for_popping == (1 << frame_pointer))
24353 assert (regs_to_pop == (1 << STACK_POINTER)) */
24354 }
24355 else
24356 {
24357 /* Since we have just move the popped value into the frame
24358 pointer, the popping register is available for reuse, and
24359 we know that we still have the stack pointer left to pop. */
24360 regs_available_for_popping |= (1 << frame_pointer);
24361 }
24362 }
24363
24364 /* If we still have registers left on the stack, but we no longer have
24365 any registers into which we can pop them, then we must move the return
24366 address into the link register and make available the register that
24367 contained it. */
24368 if (regs_available_for_popping == 0 && pops_needed > 0)
24369 {
24370 regs_available_for_popping |= 1 << reg_containing_return_addr;
24371
24372 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24373 reg_containing_return_addr);
24374
24375 reg_containing_return_addr = LR_REGNUM;
24376 }
24377
24378 /* If we have registers left on the stack then pop some more.
24379 We know that at most we will want to pop FP and SP. */
24380 if (pops_needed > 0)
24381 {
24382 int popped_into;
24383 int move_to;
24384
24385 thumb_pop (f, regs_available_for_popping);
24386
24387 /* We have popped either FP or SP.
24388 Move whichever one it is into the correct register. */
24389 popped_into = number_of_first_bit_set (regs_available_for_popping);
24390 move_to = number_of_first_bit_set (regs_to_pop);
24391
24392 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24393 --pops_needed;
24394 }
24395
24396 /* If we still have not popped everything then we must have only
24397 had one register available to us and we are now popping the SP. */
24398 if (pops_needed > 0)
24399 {
24400 int popped_into;
24401
24402 thumb_pop (f, regs_available_for_popping);
24403
24404 popped_into = number_of_first_bit_set (regs_available_for_popping);
24405
24406 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24407 /*
24408 assert (regs_to_pop == (1 << STACK_POINTER))
24409 assert (pops_needed == 1)
24410 */
24411 }
24412
24413 /* If necessary restore the a4 register. */
24414 if (restore_a4)
24415 {
24416 if (reg_containing_return_addr != LR_REGNUM)
24417 {
24418 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24419 reg_containing_return_addr = LR_REGNUM;
24420 }
24421
24422 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24423 }
24424
24425 if (crtl->calls_eh_return)
24426 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24427
24428 /* Return to caller. */
24429 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24430 {
24431 /* This is for the cases where LR is not being used to contain the return
24432 address. It may therefore contain information that we might not want
24433 to leak, hence it must be cleared. The value in R0 will never be a
24434 secret at this point, so it is safe to use it, see the clearing code
24435 in 'cmse_nonsecure_entry_clear_before_return'. */
24436 if (reg_containing_return_addr != LR_REGNUM)
24437 asm_fprintf (f, "\tmov\tlr, r0\n");
24438
24439 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24440 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24441 }
24442 else
24443 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24444 }
24445 \f
24446 /* Scan INSN just before assembler is output for it.
24447 For Thumb-1, we track the status of the condition codes; this
24448 information is used in the cbranchsi4_insn pattern. */
24449 void
24450 thumb1_final_prescan_insn (rtx_insn *insn)
24451 {
24452 if (flag_print_asm_name)
24453 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24454 INSN_ADDRESSES (INSN_UID (insn)));
24455 /* Don't overwrite the previous setter when we get to a cbranch. */
24456 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24457 {
24458 enum attr_conds conds;
24459
24460 if (cfun->machine->thumb1_cc_insn)
24461 {
24462 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24463 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24464 CC_STATUS_INIT;
24465 }
24466 conds = get_attr_conds (insn);
24467 if (conds == CONDS_SET)
24468 {
24469 rtx set = single_set (insn);
24470 cfun->machine->thumb1_cc_insn = insn;
24471 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24472 cfun->machine->thumb1_cc_op1 = const0_rtx;
24473 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24474 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24475 {
24476 rtx src1 = XEXP (SET_SRC (set), 1);
24477 if (src1 == const0_rtx)
24478 cfun->machine->thumb1_cc_mode = CCmode;
24479 }
24480 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24481 {
24482 /* Record the src register operand instead of dest because
24483 cprop_hardreg pass propagates src. */
24484 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24485 }
24486 }
24487 else if (conds != CONDS_NOCOND)
24488 cfun->machine->thumb1_cc_insn = NULL_RTX;
24489 }
24490
24491 /* Check if unexpected far jump is used. */
24492 if (cfun->machine->lr_save_eliminated
24493 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24494 internal_error("Unexpected thumb1 far jump");
24495 }
24496
24497 int
24498 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24499 {
24500 unsigned HOST_WIDE_INT mask = 0xff;
24501 int i;
24502
24503 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24504 if (val == 0) /* XXX */
24505 return 0;
24506
24507 for (i = 0; i < 25; i++)
24508 if ((val & (mask << i)) == val)
24509 return 1;
24510
24511 return 0;
24512 }
24513
24514 /* Returns nonzero if the current function contains,
24515 or might contain a far jump. */
24516 static int
24517 thumb_far_jump_used_p (void)
24518 {
24519 rtx_insn *insn;
24520 bool far_jump = false;
24521 unsigned int func_size = 0;
24522
24523 /* If we have already decided that far jumps may be used,
24524 do not bother checking again, and always return true even if
24525 it turns out that they are not being used. Once we have made
24526 the decision that far jumps are present (and that hence the link
24527 register will be pushed onto the stack) we cannot go back on it. */
24528 if (cfun->machine->far_jump_used)
24529 return 1;
24530
24531 /* If this function is not being called from the prologue/epilogue
24532 generation code then it must be being called from the
24533 INITIAL_ELIMINATION_OFFSET macro. */
24534 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24535 {
24536 /* In this case we know that we are being asked about the elimination
24537 of the arg pointer register. If that register is not being used,
24538 then there are no arguments on the stack, and we do not have to
24539 worry that a far jump might force the prologue to push the link
24540 register, changing the stack offsets. In this case we can just
24541 return false, since the presence of far jumps in the function will
24542 not affect stack offsets.
24543
24544 If the arg pointer is live (or if it was live, but has now been
24545 eliminated and so set to dead) then we do have to test to see if
24546 the function might contain a far jump. This test can lead to some
24547 false negatives, since before reload is completed, then length of
24548 branch instructions is not known, so gcc defaults to returning their
24549 longest length, which in turn sets the far jump attribute to true.
24550
24551 A false negative will not result in bad code being generated, but it
24552 will result in a needless push and pop of the link register. We
24553 hope that this does not occur too often.
24554
24555 If we need doubleword stack alignment this could affect the other
24556 elimination offsets so we can't risk getting it wrong. */
24557 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24558 cfun->machine->arg_pointer_live = 1;
24559 else if (!cfun->machine->arg_pointer_live)
24560 return 0;
24561 }
24562
24563 /* We should not change far_jump_used during or after reload, as there is
24564 no chance to change stack frame layout. */
24565 if (reload_in_progress || reload_completed)
24566 return 0;
24567
24568 /* Check to see if the function contains a branch
24569 insn with the far jump attribute set. */
24570 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24571 {
24572 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24573 {
24574 far_jump = true;
24575 }
24576 func_size += get_attr_length (insn);
24577 }
24578
24579 /* Attribute far_jump will always be true for thumb1 before
24580 shorten_branch pass. So checking far_jump attribute before
24581 shorten_branch isn't much useful.
24582
24583 Following heuristic tries to estimate more accurately if a far jump
24584 may finally be used. The heuristic is very conservative as there is
24585 no chance to roll-back the decision of not to use far jump.
24586
24587 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24588 2-byte insn is associated with a 4 byte constant pool. Using
24589 function size 2048/3 as the threshold is conservative enough. */
24590 if (far_jump)
24591 {
24592 if ((func_size * 3) >= 2048)
24593 {
24594 /* Record the fact that we have decided that
24595 the function does use far jumps. */
24596 cfun->machine->far_jump_used = 1;
24597 return 1;
24598 }
24599 }
24600
24601 return 0;
24602 }
24603
24604 /* Return nonzero if FUNC must be entered in ARM mode. */
24605 static bool
24606 is_called_in_ARM_mode (tree func)
24607 {
24608 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24609
24610 /* Ignore the problem about functions whose address is taken. */
24611 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24612 return true;
24613
24614 #ifdef ARM_PE
24615 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24616 #else
24617 return false;
24618 #endif
24619 }
24620
24621 /* Given the stack offsets and register mask in OFFSETS, decide how
24622 many additional registers to push instead of subtracting a constant
24623 from SP. For epilogues the principle is the same except we use pop.
24624 FOR_PROLOGUE indicates which we're generating. */
24625 static int
24626 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24627 {
24628 HOST_WIDE_INT amount;
24629 unsigned long live_regs_mask = offsets->saved_regs_mask;
24630 /* Extract a mask of the ones we can give to the Thumb's push/pop
24631 instruction. */
24632 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24633 /* Then count how many other high registers will need to be pushed. */
24634 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24635 int n_free, reg_base, size;
24636
24637 if (!for_prologue && frame_pointer_needed)
24638 amount = offsets->locals_base - offsets->saved_regs;
24639 else
24640 amount = offsets->outgoing_args - offsets->saved_regs;
24641
24642 /* If the stack frame size is 512 exactly, we can save one load
24643 instruction, which should make this a win even when optimizing
24644 for speed. */
24645 if (!optimize_size && amount != 512)
24646 return 0;
24647
24648 /* Can't do this if there are high registers to push. */
24649 if (high_regs_pushed != 0)
24650 return 0;
24651
24652 /* Shouldn't do it in the prologue if no registers would normally
24653 be pushed at all. In the epilogue, also allow it if we'll have
24654 a pop insn for the PC. */
24655 if (l_mask == 0
24656 && (for_prologue
24657 || TARGET_BACKTRACE
24658 || (live_regs_mask & 1 << LR_REGNUM) == 0
24659 || TARGET_INTERWORK
24660 || crtl->args.pretend_args_size != 0))
24661 return 0;
24662
24663 /* Don't do this if thumb_expand_prologue wants to emit instructions
24664 between the push and the stack frame allocation. */
24665 if (for_prologue
24666 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24667 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24668 return 0;
24669
24670 reg_base = 0;
24671 n_free = 0;
24672 if (!for_prologue)
24673 {
24674 size = arm_size_return_regs ();
24675 reg_base = ARM_NUM_INTS (size);
24676 live_regs_mask >>= reg_base;
24677 }
24678
24679 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24680 && (for_prologue || call_used_regs[reg_base + n_free]))
24681 {
24682 live_regs_mask >>= 1;
24683 n_free++;
24684 }
24685
24686 if (n_free == 0)
24687 return 0;
24688 gcc_assert (amount / 4 * 4 == amount);
24689
24690 if (amount >= 512 && (amount - n_free * 4) < 512)
24691 return (amount - 508) / 4;
24692 if (amount <= n_free * 4)
24693 return amount / 4;
24694 return 0;
24695 }
24696
24697 /* The bits which aren't usefully expanded as rtl. */
24698 const char *
24699 thumb1_unexpanded_epilogue (void)
24700 {
24701 arm_stack_offsets *offsets;
24702 int regno;
24703 unsigned long live_regs_mask = 0;
24704 int high_regs_pushed = 0;
24705 int extra_pop;
24706 int had_to_push_lr;
24707 int size;
24708
24709 if (cfun->machine->return_used_this_function != 0)
24710 return "";
24711
24712 if (IS_NAKED (arm_current_func_type ()))
24713 return "";
24714
24715 offsets = arm_get_frame_offsets ();
24716 live_regs_mask = offsets->saved_regs_mask;
24717 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24718
24719 /* If we can deduce the registers used from the function's return value.
24720 This is more reliable that examining df_regs_ever_live_p () because that
24721 will be set if the register is ever used in the function, not just if
24722 the register is used to hold a return value. */
24723 size = arm_size_return_regs ();
24724
24725 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24726 if (extra_pop > 0)
24727 {
24728 unsigned long extra_mask = (1 << extra_pop) - 1;
24729 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24730 }
24731
24732 /* The prolog may have pushed some high registers to use as
24733 work registers. e.g. the testsuite file:
24734 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24735 compiles to produce:
24736 push {r4, r5, r6, r7, lr}
24737 mov r7, r9
24738 mov r6, r8
24739 push {r6, r7}
24740 as part of the prolog. We have to undo that pushing here. */
24741
24742 if (high_regs_pushed)
24743 {
24744 unsigned long mask = live_regs_mask & 0xff;
24745 int next_hi_reg;
24746
24747 /* The available low registers depend on the size of the value we are
24748 returning. */
24749 if (size <= 12)
24750 mask |= 1 << 3;
24751 if (size <= 8)
24752 mask |= 1 << 2;
24753
24754 if (mask == 0)
24755 /* Oh dear! We have no low registers into which we can pop
24756 high registers! */
24757 internal_error
24758 ("no low registers available for popping high registers");
24759
24760 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24761 if (live_regs_mask & (1 << next_hi_reg))
24762 break;
24763
24764 while (high_regs_pushed)
24765 {
24766 /* Find lo register(s) into which the high register(s) can
24767 be popped. */
24768 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24769 {
24770 if (mask & (1 << regno))
24771 high_regs_pushed--;
24772 if (high_regs_pushed == 0)
24773 break;
24774 }
24775
24776 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24777
24778 /* Pop the values into the low register(s). */
24779 thumb_pop (asm_out_file, mask);
24780
24781 /* Move the value(s) into the high registers. */
24782 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24783 {
24784 if (mask & (1 << regno))
24785 {
24786 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24787 regno);
24788
24789 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24790 if (live_regs_mask & (1 << next_hi_reg))
24791 break;
24792 }
24793 }
24794 }
24795 live_regs_mask &= ~0x0f00;
24796 }
24797
24798 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24799 live_regs_mask &= 0xff;
24800
24801 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24802 {
24803 /* Pop the return address into the PC. */
24804 if (had_to_push_lr)
24805 live_regs_mask |= 1 << PC_REGNUM;
24806
24807 /* Either no argument registers were pushed or a backtrace
24808 structure was created which includes an adjusted stack
24809 pointer, so just pop everything. */
24810 if (live_regs_mask)
24811 thumb_pop (asm_out_file, live_regs_mask);
24812
24813 /* We have either just popped the return address into the
24814 PC or it is was kept in LR for the entire function.
24815 Note that thumb_pop has already called thumb_exit if the
24816 PC was in the list. */
24817 if (!had_to_push_lr)
24818 thumb_exit (asm_out_file, LR_REGNUM);
24819 }
24820 else
24821 {
24822 /* Pop everything but the return address. */
24823 if (live_regs_mask)
24824 thumb_pop (asm_out_file, live_regs_mask);
24825
24826 if (had_to_push_lr)
24827 {
24828 if (size > 12)
24829 {
24830 /* We have no free low regs, so save one. */
24831 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24832 LAST_ARG_REGNUM);
24833 }
24834
24835 /* Get the return address into a temporary register. */
24836 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24837
24838 if (size > 12)
24839 {
24840 /* Move the return address to lr. */
24841 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24842 LAST_ARG_REGNUM);
24843 /* Restore the low register. */
24844 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24845 IP_REGNUM);
24846 regno = LR_REGNUM;
24847 }
24848 else
24849 regno = LAST_ARG_REGNUM;
24850 }
24851 else
24852 regno = LR_REGNUM;
24853
24854 /* Remove the argument registers that were pushed onto the stack. */
24855 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24856 SP_REGNUM, SP_REGNUM,
24857 crtl->args.pretend_args_size);
24858
24859 thumb_exit (asm_out_file, regno);
24860 }
24861
24862 return "";
24863 }
24864
24865 /* Functions to save and restore machine-specific function data. */
24866 static struct machine_function *
24867 arm_init_machine_status (void)
24868 {
24869 struct machine_function *machine;
24870 machine = ggc_cleared_alloc<machine_function> ();
24871
24872 #if ARM_FT_UNKNOWN != 0
24873 machine->func_type = ARM_FT_UNKNOWN;
24874 #endif
24875 machine->static_chain_stack_bytes = -1;
24876 return machine;
24877 }
24878
24879 /* Return an RTX indicating where the return address to the
24880 calling function can be found. */
24881 rtx
24882 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24883 {
24884 if (count != 0)
24885 return NULL_RTX;
24886
24887 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24888 }
24889
24890 /* Do anything needed before RTL is emitted for each function. */
24891 void
24892 arm_init_expanders (void)
24893 {
24894 /* Arrange to initialize and mark the machine per-function status. */
24895 init_machine_status = arm_init_machine_status;
24896
24897 /* This is to stop the combine pass optimizing away the alignment
24898 adjustment of va_arg. */
24899 /* ??? It is claimed that this should not be necessary. */
24900 if (cfun)
24901 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24902 }
24903
24904 /* Check that FUNC is called with a different mode. */
24905
24906 bool
24907 arm_change_mode_p (tree func)
24908 {
24909 if (TREE_CODE (func) != FUNCTION_DECL)
24910 return false;
24911
24912 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24913
24914 if (!callee_tree)
24915 callee_tree = target_option_default_node;
24916
24917 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24918 int flags = callee_opts->x_target_flags;
24919
24920 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24921 }
24922
24923 /* Like arm_compute_initial_elimination offset. Simpler because there
24924 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24925 to point at the base of the local variables after static stack
24926 space for a function has been allocated. */
24927
24928 HOST_WIDE_INT
24929 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24930 {
24931 arm_stack_offsets *offsets;
24932
24933 offsets = arm_get_frame_offsets ();
24934
24935 switch (from)
24936 {
24937 case ARG_POINTER_REGNUM:
24938 switch (to)
24939 {
24940 case STACK_POINTER_REGNUM:
24941 return offsets->outgoing_args - offsets->saved_args;
24942
24943 case FRAME_POINTER_REGNUM:
24944 return offsets->soft_frame - offsets->saved_args;
24945
24946 case ARM_HARD_FRAME_POINTER_REGNUM:
24947 return offsets->saved_regs - offsets->saved_args;
24948
24949 case THUMB_HARD_FRAME_POINTER_REGNUM:
24950 return offsets->locals_base - offsets->saved_args;
24951
24952 default:
24953 gcc_unreachable ();
24954 }
24955 break;
24956
24957 case FRAME_POINTER_REGNUM:
24958 switch (to)
24959 {
24960 case STACK_POINTER_REGNUM:
24961 return offsets->outgoing_args - offsets->soft_frame;
24962
24963 case ARM_HARD_FRAME_POINTER_REGNUM:
24964 return offsets->saved_regs - offsets->soft_frame;
24965
24966 case THUMB_HARD_FRAME_POINTER_REGNUM:
24967 return offsets->locals_base - offsets->soft_frame;
24968
24969 default:
24970 gcc_unreachable ();
24971 }
24972 break;
24973
24974 default:
24975 gcc_unreachable ();
24976 }
24977 }
24978
24979 /* Generate the function's prologue. */
24980
24981 void
24982 thumb1_expand_prologue (void)
24983 {
24984 rtx_insn *insn;
24985
24986 HOST_WIDE_INT amount;
24987 HOST_WIDE_INT size;
24988 arm_stack_offsets *offsets;
24989 unsigned long func_type;
24990 int regno;
24991 unsigned long live_regs_mask;
24992 unsigned long l_mask;
24993 unsigned high_regs_pushed = 0;
24994 bool lr_needs_saving;
24995
24996 func_type = arm_current_func_type ();
24997
24998 /* Naked functions don't have prologues. */
24999 if (IS_NAKED (func_type))
25000 {
25001 if (flag_stack_usage_info)
25002 current_function_static_stack_size = 0;
25003 return;
25004 }
25005
25006 if (IS_INTERRUPT (func_type))
25007 {
25008 error ("interrupt Service Routines cannot be coded in Thumb mode");
25009 return;
25010 }
25011
25012 if (is_called_in_ARM_mode (current_function_decl))
25013 emit_insn (gen_prologue_thumb1_interwork ());
25014
25015 offsets = arm_get_frame_offsets ();
25016 live_regs_mask = offsets->saved_regs_mask;
25017 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25018
25019 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
25020 l_mask = live_regs_mask & 0x40ff;
25021 /* Then count how many other high registers will need to be pushed. */
25022 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25023
25024 if (crtl->args.pretend_args_size)
25025 {
25026 rtx x = GEN_INT (-crtl->args.pretend_args_size);
25027
25028 if (cfun->machine->uses_anonymous_args)
25029 {
25030 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25031 unsigned long mask;
25032
25033 mask = 1ul << (LAST_ARG_REGNUM + 1);
25034 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25035
25036 insn = thumb1_emit_multi_reg_push (mask, 0);
25037 }
25038 else
25039 {
25040 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25041 stack_pointer_rtx, x));
25042 }
25043 RTX_FRAME_RELATED_P (insn) = 1;
25044 }
25045
25046 if (TARGET_BACKTRACE)
25047 {
25048 HOST_WIDE_INT offset = 0;
25049 unsigned work_register;
25050 rtx work_reg, x, arm_hfp_rtx;
25051
25052 /* We have been asked to create a stack backtrace structure.
25053 The code looks like this:
25054
25055 0 .align 2
25056 0 func:
25057 0 sub SP, #16 Reserve space for 4 registers.
25058 2 push {R7} Push low registers.
25059 4 add R7, SP, #20 Get the stack pointer before the push.
25060 6 str R7, [SP, #8] Store the stack pointer
25061 (before reserving the space).
25062 8 mov R7, PC Get hold of the start of this code + 12.
25063 10 str R7, [SP, #16] Store it.
25064 12 mov R7, FP Get hold of the current frame pointer.
25065 14 str R7, [SP, #4] Store it.
25066 16 mov R7, LR Get hold of the current return address.
25067 18 str R7, [SP, #12] Store it.
25068 20 add R7, SP, #16 Point at the start of the
25069 backtrace structure.
25070 22 mov FP, R7 Put this value into the frame pointer. */
25071
25072 work_register = thumb_find_work_register (live_regs_mask);
25073 work_reg = gen_rtx_REG (SImode, work_register);
25074 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25075
25076 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25077 stack_pointer_rtx, GEN_INT (-16)));
25078 RTX_FRAME_RELATED_P (insn) = 1;
25079
25080 if (l_mask)
25081 {
25082 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25083 RTX_FRAME_RELATED_P (insn) = 1;
25084 lr_needs_saving = false;
25085
25086 offset = bit_count (l_mask) * UNITS_PER_WORD;
25087 }
25088
25089 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25090 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25091
25092 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25093 x = gen_frame_mem (SImode, x);
25094 emit_move_insn (x, work_reg);
25095
25096 /* Make sure that the instruction fetching the PC is in the right place
25097 to calculate "start of backtrace creation code + 12". */
25098 /* ??? The stores using the common WORK_REG ought to be enough to
25099 prevent the scheduler from doing anything weird. Failing that
25100 we could always move all of the following into an UNSPEC_VOLATILE. */
25101 if (l_mask)
25102 {
25103 x = gen_rtx_REG (SImode, PC_REGNUM);
25104 emit_move_insn (work_reg, x);
25105
25106 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25107 x = gen_frame_mem (SImode, x);
25108 emit_move_insn (x, work_reg);
25109
25110 emit_move_insn (work_reg, arm_hfp_rtx);
25111
25112 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25113 x = gen_frame_mem (SImode, x);
25114 emit_move_insn (x, work_reg);
25115 }
25116 else
25117 {
25118 emit_move_insn (work_reg, arm_hfp_rtx);
25119
25120 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25121 x = gen_frame_mem (SImode, x);
25122 emit_move_insn (x, work_reg);
25123
25124 x = gen_rtx_REG (SImode, PC_REGNUM);
25125 emit_move_insn (work_reg, x);
25126
25127 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25128 x = gen_frame_mem (SImode, x);
25129 emit_move_insn (x, work_reg);
25130 }
25131
25132 x = gen_rtx_REG (SImode, LR_REGNUM);
25133 emit_move_insn (work_reg, x);
25134
25135 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25136 x = gen_frame_mem (SImode, x);
25137 emit_move_insn (x, work_reg);
25138
25139 x = GEN_INT (offset + 12);
25140 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25141
25142 emit_move_insn (arm_hfp_rtx, work_reg);
25143 }
25144 /* Optimization: If we are not pushing any low registers but we are going
25145 to push some high registers then delay our first push. This will just
25146 be a push of LR and we can combine it with the push of the first high
25147 register. */
25148 else if ((l_mask & 0xff) != 0
25149 || (high_regs_pushed == 0 && lr_needs_saving))
25150 {
25151 unsigned long mask = l_mask;
25152 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25153 insn = thumb1_emit_multi_reg_push (mask, mask);
25154 RTX_FRAME_RELATED_P (insn) = 1;
25155 lr_needs_saving = false;
25156 }
25157
25158 if (high_regs_pushed)
25159 {
25160 unsigned pushable_regs;
25161 unsigned next_hi_reg;
25162 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25163 : crtl->args.info.nregs;
25164 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25165
25166 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25167 if (live_regs_mask & (1 << next_hi_reg))
25168 break;
25169
25170 /* Here we need to mask out registers used for passing arguments
25171 even if they can be pushed. This is to avoid using them to stash the high
25172 registers. Such kind of stash may clobber the use of arguments. */
25173 pushable_regs = l_mask & (~arg_regs_mask);
25174 if (lr_needs_saving)
25175 pushable_regs &= ~(1 << LR_REGNUM);
25176
25177 if (pushable_regs == 0)
25178 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25179
25180 while (high_regs_pushed > 0)
25181 {
25182 unsigned long real_regs_mask = 0;
25183 unsigned long push_mask = 0;
25184
25185 for (regno = LR_REGNUM; regno >= 0; regno --)
25186 {
25187 if (pushable_regs & (1 << regno))
25188 {
25189 emit_move_insn (gen_rtx_REG (SImode, regno),
25190 gen_rtx_REG (SImode, next_hi_reg));
25191
25192 high_regs_pushed --;
25193 real_regs_mask |= (1 << next_hi_reg);
25194 push_mask |= (1 << regno);
25195
25196 if (high_regs_pushed)
25197 {
25198 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25199 next_hi_reg --)
25200 if (live_regs_mask & (1 << next_hi_reg))
25201 break;
25202 }
25203 else
25204 break;
25205 }
25206 }
25207
25208 /* If we had to find a work register and we have not yet
25209 saved the LR then add it to the list of regs to push. */
25210 if (lr_needs_saving)
25211 {
25212 push_mask |= 1 << LR_REGNUM;
25213 real_regs_mask |= 1 << LR_REGNUM;
25214 lr_needs_saving = false;
25215 }
25216
25217 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25218 RTX_FRAME_RELATED_P (insn) = 1;
25219 }
25220 }
25221
25222 /* Load the pic register before setting the frame pointer,
25223 so we can use r7 as a temporary work register. */
25224 if (flag_pic && arm_pic_register != INVALID_REGNUM)
25225 arm_load_pic_register (live_regs_mask);
25226
25227 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25228 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25229 stack_pointer_rtx);
25230
25231 size = offsets->outgoing_args - offsets->saved_args;
25232 if (flag_stack_usage_info)
25233 current_function_static_stack_size = size;
25234
25235 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25236 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25237 || flag_stack_clash_protection)
25238 && size)
25239 sorry ("-fstack-check=specific for Thumb-1");
25240
25241 amount = offsets->outgoing_args - offsets->saved_regs;
25242 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25243 if (amount)
25244 {
25245 if (amount < 512)
25246 {
25247 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25248 GEN_INT (- amount)));
25249 RTX_FRAME_RELATED_P (insn) = 1;
25250 }
25251 else
25252 {
25253 rtx reg, dwarf;
25254
25255 /* The stack decrement is too big for an immediate value in a single
25256 insn. In theory we could issue multiple subtracts, but after
25257 three of them it becomes more space efficient to place the full
25258 value in the constant pool and load into a register. (Also the
25259 ARM debugger really likes to see only one stack decrement per
25260 function). So instead we look for a scratch register into which
25261 we can load the decrement, and then we subtract this from the
25262 stack pointer. Unfortunately on the thumb the only available
25263 scratch registers are the argument registers, and we cannot use
25264 these as they may hold arguments to the function. Instead we
25265 attempt to locate a call preserved register which is used by this
25266 function. If we can find one, then we know that it will have
25267 been pushed at the start of the prologue and so we can corrupt
25268 it now. */
25269 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25270 if (live_regs_mask & (1 << regno))
25271 break;
25272
25273 gcc_assert(regno <= LAST_LO_REGNUM);
25274
25275 reg = gen_rtx_REG (SImode, regno);
25276
25277 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25278
25279 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25280 stack_pointer_rtx, reg));
25281
25282 dwarf = gen_rtx_SET (stack_pointer_rtx,
25283 plus_constant (Pmode, stack_pointer_rtx,
25284 -amount));
25285 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25286 RTX_FRAME_RELATED_P (insn) = 1;
25287 }
25288 }
25289
25290 if (frame_pointer_needed)
25291 thumb_set_frame_pointer (offsets);
25292
25293 /* If we are profiling, make sure no instructions are scheduled before
25294 the call to mcount. Similarly if the user has requested no
25295 scheduling in the prolog. Similarly if we want non-call exceptions
25296 using the EABI unwinder, to prevent faulting instructions from being
25297 swapped with a stack adjustment. */
25298 if (crtl->profile || !TARGET_SCHED_PROLOG
25299 || (arm_except_unwind_info (&global_options) == UI_TARGET
25300 && cfun->can_throw_non_call_exceptions))
25301 emit_insn (gen_blockage ());
25302
25303 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25304 if (live_regs_mask & 0xff)
25305 cfun->machine->lr_save_eliminated = 0;
25306 }
25307
25308 /* Clear caller saved registers not used to pass return values and leaked
25309 condition flags before exiting a cmse_nonsecure_entry function. */
25310
25311 void
25312 cmse_nonsecure_entry_clear_before_return (void)
25313 {
25314 int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25315 uint32_t padding_bits_to_clear = 0;
25316 auto_sbitmap to_clear_bitmap (maxregno + 1);
25317 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
25318 tree result_type;
25319
25320 bitmap_clear (to_clear_bitmap);
25321 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25322 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25323
25324 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25325 registers. */
25326 if (TARGET_HARD_FLOAT)
25327 {
25328 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25329
25330 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25331
25332 /* Make sure we don't clear the two scratch registers used to clear the
25333 relevant FPSCR bits in output_return_instruction. */
25334 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25335 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25336 emit_use (gen_rtx_REG (SImode, 4));
25337 bitmap_clear_bit (to_clear_bitmap, 4);
25338 }
25339
25340 /* If the user has defined registers to be caller saved, these are no longer
25341 restored by the function before returning and must thus be cleared for
25342 security purposes. */
25343 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25344 {
25345 /* We do not touch registers that can be used to pass arguments as per
25346 the AAPCS, since these should never be made callee-saved by user
25347 options. */
25348 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25349 continue;
25350 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25351 continue;
25352 if (call_used_regs[regno])
25353 bitmap_set_bit (to_clear_bitmap, regno);
25354 }
25355
25356 /* Make sure we do not clear the registers used to return the result in. */
25357 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25358 if (!VOID_TYPE_P (result_type))
25359 {
25360 uint64_t to_clear_return_mask;
25361 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25362
25363 /* No need to check that we return in registers, because we don't
25364 support returning on stack yet. */
25365 gcc_assert (REG_P (result_rtl));
25366 to_clear_return_mask
25367 = compute_not_to_clear_mask (result_type, result_rtl, 0,
25368 &padding_bits_to_clear);
25369 if (to_clear_return_mask)
25370 {
25371 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25372 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25373 {
25374 if (to_clear_return_mask & (1ULL << regno))
25375 bitmap_clear_bit (to_clear_bitmap, regno);
25376 }
25377 }
25378 }
25379
25380 if (padding_bits_to_clear != 0)
25381 {
25382 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
25383 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
25384
25385 /* Padding_bits_to_clear is not 0 so we know we are dealing with
25386 returning a composite type, which only uses r0. Let's make sure that
25387 r1-r3 is cleared too. */
25388 bitmap_clear (to_clear_arg_regs_bitmap);
25389 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
25390 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25391 }
25392
25393 /* Clear full registers that leak before returning. */
25394 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
25395 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
25396 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
25397 clearing_reg);
25398 }
25399
25400 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25401 POP instruction can be generated. LR should be replaced by PC. All
25402 the checks required are already done by USE_RETURN_INSN (). Hence,
25403 all we really need to check here is if single register is to be
25404 returned, or multiple register return. */
25405 void
25406 thumb2_expand_return (bool simple_return)
25407 {
25408 int i, num_regs;
25409 unsigned long saved_regs_mask;
25410 arm_stack_offsets *offsets;
25411
25412 offsets = arm_get_frame_offsets ();
25413 saved_regs_mask = offsets->saved_regs_mask;
25414
25415 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25416 if (saved_regs_mask & (1 << i))
25417 num_regs++;
25418
25419 if (!simple_return && saved_regs_mask)
25420 {
25421 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25422 functions or adapt code to handle according to ACLE. This path should
25423 not be reachable for cmse_nonsecure_entry functions though we prefer
25424 to assert it for now to ensure that future code changes do not silently
25425 change this behavior. */
25426 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25427 if (num_regs == 1)
25428 {
25429 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25430 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25431 rtx addr = gen_rtx_MEM (SImode,
25432 gen_rtx_POST_INC (SImode,
25433 stack_pointer_rtx));
25434 set_mem_alias_set (addr, get_frame_alias_set ());
25435 XVECEXP (par, 0, 0) = ret_rtx;
25436 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25437 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25438 emit_jump_insn (par);
25439 }
25440 else
25441 {
25442 saved_regs_mask &= ~ (1 << LR_REGNUM);
25443 saved_regs_mask |= (1 << PC_REGNUM);
25444 arm_emit_multi_reg_pop (saved_regs_mask);
25445 }
25446 }
25447 else
25448 {
25449 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25450 cmse_nonsecure_entry_clear_before_return ();
25451 emit_jump_insn (simple_return_rtx);
25452 }
25453 }
25454
25455 void
25456 thumb1_expand_epilogue (void)
25457 {
25458 HOST_WIDE_INT amount;
25459 arm_stack_offsets *offsets;
25460 int regno;
25461
25462 /* Naked functions don't have prologues. */
25463 if (IS_NAKED (arm_current_func_type ()))
25464 return;
25465
25466 offsets = arm_get_frame_offsets ();
25467 amount = offsets->outgoing_args - offsets->saved_regs;
25468
25469 if (frame_pointer_needed)
25470 {
25471 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25472 amount = offsets->locals_base - offsets->saved_regs;
25473 }
25474 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25475
25476 gcc_assert (amount >= 0);
25477 if (amount)
25478 {
25479 emit_insn (gen_blockage ());
25480
25481 if (amount < 512)
25482 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25483 GEN_INT (amount)));
25484 else
25485 {
25486 /* r3 is always free in the epilogue. */
25487 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25488
25489 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25490 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25491 }
25492 }
25493
25494 /* Emit a USE (stack_pointer_rtx), so that
25495 the stack adjustment will not be deleted. */
25496 emit_insn (gen_force_register_use (stack_pointer_rtx));
25497
25498 if (crtl->profile || !TARGET_SCHED_PROLOG)
25499 emit_insn (gen_blockage ());
25500
25501 /* Emit a clobber for each insn that will be restored in the epilogue,
25502 so that flow2 will get register lifetimes correct. */
25503 for (regno = 0; regno < 13; regno++)
25504 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25505 emit_clobber (gen_rtx_REG (SImode, regno));
25506
25507 if (! df_regs_ever_live_p (LR_REGNUM))
25508 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25509
25510 /* Clear all caller-saved regs that are not used to return. */
25511 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25512 cmse_nonsecure_entry_clear_before_return ();
25513 }
25514
25515 /* Epilogue code for APCS frame. */
25516 static void
25517 arm_expand_epilogue_apcs_frame (bool really_return)
25518 {
25519 unsigned long func_type;
25520 unsigned long saved_regs_mask;
25521 int num_regs = 0;
25522 int i;
25523 int floats_from_frame = 0;
25524 arm_stack_offsets *offsets;
25525
25526 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25527 func_type = arm_current_func_type ();
25528
25529 /* Get frame offsets for ARM. */
25530 offsets = arm_get_frame_offsets ();
25531 saved_regs_mask = offsets->saved_regs_mask;
25532
25533 /* Find the offset of the floating-point save area in the frame. */
25534 floats_from_frame
25535 = (offsets->saved_args
25536 + arm_compute_static_chain_stack_bytes ()
25537 - offsets->frame);
25538
25539 /* Compute how many core registers saved and how far away the floats are. */
25540 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25541 if (saved_regs_mask & (1 << i))
25542 {
25543 num_regs++;
25544 floats_from_frame += 4;
25545 }
25546
25547 if (TARGET_HARD_FLOAT)
25548 {
25549 int start_reg;
25550 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25551
25552 /* The offset is from IP_REGNUM. */
25553 int saved_size = arm_get_vfp_saved_size ();
25554 if (saved_size > 0)
25555 {
25556 rtx_insn *insn;
25557 floats_from_frame += saved_size;
25558 insn = emit_insn (gen_addsi3 (ip_rtx,
25559 hard_frame_pointer_rtx,
25560 GEN_INT (-floats_from_frame)));
25561 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25562 ip_rtx, hard_frame_pointer_rtx);
25563 }
25564
25565 /* Generate VFP register multi-pop. */
25566 start_reg = FIRST_VFP_REGNUM;
25567
25568 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25569 /* Look for a case where a reg does not need restoring. */
25570 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25571 && (!df_regs_ever_live_p (i + 1)
25572 || call_used_regs[i + 1]))
25573 {
25574 if (start_reg != i)
25575 arm_emit_vfp_multi_reg_pop (start_reg,
25576 (i - start_reg) / 2,
25577 gen_rtx_REG (SImode,
25578 IP_REGNUM));
25579 start_reg = i + 2;
25580 }
25581
25582 /* Restore the remaining regs that we have discovered (or possibly
25583 even all of them, if the conditional in the for loop never
25584 fired). */
25585 if (start_reg != i)
25586 arm_emit_vfp_multi_reg_pop (start_reg,
25587 (i - start_reg) / 2,
25588 gen_rtx_REG (SImode, IP_REGNUM));
25589 }
25590
25591 if (TARGET_IWMMXT)
25592 {
25593 /* The frame pointer is guaranteed to be non-double-word aligned, as
25594 it is set to double-word-aligned old_stack_pointer - 4. */
25595 rtx_insn *insn;
25596 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25597
25598 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25599 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25600 {
25601 rtx addr = gen_frame_mem (V2SImode,
25602 plus_constant (Pmode, hard_frame_pointer_rtx,
25603 - lrm_count * 4));
25604 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25605 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25606 gen_rtx_REG (V2SImode, i),
25607 NULL_RTX);
25608 lrm_count += 2;
25609 }
25610 }
25611
25612 /* saved_regs_mask should contain IP which contains old stack pointer
25613 at the time of activation creation. Since SP and IP are adjacent registers,
25614 we can restore the value directly into SP. */
25615 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25616 saved_regs_mask &= ~(1 << IP_REGNUM);
25617 saved_regs_mask |= (1 << SP_REGNUM);
25618
25619 /* There are two registers left in saved_regs_mask - LR and PC. We
25620 only need to restore LR (the return address), but to
25621 save time we can load it directly into PC, unless we need a
25622 special function exit sequence, or we are not really returning. */
25623 if (really_return
25624 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25625 && !crtl->calls_eh_return)
25626 /* Delete LR from the register mask, so that LR on
25627 the stack is loaded into the PC in the register mask. */
25628 saved_regs_mask &= ~(1 << LR_REGNUM);
25629 else
25630 saved_regs_mask &= ~(1 << PC_REGNUM);
25631
25632 num_regs = bit_count (saved_regs_mask);
25633 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25634 {
25635 rtx_insn *insn;
25636 emit_insn (gen_blockage ());
25637 /* Unwind the stack to just below the saved registers. */
25638 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25639 hard_frame_pointer_rtx,
25640 GEN_INT (- 4 * num_regs)));
25641
25642 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25643 stack_pointer_rtx, hard_frame_pointer_rtx);
25644 }
25645
25646 arm_emit_multi_reg_pop (saved_regs_mask);
25647
25648 if (IS_INTERRUPT (func_type))
25649 {
25650 /* Interrupt handlers will have pushed the
25651 IP onto the stack, so restore it now. */
25652 rtx_insn *insn;
25653 rtx addr = gen_rtx_MEM (SImode,
25654 gen_rtx_POST_INC (SImode,
25655 stack_pointer_rtx));
25656 set_mem_alias_set (addr, get_frame_alias_set ());
25657 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25658 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25659 gen_rtx_REG (SImode, IP_REGNUM),
25660 NULL_RTX);
25661 }
25662
25663 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25664 return;
25665
25666 if (crtl->calls_eh_return)
25667 emit_insn (gen_addsi3 (stack_pointer_rtx,
25668 stack_pointer_rtx,
25669 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25670
25671 if (IS_STACKALIGN (func_type))
25672 /* Restore the original stack pointer. Before prologue, the stack was
25673 realigned and the original stack pointer saved in r0. For details,
25674 see comment in arm_expand_prologue. */
25675 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25676
25677 emit_jump_insn (simple_return_rtx);
25678 }
25679
25680 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25681 function is not a sibcall. */
25682 void
25683 arm_expand_epilogue (bool really_return)
25684 {
25685 unsigned long func_type;
25686 unsigned long saved_regs_mask;
25687 int num_regs = 0;
25688 int i;
25689 int amount;
25690 arm_stack_offsets *offsets;
25691
25692 func_type = arm_current_func_type ();
25693
25694 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25695 let output_return_instruction take care of instruction emission if any. */
25696 if (IS_NAKED (func_type)
25697 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25698 {
25699 if (really_return)
25700 emit_jump_insn (simple_return_rtx);
25701 return;
25702 }
25703
25704 /* If we are throwing an exception, then we really must be doing a
25705 return, so we can't tail-call. */
25706 gcc_assert (!crtl->calls_eh_return || really_return);
25707
25708 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25709 {
25710 arm_expand_epilogue_apcs_frame (really_return);
25711 return;
25712 }
25713
25714 /* Get frame offsets for ARM. */
25715 offsets = arm_get_frame_offsets ();
25716 saved_regs_mask = offsets->saved_regs_mask;
25717 num_regs = bit_count (saved_regs_mask);
25718
25719 if (frame_pointer_needed)
25720 {
25721 rtx_insn *insn;
25722 /* Restore stack pointer if necessary. */
25723 if (TARGET_ARM)
25724 {
25725 /* In ARM mode, frame pointer points to first saved register.
25726 Restore stack pointer to last saved register. */
25727 amount = offsets->frame - offsets->saved_regs;
25728
25729 /* Force out any pending memory operations that reference stacked data
25730 before stack de-allocation occurs. */
25731 emit_insn (gen_blockage ());
25732 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25733 hard_frame_pointer_rtx,
25734 GEN_INT (amount)));
25735 arm_add_cfa_adjust_cfa_note (insn, amount,
25736 stack_pointer_rtx,
25737 hard_frame_pointer_rtx);
25738
25739 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25740 deleted. */
25741 emit_insn (gen_force_register_use (stack_pointer_rtx));
25742 }
25743 else
25744 {
25745 /* In Thumb-2 mode, the frame pointer points to the last saved
25746 register. */
25747 amount = offsets->locals_base - offsets->saved_regs;
25748 if (amount)
25749 {
25750 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25751 hard_frame_pointer_rtx,
25752 GEN_INT (amount)));
25753 arm_add_cfa_adjust_cfa_note (insn, amount,
25754 hard_frame_pointer_rtx,
25755 hard_frame_pointer_rtx);
25756 }
25757
25758 /* Force out any pending memory operations that reference stacked data
25759 before stack de-allocation occurs. */
25760 emit_insn (gen_blockage ());
25761 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25762 hard_frame_pointer_rtx));
25763 arm_add_cfa_adjust_cfa_note (insn, 0,
25764 stack_pointer_rtx,
25765 hard_frame_pointer_rtx);
25766 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25767 deleted. */
25768 emit_insn (gen_force_register_use (stack_pointer_rtx));
25769 }
25770 }
25771 else
25772 {
25773 /* Pop off outgoing args and local frame to adjust stack pointer to
25774 last saved register. */
25775 amount = offsets->outgoing_args - offsets->saved_regs;
25776 if (amount)
25777 {
25778 rtx_insn *tmp;
25779 /* Force out any pending memory operations that reference stacked data
25780 before stack de-allocation occurs. */
25781 emit_insn (gen_blockage ());
25782 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25783 stack_pointer_rtx,
25784 GEN_INT (amount)));
25785 arm_add_cfa_adjust_cfa_note (tmp, amount,
25786 stack_pointer_rtx, stack_pointer_rtx);
25787 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25788 not deleted. */
25789 emit_insn (gen_force_register_use (stack_pointer_rtx));
25790 }
25791 }
25792
25793 if (TARGET_HARD_FLOAT)
25794 {
25795 /* Generate VFP register multi-pop. */
25796 int end_reg = LAST_VFP_REGNUM + 1;
25797
25798 /* Scan the registers in reverse order. We need to match
25799 any groupings made in the prologue and generate matching
25800 vldm operations. The need to match groups is because,
25801 unlike pop, vldm can only do consecutive regs. */
25802 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25803 /* Look for a case where a reg does not need restoring. */
25804 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25805 && (!df_regs_ever_live_p (i + 1)
25806 || call_used_regs[i + 1]))
25807 {
25808 /* Restore the regs discovered so far (from reg+2 to
25809 end_reg). */
25810 if (end_reg > i + 2)
25811 arm_emit_vfp_multi_reg_pop (i + 2,
25812 (end_reg - (i + 2)) / 2,
25813 stack_pointer_rtx);
25814 end_reg = i;
25815 }
25816
25817 /* Restore the remaining regs that we have discovered (or possibly
25818 even all of them, if the conditional in the for loop never
25819 fired). */
25820 if (end_reg > i + 2)
25821 arm_emit_vfp_multi_reg_pop (i + 2,
25822 (end_reg - (i + 2)) / 2,
25823 stack_pointer_rtx);
25824 }
25825
25826 if (TARGET_IWMMXT)
25827 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25828 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25829 {
25830 rtx_insn *insn;
25831 rtx addr = gen_rtx_MEM (V2SImode,
25832 gen_rtx_POST_INC (SImode,
25833 stack_pointer_rtx));
25834 set_mem_alias_set (addr, get_frame_alias_set ());
25835 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25836 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25837 gen_rtx_REG (V2SImode, i),
25838 NULL_RTX);
25839 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25840 stack_pointer_rtx, stack_pointer_rtx);
25841 }
25842
25843 if (saved_regs_mask)
25844 {
25845 rtx insn;
25846 bool return_in_pc = false;
25847
25848 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25849 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25850 && !IS_CMSE_ENTRY (func_type)
25851 && !IS_STACKALIGN (func_type)
25852 && really_return
25853 && crtl->args.pretend_args_size == 0
25854 && saved_regs_mask & (1 << LR_REGNUM)
25855 && !crtl->calls_eh_return)
25856 {
25857 saved_regs_mask &= ~(1 << LR_REGNUM);
25858 saved_regs_mask |= (1 << PC_REGNUM);
25859 return_in_pc = true;
25860 }
25861
25862 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25863 {
25864 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25865 if (saved_regs_mask & (1 << i))
25866 {
25867 rtx addr = gen_rtx_MEM (SImode,
25868 gen_rtx_POST_INC (SImode,
25869 stack_pointer_rtx));
25870 set_mem_alias_set (addr, get_frame_alias_set ());
25871
25872 if (i == PC_REGNUM)
25873 {
25874 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25875 XVECEXP (insn, 0, 0) = ret_rtx;
25876 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25877 addr);
25878 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25879 insn = emit_jump_insn (insn);
25880 }
25881 else
25882 {
25883 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25884 addr));
25885 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25886 gen_rtx_REG (SImode, i),
25887 NULL_RTX);
25888 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25889 stack_pointer_rtx,
25890 stack_pointer_rtx);
25891 }
25892 }
25893 }
25894 else
25895 {
25896 if (TARGET_LDRD
25897 && current_tune->prefer_ldrd_strd
25898 && !optimize_function_for_size_p (cfun))
25899 {
25900 if (TARGET_THUMB2)
25901 thumb2_emit_ldrd_pop (saved_regs_mask);
25902 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25903 arm_emit_ldrd_pop (saved_regs_mask);
25904 else
25905 arm_emit_multi_reg_pop (saved_regs_mask);
25906 }
25907 else
25908 arm_emit_multi_reg_pop (saved_regs_mask);
25909 }
25910
25911 if (return_in_pc)
25912 return;
25913 }
25914
25915 amount
25916 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25917 if (amount)
25918 {
25919 int i, j;
25920 rtx dwarf = NULL_RTX;
25921 rtx_insn *tmp =
25922 emit_insn (gen_addsi3 (stack_pointer_rtx,
25923 stack_pointer_rtx,
25924 GEN_INT (amount)));
25925
25926 RTX_FRAME_RELATED_P (tmp) = 1;
25927
25928 if (cfun->machine->uses_anonymous_args)
25929 {
25930 /* Restore pretend args. Refer arm_expand_prologue on how to save
25931 pretend_args in stack. */
25932 int num_regs = crtl->args.pretend_args_size / 4;
25933 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25934 for (j = 0, i = 0; j < num_regs; i++)
25935 if (saved_regs_mask & (1 << i))
25936 {
25937 rtx reg = gen_rtx_REG (SImode, i);
25938 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25939 j++;
25940 }
25941 REG_NOTES (tmp) = dwarf;
25942 }
25943 arm_add_cfa_adjust_cfa_note (tmp, amount,
25944 stack_pointer_rtx, stack_pointer_rtx);
25945 }
25946
25947 /* Clear all caller-saved regs that are not used to return. */
25948 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25949 {
25950 /* CMSE_ENTRY always returns. */
25951 gcc_assert (really_return);
25952 cmse_nonsecure_entry_clear_before_return ();
25953 }
25954
25955 if (!really_return)
25956 return;
25957
25958 if (crtl->calls_eh_return)
25959 emit_insn (gen_addsi3 (stack_pointer_rtx,
25960 stack_pointer_rtx,
25961 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25962
25963 if (IS_STACKALIGN (func_type))
25964 /* Restore the original stack pointer. Before prologue, the stack was
25965 realigned and the original stack pointer saved in r0. For details,
25966 see comment in arm_expand_prologue. */
25967 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25968
25969 emit_jump_insn (simple_return_rtx);
25970 }
25971
25972 /* Implementation of insn prologue_thumb1_interwork. This is the first
25973 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25974
25975 const char *
25976 thumb1_output_interwork (void)
25977 {
25978 const char * name;
25979 FILE *f = asm_out_file;
25980
25981 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25982 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25983 == SYMBOL_REF);
25984 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25985
25986 /* Generate code sequence to switch us into Thumb mode. */
25987 /* The .code 32 directive has already been emitted by
25988 ASM_DECLARE_FUNCTION_NAME. */
25989 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25990 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25991
25992 /* Generate a label, so that the debugger will notice the
25993 change in instruction sets. This label is also used by
25994 the assembler to bypass the ARM code when this function
25995 is called from a Thumb encoded function elsewhere in the
25996 same file. Hence the definition of STUB_NAME here must
25997 agree with the definition in gas/config/tc-arm.c. */
25998
25999 #define STUB_NAME ".real_start_of"
26000
26001 fprintf (f, "\t.code\t16\n");
26002 #ifdef ARM_PE
26003 if (arm_dllexport_name_p (name))
26004 name = arm_strip_name_encoding (name);
26005 #endif
26006 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26007 fprintf (f, "\t.thumb_func\n");
26008 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26009
26010 return "";
26011 }
26012
26013 /* Handle the case of a double word load into a low register from
26014 a computed memory address. The computed address may involve a
26015 register which is overwritten by the load. */
26016 const char *
26017 thumb_load_double_from_address (rtx *operands)
26018 {
26019 rtx addr;
26020 rtx base;
26021 rtx offset;
26022 rtx arg1;
26023 rtx arg2;
26024
26025 gcc_assert (REG_P (operands[0]));
26026 gcc_assert (MEM_P (operands[1]));
26027
26028 /* Get the memory address. */
26029 addr = XEXP (operands[1], 0);
26030
26031 /* Work out how the memory address is computed. */
26032 switch (GET_CODE (addr))
26033 {
26034 case REG:
26035 operands[2] = adjust_address (operands[1], SImode, 4);
26036
26037 if (REGNO (operands[0]) == REGNO (addr))
26038 {
26039 output_asm_insn ("ldr\t%H0, %2", operands);
26040 output_asm_insn ("ldr\t%0, %1", operands);
26041 }
26042 else
26043 {
26044 output_asm_insn ("ldr\t%0, %1", operands);
26045 output_asm_insn ("ldr\t%H0, %2", operands);
26046 }
26047 break;
26048
26049 case CONST:
26050 /* Compute <address> + 4 for the high order load. */
26051 operands[2] = adjust_address (operands[1], SImode, 4);
26052
26053 output_asm_insn ("ldr\t%0, %1", operands);
26054 output_asm_insn ("ldr\t%H0, %2", operands);
26055 break;
26056
26057 case PLUS:
26058 arg1 = XEXP (addr, 0);
26059 arg2 = XEXP (addr, 1);
26060
26061 if (CONSTANT_P (arg1))
26062 base = arg2, offset = arg1;
26063 else
26064 base = arg1, offset = arg2;
26065
26066 gcc_assert (REG_P (base));
26067
26068 /* Catch the case of <address> = <reg> + <reg> */
26069 if (REG_P (offset))
26070 {
26071 int reg_offset = REGNO (offset);
26072 int reg_base = REGNO (base);
26073 int reg_dest = REGNO (operands[0]);
26074
26075 /* Add the base and offset registers together into the
26076 higher destination register. */
26077 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26078 reg_dest + 1, reg_base, reg_offset);
26079
26080 /* Load the lower destination register from the address in
26081 the higher destination register. */
26082 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26083 reg_dest, reg_dest + 1);
26084
26085 /* Load the higher destination register from its own address
26086 plus 4. */
26087 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26088 reg_dest + 1, reg_dest + 1);
26089 }
26090 else
26091 {
26092 /* Compute <address> + 4 for the high order load. */
26093 operands[2] = adjust_address (operands[1], SImode, 4);
26094
26095 /* If the computed address is held in the low order register
26096 then load the high order register first, otherwise always
26097 load the low order register first. */
26098 if (REGNO (operands[0]) == REGNO (base))
26099 {
26100 output_asm_insn ("ldr\t%H0, %2", operands);
26101 output_asm_insn ("ldr\t%0, %1", operands);
26102 }
26103 else
26104 {
26105 output_asm_insn ("ldr\t%0, %1", operands);
26106 output_asm_insn ("ldr\t%H0, %2", operands);
26107 }
26108 }
26109 break;
26110
26111 case LABEL_REF:
26112 /* With no registers to worry about we can just load the value
26113 directly. */
26114 operands[2] = adjust_address (operands[1], SImode, 4);
26115
26116 output_asm_insn ("ldr\t%H0, %2", operands);
26117 output_asm_insn ("ldr\t%0, %1", operands);
26118 break;
26119
26120 default:
26121 gcc_unreachable ();
26122 }
26123
26124 return "";
26125 }
26126
26127 const char *
26128 thumb_output_move_mem_multiple (int n, rtx *operands)
26129 {
26130 switch (n)
26131 {
26132 case 2:
26133 if (REGNO (operands[4]) > REGNO (operands[5]))
26134 std::swap (operands[4], operands[5]);
26135
26136 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26137 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26138 break;
26139
26140 case 3:
26141 if (REGNO (operands[4]) > REGNO (operands[5]))
26142 std::swap (operands[4], operands[5]);
26143 if (REGNO (operands[5]) > REGNO (operands[6]))
26144 std::swap (operands[5], operands[6]);
26145 if (REGNO (operands[4]) > REGNO (operands[5]))
26146 std::swap (operands[4], operands[5]);
26147
26148 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26149 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26150 break;
26151
26152 default:
26153 gcc_unreachable ();
26154 }
26155
26156 return "";
26157 }
26158
26159 /* Output a call-via instruction for thumb state. */
26160 const char *
26161 thumb_call_via_reg (rtx reg)
26162 {
26163 int regno = REGNO (reg);
26164 rtx *labelp;
26165
26166 gcc_assert (regno < LR_REGNUM);
26167
26168 /* If we are in the normal text section we can use a single instance
26169 per compilation unit. If we are doing function sections, then we need
26170 an entry per section, since we can't rely on reachability. */
26171 if (in_section == text_section)
26172 {
26173 thumb_call_reg_needed = 1;
26174
26175 if (thumb_call_via_label[regno] == NULL)
26176 thumb_call_via_label[regno] = gen_label_rtx ();
26177 labelp = thumb_call_via_label + regno;
26178 }
26179 else
26180 {
26181 if (cfun->machine->call_via[regno] == NULL)
26182 cfun->machine->call_via[regno] = gen_label_rtx ();
26183 labelp = cfun->machine->call_via + regno;
26184 }
26185
26186 output_asm_insn ("bl\t%a0", labelp);
26187 return "";
26188 }
26189
26190 /* Routines for generating rtl. */
26191 void
26192 thumb_expand_movmemqi (rtx *operands)
26193 {
26194 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26195 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26196 HOST_WIDE_INT len = INTVAL (operands[2]);
26197 HOST_WIDE_INT offset = 0;
26198
26199 while (len >= 12)
26200 {
26201 emit_insn (gen_movmem12b (out, in, out, in));
26202 len -= 12;
26203 }
26204
26205 if (len >= 8)
26206 {
26207 emit_insn (gen_movmem8b (out, in, out, in));
26208 len -= 8;
26209 }
26210
26211 if (len >= 4)
26212 {
26213 rtx reg = gen_reg_rtx (SImode);
26214 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26215 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26216 len -= 4;
26217 offset += 4;
26218 }
26219
26220 if (len >= 2)
26221 {
26222 rtx reg = gen_reg_rtx (HImode);
26223 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26224 plus_constant (Pmode, in,
26225 offset))));
26226 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26227 offset)),
26228 reg));
26229 len -= 2;
26230 offset += 2;
26231 }
26232
26233 if (len)
26234 {
26235 rtx reg = gen_reg_rtx (QImode);
26236 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26237 plus_constant (Pmode, in,
26238 offset))));
26239 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26240 offset)),
26241 reg));
26242 }
26243 }
26244
26245 void
26246 thumb_reload_out_hi (rtx *operands)
26247 {
26248 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26249 }
26250
26251 /* Return the length of a function name prefix
26252 that starts with the character 'c'. */
26253 static int
26254 arm_get_strip_length (int c)
26255 {
26256 switch (c)
26257 {
26258 ARM_NAME_ENCODING_LENGTHS
26259 default: return 0;
26260 }
26261 }
26262
26263 /* Return a pointer to a function's name with any
26264 and all prefix encodings stripped from it. */
26265 const char *
26266 arm_strip_name_encoding (const char *name)
26267 {
26268 int skip;
26269
26270 while ((skip = arm_get_strip_length (* name)))
26271 name += skip;
26272
26273 return name;
26274 }
26275
26276 /* If there is a '*' anywhere in the name's prefix, then
26277 emit the stripped name verbatim, otherwise prepend an
26278 underscore if leading underscores are being used. */
26279 void
26280 arm_asm_output_labelref (FILE *stream, const char *name)
26281 {
26282 int skip;
26283 int verbatim = 0;
26284
26285 while ((skip = arm_get_strip_length (* name)))
26286 {
26287 verbatim |= (*name == '*');
26288 name += skip;
26289 }
26290
26291 if (verbatim)
26292 fputs (name, stream);
26293 else
26294 asm_fprintf (stream, "%U%s", name);
26295 }
26296
26297 /* This function is used to emit an EABI tag and its associated value.
26298 We emit the numerical value of the tag in case the assembler does not
26299 support textual tags. (Eg gas prior to 2.20). If requested we include
26300 the tag name in a comment so that anyone reading the assembler output
26301 will know which tag is being set.
26302
26303 This function is not static because arm-c.c needs it too. */
26304
26305 void
26306 arm_emit_eabi_attribute (const char *name, int num, int val)
26307 {
26308 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26309 if (flag_verbose_asm || flag_debug_asm)
26310 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26311 asm_fprintf (asm_out_file, "\n");
26312 }
26313
26314 /* This function is used to print CPU tuning information as comment
26315 in assembler file. Pointers are not printed for now. */
26316
26317 void
26318 arm_print_tune_info (void)
26319 {
26320 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26321 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26322 current_tune->constant_limit);
26323 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26324 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26325 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26326 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26327 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26328 "prefetch.l1_cache_size:\t%d\n",
26329 current_tune->prefetch.l1_cache_size);
26330 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26331 "prefetch.l1_cache_line_size:\t%d\n",
26332 current_tune->prefetch.l1_cache_line_size);
26333 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26334 "prefer_constant_pool:\t%d\n",
26335 (int) current_tune->prefer_constant_pool);
26336 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26337 "branch_cost:\t(s:speed, p:predictable)\n");
26338 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26339 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26340 current_tune->branch_cost (false, false));
26341 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26342 current_tune->branch_cost (false, true));
26343 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26344 current_tune->branch_cost (true, false));
26345 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26346 current_tune->branch_cost (true, true));
26347 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26348 "prefer_ldrd_strd:\t%d\n",
26349 (int) current_tune->prefer_ldrd_strd);
26350 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26351 "logical_op_non_short_circuit:\t[%d,%d]\n",
26352 (int) current_tune->logical_op_non_short_circuit_thumb,
26353 (int) current_tune->logical_op_non_short_circuit_arm);
26354 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26355 "prefer_neon_for_64bits:\t%d\n",
26356 (int) current_tune->prefer_neon_for_64bits);
26357 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26358 "disparage_flag_setting_t16_encodings:\t%d\n",
26359 (int) current_tune->disparage_flag_setting_t16_encodings);
26360 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26361 "string_ops_prefer_neon:\t%d\n",
26362 (int) current_tune->string_ops_prefer_neon);
26363 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26364 "max_insns_inline_memset:\t%d\n",
26365 current_tune->max_insns_inline_memset);
26366 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26367 current_tune->fusible_ops);
26368 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26369 (int) current_tune->sched_autopref);
26370 }
26371
26372 /* Print .arch and .arch_extension directives corresponding to the
26373 current architecture configuration. */
26374 static void
26375 arm_print_asm_arch_directives ()
26376 {
26377 const arch_option *arch
26378 = arm_parse_arch_option_name (all_architectures, "-march",
26379 arm_active_target.arch_name);
26380 auto_sbitmap opt_bits (isa_num_bits);
26381
26382 gcc_assert (arch);
26383
26384 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26385 arm_last_printed_arch_string = arm_active_target.arch_name;
26386 if (!arch->common.extensions)
26387 return;
26388
26389 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26390 opt->name != NULL;
26391 opt++)
26392 {
26393 if (!opt->remove)
26394 {
26395 arm_initialize_isa (opt_bits, opt->isa_bits);
26396
26397 /* If every feature bit of this option is set in the target
26398 ISA specification, print out the option name. However,
26399 don't print anything if all the bits are part of the
26400 FPU specification. */
26401 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26402 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26403 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26404 }
26405 }
26406 }
26407
26408 static void
26409 arm_file_start (void)
26410 {
26411 int val;
26412
26413 if (TARGET_BPABI)
26414 {
26415 /* We don't have a specified CPU. Use the architecture to
26416 generate the tags.
26417
26418 Note: it might be better to do this unconditionally, then the
26419 assembler would not need to know about all new CPU names as
26420 they are added. */
26421 if (!arm_active_target.core_name)
26422 {
26423 /* armv7ve doesn't support any extensions. */
26424 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26425 {
26426 /* Keep backward compatability for assemblers
26427 which don't support armv7ve. */
26428 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26429 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26430 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26431 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26432 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26433 arm_last_printed_arch_string = "armv7ve";
26434 }
26435 else
26436 arm_print_asm_arch_directives ();
26437 }
26438 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26439 {
26440 asm_fprintf (asm_out_file, "\t.arch %s\n",
26441 arm_active_target.core_name + 8);
26442 arm_last_printed_arch_string = arm_active_target.core_name + 8;
26443 }
26444 else
26445 {
26446 const char* truncated_name
26447 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26448 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26449 }
26450
26451 if (print_tune_info)
26452 arm_print_tune_info ();
26453
26454 if (! TARGET_SOFT_FLOAT)
26455 {
26456 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26457 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26458
26459 if (TARGET_HARD_FLOAT_ABI)
26460 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26461 }
26462
26463 /* Some of these attributes only apply when the corresponding features
26464 are used. However we don't have any easy way of figuring this out.
26465 Conservatively record the setting that would have been used. */
26466
26467 if (flag_rounding_math)
26468 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26469
26470 if (!flag_unsafe_math_optimizations)
26471 {
26472 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26473 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26474 }
26475 if (flag_signaling_nans)
26476 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26477
26478 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26479 flag_finite_math_only ? 1 : 3);
26480
26481 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26482 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26483 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26484 flag_short_enums ? 1 : 2);
26485
26486 /* Tag_ABI_optimization_goals. */
26487 if (optimize_size)
26488 val = 4;
26489 else if (optimize >= 2)
26490 val = 2;
26491 else if (optimize)
26492 val = 1;
26493 else
26494 val = 6;
26495 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26496
26497 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26498 unaligned_access);
26499
26500 if (arm_fp16_format)
26501 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26502 (int) arm_fp16_format);
26503
26504 if (arm_lang_output_object_attributes_hook)
26505 arm_lang_output_object_attributes_hook();
26506 }
26507
26508 default_file_start ();
26509 }
26510
26511 static void
26512 arm_file_end (void)
26513 {
26514 int regno;
26515
26516 if (NEED_INDICATE_EXEC_STACK)
26517 /* Add .note.GNU-stack. */
26518 file_end_indicate_exec_stack ();
26519
26520 if (! thumb_call_reg_needed)
26521 return;
26522
26523 switch_to_section (text_section);
26524 asm_fprintf (asm_out_file, "\t.code 16\n");
26525 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26526
26527 for (regno = 0; regno < LR_REGNUM; regno++)
26528 {
26529 rtx label = thumb_call_via_label[regno];
26530
26531 if (label != 0)
26532 {
26533 targetm.asm_out.internal_label (asm_out_file, "L",
26534 CODE_LABEL_NUMBER (label));
26535 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26536 }
26537 }
26538 }
26539
26540 #ifndef ARM_PE
26541 /* Symbols in the text segment can be accessed without indirecting via the
26542 constant pool; it may take an extra binary operation, but this is still
26543 faster than indirecting via memory. Don't do this when not optimizing,
26544 since we won't be calculating al of the offsets necessary to do this
26545 simplification. */
26546
26547 static void
26548 arm_encode_section_info (tree decl, rtx rtl, int first)
26549 {
26550 if (optimize > 0 && TREE_CONSTANT (decl))
26551 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26552
26553 default_encode_section_info (decl, rtl, first);
26554 }
26555 #endif /* !ARM_PE */
26556
26557 static void
26558 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26559 {
26560 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26561 && !strcmp (prefix, "L"))
26562 {
26563 arm_ccfsm_state = 0;
26564 arm_target_insn = NULL;
26565 }
26566 default_internal_label (stream, prefix, labelno);
26567 }
26568
26569 /* Output code to add DELTA to the first argument, and then jump
26570 to FUNCTION. Used for C++ multiple inheritance. */
26571
26572 static void
26573 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26574 HOST_WIDE_INT, tree function)
26575 {
26576 static int thunk_label = 0;
26577 char label[256];
26578 char labelpc[256];
26579 int mi_delta = delta;
26580 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26581 int shift = 0;
26582 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26583 ? 1 : 0);
26584 if (mi_delta < 0)
26585 mi_delta = - mi_delta;
26586
26587 final_start_function (emit_barrier (), file, 1);
26588
26589 if (TARGET_THUMB1)
26590 {
26591 int labelno = thunk_label++;
26592 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26593 /* Thunks are entered in arm mode when available. */
26594 if (TARGET_THUMB1_ONLY)
26595 {
26596 /* push r3 so we can use it as a temporary. */
26597 /* TODO: Omit this save if r3 is not used. */
26598 fputs ("\tpush {r3}\n", file);
26599 fputs ("\tldr\tr3, ", file);
26600 }
26601 else
26602 {
26603 fputs ("\tldr\tr12, ", file);
26604 }
26605 assemble_name (file, label);
26606 fputc ('\n', file);
26607 if (flag_pic)
26608 {
26609 /* If we are generating PIC, the ldr instruction below loads
26610 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26611 the address of the add + 8, so we have:
26612
26613 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26614 = target + 1.
26615
26616 Note that we have "+ 1" because some versions of GNU ld
26617 don't set the low bit of the result for R_ARM_REL32
26618 relocations against thumb function symbols.
26619 On ARMv6M this is +4, not +8. */
26620 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26621 assemble_name (file, labelpc);
26622 fputs (":\n", file);
26623 if (TARGET_THUMB1_ONLY)
26624 {
26625 /* This is 2 insns after the start of the thunk, so we know it
26626 is 4-byte aligned. */
26627 fputs ("\tadd\tr3, pc, r3\n", file);
26628 fputs ("\tmov r12, r3\n", file);
26629 }
26630 else
26631 fputs ("\tadd\tr12, pc, r12\n", file);
26632 }
26633 else if (TARGET_THUMB1_ONLY)
26634 fputs ("\tmov r12, r3\n", file);
26635 }
26636 if (TARGET_THUMB1_ONLY)
26637 {
26638 if (mi_delta > 255)
26639 {
26640 fputs ("\tldr\tr3, ", file);
26641 assemble_name (file, label);
26642 fputs ("+4\n", file);
26643 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26644 mi_op, this_regno, this_regno);
26645 }
26646 else if (mi_delta != 0)
26647 {
26648 /* Thumb1 unified syntax requires s suffix in instruction name when
26649 one of the operands is immediate. */
26650 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26651 mi_op, this_regno, this_regno,
26652 mi_delta);
26653 }
26654 }
26655 else
26656 {
26657 /* TODO: Use movw/movt for large constants when available. */
26658 while (mi_delta != 0)
26659 {
26660 if ((mi_delta & (3 << shift)) == 0)
26661 shift += 2;
26662 else
26663 {
26664 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26665 mi_op, this_regno, this_regno,
26666 mi_delta & (0xff << shift));
26667 mi_delta &= ~(0xff << shift);
26668 shift += 8;
26669 }
26670 }
26671 }
26672 if (TARGET_THUMB1)
26673 {
26674 if (TARGET_THUMB1_ONLY)
26675 fputs ("\tpop\t{r3}\n", file);
26676
26677 fprintf (file, "\tbx\tr12\n");
26678 ASM_OUTPUT_ALIGN (file, 2);
26679 assemble_name (file, label);
26680 fputs (":\n", file);
26681 if (flag_pic)
26682 {
26683 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26684 rtx tem = XEXP (DECL_RTL (function), 0);
26685 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26686 pipeline offset is four rather than eight. Adjust the offset
26687 accordingly. */
26688 tem = plus_constant (GET_MODE (tem), tem,
26689 TARGET_THUMB1_ONLY ? -3 : -7);
26690 tem = gen_rtx_MINUS (GET_MODE (tem),
26691 tem,
26692 gen_rtx_SYMBOL_REF (Pmode,
26693 ggc_strdup (labelpc)));
26694 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26695 }
26696 else
26697 /* Output ".word .LTHUNKn". */
26698 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26699
26700 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26701 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26702 }
26703 else
26704 {
26705 fputs ("\tb\t", file);
26706 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26707 if (NEED_PLT_RELOC)
26708 fputs ("(PLT)", file);
26709 fputc ('\n', file);
26710 }
26711
26712 final_end_function ();
26713 }
26714
26715 /* MI thunk handling for TARGET_32BIT. */
26716
26717 static void
26718 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26719 HOST_WIDE_INT vcall_offset, tree function)
26720 {
26721 /* On ARM, this_regno is R0 or R1 depending on
26722 whether the function returns an aggregate or not.
26723 */
26724 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26725 function)
26726 ? R1_REGNUM : R0_REGNUM);
26727
26728 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26729 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26730 reload_completed = 1;
26731 emit_note (NOTE_INSN_PROLOGUE_END);
26732
26733 /* Add DELTA to THIS_RTX. */
26734 if (delta != 0)
26735 arm_split_constant (PLUS, Pmode, NULL_RTX,
26736 delta, this_rtx, this_rtx, false);
26737
26738 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26739 if (vcall_offset != 0)
26740 {
26741 /* Load *THIS_RTX. */
26742 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26743 /* Compute *THIS_RTX + VCALL_OFFSET. */
26744 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26745 false);
26746 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26747 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26748 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26749 }
26750
26751 /* Generate a tail call to the target function. */
26752 if (!TREE_USED (function))
26753 {
26754 assemble_external (function);
26755 TREE_USED (function) = 1;
26756 }
26757 rtx funexp = XEXP (DECL_RTL (function), 0);
26758 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26759 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26760 SIBLING_CALL_P (insn) = 1;
26761
26762 insn = get_insns ();
26763 shorten_branches (insn);
26764 final_start_function (insn, file, 1);
26765 final (insn, file, 1);
26766 final_end_function ();
26767
26768 /* Stop pretending this is a post-reload pass. */
26769 reload_completed = 0;
26770 }
26771
26772 /* Output code to add DELTA to the first argument, and then jump
26773 to FUNCTION. Used for C++ multiple inheritance. */
26774
26775 static void
26776 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26777 HOST_WIDE_INT vcall_offset, tree function)
26778 {
26779 if (TARGET_32BIT)
26780 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26781 else
26782 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26783 }
26784
26785 int
26786 arm_emit_vector_const (FILE *file, rtx x)
26787 {
26788 int i;
26789 const char * pattern;
26790
26791 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26792
26793 switch (GET_MODE (x))
26794 {
26795 case E_V2SImode: pattern = "%08x"; break;
26796 case E_V4HImode: pattern = "%04x"; break;
26797 case E_V8QImode: pattern = "%02x"; break;
26798 default: gcc_unreachable ();
26799 }
26800
26801 fprintf (file, "0x");
26802 for (i = CONST_VECTOR_NUNITS (x); i--;)
26803 {
26804 rtx element;
26805
26806 element = CONST_VECTOR_ELT (x, i);
26807 fprintf (file, pattern, INTVAL (element));
26808 }
26809
26810 return 1;
26811 }
26812
26813 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26814 HFmode constant pool entries are actually loaded with ldr. */
26815 void
26816 arm_emit_fp16_const (rtx c)
26817 {
26818 long bits;
26819
26820 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26821 if (WORDS_BIG_ENDIAN)
26822 assemble_zeros (2);
26823 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26824 if (!WORDS_BIG_ENDIAN)
26825 assemble_zeros (2);
26826 }
26827
26828 const char *
26829 arm_output_load_gr (rtx *operands)
26830 {
26831 rtx reg;
26832 rtx offset;
26833 rtx wcgr;
26834 rtx sum;
26835
26836 if (!MEM_P (operands [1])
26837 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26838 || !REG_P (reg = XEXP (sum, 0))
26839 || !CONST_INT_P (offset = XEXP (sum, 1))
26840 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26841 return "wldrw%?\t%0, %1";
26842
26843 /* Fix up an out-of-range load of a GR register. */
26844 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26845 wcgr = operands[0];
26846 operands[0] = reg;
26847 output_asm_insn ("ldr%?\t%0, %1", operands);
26848
26849 operands[0] = wcgr;
26850 operands[1] = reg;
26851 output_asm_insn ("tmcr%?\t%0, %1", operands);
26852 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26853
26854 return "";
26855 }
26856
26857 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26858
26859 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26860 named arg and all anonymous args onto the stack.
26861 XXX I know the prologue shouldn't be pushing registers, but it is faster
26862 that way. */
26863
26864 static void
26865 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26866 machine_mode mode,
26867 tree type,
26868 int *pretend_size,
26869 int second_time ATTRIBUTE_UNUSED)
26870 {
26871 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26872 int nregs;
26873
26874 cfun->machine->uses_anonymous_args = 1;
26875 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26876 {
26877 nregs = pcum->aapcs_ncrn;
26878 if (nregs & 1)
26879 {
26880 int res = arm_needs_doubleword_align (mode, type);
26881 if (res < 0 && warn_psabi)
26882 inform (input_location, "parameter passing for argument of "
26883 "type %qT changed in GCC 7.1", type);
26884 else if (res > 0)
26885 nregs++;
26886 }
26887 }
26888 else
26889 nregs = pcum->nregs;
26890
26891 if (nregs < NUM_ARG_REGS)
26892 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26893 }
26894
26895 /* We can't rely on the caller doing the proper promotion when
26896 using APCS or ATPCS. */
26897
26898 static bool
26899 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26900 {
26901 return !TARGET_AAPCS_BASED;
26902 }
26903
26904 static machine_mode
26905 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26906 machine_mode mode,
26907 int *punsignedp ATTRIBUTE_UNUSED,
26908 const_tree fntype ATTRIBUTE_UNUSED,
26909 int for_return ATTRIBUTE_UNUSED)
26910 {
26911 if (GET_MODE_CLASS (mode) == MODE_INT
26912 && GET_MODE_SIZE (mode) < 4)
26913 return SImode;
26914
26915 return mode;
26916 }
26917
26918
26919 static bool
26920 arm_default_short_enums (void)
26921 {
26922 return ARM_DEFAULT_SHORT_ENUMS;
26923 }
26924
26925
26926 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26927
26928 static bool
26929 arm_align_anon_bitfield (void)
26930 {
26931 return TARGET_AAPCS_BASED;
26932 }
26933
26934
26935 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26936
26937 static tree
26938 arm_cxx_guard_type (void)
26939 {
26940 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26941 }
26942
26943
26944 /* The EABI says test the least significant bit of a guard variable. */
26945
26946 static bool
26947 arm_cxx_guard_mask_bit (void)
26948 {
26949 return TARGET_AAPCS_BASED;
26950 }
26951
26952
26953 /* The EABI specifies that all array cookies are 8 bytes long. */
26954
26955 static tree
26956 arm_get_cookie_size (tree type)
26957 {
26958 tree size;
26959
26960 if (!TARGET_AAPCS_BASED)
26961 return default_cxx_get_cookie_size (type);
26962
26963 size = build_int_cst (sizetype, 8);
26964 return size;
26965 }
26966
26967
26968 /* The EABI says that array cookies should also contain the element size. */
26969
26970 static bool
26971 arm_cookie_has_size (void)
26972 {
26973 return TARGET_AAPCS_BASED;
26974 }
26975
26976
26977 /* The EABI says constructors and destructors should return a pointer to
26978 the object constructed/destroyed. */
26979
26980 static bool
26981 arm_cxx_cdtor_returns_this (void)
26982 {
26983 return TARGET_AAPCS_BASED;
26984 }
26985
26986 /* The EABI says that an inline function may never be the key
26987 method. */
26988
26989 static bool
26990 arm_cxx_key_method_may_be_inline (void)
26991 {
26992 return !TARGET_AAPCS_BASED;
26993 }
26994
26995 static void
26996 arm_cxx_determine_class_data_visibility (tree decl)
26997 {
26998 if (!TARGET_AAPCS_BASED
26999 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27000 return;
27001
27002 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27003 is exported. However, on systems without dynamic vague linkage,
27004 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27005 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27006 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27007 else
27008 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27009 DECL_VISIBILITY_SPECIFIED (decl) = 1;
27010 }
27011
27012 static bool
27013 arm_cxx_class_data_always_comdat (void)
27014 {
27015 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27016 vague linkage if the class has no key function. */
27017 return !TARGET_AAPCS_BASED;
27018 }
27019
27020
27021 /* The EABI says __aeabi_atexit should be used to register static
27022 destructors. */
27023
27024 static bool
27025 arm_cxx_use_aeabi_atexit (void)
27026 {
27027 return TARGET_AAPCS_BASED;
27028 }
27029
27030
27031 void
27032 arm_set_return_address (rtx source, rtx scratch)
27033 {
27034 arm_stack_offsets *offsets;
27035 HOST_WIDE_INT delta;
27036 rtx addr, mem;
27037 unsigned long saved_regs;
27038
27039 offsets = arm_get_frame_offsets ();
27040 saved_regs = offsets->saved_regs_mask;
27041
27042 if ((saved_regs & (1 << LR_REGNUM)) == 0)
27043 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27044 else
27045 {
27046 if (frame_pointer_needed)
27047 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27048 else
27049 {
27050 /* LR will be the first saved register. */
27051 delta = offsets->outgoing_args - (offsets->frame + 4);
27052
27053
27054 if (delta >= 4096)
27055 {
27056 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27057 GEN_INT (delta & ~4095)));
27058 addr = scratch;
27059 delta &= 4095;
27060 }
27061 else
27062 addr = stack_pointer_rtx;
27063
27064 addr = plus_constant (Pmode, addr, delta);
27065 }
27066
27067 /* The store needs to be marked to prevent DSE from deleting
27068 it as dead if it is based on fp. */
27069 mem = gen_frame_mem (Pmode, addr);
27070 MEM_VOLATILE_P (mem) = true;
27071 emit_move_insn (mem, source);
27072 }
27073 }
27074
27075
27076 void
27077 thumb_set_return_address (rtx source, rtx scratch)
27078 {
27079 arm_stack_offsets *offsets;
27080 HOST_WIDE_INT delta;
27081 HOST_WIDE_INT limit;
27082 int reg;
27083 rtx addr, mem;
27084 unsigned long mask;
27085
27086 emit_use (source);
27087
27088 offsets = arm_get_frame_offsets ();
27089 mask = offsets->saved_regs_mask;
27090 if (mask & (1 << LR_REGNUM))
27091 {
27092 limit = 1024;
27093 /* Find the saved regs. */
27094 if (frame_pointer_needed)
27095 {
27096 delta = offsets->soft_frame - offsets->saved_args;
27097 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27098 if (TARGET_THUMB1)
27099 limit = 128;
27100 }
27101 else
27102 {
27103 delta = offsets->outgoing_args - offsets->saved_args;
27104 reg = SP_REGNUM;
27105 }
27106 /* Allow for the stack frame. */
27107 if (TARGET_THUMB1 && TARGET_BACKTRACE)
27108 delta -= 16;
27109 /* The link register is always the first saved register. */
27110 delta -= 4;
27111
27112 /* Construct the address. */
27113 addr = gen_rtx_REG (SImode, reg);
27114 if (delta > limit)
27115 {
27116 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27117 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27118 addr = scratch;
27119 }
27120 else
27121 addr = plus_constant (Pmode, addr, delta);
27122
27123 /* The store needs to be marked to prevent DSE from deleting
27124 it as dead if it is based on fp. */
27125 mem = gen_frame_mem (Pmode, addr);
27126 MEM_VOLATILE_P (mem) = true;
27127 emit_move_insn (mem, source);
27128 }
27129 else
27130 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27131 }
27132
27133 /* Implements target hook vector_mode_supported_p. */
27134 bool
27135 arm_vector_mode_supported_p (machine_mode mode)
27136 {
27137 /* Neon also supports V2SImode, etc. listed in the clause below. */
27138 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27139 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27140 || mode == V2DImode || mode == V8HFmode))
27141 return true;
27142
27143 if ((TARGET_NEON || TARGET_IWMMXT)
27144 && ((mode == V2SImode)
27145 || (mode == V4HImode)
27146 || (mode == V8QImode)))
27147 return true;
27148
27149 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27150 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27151 || mode == V2HAmode))
27152 return true;
27153
27154 return false;
27155 }
27156
27157 /* Implements target hook array_mode_supported_p. */
27158
27159 static bool
27160 arm_array_mode_supported_p (machine_mode mode,
27161 unsigned HOST_WIDE_INT nelems)
27162 {
27163 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
27164 for now, as the lane-swapping logic needs to be extended in the expanders.
27165 See PR target/82518. */
27166 if (TARGET_NEON && !BYTES_BIG_ENDIAN
27167 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27168 && (nelems >= 2 && nelems <= 4))
27169 return true;
27170
27171 return false;
27172 }
27173
27174 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27175 registers when autovectorizing for Neon, at least until multiple vector
27176 widths are supported properly by the middle-end. */
27177
27178 static machine_mode
27179 arm_preferred_simd_mode (scalar_mode mode)
27180 {
27181 if (TARGET_NEON)
27182 switch (mode)
27183 {
27184 case E_SFmode:
27185 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27186 case E_SImode:
27187 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27188 case E_HImode:
27189 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27190 case E_QImode:
27191 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27192 case E_DImode:
27193 if (!TARGET_NEON_VECTORIZE_DOUBLE)
27194 return V2DImode;
27195 break;
27196
27197 default:;
27198 }
27199
27200 if (TARGET_REALLY_IWMMXT)
27201 switch (mode)
27202 {
27203 case E_SImode:
27204 return V2SImode;
27205 case E_HImode:
27206 return V4HImode;
27207 case E_QImode:
27208 return V8QImode;
27209
27210 default:;
27211 }
27212
27213 return word_mode;
27214 }
27215
27216 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27217
27218 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27219 using r0-r4 for function arguments, r7 for the stack frame and don't have
27220 enough left over to do doubleword arithmetic. For Thumb-2 all the
27221 potentially problematic instructions accept high registers so this is not
27222 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27223 that require many low registers. */
27224 static bool
27225 arm_class_likely_spilled_p (reg_class_t rclass)
27226 {
27227 if ((TARGET_THUMB1 && rclass == LO_REGS)
27228 || rclass == CC_REG)
27229 return true;
27230
27231 return false;
27232 }
27233
27234 /* Implements target hook small_register_classes_for_mode_p. */
27235 bool
27236 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27237 {
27238 return TARGET_THUMB1;
27239 }
27240
27241 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27242 ARM insns and therefore guarantee that the shift count is modulo 256.
27243 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27244 guarantee no particular behavior for out-of-range counts. */
27245
27246 static unsigned HOST_WIDE_INT
27247 arm_shift_truncation_mask (machine_mode mode)
27248 {
27249 return mode == SImode ? 255 : 0;
27250 }
27251
27252
27253 /* Map internal gcc register numbers to DWARF2 register numbers. */
27254
27255 unsigned int
27256 arm_dbx_register_number (unsigned int regno)
27257 {
27258 if (regno < 16)
27259 return regno;
27260
27261 if (IS_VFP_REGNUM (regno))
27262 {
27263 /* See comment in arm_dwarf_register_span. */
27264 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27265 return 64 + regno - FIRST_VFP_REGNUM;
27266 else
27267 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27268 }
27269
27270 if (IS_IWMMXT_GR_REGNUM (regno))
27271 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27272
27273 if (IS_IWMMXT_REGNUM (regno))
27274 return 112 + regno - FIRST_IWMMXT_REGNUM;
27275
27276 return DWARF_FRAME_REGISTERS;
27277 }
27278
27279 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27280 GCC models tham as 64 32-bit registers, so we need to describe this to
27281 the DWARF generation code. Other registers can use the default. */
27282 static rtx
27283 arm_dwarf_register_span (rtx rtl)
27284 {
27285 machine_mode mode;
27286 unsigned regno;
27287 rtx parts[16];
27288 int nregs;
27289 int i;
27290
27291 regno = REGNO (rtl);
27292 if (!IS_VFP_REGNUM (regno))
27293 return NULL_RTX;
27294
27295 /* XXX FIXME: The EABI defines two VFP register ranges:
27296 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27297 256-287: D0-D31
27298 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27299 corresponding D register. Until GDB supports this, we shall use the
27300 legacy encodings. We also use these encodings for D0-D15 for
27301 compatibility with older debuggers. */
27302 mode = GET_MODE (rtl);
27303 if (GET_MODE_SIZE (mode) < 8)
27304 return NULL_RTX;
27305
27306 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27307 {
27308 nregs = GET_MODE_SIZE (mode) / 4;
27309 for (i = 0; i < nregs; i += 2)
27310 if (TARGET_BIG_END)
27311 {
27312 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27313 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27314 }
27315 else
27316 {
27317 parts[i] = gen_rtx_REG (SImode, regno + i);
27318 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27319 }
27320 }
27321 else
27322 {
27323 nregs = GET_MODE_SIZE (mode) / 8;
27324 for (i = 0; i < nregs; i++)
27325 parts[i] = gen_rtx_REG (DImode, regno + i);
27326 }
27327
27328 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27329 }
27330
27331 #if ARM_UNWIND_INFO
27332 /* Emit unwind directives for a store-multiple instruction or stack pointer
27333 push during alignment.
27334 These should only ever be generated by the function prologue code, so
27335 expect them to have a particular form.
27336 The store-multiple instruction sometimes pushes pc as the last register,
27337 although it should not be tracked into unwind information, or for -Os
27338 sometimes pushes some dummy registers before first register that needs
27339 to be tracked in unwind information; such dummy registers are there just
27340 to avoid separate stack adjustment, and will not be restored in the
27341 epilogue. */
27342
27343 static void
27344 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27345 {
27346 int i;
27347 HOST_WIDE_INT offset;
27348 HOST_WIDE_INT nregs;
27349 int reg_size;
27350 unsigned reg;
27351 unsigned lastreg;
27352 unsigned padfirst = 0, padlast = 0;
27353 rtx e;
27354
27355 e = XVECEXP (p, 0, 0);
27356 gcc_assert (GET_CODE (e) == SET);
27357
27358 /* First insn will adjust the stack pointer. */
27359 gcc_assert (GET_CODE (e) == SET
27360 && REG_P (SET_DEST (e))
27361 && REGNO (SET_DEST (e)) == SP_REGNUM
27362 && GET_CODE (SET_SRC (e)) == PLUS);
27363
27364 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27365 nregs = XVECLEN (p, 0) - 1;
27366 gcc_assert (nregs);
27367
27368 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27369 if (reg < 16)
27370 {
27371 /* For -Os dummy registers can be pushed at the beginning to
27372 avoid separate stack pointer adjustment. */
27373 e = XVECEXP (p, 0, 1);
27374 e = XEXP (SET_DEST (e), 0);
27375 if (GET_CODE (e) == PLUS)
27376 padfirst = INTVAL (XEXP (e, 1));
27377 gcc_assert (padfirst == 0 || optimize_size);
27378 /* The function prologue may also push pc, but not annotate it as it is
27379 never restored. We turn this into a stack pointer adjustment. */
27380 e = XVECEXP (p, 0, nregs);
27381 e = XEXP (SET_DEST (e), 0);
27382 if (GET_CODE (e) == PLUS)
27383 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27384 else
27385 padlast = offset - 4;
27386 gcc_assert (padlast == 0 || padlast == 4);
27387 if (padlast == 4)
27388 fprintf (asm_out_file, "\t.pad #4\n");
27389 reg_size = 4;
27390 fprintf (asm_out_file, "\t.save {");
27391 }
27392 else if (IS_VFP_REGNUM (reg))
27393 {
27394 reg_size = 8;
27395 fprintf (asm_out_file, "\t.vsave {");
27396 }
27397 else
27398 /* Unknown register type. */
27399 gcc_unreachable ();
27400
27401 /* If the stack increment doesn't match the size of the saved registers,
27402 something has gone horribly wrong. */
27403 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27404
27405 offset = padfirst;
27406 lastreg = 0;
27407 /* The remaining insns will describe the stores. */
27408 for (i = 1; i <= nregs; i++)
27409 {
27410 /* Expect (set (mem <addr>) (reg)).
27411 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27412 e = XVECEXP (p, 0, i);
27413 gcc_assert (GET_CODE (e) == SET
27414 && MEM_P (SET_DEST (e))
27415 && REG_P (SET_SRC (e)));
27416
27417 reg = REGNO (SET_SRC (e));
27418 gcc_assert (reg >= lastreg);
27419
27420 if (i != 1)
27421 fprintf (asm_out_file, ", ");
27422 /* We can't use %r for vfp because we need to use the
27423 double precision register names. */
27424 if (IS_VFP_REGNUM (reg))
27425 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27426 else
27427 asm_fprintf (asm_out_file, "%r", reg);
27428
27429 if (flag_checking)
27430 {
27431 /* Check that the addresses are consecutive. */
27432 e = XEXP (SET_DEST (e), 0);
27433 if (GET_CODE (e) == PLUS)
27434 gcc_assert (REG_P (XEXP (e, 0))
27435 && REGNO (XEXP (e, 0)) == SP_REGNUM
27436 && CONST_INT_P (XEXP (e, 1))
27437 && offset == INTVAL (XEXP (e, 1)));
27438 else
27439 gcc_assert (i == 1
27440 && REG_P (e)
27441 && REGNO (e) == SP_REGNUM);
27442 offset += reg_size;
27443 }
27444 }
27445 fprintf (asm_out_file, "}\n");
27446 if (padfirst)
27447 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27448 }
27449
27450 /* Emit unwind directives for a SET. */
27451
27452 static void
27453 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27454 {
27455 rtx e0;
27456 rtx e1;
27457 unsigned reg;
27458
27459 e0 = XEXP (p, 0);
27460 e1 = XEXP (p, 1);
27461 switch (GET_CODE (e0))
27462 {
27463 case MEM:
27464 /* Pushing a single register. */
27465 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27466 || !REG_P (XEXP (XEXP (e0, 0), 0))
27467 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27468 abort ();
27469
27470 asm_fprintf (asm_out_file, "\t.save ");
27471 if (IS_VFP_REGNUM (REGNO (e1)))
27472 asm_fprintf(asm_out_file, "{d%d}\n",
27473 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27474 else
27475 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27476 break;
27477
27478 case REG:
27479 if (REGNO (e0) == SP_REGNUM)
27480 {
27481 /* A stack increment. */
27482 if (GET_CODE (e1) != PLUS
27483 || !REG_P (XEXP (e1, 0))
27484 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27485 || !CONST_INT_P (XEXP (e1, 1)))
27486 abort ();
27487
27488 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27489 -INTVAL (XEXP (e1, 1)));
27490 }
27491 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27492 {
27493 HOST_WIDE_INT offset;
27494
27495 if (GET_CODE (e1) == PLUS)
27496 {
27497 if (!REG_P (XEXP (e1, 0))
27498 || !CONST_INT_P (XEXP (e1, 1)))
27499 abort ();
27500 reg = REGNO (XEXP (e1, 0));
27501 offset = INTVAL (XEXP (e1, 1));
27502 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27503 HARD_FRAME_POINTER_REGNUM, reg,
27504 offset);
27505 }
27506 else if (REG_P (e1))
27507 {
27508 reg = REGNO (e1);
27509 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27510 HARD_FRAME_POINTER_REGNUM, reg);
27511 }
27512 else
27513 abort ();
27514 }
27515 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27516 {
27517 /* Move from sp to reg. */
27518 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27519 }
27520 else if (GET_CODE (e1) == PLUS
27521 && REG_P (XEXP (e1, 0))
27522 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27523 && CONST_INT_P (XEXP (e1, 1)))
27524 {
27525 /* Set reg to offset from sp. */
27526 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27527 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27528 }
27529 else
27530 abort ();
27531 break;
27532
27533 default:
27534 abort ();
27535 }
27536 }
27537
27538
27539 /* Emit unwind directives for the given insn. */
27540
27541 static void
27542 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27543 {
27544 rtx note, pat;
27545 bool handled_one = false;
27546
27547 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27548 return;
27549
27550 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27551 && (TREE_NOTHROW (current_function_decl)
27552 || crtl->all_throwers_are_sibcalls))
27553 return;
27554
27555 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27556 return;
27557
27558 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27559 {
27560 switch (REG_NOTE_KIND (note))
27561 {
27562 case REG_FRAME_RELATED_EXPR:
27563 pat = XEXP (note, 0);
27564 goto found;
27565
27566 case REG_CFA_REGISTER:
27567 pat = XEXP (note, 0);
27568 if (pat == NULL)
27569 {
27570 pat = PATTERN (insn);
27571 if (GET_CODE (pat) == PARALLEL)
27572 pat = XVECEXP (pat, 0, 0);
27573 }
27574
27575 /* Only emitted for IS_STACKALIGN re-alignment. */
27576 {
27577 rtx dest, src;
27578 unsigned reg;
27579
27580 src = SET_SRC (pat);
27581 dest = SET_DEST (pat);
27582
27583 gcc_assert (src == stack_pointer_rtx);
27584 reg = REGNO (dest);
27585 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27586 reg + 0x90, reg);
27587 }
27588 handled_one = true;
27589 break;
27590
27591 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27592 to get correct dwarf information for shrink-wrap. We should not
27593 emit unwind information for it because these are used either for
27594 pretend arguments or notes to adjust sp and restore registers from
27595 stack. */
27596 case REG_CFA_DEF_CFA:
27597 case REG_CFA_ADJUST_CFA:
27598 case REG_CFA_RESTORE:
27599 return;
27600
27601 case REG_CFA_EXPRESSION:
27602 case REG_CFA_OFFSET:
27603 /* ??? Only handling here what we actually emit. */
27604 gcc_unreachable ();
27605
27606 default:
27607 break;
27608 }
27609 }
27610 if (handled_one)
27611 return;
27612 pat = PATTERN (insn);
27613 found:
27614
27615 switch (GET_CODE (pat))
27616 {
27617 case SET:
27618 arm_unwind_emit_set (asm_out_file, pat);
27619 break;
27620
27621 case SEQUENCE:
27622 /* Store multiple. */
27623 arm_unwind_emit_sequence (asm_out_file, pat);
27624 break;
27625
27626 default:
27627 abort();
27628 }
27629 }
27630
27631
27632 /* Output a reference from a function exception table to the type_info
27633 object X. The EABI specifies that the symbol should be relocated by
27634 an R_ARM_TARGET2 relocation. */
27635
27636 static bool
27637 arm_output_ttype (rtx x)
27638 {
27639 fputs ("\t.word\t", asm_out_file);
27640 output_addr_const (asm_out_file, x);
27641 /* Use special relocations for symbol references. */
27642 if (!CONST_INT_P (x))
27643 fputs ("(TARGET2)", asm_out_file);
27644 fputc ('\n', asm_out_file);
27645
27646 return TRUE;
27647 }
27648
27649 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27650
27651 static void
27652 arm_asm_emit_except_personality (rtx personality)
27653 {
27654 fputs ("\t.personality\t", asm_out_file);
27655 output_addr_const (asm_out_file, personality);
27656 fputc ('\n', asm_out_file);
27657 }
27658 #endif /* ARM_UNWIND_INFO */
27659
27660 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27661
27662 static void
27663 arm_asm_init_sections (void)
27664 {
27665 #if ARM_UNWIND_INFO
27666 exception_section = get_unnamed_section (0, output_section_asm_op,
27667 "\t.handlerdata");
27668 #endif /* ARM_UNWIND_INFO */
27669
27670 #ifdef OBJECT_FORMAT_ELF
27671 if (target_pure_code)
27672 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27673 #endif
27674 }
27675
27676 /* Output unwind directives for the start/end of a function. */
27677
27678 void
27679 arm_output_fn_unwind (FILE * f, bool prologue)
27680 {
27681 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27682 return;
27683
27684 if (prologue)
27685 fputs ("\t.fnstart\n", f);
27686 else
27687 {
27688 /* If this function will never be unwound, then mark it as such.
27689 The came condition is used in arm_unwind_emit to suppress
27690 the frame annotations. */
27691 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27692 && (TREE_NOTHROW (current_function_decl)
27693 || crtl->all_throwers_are_sibcalls))
27694 fputs("\t.cantunwind\n", f);
27695
27696 fputs ("\t.fnend\n", f);
27697 }
27698 }
27699
27700 static bool
27701 arm_emit_tls_decoration (FILE *fp, rtx x)
27702 {
27703 enum tls_reloc reloc;
27704 rtx val;
27705
27706 val = XVECEXP (x, 0, 0);
27707 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27708
27709 output_addr_const (fp, val);
27710
27711 switch (reloc)
27712 {
27713 case TLS_GD32:
27714 fputs ("(tlsgd)", fp);
27715 break;
27716 case TLS_LDM32:
27717 fputs ("(tlsldm)", fp);
27718 break;
27719 case TLS_LDO32:
27720 fputs ("(tlsldo)", fp);
27721 break;
27722 case TLS_IE32:
27723 fputs ("(gottpoff)", fp);
27724 break;
27725 case TLS_LE32:
27726 fputs ("(tpoff)", fp);
27727 break;
27728 case TLS_DESCSEQ:
27729 fputs ("(tlsdesc)", fp);
27730 break;
27731 default:
27732 gcc_unreachable ();
27733 }
27734
27735 switch (reloc)
27736 {
27737 case TLS_GD32:
27738 case TLS_LDM32:
27739 case TLS_IE32:
27740 case TLS_DESCSEQ:
27741 fputs (" + (. - ", fp);
27742 output_addr_const (fp, XVECEXP (x, 0, 2));
27743 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27744 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27745 output_addr_const (fp, XVECEXP (x, 0, 3));
27746 fputc (')', fp);
27747 break;
27748 default:
27749 break;
27750 }
27751
27752 return TRUE;
27753 }
27754
27755 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27756
27757 static void
27758 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27759 {
27760 gcc_assert (size == 4);
27761 fputs ("\t.word\t", file);
27762 output_addr_const (file, x);
27763 fputs ("(tlsldo)", file);
27764 }
27765
27766 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27767
27768 static bool
27769 arm_output_addr_const_extra (FILE *fp, rtx x)
27770 {
27771 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27772 return arm_emit_tls_decoration (fp, x);
27773 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27774 {
27775 char label[256];
27776 int labelno = INTVAL (XVECEXP (x, 0, 0));
27777
27778 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27779 assemble_name_raw (fp, label);
27780
27781 return TRUE;
27782 }
27783 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27784 {
27785 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27786 if (GOT_PCREL)
27787 fputs ("+.", fp);
27788 fputs ("-(", fp);
27789 output_addr_const (fp, XVECEXP (x, 0, 0));
27790 fputc (')', fp);
27791 return TRUE;
27792 }
27793 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27794 {
27795 output_addr_const (fp, XVECEXP (x, 0, 0));
27796 if (GOT_PCREL)
27797 fputs ("+.", fp);
27798 fputs ("-(", fp);
27799 output_addr_const (fp, XVECEXP (x, 0, 1));
27800 fputc (')', fp);
27801 return TRUE;
27802 }
27803 else if (GET_CODE (x) == CONST_VECTOR)
27804 return arm_emit_vector_const (fp, x);
27805
27806 return FALSE;
27807 }
27808
27809 /* Output assembly for a shift instruction.
27810 SET_FLAGS determines how the instruction modifies the condition codes.
27811 0 - Do not set condition codes.
27812 1 - Set condition codes.
27813 2 - Use smallest instruction. */
27814 const char *
27815 arm_output_shift(rtx * operands, int set_flags)
27816 {
27817 char pattern[100];
27818 static const char flag_chars[3] = {'?', '.', '!'};
27819 const char *shift;
27820 HOST_WIDE_INT val;
27821 char c;
27822
27823 c = flag_chars[set_flags];
27824 shift = shift_op(operands[3], &val);
27825 if (shift)
27826 {
27827 if (val != -1)
27828 operands[2] = GEN_INT(val);
27829 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27830 }
27831 else
27832 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27833
27834 output_asm_insn (pattern, operands);
27835 return "";
27836 }
27837
27838 /* Output assembly for a WMMX immediate shift instruction. */
27839 const char *
27840 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27841 {
27842 int shift = INTVAL (operands[2]);
27843 char templ[50];
27844 machine_mode opmode = GET_MODE (operands[0]);
27845
27846 gcc_assert (shift >= 0);
27847
27848 /* If the shift value in the register versions is > 63 (for D qualifier),
27849 31 (for W qualifier) or 15 (for H qualifier). */
27850 if (((opmode == V4HImode) && (shift > 15))
27851 || ((opmode == V2SImode) && (shift > 31))
27852 || ((opmode == DImode) && (shift > 63)))
27853 {
27854 if (wror_or_wsra)
27855 {
27856 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27857 output_asm_insn (templ, operands);
27858 if (opmode == DImode)
27859 {
27860 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27861 output_asm_insn (templ, operands);
27862 }
27863 }
27864 else
27865 {
27866 /* The destination register will contain all zeros. */
27867 sprintf (templ, "wzero\t%%0");
27868 output_asm_insn (templ, operands);
27869 }
27870 return "";
27871 }
27872
27873 if ((opmode == DImode) && (shift > 32))
27874 {
27875 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27876 output_asm_insn (templ, operands);
27877 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27878 output_asm_insn (templ, operands);
27879 }
27880 else
27881 {
27882 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27883 output_asm_insn (templ, operands);
27884 }
27885 return "";
27886 }
27887
27888 /* Output assembly for a WMMX tinsr instruction. */
27889 const char *
27890 arm_output_iwmmxt_tinsr (rtx *operands)
27891 {
27892 int mask = INTVAL (operands[3]);
27893 int i;
27894 char templ[50];
27895 int units = mode_nunits[GET_MODE (operands[0])];
27896 gcc_assert ((mask & (mask - 1)) == 0);
27897 for (i = 0; i < units; ++i)
27898 {
27899 if ((mask & 0x01) == 1)
27900 {
27901 break;
27902 }
27903 mask >>= 1;
27904 }
27905 gcc_assert (i < units);
27906 {
27907 switch (GET_MODE (operands[0]))
27908 {
27909 case E_V8QImode:
27910 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27911 break;
27912 case E_V4HImode:
27913 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27914 break;
27915 case E_V2SImode:
27916 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27917 break;
27918 default:
27919 gcc_unreachable ();
27920 break;
27921 }
27922 output_asm_insn (templ, operands);
27923 }
27924 return "";
27925 }
27926
27927 /* Output a Thumb-1 casesi dispatch sequence. */
27928 const char *
27929 thumb1_output_casesi (rtx *operands)
27930 {
27931 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27932
27933 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27934
27935 switch (GET_MODE(diff_vec))
27936 {
27937 case E_QImode:
27938 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27939 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27940 case E_HImode:
27941 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27942 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27943 case E_SImode:
27944 return "bl\t%___gnu_thumb1_case_si";
27945 default:
27946 gcc_unreachable ();
27947 }
27948 }
27949
27950 /* Output a Thumb-2 casesi instruction. */
27951 const char *
27952 thumb2_output_casesi (rtx *operands)
27953 {
27954 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27955
27956 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27957
27958 output_asm_insn ("cmp\t%0, %1", operands);
27959 output_asm_insn ("bhi\t%l3", operands);
27960 switch (GET_MODE(diff_vec))
27961 {
27962 case E_QImode:
27963 return "tbb\t[%|pc, %0]";
27964 case E_HImode:
27965 return "tbh\t[%|pc, %0, lsl #1]";
27966 case E_SImode:
27967 if (flag_pic)
27968 {
27969 output_asm_insn ("adr\t%4, %l2", operands);
27970 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27971 output_asm_insn ("add\t%4, %4, %5", operands);
27972 return "bx\t%4";
27973 }
27974 else
27975 {
27976 output_asm_insn ("adr\t%4, %l2", operands);
27977 return "ldr\t%|pc, [%4, %0, lsl #2]";
27978 }
27979 default:
27980 gcc_unreachable ();
27981 }
27982 }
27983
27984 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27985 per-core tuning structs. */
27986 static int
27987 arm_issue_rate (void)
27988 {
27989 return current_tune->issue_rate;
27990 }
27991
27992 /* Return how many instructions should scheduler lookahead to choose the
27993 best one. */
27994 static int
27995 arm_first_cycle_multipass_dfa_lookahead (void)
27996 {
27997 int issue_rate = arm_issue_rate ();
27998
27999 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
28000 }
28001
28002 /* Enable modeling of L2 auto-prefetcher. */
28003 static int
28004 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28005 {
28006 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28007 }
28008
28009 const char *
28010 arm_mangle_type (const_tree type)
28011 {
28012 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28013 has to be managled as if it is in the "std" namespace. */
28014 if (TARGET_AAPCS_BASED
28015 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28016 return "St9__va_list";
28017
28018 /* Half-precision float. */
28019 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28020 return "Dh";
28021
28022 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28023 builtin type. */
28024 if (TYPE_NAME (type) != NULL)
28025 return arm_mangle_builtin_type (type);
28026
28027 /* Use the default mangling. */
28028 return NULL;
28029 }
28030
28031 /* Order of allocation of core registers for Thumb: this allocation is
28032 written over the corresponding initial entries of the array
28033 initialized with REG_ALLOC_ORDER. We allocate all low registers
28034 first. Saving and restoring a low register is usually cheaper than
28035 using a call-clobbered high register. */
28036
28037 static const int thumb_core_reg_alloc_order[] =
28038 {
28039 3, 2, 1, 0, 4, 5, 6, 7,
28040 12, 14, 8, 9, 10, 11
28041 };
28042
28043 /* Adjust register allocation order when compiling for Thumb. */
28044
28045 void
28046 arm_order_regs_for_local_alloc (void)
28047 {
28048 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28049 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28050 if (TARGET_THUMB)
28051 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28052 sizeof (thumb_core_reg_alloc_order));
28053 }
28054
28055 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28056
28057 bool
28058 arm_frame_pointer_required (void)
28059 {
28060 if (SUBTARGET_FRAME_POINTER_REQUIRED)
28061 return true;
28062
28063 /* If the function receives nonlocal gotos, it needs to save the frame
28064 pointer in the nonlocal_goto_save_area object. */
28065 if (cfun->has_nonlocal_label)
28066 return true;
28067
28068 /* The frame pointer is required for non-leaf APCS frames. */
28069 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28070 return true;
28071
28072 /* If we are probing the stack in the prologue, we will have a faulting
28073 instruction prior to the stack adjustment and this requires a frame
28074 pointer if we want to catch the exception using the EABI unwinder. */
28075 if (!IS_INTERRUPT (arm_current_func_type ())
28076 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28077 || flag_stack_clash_protection)
28078 && arm_except_unwind_info (&global_options) == UI_TARGET
28079 && cfun->can_throw_non_call_exceptions)
28080 {
28081 HOST_WIDE_INT size = get_frame_size ();
28082
28083 /* That's irrelevant if there is no stack adjustment. */
28084 if (size <= 0)
28085 return false;
28086
28087 /* That's relevant only if there is a stack probe. */
28088 if (crtl->is_leaf && !cfun->calls_alloca)
28089 {
28090 /* We don't have the final size of the frame so adjust. */
28091 size += 32 * UNITS_PER_WORD;
28092 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
28093 return true;
28094 }
28095 else
28096 return true;
28097 }
28098
28099 return false;
28100 }
28101
28102 /* Only thumb1 can't support conditional execution, so return true if
28103 the target is not thumb1. */
28104 static bool
28105 arm_have_conditional_execution (void)
28106 {
28107 return !TARGET_THUMB1;
28108 }
28109
28110 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
28111 static HOST_WIDE_INT
28112 arm_vector_alignment (const_tree type)
28113 {
28114 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28115
28116 if (TARGET_AAPCS_BASED)
28117 align = MIN (align, 64);
28118
28119 return align;
28120 }
28121
28122 static void
28123 arm_autovectorize_vector_sizes (vector_sizes *sizes)
28124 {
28125 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28126 {
28127 sizes->safe_push (16);
28128 sizes->safe_push (8);
28129 }
28130 }
28131
28132 static bool
28133 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28134 {
28135 /* Vectors which aren't in packed structures will not be less aligned than
28136 the natural alignment of their element type, so this is safe. */
28137 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28138 return !is_packed;
28139
28140 return default_builtin_vector_alignment_reachable (type, is_packed);
28141 }
28142
28143 static bool
28144 arm_builtin_support_vector_misalignment (machine_mode mode,
28145 const_tree type, int misalignment,
28146 bool is_packed)
28147 {
28148 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28149 {
28150 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28151
28152 if (is_packed)
28153 return align == 1;
28154
28155 /* If the misalignment is unknown, we should be able to handle the access
28156 so long as it is not to a member of a packed data structure. */
28157 if (misalignment == -1)
28158 return true;
28159
28160 /* Return true if the misalignment is a multiple of the natural alignment
28161 of the vector's element type. This is probably always going to be
28162 true in practice, since we've already established that this isn't a
28163 packed access. */
28164 return ((misalignment % align) == 0);
28165 }
28166
28167 return default_builtin_support_vector_misalignment (mode, type, misalignment,
28168 is_packed);
28169 }
28170
28171 static void
28172 arm_conditional_register_usage (void)
28173 {
28174 int regno;
28175
28176 if (TARGET_THUMB1 && optimize_size)
28177 {
28178 /* When optimizing for size on Thumb-1, it's better not
28179 to use the HI regs, because of the overhead of
28180 stacking them. */
28181 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28182 fixed_regs[regno] = call_used_regs[regno] = 1;
28183 }
28184
28185 /* The link register can be clobbered by any branch insn,
28186 but we have no way to track that at present, so mark
28187 it as unavailable. */
28188 if (TARGET_THUMB1)
28189 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28190
28191 if (TARGET_32BIT && TARGET_HARD_FLOAT)
28192 {
28193 /* VFPv3 registers are disabled when earlier VFP
28194 versions are selected due to the definition of
28195 LAST_VFP_REGNUM. */
28196 for (regno = FIRST_VFP_REGNUM;
28197 regno <= LAST_VFP_REGNUM; ++ regno)
28198 {
28199 fixed_regs[regno] = 0;
28200 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28201 || regno >= FIRST_VFP_REGNUM + 32;
28202 }
28203 }
28204
28205 if (TARGET_REALLY_IWMMXT)
28206 {
28207 regno = FIRST_IWMMXT_GR_REGNUM;
28208 /* The 2002/10/09 revision of the XScale ABI has wCG0
28209 and wCG1 as call-preserved registers. The 2002/11/21
28210 revision changed this so that all wCG registers are
28211 scratch registers. */
28212 for (regno = FIRST_IWMMXT_GR_REGNUM;
28213 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28214 fixed_regs[regno] = 0;
28215 /* The XScale ABI has wR0 - wR9 as scratch registers,
28216 the rest as call-preserved registers. */
28217 for (regno = FIRST_IWMMXT_REGNUM;
28218 regno <= LAST_IWMMXT_REGNUM; ++ regno)
28219 {
28220 fixed_regs[regno] = 0;
28221 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28222 }
28223 }
28224
28225 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28226 {
28227 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28228 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28229 }
28230 else if (TARGET_APCS_STACK)
28231 {
28232 fixed_regs[10] = 1;
28233 call_used_regs[10] = 1;
28234 }
28235 /* -mcaller-super-interworking reserves r11 for calls to
28236 _interwork_r11_call_via_rN(). Making the register global
28237 is an easy way of ensuring that it remains valid for all
28238 calls. */
28239 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28240 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28241 {
28242 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28243 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28244 if (TARGET_CALLER_INTERWORKING)
28245 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28246 }
28247 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28248 }
28249
28250 static reg_class_t
28251 arm_preferred_rename_class (reg_class_t rclass)
28252 {
28253 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28254 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28255 and code size can be reduced. */
28256 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28257 return LO_REGS;
28258 else
28259 return NO_REGS;
28260 }
28261
28262 /* Compute the attribute "length" of insn "*push_multi".
28263 So this function MUST be kept in sync with that insn pattern. */
28264 int
28265 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28266 {
28267 int i, regno, hi_reg;
28268 int num_saves = XVECLEN (parallel_op, 0);
28269
28270 /* ARM mode. */
28271 if (TARGET_ARM)
28272 return 4;
28273 /* Thumb1 mode. */
28274 if (TARGET_THUMB1)
28275 return 2;
28276
28277 /* Thumb2 mode. */
28278 regno = REGNO (first_op);
28279 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28280 list is 8-bit. Normally this means all registers in the list must be
28281 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28282 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28283 with 16-bit encoding. */
28284 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28285 for (i = 1; i < num_saves && !hi_reg; i++)
28286 {
28287 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28288 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28289 }
28290
28291 if (!hi_reg)
28292 return 2;
28293 return 4;
28294 }
28295
28296 /* Compute the attribute "length" of insn. Currently, this function is used
28297 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28298 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28299 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28300 true if OPERANDS contains insn which explicit updates base register. */
28301
28302 int
28303 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28304 {
28305 /* ARM mode. */
28306 if (TARGET_ARM)
28307 return 4;
28308 /* Thumb1 mode. */
28309 if (TARGET_THUMB1)
28310 return 2;
28311
28312 rtx parallel_op = operands[0];
28313 /* Initialize to elements number of PARALLEL. */
28314 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28315 /* Initialize the value to base register. */
28316 unsigned regno = REGNO (operands[1]);
28317 /* Skip return and write back pattern.
28318 We only need register pop pattern for later analysis. */
28319 unsigned first_indx = 0;
28320 first_indx += return_pc ? 1 : 0;
28321 first_indx += write_back_p ? 1 : 0;
28322
28323 /* A pop operation can be done through LDM or POP. If the base register is SP
28324 and if it's with write back, then a LDM will be alias of POP. */
28325 bool pop_p = (regno == SP_REGNUM && write_back_p);
28326 bool ldm_p = !pop_p;
28327
28328 /* Check base register for LDM. */
28329 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28330 return 4;
28331
28332 /* Check each register in the list. */
28333 for (; indx >= first_indx; indx--)
28334 {
28335 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28336 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28337 comment in arm_attr_length_push_multi. */
28338 if (REGNO_REG_CLASS (regno) == HI_REGS
28339 && (regno != PC_REGNUM || ldm_p))
28340 return 4;
28341 }
28342
28343 return 2;
28344 }
28345
28346 /* Compute the number of instructions emitted by output_move_double. */
28347 int
28348 arm_count_output_move_double_insns (rtx *operands)
28349 {
28350 int count;
28351 rtx ops[2];
28352 /* output_move_double may modify the operands array, so call it
28353 here on a copy of the array. */
28354 ops[0] = operands[0];
28355 ops[1] = operands[1];
28356 output_move_double (ops, false, &count);
28357 return count;
28358 }
28359
28360 int
28361 vfp3_const_double_for_fract_bits (rtx operand)
28362 {
28363 REAL_VALUE_TYPE r0;
28364
28365 if (!CONST_DOUBLE_P (operand))
28366 return 0;
28367
28368 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28369 if (exact_real_inverse (DFmode, &r0)
28370 && !REAL_VALUE_NEGATIVE (r0))
28371 {
28372 if (exact_real_truncate (DFmode, &r0))
28373 {
28374 HOST_WIDE_INT value = real_to_integer (&r0);
28375 value = value & 0xffffffff;
28376 if ((value != 0) && ( (value & (value - 1)) == 0))
28377 {
28378 int ret = exact_log2 (value);
28379 gcc_assert (IN_RANGE (ret, 0, 31));
28380 return ret;
28381 }
28382 }
28383 }
28384 return 0;
28385 }
28386
28387 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28388 log2 is in [1, 32], return that log2. Otherwise return -1.
28389 This is used in the patterns for vcvt.s32.f32 floating-point to
28390 fixed-point conversions. */
28391
28392 int
28393 vfp3_const_double_for_bits (rtx x)
28394 {
28395 const REAL_VALUE_TYPE *r;
28396
28397 if (!CONST_DOUBLE_P (x))
28398 return -1;
28399
28400 r = CONST_DOUBLE_REAL_VALUE (x);
28401
28402 if (REAL_VALUE_NEGATIVE (*r)
28403 || REAL_VALUE_ISNAN (*r)
28404 || REAL_VALUE_ISINF (*r)
28405 || !real_isinteger (r, SFmode))
28406 return -1;
28407
28408 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28409
28410 /* The exact_log2 above will have returned -1 if this is
28411 not an exact log2. */
28412 if (!IN_RANGE (hwint, 1, 32))
28413 return -1;
28414
28415 return hwint;
28416 }
28417
28418 \f
28419 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28420
28421 static void
28422 arm_pre_atomic_barrier (enum memmodel model)
28423 {
28424 if (need_atomic_barrier_p (model, true))
28425 emit_insn (gen_memory_barrier ());
28426 }
28427
28428 static void
28429 arm_post_atomic_barrier (enum memmodel model)
28430 {
28431 if (need_atomic_barrier_p (model, false))
28432 emit_insn (gen_memory_barrier ());
28433 }
28434
28435 /* Emit the load-exclusive and store-exclusive instructions.
28436 Use acquire and release versions if necessary. */
28437
28438 static void
28439 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28440 {
28441 rtx (*gen) (rtx, rtx);
28442
28443 if (acq)
28444 {
28445 switch (mode)
28446 {
28447 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28448 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28449 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28450 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28451 default:
28452 gcc_unreachable ();
28453 }
28454 }
28455 else
28456 {
28457 switch (mode)
28458 {
28459 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28460 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28461 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28462 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28463 default:
28464 gcc_unreachable ();
28465 }
28466 }
28467
28468 emit_insn (gen (rval, mem));
28469 }
28470
28471 static void
28472 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28473 rtx mem, bool rel)
28474 {
28475 rtx (*gen) (rtx, rtx, rtx);
28476
28477 if (rel)
28478 {
28479 switch (mode)
28480 {
28481 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28482 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28483 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28484 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28485 default:
28486 gcc_unreachable ();
28487 }
28488 }
28489 else
28490 {
28491 switch (mode)
28492 {
28493 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28494 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28495 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28496 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28497 default:
28498 gcc_unreachable ();
28499 }
28500 }
28501
28502 emit_insn (gen (bval, rval, mem));
28503 }
28504
28505 /* Mark the previous jump instruction as unlikely. */
28506
28507 static void
28508 emit_unlikely_jump (rtx insn)
28509 {
28510 rtx_insn *jump = emit_jump_insn (insn);
28511 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28512 }
28513
28514 /* Expand a compare and swap pattern. */
28515
28516 void
28517 arm_expand_compare_and_swap (rtx operands[])
28518 {
28519 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28520 machine_mode mode;
28521 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28522
28523 bval = operands[0];
28524 rval = operands[1];
28525 mem = operands[2];
28526 oldval = operands[3];
28527 newval = operands[4];
28528 is_weak = operands[5];
28529 mod_s = operands[6];
28530 mod_f = operands[7];
28531 mode = GET_MODE (mem);
28532
28533 /* Normally the succ memory model must be stronger than fail, but in the
28534 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28535 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28536
28537 if (TARGET_HAVE_LDACQ
28538 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28539 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28540 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28541
28542 switch (mode)
28543 {
28544 case E_QImode:
28545 case E_HImode:
28546 /* For narrow modes, we're going to perform the comparison in SImode,
28547 so do the zero-extension now. */
28548 rval = gen_reg_rtx (SImode);
28549 oldval = convert_modes (SImode, mode, oldval, true);
28550 /* FALLTHRU */
28551
28552 case E_SImode:
28553 /* Force the value into a register if needed. We waited until after
28554 the zero-extension above to do this properly. */
28555 if (!arm_add_operand (oldval, SImode))
28556 oldval = force_reg (SImode, oldval);
28557 break;
28558
28559 case E_DImode:
28560 if (!cmpdi_operand (oldval, mode))
28561 oldval = force_reg (mode, oldval);
28562 break;
28563
28564 default:
28565 gcc_unreachable ();
28566 }
28567
28568 if (TARGET_THUMB1)
28569 {
28570 switch (mode)
28571 {
28572 case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28573 case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28574 case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28575 case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28576 default:
28577 gcc_unreachable ();
28578 }
28579 }
28580 else
28581 {
28582 switch (mode)
28583 {
28584 case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28585 case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28586 case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28587 case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28588 default:
28589 gcc_unreachable ();
28590 }
28591 }
28592
28593 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28594 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28595
28596 if (mode == QImode || mode == HImode)
28597 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28598
28599 /* In all cases, we arrange for success to be signaled by Z set.
28600 This arrangement allows for the boolean result to be used directly
28601 in a subsequent branch, post optimization. For Thumb-1 targets, the
28602 boolean negation of the result is also stored in bval because Thumb-1
28603 backend lacks dependency tracking for CC flag due to flag-setting not
28604 being represented at RTL level. */
28605 if (TARGET_THUMB1)
28606 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28607 else
28608 {
28609 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28610 emit_insn (gen_rtx_SET (bval, x));
28611 }
28612 }
28613
28614 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28615 another memory store between the load-exclusive and store-exclusive can
28616 reset the monitor from Exclusive to Open state. This means we must wait
28617 until after reload to split the pattern, lest we get a register spill in
28618 the middle of the atomic sequence. Success of the compare and swap is
28619 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28620 for Thumb-1 targets (ie. negation of the boolean value returned by
28621 atomic_compare_and_swapmode standard pattern in operand 0). */
28622
28623 void
28624 arm_split_compare_and_swap (rtx operands[])
28625 {
28626 rtx rval, mem, oldval, newval, neg_bval;
28627 machine_mode mode;
28628 enum memmodel mod_s, mod_f;
28629 bool is_weak;
28630 rtx_code_label *label1, *label2;
28631 rtx x, cond;
28632
28633 rval = operands[1];
28634 mem = operands[2];
28635 oldval = operands[3];
28636 newval = operands[4];
28637 is_weak = (operands[5] != const0_rtx);
28638 mod_s = memmodel_from_int (INTVAL (operands[6]));
28639 mod_f = memmodel_from_int (INTVAL (operands[7]));
28640 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28641 mode = GET_MODE (mem);
28642
28643 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28644
28645 bool use_acquire = TARGET_HAVE_LDACQ
28646 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28647 || is_mm_release (mod_s));
28648
28649 bool use_release = TARGET_HAVE_LDACQ
28650 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28651 || is_mm_acquire (mod_s));
28652
28653 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28654 a full barrier is emitted after the store-release. */
28655 if (is_armv8_sync)
28656 use_acquire = false;
28657
28658 /* Checks whether a barrier is needed and emits one accordingly. */
28659 if (!(use_acquire || use_release))
28660 arm_pre_atomic_barrier (mod_s);
28661
28662 label1 = NULL;
28663 if (!is_weak)
28664 {
28665 label1 = gen_label_rtx ();
28666 emit_label (label1);
28667 }
28668 label2 = gen_label_rtx ();
28669
28670 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28671
28672 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28673 as required to communicate with arm_expand_compare_and_swap. */
28674 if (TARGET_32BIT)
28675 {
28676 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28677 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28678 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28679 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28680 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28681 }
28682 else
28683 {
28684 emit_move_insn (neg_bval, const1_rtx);
28685 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28686 if (thumb1_cmpneg_operand (oldval, SImode))
28687 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28688 label2, cond));
28689 else
28690 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28691 }
28692
28693 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28694
28695 /* Weak or strong, we want EQ to be true for success, so that we
28696 match the flags that we got from the compare above. */
28697 if (TARGET_32BIT)
28698 {
28699 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28700 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28701 emit_insn (gen_rtx_SET (cond, x));
28702 }
28703
28704 if (!is_weak)
28705 {
28706 /* Z is set to boolean value of !neg_bval, as required to communicate
28707 with arm_expand_compare_and_swap. */
28708 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28709 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28710 }
28711
28712 if (!is_mm_relaxed (mod_f))
28713 emit_label (label2);
28714
28715 /* Checks whether a barrier is needed and emits one accordingly. */
28716 if (is_armv8_sync
28717 || !(use_acquire || use_release))
28718 arm_post_atomic_barrier (mod_s);
28719
28720 if (is_mm_relaxed (mod_f))
28721 emit_label (label2);
28722 }
28723
28724 /* Split an atomic operation pattern. Operation is given by CODE and is one
28725 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28726 operation). Operation is performed on the content at MEM and on VALUE
28727 following the memory model MODEL_RTX. The content at MEM before and after
28728 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28729 success of the operation is returned in COND. Using a scratch register or
28730 an operand register for these determines what result is returned for that
28731 pattern. */
28732
28733 void
28734 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28735 rtx value, rtx model_rtx, rtx cond)
28736 {
28737 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28738 machine_mode mode = GET_MODE (mem);
28739 machine_mode wmode = (mode == DImode ? DImode : SImode);
28740 rtx_code_label *label;
28741 bool all_low_regs, bind_old_new;
28742 rtx x;
28743
28744 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28745
28746 bool use_acquire = TARGET_HAVE_LDACQ
28747 && !(is_mm_relaxed (model) || is_mm_consume (model)
28748 || is_mm_release (model));
28749
28750 bool use_release = TARGET_HAVE_LDACQ
28751 && !(is_mm_relaxed (model) || is_mm_consume (model)
28752 || is_mm_acquire (model));
28753
28754 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28755 a full barrier is emitted after the store-release. */
28756 if (is_armv8_sync)
28757 use_acquire = false;
28758
28759 /* Checks whether a barrier is needed and emits one accordingly. */
28760 if (!(use_acquire || use_release))
28761 arm_pre_atomic_barrier (model);
28762
28763 label = gen_label_rtx ();
28764 emit_label (label);
28765
28766 if (new_out)
28767 new_out = gen_lowpart (wmode, new_out);
28768 if (old_out)
28769 old_out = gen_lowpart (wmode, old_out);
28770 else
28771 old_out = new_out;
28772 value = simplify_gen_subreg (wmode, value, mode, 0);
28773
28774 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28775
28776 /* Does the operation require destination and first operand to use the same
28777 register? This is decided by register constraints of relevant insn
28778 patterns in thumb1.md. */
28779 gcc_assert (!new_out || REG_P (new_out));
28780 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28781 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28782 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28783 bind_old_new =
28784 (TARGET_THUMB1
28785 && code != SET
28786 && code != MINUS
28787 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28788
28789 /* We want to return the old value while putting the result of the operation
28790 in the same register as the old value so copy the old value over to the
28791 destination register and use that register for the operation. */
28792 if (old_out && bind_old_new)
28793 {
28794 emit_move_insn (new_out, old_out);
28795 old_out = new_out;
28796 }
28797
28798 switch (code)
28799 {
28800 case SET:
28801 new_out = value;
28802 break;
28803
28804 case NOT:
28805 x = gen_rtx_AND (wmode, old_out, value);
28806 emit_insn (gen_rtx_SET (new_out, x));
28807 x = gen_rtx_NOT (wmode, new_out);
28808 emit_insn (gen_rtx_SET (new_out, x));
28809 break;
28810
28811 case MINUS:
28812 if (CONST_INT_P (value))
28813 {
28814 value = GEN_INT (-INTVAL (value));
28815 code = PLUS;
28816 }
28817 /* FALLTHRU */
28818
28819 case PLUS:
28820 if (mode == DImode)
28821 {
28822 /* DImode plus/minus need to clobber flags. */
28823 /* The adddi3 and subdi3 patterns are incorrectly written so that
28824 they require matching operands, even when we could easily support
28825 three operands. Thankfully, this can be fixed up post-splitting,
28826 as the individual add+adc patterns do accept three operands and
28827 post-reload cprop can make these moves go away. */
28828 emit_move_insn (new_out, old_out);
28829 if (code == PLUS)
28830 x = gen_adddi3 (new_out, new_out, value);
28831 else
28832 x = gen_subdi3 (new_out, new_out, value);
28833 emit_insn (x);
28834 break;
28835 }
28836 /* FALLTHRU */
28837
28838 default:
28839 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28840 emit_insn (gen_rtx_SET (new_out, x));
28841 break;
28842 }
28843
28844 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28845 use_release);
28846
28847 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28848 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28849
28850 /* Checks whether a barrier is needed and emits one accordingly. */
28851 if (is_armv8_sync
28852 || !(use_acquire || use_release))
28853 arm_post_atomic_barrier (model);
28854 }
28855 \f
28856 #define MAX_VECT_LEN 16
28857
28858 struct expand_vec_perm_d
28859 {
28860 rtx target, op0, op1;
28861 vec_perm_indices perm;
28862 machine_mode vmode;
28863 bool one_vector_p;
28864 bool testing_p;
28865 };
28866
28867 /* Generate a variable permutation. */
28868
28869 static void
28870 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28871 {
28872 machine_mode vmode = GET_MODE (target);
28873 bool one_vector_p = rtx_equal_p (op0, op1);
28874
28875 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28876 gcc_checking_assert (GET_MODE (op0) == vmode);
28877 gcc_checking_assert (GET_MODE (op1) == vmode);
28878 gcc_checking_assert (GET_MODE (sel) == vmode);
28879 gcc_checking_assert (TARGET_NEON);
28880
28881 if (one_vector_p)
28882 {
28883 if (vmode == V8QImode)
28884 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28885 else
28886 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28887 }
28888 else
28889 {
28890 rtx pair;
28891
28892 if (vmode == V8QImode)
28893 {
28894 pair = gen_reg_rtx (V16QImode);
28895 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28896 pair = gen_lowpart (TImode, pair);
28897 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28898 }
28899 else
28900 {
28901 pair = gen_reg_rtx (OImode);
28902 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28903 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28904 }
28905 }
28906 }
28907
28908 void
28909 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28910 {
28911 machine_mode vmode = GET_MODE (target);
28912 unsigned int nelt = GET_MODE_NUNITS (vmode);
28913 bool one_vector_p = rtx_equal_p (op0, op1);
28914 rtx mask;
28915
28916 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28917 numbering of elements for big-endian, we must reverse the order. */
28918 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28919
28920 /* The VTBL instruction does not use a modulo index, so we must take care
28921 of that ourselves. */
28922 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28923 mask = gen_const_vec_duplicate (vmode, mask);
28924 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28925
28926 arm_expand_vec_perm_1 (target, op0, op1, sel);
28927 }
28928
28929 /* Map lane ordering between architectural lane order, and GCC lane order,
28930 taking into account ABI. See comment above output_move_neon for details. */
28931
28932 static int
28933 neon_endian_lane_map (machine_mode mode, int lane)
28934 {
28935 if (BYTES_BIG_ENDIAN)
28936 {
28937 int nelems = GET_MODE_NUNITS (mode);
28938 /* Reverse lane order. */
28939 lane = (nelems - 1 - lane);
28940 /* Reverse D register order, to match ABI. */
28941 if (GET_MODE_SIZE (mode) == 16)
28942 lane = lane ^ (nelems / 2);
28943 }
28944 return lane;
28945 }
28946
28947 /* Some permutations index into pairs of vectors, this is a helper function
28948 to map indexes into those pairs of vectors. */
28949
28950 static int
28951 neon_pair_endian_lane_map (machine_mode mode, int lane)
28952 {
28953 int nelem = GET_MODE_NUNITS (mode);
28954 if (BYTES_BIG_ENDIAN)
28955 lane =
28956 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28957 return lane;
28958 }
28959
28960 /* Generate or test for an insn that supports a constant permutation. */
28961
28962 /* Recognize patterns for the VUZP insns. */
28963
28964 static bool
28965 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28966 {
28967 unsigned int i, odd, mask, nelt = d->perm.length ();
28968 rtx out0, out1, in0, in1;
28969 rtx (*gen)(rtx, rtx, rtx, rtx);
28970 int first_elem;
28971 int swap_nelt;
28972
28973 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28974 return false;
28975
28976 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28977 big endian pattern on 64 bit vectors, so we correct for that. */
28978 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28979 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
28980
28981 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28982
28983 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28984 odd = 0;
28985 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28986 odd = 1;
28987 else
28988 return false;
28989 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28990
28991 for (i = 0; i < nelt; i++)
28992 {
28993 unsigned elt =
28994 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28995 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28996 return false;
28997 }
28998
28999 /* Success! */
29000 if (d->testing_p)
29001 return true;
29002
29003 switch (d->vmode)
29004 {
29005 case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
29006 case E_V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
29007 case E_V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
29008 case E_V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
29009 case E_V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
29010 case E_V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
29011 case E_V4SImode: gen = gen_neon_vuzpv4si_internal; break;
29012 case E_V2SImode: gen = gen_neon_vuzpv2si_internal; break;
29013 case E_V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
29014 case E_V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
29015 default:
29016 gcc_unreachable ();
29017 }
29018
29019 in0 = d->op0;
29020 in1 = d->op1;
29021 if (swap_nelt != 0)
29022 std::swap (in0, in1);
29023
29024 out0 = d->target;
29025 out1 = gen_reg_rtx (d->vmode);
29026 if (odd)
29027 std::swap (out0, out1);
29028
29029 emit_insn (gen (out0, in0, in1, out1));
29030 return true;
29031 }
29032
29033 /* Recognize patterns for the VZIP insns. */
29034
29035 static bool
29036 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29037 {
29038 unsigned int i, high, mask, nelt = d->perm.length ();
29039 rtx out0, out1, in0, in1;
29040 rtx (*gen)(rtx, rtx, rtx, rtx);
29041 int first_elem;
29042 bool is_swapped;
29043
29044 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29045 return false;
29046
29047 is_swapped = BYTES_BIG_ENDIAN;
29048
29049 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29050
29051 high = nelt / 2;
29052 if (first_elem == neon_endian_lane_map (d->vmode, high))
29053 ;
29054 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29055 high = 0;
29056 else
29057 return false;
29058 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29059
29060 for (i = 0; i < nelt / 2; i++)
29061 {
29062 unsigned elt =
29063 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29064 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29065 != elt)
29066 return false;
29067 elt =
29068 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29069 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29070 != elt)
29071 return false;
29072 }
29073
29074 /* Success! */
29075 if (d->testing_p)
29076 return true;
29077
29078 switch (d->vmode)
29079 {
29080 case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
29081 case E_V8QImode: gen = gen_neon_vzipv8qi_internal; break;
29082 case E_V8HImode: gen = gen_neon_vzipv8hi_internal; break;
29083 case E_V4HImode: gen = gen_neon_vzipv4hi_internal; break;
29084 case E_V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
29085 case E_V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
29086 case E_V4SImode: gen = gen_neon_vzipv4si_internal; break;
29087 case E_V2SImode: gen = gen_neon_vzipv2si_internal; break;
29088 case E_V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
29089 case E_V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
29090 default:
29091 gcc_unreachable ();
29092 }
29093
29094 in0 = d->op0;
29095 in1 = d->op1;
29096 if (is_swapped)
29097 std::swap (in0, in1);
29098
29099 out0 = d->target;
29100 out1 = gen_reg_rtx (d->vmode);
29101 if (high)
29102 std::swap (out0, out1);
29103
29104 emit_insn (gen (out0, in0, in1, out1));
29105 return true;
29106 }
29107
29108 /* Recognize patterns for the VREV insns. */
29109
29110 static bool
29111 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29112 {
29113 unsigned int i, j, diff, nelt = d->perm.length ();
29114 rtx (*gen)(rtx, rtx);
29115
29116 if (!d->one_vector_p)
29117 return false;
29118
29119 diff = d->perm[0];
29120 switch (diff)
29121 {
29122 case 7:
29123 switch (d->vmode)
29124 {
29125 case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
29126 case E_V8QImode: gen = gen_neon_vrev64v8qi; break;
29127 default:
29128 return false;
29129 }
29130 break;
29131 case 3:
29132 switch (d->vmode)
29133 {
29134 case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
29135 case E_V8QImode: gen = gen_neon_vrev32v8qi; break;
29136 case E_V8HImode: gen = gen_neon_vrev64v8hi; break;
29137 case E_V4HImode: gen = gen_neon_vrev64v4hi; break;
29138 case E_V8HFmode: gen = gen_neon_vrev64v8hf; break;
29139 case E_V4HFmode: gen = gen_neon_vrev64v4hf; break;
29140 default:
29141 return false;
29142 }
29143 break;
29144 case 1:
29145 switch (d->vmode)
29146 {
29147 case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
29148 case E_V8QImode: gen = gen_neon_vrev16v8qi; break;
29149 case E_V8HImode: gen = gen_neon_vrev32v8hi; break;
29150 case E_V4HImode: gen = gen_neon_vrev32v4hi; break;
29151 case E_V4SImode: gen = gen_neon_vrev64v4si; break;
29152 case E_V2SImode: gen = gen_neon_vrev64v2si; break;
29153 case E_V4SFmode: gen = gen_neon_vrev64v4sf; break;
29154 case E_V2SFmode: gen = gen_neon_vrev64v2sf; break;
29155 default:
29156 return false;
29157 }
29158 break;
29159 default:
29160 return false;
29161 }
29162
29163 for (i = 0; i < nelt ; i += diff + 1)
29164 for (j = 0; j <= diff; j += 1)
29165 {
29166 /* This is guaranteed to be true as the value of diff
29167 is 7, 3, 1 and we should have enough elements in the
29168 queue to generate this. Getting a vector mask with a
29169 value of diff other than these values implies that
29170 something is wrong by the time we get here. */
29171 gcc_assert (i + j < nelt);
29172 if (d->perm[i + j] != i + diff - j)
29173 return false;
29174 }
29175
29176 /* Success! */
29177 if (d->testing_p)
29178 return true;
29179
29180 emit_insn (gen (d->target, d->op0));
29181 return true;
29182 }
29183
29184 /* Recognize patterns for the VTRN insns. */
29185
29186 static bool
29187 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29188 {
29189 unsigned int i, odd, mask, nelt = d->perm.length ();
29190 rtx out0, out1, in0, in1;
29191 rtx (*gen)(rtx, rtx, rtx, rtx);
29192
29193 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29194 return false;
29195
29196 /* Note that these are little-endian tests. Adjust for big-endian later. */
29197 if (d->perm[0] == 0)
29198 odd = 0;
29199 else if (d->perm[0] == 1)
29200 odd = 1;
29201 else
29202 return false;
29203 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29204
29205 for (i = 0; i < nelt; i += 2)
29206 {
29207 if (d->perm[i] != i + odd)
29208 return false;
29209 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29210 return false;
29211 }
29212
29213 /* Success! */
29214 if (d->testing_p)
29215 return true;
29216
29217 switch (d->vmode)
29218 {
29219 case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29220 case E_V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
29221 case E_V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
29222 case E_V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
29223 case E_V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
29224 case E_V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
29225 case E_V4SImode: gen = gen_neon_vtrnv4si_internal; break;
29226 case E_V2SImode: gen = gen_neon_vtrnv2si_internal; break;
29227 case E_V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
29228 case E_V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
29229 default:
29230 gcc_unreachable ();
29231 }
29232
29233 in0 = d->op0;
29234 in1 = d->op1;
29235 if (BYTES_BIG_ENDIAN)
29236 {
29237 std::swap (in0, in1);
29238 odd = !odd;
29239 }
29240
29241 out0 = d->target;
29242 out1 = gen_reg_rtx (d->vmode);
29243 if (odd)
29244 std::swap (out0, out1);
29245
29246 emit_insn (gen (out0, in0, in1, out1));
29247 return true;
29248 }
29249
29250 /* Recognize patterns for the VEXT insns. */
29251
29252 static bool
29253 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29254 {
29255 unsigned int i, nelt = d->perm.length ();
29256 rtx (*gen) (rtx, rtx, rtx, rtx);
29257 rtx offset;
29258
29259 unsigned int location;
29260
29261 unsigned int next = d->perm[0] + 1;
29262
29263 /* TODO: Handle GCC's numbering of elements for big-endian. */
29264 if (BYTES_BIG_ENDIAN)
29265 return false;
29266
29267 /* Check if the extracted indexes are increasing by one. */
29268 for (i = 1; i < nelt; next++, i++)
29269 {
29270 /* If we hit the most significant element of the 2nd vector in
29271 the previous iteration, no need to test further. */
29272 if (next == 2 * nelt)
29273 return false;
29274
29275 /* If we are operating on only one vector: it could be a
29276 rotation. If there are only two elements of size < 64, let
29277 arm_evpc_neon_vrev catch it. */
29278 if (d->one_vector_p && (next == nelt))
29279 {
29280 if ((nelt == 2) && (d->vmode != V2DImode))
29281 return false;
29282 else
29283 next = 0;
29284 }
29285
29286 if (d->perm[i] != next)
29287 return false;
29288 }
29289
29290 location = d->perm[0];
29291
29292 switch (d->vmode)
29293 {
29294 case E_V16QImode: gen = gen_neon_vextv16qi; break;
29295 case E_V8QImode: gen = gen_neon_vextv8qi; break;
29296 case E_V4HImode: gen = gen_neon_vextv4hi; break;
29297 case E_V8HImode: gen = gen_neon_vextv8hi; break;
29298 case E_V2SImode: gen = gen_neon_vextv2si; break;
29299 case E_V4SImode: gen = gen_neon_vextv4si; break;
29300 case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29301 case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29302 case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29303 case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29304 case E_V2DImode: gen = gen_neon_vextv2di; break;
29305 default:
29306 return false;
29307 }
29308
29309 /* Success! */
29310 if (d->testing_p)
29311 return true;
29312
29313 offset = GEN_INT (location);
29314 emit_insn (gen (d->target, d->op0, d->op1, offset));
29315 return true;
29316 }
29317
29318 /* The NEON VTBL instruction is a fully variable permuation that's even
29319 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29320 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29321 can do slightly better by expanding this as a constant where we don't
29322 have to apply a mask. */
29323
29324 static bool
29325 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29326 {
29327 rtx rperm[MAX_VECT_LEN], sel;
29328 machine_mode vmode = d->vmode;
29329 unsigned int i, nelt = d->perm.length ();
29330
29331 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29332 numbering of elements for big-endian, we must reverse the order. */
29333 if (BYTES_BIG_ENDIAN)
29334 return false;
29335
29336 if (d->testing_p)
29337 return true;
29338
29339 /* Generic code will try constant permutation twice. Once with the
29340 original mode and again with the elements lowered to QImode.
29341 So wait and don't do the selector expansion ourselves. */
29342 if (vmode != V8QImode && vmode != V16QImode)
29343 return false;
29344
29345 for (i = 0; i < nelt; ++i)
29346 rperm[i] = GEN_INT (d->perm[i]);
29347 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29348 sel = force_reg (vmode, sel);
29349
29350 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29351 return true;
29352 }
29353
29354 static bool
29355 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29356 {
29357 /* Check if the input mask matches vext before reordering the
29358 operands. */
29359 if (TARGET_NEON)
29360 if (arm_evpc_neon_vext (d))
29361 return true;
29362
29363 /* The pattern matching functions above are written to look for a small
29364 number to begin the sequence (0, 1, N/2). If we begin with an index
29365 from the second operand, we can swap the operands. */
29366 unsigned int nelt = d->perm.length ();
29367 if (d->perm[0] >= nelt)
29368 {
29369 d->perm.rotate_inputs (1);
29370 std::swap (d->op0, d->op1);
29371 }
29372
29373 if (TARGET_NEON)
29374 {
29375 if (arm_evpc_neon_vuzp (d))
29376 return true;
29377 if (arm_evpc_neon_vzip (d))
29378 return true;
29379 if (arm_evpc_neon_vrev (d))
29380 return true;
29381 if (arm_evpc_neon_vtrn (d))
29382 return true;
29383 return arm_evpc_neon_vtbl (d);
29384 }
29385 return false;
29386 }
29387
29388 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
29389
29390 static bool
29391 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
29392 const vec_perm_indices &sel)
29393 {
29394 struct expand_vec_perm_d d;
29395 int i, nelt, which;
29396
29397 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
29398 return false;
29399
29400 d.target = target;
29401 d.op0 = op0;
29402 d.op1 = op1;
29403
29404 d.vmode = vmode;
29405 gcc_assert (VECTOR_MODE_P (d.vmode));
29406 d.testing_p = !target;
29407
29408 nelt = GET_MODE_NUNITS (d.vmode);
29409 for (i = which = 0; i < nelt; ++i)
29410 {
29411 int ei = sel[i] & (2 * nelt - 1);
29412 which |= (ei < nelt ? 1 : 2);
29413 }
29414
29415 switch (which)
29416 {
29417 default:
29418 gcc_unreachable();
29419
29420 case 3:
29421 d.one_vector_p = false;
29422 if (d.testing_p || !rtx_equal_p (op0, op1))
29423 break;
29424
29425 /* The elements of PERM do not suggest that only the first operand
29426 is used, but both operands are identical. Allow easier matching
29427 of the permutation by folding the permutation into the single
29428 input vector. */
29429 /* FALLTHRU */
29430 case 2:
29431 d.op0 = op1;
29432 d.one_vector_p = true;
29433 break;
29434
29435 case 1:
29436 d.op1 = op0;
29437 d.one_vector_p = true;
29438 break;
29439 }
29440
29441 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
29442
29443 if (!d.testing_p)
29444 return arm_expand_vec_perm_const_1 (&d);
29445
29446 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29447 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29448 if (!d.one_vector_p)
29449 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29450
29451 start_sequence ();
29452 bool ret = arm_expand_vec_perm_const_1 (&d);
29453 end_sequence ();
29454
29455 return ret;
29456 }
29457
29458 bool
29459 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29460 {
29461 /* If we are soft float and we do not have ldrd
29462 then all auto increment forms are ok. */
29463 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29464 return true;
29465
29466 switch (code)
29467 {
29468 /* Post increment and Pre Decrement are supported for all
29469 instruction forms except for vector forms. */
29470 case ARM_POST_INC:
29471 case ARM_PRE_DEC:
29472 if (VECTOR_MODE_P (mode))
29473 {
29474 if (code != ARM_PRE_DEC)
29475 return true;
29476 else
29477 return false;
29478 }
29479
29480 return true;
29481
29482 case ARM_POST_DEC:
29483 case ARM_PRE_INC:
29484 /* Without LDRD and mode size greater than
29485 word size, there is no point in auto-incrementing
29486 because ldm and stm will not have these forms. */
29487 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29488 return false;
29489
29490 /* Vector and floating point modes do not support
29491 these auto increment forms. */
29492 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29493 return false;
29494
29495 return true;
29496
29497 default:
29498 return false;
29499
29500 }
29501
29502 return false;
29503 }
29504
29505 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29506 on ARM, since we know that shifts by negative amounts are no-ops.
29507 Additionally, the default expansion code is not available or suitable
29508 for post-reload insn splits (this can occur when the register allocator
29509 chooses not to do a shift in NEON).
29510
29511 This function is used in both initial expand and post-reload splits, and
29512 handles all kinds of 64-bit shifts.
29513
29514 Input requirements:
29515 - It is safe for the input and output to be the same register, but
29516 early-clobber rules apply for the shift amount and scratch registers.
29517 - Shift by register requires both scratch registers. In all other cases
29518 the scratch registers may be NULL.
29519 - Ashiftrt by a register also clobbers the CC register. */
29520 void
29521 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29522 rtx amount, rtx scratch1, rtx scratch2)
29523 {
29524 rtx out_high = gen_highpart (SImode, out);
29525 rtx out_low = gen_lowpart (SImode, out);
29526 rtx in_high = gen_highpart (SImode, in);
29527 rtx in_low = gen_lowpart (SImode, in);
29528
29529 /* Terminology:
29530 in = the register pair containing the input value.
29531 out = the destination register pair.
29532 up = the high- or low-part of each pair.
29533 down = the opposite part to "up".
29534 In a shift, we can consider bits to shift from "up"-stream to
29535 "down"-stream, so in a left-shift "up" is the low-part and "down"
29536 is the high-part of each register pair. */
29537
29538 rtx out_up = code == ASHIFT ? out_low : out_high;
29539 rtx out_down = code == ASHIFT ? out_high : out_low;
29540 rtx in_up = code == ASHIFT ? in_low : in_high;
29541 rtx in_down = code == ASHIFT ? in_high : in_low;
29542
29543 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29544 gcc_assert (out
29545 && (REG_P (out) || GET_CODE (out) == SUBREG)
29546 && GET_MODE (out) == DImode);
29547 gcc_assert (in
29548 && (REG_P (in) || GET_CODE (in) == SUBREG)
29549 && GET_MODE (in) == DImode);
29550 gcc_assert (amount
29551 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29552 && GET_MODE (amount) == SImode)
29553 || CONST_INT_P (amount)));
29554 gcc_assert (scratch1 == NULL
29555 || (GET_CODE (scratch1) == SCRATCH)
29556 || (GET_MODE (scratch1) == SImode
29557 && REG_P (scratch1)));
29558 gcc_assert (scratch2 == NULL
29559 || (GET_CODE (scratch2) == SCRATCH)
29560 || (GET_MODE (scratch2) == SImode
29561 && REG_P (scratch2)));
29562 gcc_assert (!REG_P (out) || !REG_P (amount)
29563 || !HARD_REGISTER_P (out)
29564 || (REGNO (out) != REGNO (amount)
29565 && REGNO (out) + 1 != REGNO (amount)));
29566
29567 /* Macros to make following code more readable. */
29568 #define SUB_32(DEST,SRC) \
29569 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29570 #define RSB_32(DEST,SRC) \
29571 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29572 #define SUB_S_32(DEST,SRC) \
29573 gen_addsi3_compare0 ((DEST), (SRC), \
29574 GEN_INT (-32))
29575 #define SET(DEST,SRC) \
29576 gen_rtx_SET ((DEST), (SRC))
29577 #define SHIFT(CODE,SRC,AMOUNT) \
29578 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29579 #define LSHIFT(CODE,SRC,AMOUNT) \
29580 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29581 SImode, (SRC), (AMOUNT))
29582 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29583 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29584 SImode, (SRC), (AMOUNT))
29585 #define ORR(A,B) \
29586 gen_rtx_IOR (SImode, (A), (B))
29587 #define BRANCH(COND,LABEL) \
29588 gen_arm_cond_branch ((LABEL), \
29589 gen_rtx_ ## COND (CCmode, cc_reg, \
29590 const0_rtx), \
29591 cc_reg)
29592
29593 /* Shifts by register and shifts by constant are handled separately. */
29594 if (CONST_INT_P (amount))
29595 {
29596 /* We have a shift-by-constant. */
29597
29598 /* First, handle out-of-range shift amounts.
29599 In both cases we try to match the result an ARM instruction in a
29600 shift-by-register would give. This helps reduce execution
29601 differences between optimization levels, but it won't stop other
29602 parts of the compiler doing different things. This is "undefined
29603 behavior, in any case. */
29604 if (INTVAL (amount) <= 0)
29605 emit_insn (gen_movdi (out, in));
29606 else if (INTVAL (amount) >= 64)
29607 {
29608 if (code == ASHIFTRT)
29609 {
29610 rtx const31_rtx = GEN_INT (31);
29611 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29612 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29613 }
29614 else
29615 emit_insn (gen_movdi (out, const0_rtx));
29616 }
29617
29618 /* Now handle valid shifts. */
29619 else if (INTVAL (amount) < 32)
29620 {
29621 /* Shifts by a constant less than 32. */
29622 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29623
29624 /* Clearing the out register in DImode first avoids lots
29625 of spilling and results in less stack usage.
29626 Later this redundant insn is completely removed.
29627 Do that only if "in" and "out" are different registers. */
29628 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29629 emit_insn (SET (out, const0_rtx));
29630 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29631 emit_insn (SET (out_down,
29632 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29633 out_down)));
29634 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29635 }
29636 else
29637 {
29638 /* Shifts by a constant greater than 31. */
29639 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29640
29641 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29642 emit_insn (SET (out, const0_rtx));
29643 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29644 if (code == ASHIFTRT)
29645 emit_insn (gen_ashrsi3 (out_up, in_up,
29646 GEN_INT (31)));
29647 else
29648 emit_insn (SET (out_up, const0_rtx));
29649 }
29650 }
29651 else
29652 {
29653 /* We have a shift-by-register. */
29654 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29655
29656 /* This alternative requires the scratch registers. */
29657 gcc_assert (scratch1 && REG_P (scratch1));
29658 gcc_assert (scratch2 && REG_P (scratch2));
29659
29660 /* We will need the values "amount-32" and "32-amount" later.
29661 Swapping them around now allows the later code to be more general. */
29662 switch (code)
29663 {
29664 case ASHIFT:
29665 emit_insn (SUB_32 (scratch1, amount));
29666 emit_insn (RSB_32 (scratch2, amount));
29667 break;
29668 case ASHIFTRT:
29669 emit_insn (RSB_32 (scratch1, amount));
29670 /* Also set CC = amount > 32. */
29671 emit_insn (SUB_S_32 (scratch2, amount));
29672 break;
29673 case LSHIFTRT:
29674 emit_insn (RSB_32 (scratch1, amount));
29675 emit_insn (SUB_32 (scratch2, amount));
29676 break;
29677 default:
29678 gcc_unreachable ();
29679 }
29680
29681 /* Emit code like this:
29682
29683 arithmetic-left:
29684 out_down = in_down << amount;
29685 out_down = (in_up << (amount - 32)) | out_down;
29686 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29687 out_up = in_up << amount;
29688
29689 arithmetic-right:
29690 out_down = in_down >> amount;
29691 out_down = (in_up << (32 - amount)) | out_down;
29692 if (amount < 32)
29693 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29694 out_up = in_up << amount;
29695
29696 logical-right:
29697 out_down = in_down >> amount;
29698 out_down = (in_up << (32 - amount)) | out_down;
29699 if (amount < 32)
29700 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29701 out_up = in_up << amount;
29702
29703 The ARM and Thumb2 variants are the same but implemented slightly
29704 differently. If this were only called during expand we could just
29705 use the Thumb2 case and let combine do the right thing, but this
29706 can also be called from post-reload splitters. */
29707
29708 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29709
29710 if (!TARGET_THUMB2)
29711 {
29712 /* Emit code for ARM mode. */
29713 emit_insn (SET (out_down,
29714 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29715 if (code == ASHIFTRT)
29716 {
29717 rtx_code_label *done_label = gen_label_rtx ();
29718 emit_jump_insn (BRANCH (LT, done_label));
29719 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29720 out_down)));
29721 emit_label (done_label);
29722 }
29723 else
29724 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29725 out_down)));
29726 }
29727 else
29728 {
29729 /* Emit code for Thumb2 mode.
29730 Thumb2 can't do shift and or in one insn. */
29731 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29732 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29733
29734 if (code == ASHIFTRT)
29735 {
29736 rtx_code_label *done_label = gen_label_rtx ();
29737 emit_jump_insn (BRANCH (LT, done_label));
29738 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29739 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29740 emit_label (done_label);
29741 }
29742 else
29743 {
29744 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29745 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29746 }
29747 }
29748
29749 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29750 }
29751
29752 #undef SUB_32
29753 #undef RSB_32
29754 #undef SUB_S_32
29755 #undef SET
29756 #undef SHIFT
29757 #undef LSHIFT
29758 #undef REV_LSHIFT
29759 #undef ORR
29760 #undef BRANCH
29761 }
29762
29763 /* Returns true if the pattern is a valid symbolic address, which is either a
29764 symbol_ref or (symbol_ref + addend).
29765
29766 According to the ARM ELF ABI, the initial addend of REL-type relocations
29767 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29768 literal field of the instruction as a 16-bit signed value in the range
29769 -32768 <= A < 32768. */
29770
29771 bool
29772 arm_valid_symbolic_address_p (rtx addr)
29773 {
29774 rtx xop0, xop1 = NULL_RTX;
29775 rtx tmp = addr;
29776
29777 if (target_word_relocations)
29778 return false;
29779
29780 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29781 return true;
29782
29783 /* (const (plus: symbol_ref const_int)) */
29784 if (GET_CODE (addr) == CONST)
29785 tmp = XEXP (addr, 0);
29786
29787 if (GET_CODE (tmp) == PLUS)
29788 {
29789 xop0 = XEXP (tmp, 0);
29790 xop1 = XEXP (tmp, 1);
29791
29792 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29793 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29794 }
29795
29796 return false;
29797 }
29798
29799 /* Returns true if a valid comparison operation and makes
29800 the operands in a form that is valid. */
29801 bool
29802 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29803 {
29804 enum rtx_code code = GET_CODE (*comparison);
29805 int code_int;
29806 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29807 ? GET_MODE (*op2) : GET_MODE (*op1);
29808
29809 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29810
29811 if (code == UNEQ || code == LTGT)
29812 return false;
29813
29814 code_int = (int)code;
29815 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29816 PUT_CODE (*comparison, (enum rtx_code)code_int);
29817
29818 switch (mode)
29819 {
29820 case E_SImode:
29821 if (!arm_add_operand (*op1, mode))
29822 *op1 = force_reg (mode, *op1);
29823 if (!arm_add_operand (*op2, mode))
29824 *op2 = force_reg (mode, *op2);
29825 return true;
29826
29827 case E_DImode:
29828 if (!cmpdi_operand (*op1, mode))
29829 *op1 = force_reg (mode, *op1);
29830 if (!cmpdi_operand (*op2, mode))
29831 *op2 = force_reg (mode, *op2);
29832 return true;
29833
29834 case E_HFmode:
29835 if (!TARGET_VFP_FP16INST)
29836 break;
29837 /* FP16 comparisons are done in SF mode. */
29838 mode = SFmode;
29839 *op1 = convert_to_mode (mode, *op1, 1);
29840 *op2 = convert_to_mode (mode, *op2, 1);
29841 /* Fall through. */
29842 case E_SFmode:
29843 case E_DFmode:
29844 if (!vfp_compare_operand (*op1, mode))
29845 *op1 = force_reg (mode, *op1);
29846 if (!vfp_compare_operand (*op2, mode))
29847 *op2 = force_reg (mode, *op2);
29848 return true;
29849 default:
29850 break;
29851 }
29852
29853 return false;
29854
29855 }
29856
29857 /* Maximum number of instructions to set block of memory. */
29858 static int
29859 arm_block_set_max_insns (void)
29860 {
29861 if (optimize_function_for_size_p (cfun))
29862 return 4;
29863 else
29864 return current_tune->max_insns_inline_memset;
29865 }
29866
29867 /* Return TRUE if it's profitable to set block of memory for
29868 non-vectorized case. VAL is the value to set the memory
29869 with. LENGTH is the number of bytes to set. ALIGN is the
29870 alignment of the destination memory in bytes. UNALIGNED_P
29871 is TRUE if we can only set the memory with instructions
29872 meeting alignment requirements. USE_STRD_P is TRUE if we
29873 can use strd to set the memory. */
29874 static bool
29875 arm_block_set_non_vect_profit_p (rtx val,
29876 unsigned HOST_WIDE_INT length,
29877 unsigned HOST_WIDE_INT align,
29878 bool unaligned_p, bool use_strd_p)
29879 {
29880 int num = 0;
29881 /* For leftovers in bytes of 0-7, we can set the memory block using
29882 strb/strh/str with minimum instruction number. */
29883 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29884
29885 if (unaligned_p)
29886 {
29887 num = arm_const_inline_cost (SET, val);
29888 num += length / align + length % align;
29889 }
29890 else if (use_strd_p)
29891 {
29892 num = arm_const_double_inline_cost (val);
29893 num += (length >> 3) + leftover[length & 7];
29894 }
29895 else
29896 {
29897 num = arm_const_inline_cost (SET, val);
29898 num += (length >> 2) + leftover[length & 3];
29899 }
29900
29901 /* We may be able to combine last pair STRH/STRB into a single STR
29902 by shifting one byte back. */
29903 if (unaligned_access && length > 3 && (length & 3) == 3)
29904 num--;
29905
29906 return (num <= arm_block_set_max_insns ());
29907 }
29908
29909 /* Return TRUE if it's profitable to set block of memory for
29910 vectorized case. LENGTH is the number of bytes to set.
29911 ALIGN is the alignment of destination memory in bytes.
29912 MODE is the vector mode used to set the memory. */
29913 static bool
29914 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29915 unsigned HOST_WIDE_INT align,
29916 machine_mode mode)
29917 {
29918 int num;
29919 bool unaligned_p = ((align & 3) != 0);
29920 unsigned int nelt = GET_MODE_NUNITS (mode);
29921
29922 /* Instruction loading constant value. */
29923 num = 1;
29924 /* Instructions storing the memory. */
29925 num += (length + nelt - 1) / nelt;
29926 /* Instructions adjusting the address expression. Only need to
29927 adjust address expression if it's 4 bytes aligned and bytes
29928 leftover can only be stored by mis-aligned store instruction. */
29929 if (!unaligned_p && (length & 3) != 0)
29930 num++;
29931
29932 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29933 if (!unaligned_p && mode == V16QImode)
29934 num--;
29935
29936 return (num <= arm_block_set_max_insns ());
29937 }
29938
29939 /* Set a block of memory using vectorization instructions for the
29940 unaligned case. We fill the first LENGTH bytes of the memory
29941 area starting from DSTBASE with byte constant VALUE. ALIGN is
29942 the alignment requirement of memory. Return TRUE if succeeded. */
29943 static bool
29944 arm_block_set_unaligned_vect (rtx dstbase,
29945 unsigned HOST_WIDE_INT length,
29946 unsigned HOST_WIDE_INT value,
29947 unsigned HOST_WIDE_INT align)
29948 {
29949 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
29950 rtx dst, mem;
29951 rtx val_vec, reg;
29952 rtx (*gen_func) (rtx, rtx);
29953 machine_mode mode;
29954 unsigned HOST_WIDE_INT v = value;
29955 unsigned int offset = 0;
29956 gcc_assert ((align & 0x3) != 0);
29957 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29958 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29959 if (length >= nelt_v16)
29960 {
29961 mode = V16QImode;
29962 gen_func = gen_movmisalignv16qi;
29963 }
29964 else
29965 {
29966 mode = V8QImode;
29967 gen_func = gen_movmisalignv8qi;
29968 }
29969 nelt_mode = GET_MODE_NUNITS (mode);
29970 gcc_assert (length >= nelt_mode);
29971 /* Skip if it isn't profitable. */
29972 if (!arm_block_set_vect_profit_p (length, align, mode))
29973 return false;
29974
29975 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29976 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29977
29978 v = sext_hwi (v, BITS_PER_WORD);
29979
29980 reg = gen_reg_rtx (mode);
29981 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
29982 /* Emit instruction loading the constant value. */
29983 emit_move_insn (reg, val_vec);
29984
29985 /* Handle nelt_mode bytes in a vector. */
29986 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29987 {
29988 emit_insn ((*gen_func) (mem, reg));
29989 if (i + 2 * nelt_mode <= length)
29990 {
29991 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29992 offset += nelt_mode;
29993 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29994 }
29995 }
29996
29997 /* If there are not less than nelt_v8 bytes leftover, we must be in
29998 V16QI mode. */
29999 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
30000
30001 /* Handle (8, 16) bytes leftover. */
30002 if (i + nelt_v8 < length)
30003 {
30004 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
30005 offset += length - i;
30006 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30007
30008 /* We are shifting bytes back, set the alignment accordingly. */
30009 if ((length & 1) != 0 && align >= 2)
30010 set_mem_align (mem, BITS_PER_UNIT);
30011
30012 emit_insn (gen_movmisalignv16qi (mem, reg));
30013 }
30014 /* Handle (0, 8] bytes leftover. */
30015 else if (i < length && i + nelt_v8 >= length)
30016 {
30017 if (mode == V16QImode)
30018 reg = gen_lowpart (V8QImode, reg);
30019
30020 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30021 + (nelt_mode - nelt_v8))));
30022 offset += (length - i) + (nelt_mode - nelt_v8);
30023 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30024
30025 /* We are shifting bytes back, set the alignment accordingly. */
30026 if ((length & 1) != 0 && align >= 2)
30027 set_mem_align (mem, BITS_PER_UNIT);
30028
30029 emit_insn (gen_movmisalignv8qi (mem, reg));
30030 }
30031
30032 return true;
30033 }
30034
30035 /* Set a block of memory using vectorization instructions for the
30036 aligned case. We fill the first LENGTH bytes of the memory area
30037 starting from DSTBASE with byte constant VALUE. ALIGN is the
30038 alignment requirement of memory. Return TRUE if succeeded. */
30039 static bool
30040 arm_block_set_aligned_vect (rtx dstbase,
30041 unsigned HOST_WIDE_INT length,
30042 unsigned HOST_WIDE_INT value,
30043 unsigned HOST_WIDE_INT align)
30044 {
30045 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30046 rtx dst, addr, mem;
30047 rtx val_vec, reg;
30048 machine_mode mode;
30049 unsigned HOST_WIDE_INT v = value;
30050 unsigned int offset = 0;
30051
30052 gcc_assert ((align & 0x3) == 0);
30053 nelt_v8 = GET_MODE_NUNITS (V8QImode);
30054 nelt_v16 = GET_MODE_NUNITS (V16QImode);
30055 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30056 mode = V16QImode;
30057 else
30058 mode = V8QImode;
30059
30060 nelt_mode = GET_MODE_NUNITS (mode);
30061 gcc_assert (length >= nelt_mode);
30062 /* Skip if it isn't profitable. */
30063 if (!arm_block_set_vect_profit_p (length, align, mode))
30064 return false;
30065
30066 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30067
30068 v = sext_hwi (v, BITS_PER_WORD);
30069
30070 reg = gen_reg_rtx (mode);
30071 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30072 /* Emit instruction loading the constant value. */
30073 emit_move_insn (reg, val_vec);
30074
30075 i = 0;
30076 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
30077 if (mode == V16QImode)
30078 {
30079 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30080 emit_insn (gen_movmisalignv16qi (mem, reg));
30081 i += nelt_mode;
30082 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
30083 if (i + nelt_v8 < length && i + nelt_v16 > length)
30084 {
30085 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30086 offset += length - nelt_mode;
30087 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30088 /* We are shifting bytes back, set the alignment accordingly. */
30089 if ((length & 0x3) == 0)
30090 set_mem_align (mem, BITS_PER_UNIT * 4);
30091 else if ((length & 0x1) == 0)
30092 set_mem_align (mem, BITS_PER_UNIT * 2);
30093 else
30094 set_mem_align (mem, BITS_PER_UNIT);
30095
30096 emit_insn (gen_movmisalignv16qi (mem, reg));
30097 return true;
30098 }
30099 /* Fall through for bytes leftover. */
30100 mode = V8QImode;
30101 nelt_mode = GET_MODE_NUNITS (mode);
30102 reg = gen_lowpart (V8QImode, reg);
30103 }
30104
30105 /* Handle 8 bytes in a vector. */
30106 for (; (i + nelt_mode <= length); i += nelt_mode)
30107 {
30108 addr = plus_constant (Pmode, dst, i);
30109 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30110 emit_move_insn (mem, reg);
30111 }
30112
30113 /* Handle single word leftover by shifting 4 bytes back. We can
30114 use aligned access for this case. */
30115 if (i + UNITS_PER_WORD == length)
30116 {
30117 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30118 offset += i - UNITS_PER_WORD;
30119 mem = adjust_automodify_address (dstbase, mode, addr, offset);
30120 /* We are shifting 4 bytes back, set the alignment accordingly. */
30121 if (align > UNITS_PER_WORD)
30122 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30123
30124 emit_move_insn (mem, reg);
30125 }
30126 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30127 We have to use unaligned access for this case. */
30128 else if (i < length)
30129 {
30130 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30131 offset += length - nelt_mode;
30132 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30133 /* We are shifting bytes back, set the alignment accordingly. */
30134 if ((length & 1) == 0)
30135 set_mem_align (mem, BITS_PER_UNIT * 2);
30136 else
30137 set_mem_align (mem, BITS_PER_UNIT);
30138
30139 emit_insn (gen_movmisalignv8qi (mem, reg));
30140 }
30141
30142 return true;
30143 }
30144
30145 /* Set a block of memory using plain strh/strb instructions, only
30146 using instructions allowed by ALIGN on processor. We fill the
30147 first LENGTH bytes of the memory area starting from DSTBASE
30148 with byte constant VALUE. ALIGN is the alignment requirement
30149 of memory. */
30150 static bool
30151 arm_block_set_unaligned_non_vect (rtx dstbase,
30152 unsigned HOST_WIDE_INT length,
30153 unsigned HOST_WIDE_INT value,
30154 unsigned HOST_WIDE_INT align)
30155 {
30156 unsigned int i;
30157 rtx dst, addr, mem;
30158 rtx val_exp, val_reg, reg;
30159 machine_mode mode;
30160 HOST_WIDE_INT v = value;
30161
30162 gcc_assert (align == 1 || align == 2);
30163
30164 if (align == 2)
30165 v |= (value << BITS_PER_UNIT);
30166
30167 v = sext_hwi (v, BITS_PER_WORD);
30168 val_exp = GEN_INT (v);
30169 /* Skip if it isn't profitable. */
30170 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30171 align, true, false))
30172 return false;
30173
30174 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30175 mode = (align == 2 ? HImode : QImode);
30176 val_reg = force_reg (SImode, val_exp);
30177 reg = gen_lowpart (mode, val_reg);
30178
30179 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30180 {
30181 addr = plus_constant (Pmode, dst, i);
30182 mem = adjust_automodify_address (dstbase, mode, addr, i);
30183 emit_move_insn (mem, reg);
30184 }
30185
30186 /* Handle single byte leftover. */
30187 if (i + 1 == length)
30188 {
30189 reg = gen_lowpart (QImode, val_reg);
30190 addr = plus_constant (Pmode, dst, i);
30191 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30192 emit_move_insn (mem, reg);
30193 i++;
30194 }
30195
30196 gcc_assert (i == length);
30197 return true;
30198 }
30199
30200 /* Set a block of memory using plain strd/str/strh/strb instructions,
30201 to permit unaligned copies on processors which support unaligned
30202 semantics for those instructions. We fill the first LENGTH bytes
30203 of the memory area starting from DSTBASE with byte constant VALUE.
30204 ALIGN is the alignment requirement of memory. */
30205 static bool
30206 arm_block_set_aligned_non_vect (rtx dstbase,
30207 unsigned HOST_WIDE_INT length,
30208 unsigned HOST_WIDE_INT value,
30209 unsigned HOST_WIDE_INT align)
30210 {
30211 unsigned int i;
30212 rtx dst, addr, mem;
30213 rtx val_exp, val_reg, reg;
30214 unsigned HOST_WIDE_INT v;
30215 bool use_strd_p;
30216
30217 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30218 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30219
30220 v = (value | (value << 8) | (value << 16) | (value << 24));
30221 if (length < UNITS_PER_WORD)
30222 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30223
30224 if (use_strd_p)
30225 v |= (v << BITS_PER_WORD);
30226 else
30227 v = sext_hwi (v, BITS_PER_WORD);
30228
30229 val_exp = GEN_INT (v);
30230 /* Skip if it isn't profitable. */
30231 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30232 align, false, use_strd_p))
30233 {
30234 if (!use_strd_p)
30235 return false;
30236
30237 /* Try without strd. */
30238 v = (v >> BITS_PER_WORD);
30239 v = sext_hwi (v, BITS_PER_WORD);
30240 val_exp = GEN_INT (v);
30241 use_strd_p = false;
30242 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30243 align, false, use_strd_p))
30244 return false;
30245 }
30246
30247 i = 0;
30248 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30249 /* Handle double words using strd if possible. */
30250 if (use_strd_p)
30251 {
30252 val_reg = force_reg (DImode, val_exp);
30253 reg = val_reg;
30254 for (; (i + 8 <= length); i += 8)
30255 {
30256 addr = plus_constant (Pmode, dst, i);
30257 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30258 emit_move_insn (mem, reg);
30259 }
30260 }
30261 else
30262 val_reg = force_reg (SImode, val_exp);
30263
30264 /* Handle words. */
30265 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30266 for (; (i + 4 <= length); i += 4)
30267 {
30268 addr = plus_constant (Pmode, dst, i);
30269 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30270 if ((align & 3) == 0)
30271 emit_move_insn (mem, reg);
30272 else
30273 emit_insn (gen_unaligned_storesi (mem, reg));
30274 }
30275
30276 /* Merge last pair of STRH and STRB into a STR if possible. */
30277 if (unaligned_access && i > 0 && (i + 3) == length)
30278 {
30279 addr = plus_constant (Pmode, dst, i - 1);
30280 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30281 /* We are shifting one byte back, set the alignment accordingly. */
30282 if ((align & 1) == 0)
30283 set_mem_align (mem, BITS_PER_UNIT);
30284
30285 /* Most likely this is an unaligned access, and we can't tell at
30286 compilation time. */
30287 emit_insn (gen_unaligned_storesi (mem, reg));
30288 return true;
30289 }
30290
30291 /* Handle half word leftover. */
30292 if (i + 2 <= length)
30293 {
30294 reg = gen_lowpart (HImode, val_reg);
30295 addr = plus_constant (Pmode, dst, i);
30296 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30297 if ((align & 1) == 0)
30298 emit_move_insn (mem, reg);
30299 else
30300 emit_insn (gen_unaligned_storehi (mem, reg));
30301
30302 i += 2;
30303 }
30304
30305 /* Handle single byte leftover. */
30306 if (i + 1 == length)
30307 {
30308 reg = gen_lowpart (QImode, val_reg);
30309 addr = plus_constant (Pmode, dst, i);
30310 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30311 emit_move_insn (mem, reg);
30312 }
30313
30314 return true;
30315 }
30316
30317 /* Set a block of memory using vectorization instructions for both
30318 aligned and unaligned cases. We fill the first LENGTH bytes of
30319 the memory area starting from DSTBASE with byte constant VALUE.
30320 ALIGN is the alignment requirement of memory. */
30321 static bool
30322 arm_block_set_vect (rtx dstbase,
30323 unsigned HOST_WIDE_INT length,
30324 unsigned HOST_WIDE_INT value,
30325 unsigned HOST_WIDE_INT align)
30326 {
30327 /* Check whether we need to use unaligned store instruction. */
30328 if (((align & 3) != 0 || (length & 3) != 0)
30329 /* Check whether unaligned store instruction is available. */
30330 && (!unaligned_access || BYTES_BIG_ENDIAN))
30331 return false;
30332
30333 if ((align & 3) == 0)
30334 return arm_block_set_aligned_vect (dstbase, length, value, align);
30335 else
30336 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30337 }
30338
30339 /* Expand string store operation. Firstly we try to do that by using
30340 vectorization instructions, then try with ARM unaligned access and
30341 double-word store if profitable. OPERANDS[0] is the destination,
30342 OPERANDS[1] is the number of bytes, operands[2] is the value to
30343 initialize the memory, OPERANDS[3] is the known alignment of the
30344 destination. */
30345 bool
30346 arm_gen_setmem (rtx *operands)
30347 {
30348 rtx dstbase = operands[0];
30349 unsigned HOST_WIDE_INT length;
30350 unsigned HOST_WIDE_INT value;
30351 unsigned HOST_WIDE_INT align;
30352
30353 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30354 return false;
30355
30356 length = UINTVAL (operands[1]);
30357 if (length > 64)
30358 return false;
30359
30360 value = (UINTVAL (operands[2]) & 0xFF);
30361 align = UINTVAL (operands[3]);
30362 if (TARGET_NEON && length >= 8
30363 && current_tune->string_ops_prefer_neon
30364 && arm_block_set_vect (dstbase, length, value, align))
30365 return true;
30366
30367 if (!unaligned_access && (align & 3) != 0)
30368 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30369
30370 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30371 }
30372
30373
30374 static bool
30375 arm_macro_fusion_p (void)
30376 {
30377 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30378 }
30379
30380 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30381 for MOVW / MOVT macro fusion. */
30382
30383 static bool
30384 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30385 {
30386 /* We are trying to fuse
30387 movw imm / movt imm
30388 instructions as a group that gets scheduled together. */
30389
30390 rtx set_dest = SET_DEST (curr_set);
30391
30392 if (GET_MODE (set_dest) != SImode)
30393 return false;
30394
30395 /* We are trying to match:
30396 prev (movw) == (set (reg r0) (const_int imm16))
30397 curr (movt) == (set (zero_extract (reg r0)
30398 (const_int 16)
30399 (const_int 16))
30400 (const_int imm16_1))
30401 or
30402 prev (movw) == (set (reg r1)
30403 (high (symbol_ref ("SYM"))))
30404 curr (movt) == (set (reg r0)
30405 (lo_sum (reg r1)
30406 (symbol_ref ("SYM")))) */
30407
30408 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30409 {
30410 if (CONST_INT_P (SET_SRC (curr_set))
30411 && CONST_INT_P (SET_SRC (prev_set))
30412 && REG_P (XEXP (set_dest, 0))
30413 && REG_P (SET_DEST (prev_set))
30414 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30415 return true;
30416
30417 }
30418 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30419 && REG_P (SET_DEST (curr_set))
30420 && REG_P (SET_DEST (prev_set))
30421 && GET_CODE (SET_SRC (prev_set)) == HIGH
30422 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30423 return true;
30424
30425 return false;
30426 }
30427
30428 static bool
30429 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30430 {
30431 rtx prev_set = single_set (prev);
30432 rtx curr_set = single_set (curr);
30433
30434 if (!prev_set
30435 || !curr_set)
30436 return false;
30437
30438 if (any_condjump_p (curr))
30439 return false;
30440
30441 if (!arm_macro_fusion_p ())
30442 return false;
30443
30444 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30445 && aarch_crypto_can_dual_issue (prev, curr))
30446 return true;
30447
30448 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30449 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30450 return true;
30451
30452 return false;
30453 }
30454
30455 /* Return true iff the instruction fusion described by OP is enabled. */
30456 bool
30457 arm_fusion_enabled_p (tune_params::fuse_ops op)
30458 {
30459 return current_tune->fusible_ops & op;
30460 }
30461
30462 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30463 scheduled for speculative execution. Reject the long-running division
30464 and square-root instructions. */
30465
30466 static bool
30467 arm_sched_can_speculate_insn (rtx_insn *insn)
30468 {
30469 switch (get_attr_type (insn))
30470 {
30471 case TYPE_SDIV:
30472 case TYPE_UDIV:
30473 case TYPE_FDIVS:
30474 case TYPE_FDIVD:
30475 case TYPE_FSQRTS:
30476 case TYPE_FSQRTD:
30477 case TYPE_NEON_FP_SQRT_S:
30478 case TYPE_NEON_FP_SQRT_D:
30479 case TYPE_NEON_FP_SQRT_S_Q:
30480 case TYPE_NEON_FP_SQRT_D_Q:
30481 case TYPE_NEON_FP_DIV_S:
30482 case TYPE_NEON_FP_DIV_D:
30483 case TYPE_NEON_FP_DIV_S_Q:
30484 case TYPE_NEON_FP_DIV_D_Q:
30485 return false;
30486 default:
30487 return true;
30488 }
30489 }
30490
30491 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30492
30493 static unsigned HOST_WIDE_INT
30494 arm_asan_shadow_offset (void)
30495 {
30496 return HOST_WIDE_INT_1U << 29;
30497 }
30498
30499
30500 /* This is a temporary fix for PR60655. Ideally we need
30501 to handle most of these cases in the generic part but
30502 currently we reject minus (..) (sym_ref). We try to
30503 ameliorate the case with minus (sym_ref1) (sym_ref2)
30504 where they are in the same section. */
30505
30506 static bool
30507 arm_const_not_ok_for_debug_p (rtx p)
30508 {
30509 tree decl_op0 = NULL;
30510 tree decl_op1 = NULL;
30511
30512 if (GET_CODE (p) == UNSPEC)
30513 return true;
30514 if (GET_CODE (p) == MINUS)
30515 {
30516 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30517 {
30518 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30519 if (decl_op1
30520 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30521 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30522 {
30523 if ((VAR_P (decl_op1)
30524 || TREE_CODE (decl_op1) == CONST_DECL)
30525 && (VAR_P (decl_op0)
30526 || TREE_CODE (decl_op0) == CONST_DECL))
30527 return (get_variable_section (decl_op1, false)
30528 != get_variable_section (decl_op0, false));
30529
30530 if (TREE_CODE (decl_op1) == LABEL_DECL
30531 && TREE_CODE (decl_op0) == LABEL_DECL)
30532 return (DECL_CONTEXT (decl_op1)
30533 != DECL_CONTEXT (decl_op0));
30534 }
30535
30536 return true;
30537 }
30538 }
30539
30540 return false;
30541 }
30542
30543 /* return TRUE if x is a reference to a value in a constant pool */
30544 extern bool
30545 arm_is_constant_pool_ref (rtx x)
30546 {
30547 return (MEM_P (x)
30548 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30549 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30550 }
30551
30552 /* Remember the last target of arm_set_current_function. */
30553 static GTY(()) tree arm_previous_fndecl;
30554
30555 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30556
30557 void
30558 save_restore_target_globals (tree new_tree)
30559 {
30560 /* If we have a previous state, use it. */
30561 if (TREE_TARGET_GLOBALS (new_tree))
30562 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30563 else if (new_tree == target_option_default_node)
30564 restore_target_globals (&default_target_globals);
30565 else
30566 {
30567 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30568 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30569 }
30570
30571 arm_option_params_internal ();
30572 }
30573
30574 /* Invalidate arm_previous_fndecl. */
30575
30576 void
30577 arm_reset_previous_fndecl (void)
30578 {
30579 arm_previous_fndecl = NULL_TREE;
30580 }
30581
30582 /* Establish appropriate back-end context for processing the function
30583 FNDECL. The argument might be NULL to indicate processing at top
30584 level, outside of any function scope. */
30585
30586 static void
30587 arm_set_current_function (tree fndecl)
30588 {
30589 if (!fndecl || fndecl == arm_previous_fndecl)
30590 return;
30591
30592 tree old_tree = (arm_previous_fndecl
30593 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30594 : NULL_TREE);
30595
30596 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30597
30598 /* If current function has no attributes but previous one did,
30599 use the default node. */
30600 if (! new_tree && old_tree)
30601 new_tree = target_option_default_node;
30602
30603 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30604 the default have been handled by save_restore_target_globals from
30605 arm_pragma_target_parse. */
30606 if (old_tree == new_tree)
30607 return;
30608
30609 arm_previous_fndecl = fndecl;
30610
30611 /* First set the target options. */
30612 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30613
30614 save_restore_target_globals (new_tree);
30615 }
30616
30617 /* Implement TARGET_OPTION_PRINT. */
30618
30619 static void
30620 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30621 {
30622 int flags = ptr->x_target_flags;
30623 const char *fpu_name;
30624
30625 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30626 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30627
30628 fprintf (file, "%*sselected isa %s\n", indent, "",
30629 TARGET_THUMB2_P (flags) ? "thumb2" :
30630 TARGET_THUMB_P (flags) ? "thumb1" :
30631 "arm");
30632
30633 if (ptr->x_arm_arch_string)
30634 fprintf (file, "%*sselected architecture %s\n", indent, "",
30635 ptr->x_arm_arch_string);
30636
30637 if (ptr->x_arm_cpu_string)
30638 fprintf (file, "%*sselected CPU %s\n", indent, "",
30639 ptr->x_arm_cpu_string);
30640
30641 if (ptr->x_arm_tune_string)
30642 fprintf (file, "%*sselected tune %s\n", indent, "",
30643 ptr->x_arm_tune_string);
30644
30645 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30646 }
30647
30648 /* Hook to determine if one function can safely inline another. */
30649
30650 static bool
30651 arm_can_inline_p (tree caller, tree callee)
30652 {
30653 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30654 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30655 bool can_inline = true;
30656
30657 struct cl_target_option *caller_opts
30658 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30659 : target_option_default_node);
30660
30661 struct cl_target_option *callee_opts
30662 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30663 : target_option_default_node);
30664
30665 if (callee_opts == caller_opts)
30666 return true;
30667
30668 /* Callee's ISA features should be a subset of the caller's. */
30669 struct arm_build_target caller_target;
30670 struct arm_build_target callee_target;
30671 caller_target.isa = sbitmap_alloc (isa_num_bits);
30672 callee_target.isa = sbitmap_alloc (isa_num_bits);
30673
30674 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30675 false);
30676 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30677 false);
30678 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30679 can_inline = false;
30680
30681 sbitmap_free (caller_target.isa);
30682 sbitmap_free (callee_target.isa);
30683
30684 /* OK to inline between different modes.
30685 Function with mode specific instructions, e.g using asm,
30686 must be explicitly protected with noinline. */
30687 return can_inline;
30688 }
30689
30690 /* Hook to fix function's alignment affected by target attribute. */
30691
30692 static void
30693 arm_relayout_function (tree fndecl)
30694 {
30695 if (DECL_USER_ALIGN (fndecl))
30696 return;
30697
30698 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30699
30700 if (!callee_tree)
30701 callee_tree = target_option_default_node;
30702
30703 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30704 SET_DECL_ALIGN
30705 (fndecl,
30706 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30707 }
30708
30709 /* Inner function to process the attribute((target(...))), take an argument and
30710 set the current options from the argument. If we have a list, recursively
30711 go over the list. */
30712
30713 static bool
30714 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30715 {
30716 if (TREE_CODE (args) == TREE_LIST)
30717 {
30718 bool ret = true;
30719
30720 for (; args; args = TREE_CHAIN (args))
30721 if (TREE_VALUE (args)
30722 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30723 ret = false;
30724 return ret;
30725 }
30726
30727 else if (TREE_CODE (args) != STRING_CST)
30728 {
30729 error ("attribute %<target%> argument not a string");
30730 return false;
30731 }
30732
30733 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30734 char *q;
30735
30736 while ((q = strtok (argstr, ",")) != NULL)
30737 {
30738 while (ISSPACE (*q)) ++q;
30739
30740 argstr = NULL;
30741 if (!strncmp (q, "thumb", 5))
30742 opts->x_target_flags |= MASK_THUMB;
30743
30744 else if (!strncmp (q, "arm", 3))
30745 opts->x_target_flags &= ~MASK_THUMB;
30746
30747 else if (!strncmp (q, "fpu=", 4))
30748 {
30749 int fpu_index;
30750 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30751 &fpu_index, CL_TARGET))
30752 {
30753 error ("invalid fpu for target attribute or pragma %qs", q);
30754 return false;
30755 }
30756 if (fpu_index == TARGET_FPU_auto)
30757 {
30758 /* This doesn't really make sense until we support
30759 general dynamic selection of the architecture and all
30760 sub-features. */
30761 sorry ("auto fpu selection not currently permitted here");
30762 return false;
30763 }
30764 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30765 }
30766 else if (!strncmp (q, "arch=", 5))
30767 {
30768 char* arch = q+5;
30769 const arch_option *arm_selected_arch
30770 = arm_parse_arch_option_name (all_architectures, "arch", arch);
30771
30772 if (!arm_selected_arch)
30773 {
30774 error ("invalid architecture for target attribute or pragma %qs",
30775 q);
30776 return false;
30777 }
30778
30779 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
30780 }
30781 else if (q[0] == '+')
30782 {
30783 opts->x_arm_arch_string
30784 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
30785 }
30786 else
30787 {
30788 error ("unknown target attribute or pragma %qs", q);
30789 return false;
30790 }
30791 }
30792
30793 return true;
30794 }
30795
30796 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30797
30798 tree
30799 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30800 struct gcc_options *opts_set)
30801 {
30802 struct cl_target_option cl_opts;
30803
30804 if (!arm_valid_target_attribute_rec (args, opts))
30805 return NULL_TREE;
30806
30807 cl_target_option_save (&cl_opts, opts);
30808 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30809 arm_option_check_internal (opts);
30810 /* Do any overrides, such as global options arch=xxx.
30811 We do this since arm_active_target was overridden. */
30812 arm_option_reconfigure_globals ();
30813 arm_options_perform_arch_sanity_checks ();
30814 arm_option_override_internal (opts, opts_set);
30815
30816 return build_target_option_node (opts);
30817 }
30818
30819 static void
30820 add_attribute (const char * mode, tree *attributes)
30821 {
30822 size_t len = strlen (mode);
30823 tree value = build_string (len, mode);
30824
30825 TREE_TYPE (value) = build_array_type (char_type_node,
30826 build_index_type (size_int (len)));
30827
30828 *attributes = tree_cons (get_identifier ("target"),
30829 build_tree_list (NULL_TREE, value),
30830 *attributes);
30831 }
30832
30833 /* For testing. Insert thumb or arm modes alternatively on functions. */
30834
30835 static void
30836 arm_insert_attributes (tree fndecl, tree * attributes)
30837 {
30838 const char *mode;
30839
30840 if (! TARGET_FLIP_THUMB)
30841 return;
30842
30843 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30844 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30845 return;
30846
30847 /* Nested definitions must inherit mode. */
30848 if (current_function_decl)
30849 {
30850 mode = TARGET_THUMB ? "thumb" : "arm";
30851 add_attribute (mode, attributes);
30852 return;
30853 }
30854
30855 /* If there is already a setting don't change it. */
30856 if (lookup_attribute ("target", *attributes) != NULL)
30857 return;
30858
30859 mode = thumb_flipper ? "thumb" : "arm";
30860 add_attribute (mode, attributes);
30861
30862 thumb_flipper = !thumb_flipper;
30863 }
30864
30865 /* Hook to validate attribute((target("string"))). */
30866
30867 static bool
30868 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30869 tree args, int ARG_UNUSED (flags))
30870 {
30871 bool ret = true;
30872 struct gcc_options func_options;
30873 tree cur_tree, new_optimize;
30874 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30875
30876 /* Get the optimization options of the current function. */
30877 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30878
30879 /* If the function changed the optimization levels as well as setting target
30880 options, start with the optimizations specified. */
30881 if (!func_optimize)
30882 func_optimize = optimization_default_node;
30883
30884 /* Init func_options. */
30885 memset (&func_options, 0, sizeof (func_options));
30886 init_options_struct (&func_options, NULL);
30887 lang_hooks.init_options_struct (&func_options);
30888
30889 /* Initialize func_options to the defaults. */
30890 cl_optimization_restore (&func_options,
30891 TREE_OPTIMIZATION (func_optimize));
30892
30893 cl_target_option_restore (&func_options,
30894 TREE_TARGET_OPTION (target_option_default_node));
30895
30896 /* Set func_options flags with new target mode. */
30897 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30898 &global_options_set);
30899
30900 if (cur_tree == NULL_TREE)
30901 ret = false;
30902
30903 new_optimize = build_optimization_node (&func_options);
30904
30905 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30906
30907 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30908
30909 finalize_options_struct (&func_options);
30910
30911 return ret;
30912 }
30913
30914 /* Match an ISA feature bitmap to a named FPU. We always use the
30915 first entry that exactly matches the feature set, so that we
30916 effectively canonicalize the FPU name for the assembler. */
30917 static const char*
30918 arm_identify_fpu_from_isa (sbitmap isa)
30919 {
30920 auto_sbitmap fpubits (isa_num_bits);
30921 auto_sbitmap cand_fpubits (isa_num_bits);
30922
30923 bitmap_and (fpubits, isa, isa_all_fpubits);
30924
30925 /* If there are no ISA feature bits relating to the FPU, we must be
30926 doing soft-float. */
30927 if (bitmap_empty_p (fpubits))
30928 return "softvfp";
30929
30930 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30931 {
30932 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30933 if (bitmap_equal_p (fpubits, cand_fpubits))
30934 return all_fpus[i].name;
30935 }
30936 /* We must find an entry, or things have gone wrong. */
30937 gcc_unreachable ();
30938 }
30939
30940 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
30941 by the function fndecl. */
30942 void
30943 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30944 {
30945 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
30946
30947 struct cl_target_option *targ_options;
30948 if (target_parts)
30949 targ_options = TREE_TARGET_OPTION (target_parts);
30950 else
30951 targ_options = TREE_TARGET_OPTION (target_option_current_node);
30952 gcc_assert (targ_options);
30953
30954 /* Only update the assembler .arch string if it is distinct from the last
30955 such string we printed. arch_to_print is set conditionally in case
30956 targ_options->x_arm_arch_string is NULL which can be the case
30957 when cc1 is invoked directly without passing -march option. */
30958 std::string arch_to_print;
30959 if (targ_options->x_arm_arch_string)
30960 arch_to_print = targ_options->x_arm_arch_string;
30961
30962 if (arch_to_print != arm_last_printed_arch_string)
30963 {
30964 std::string arch_name
30965 = arch_to_print.substr (0, arch_to_print.find ("+"));
30966 asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
30967 const arch_option *arch
30968 = arm_parse_arch_option_name (all_architectures, "-march",
30969 targ_options->x_arm_arch_string);
30970 auto_sbitmap opt_bits (isa_num_bits);
30971
30972 gcc_assert (arch);
30973 if (arch->common.extensions)
30974 {
30975 for (const struct cpu_arch_extension *opt = arch->common.extensions;
30976 opt->name != NULL;
30977 opt++)
30978 {
30979 if (!opt->remove)
30980 {
30981 arm_initialize_isa (opt_bits, opt->isa_bits);
30982 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
30983 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
30984 asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
30985 opt->name);
30986 }
30987 }
30988 }
30989
30990 arm_last_printed_arch_string = arch_to_print;
30991 }
30992
30993 fprintf (stream, "\t.syntax unified\n");
30994
30995 if (TARGET_THUMB)
30996 {
30997 if (is_called_in_ARM_mode (decl)
30998 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30999 && cfun->is_thunk))
31000 fprintf (stream, "\t.code 32\n");
31001 else if (TARGET_THUMB1)
31002 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
31003 else
31004 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
31005 }
31006 else
31007 fprintf (stream, "\t.arm\n");
31008
31009 std::string fpu_to_print
31010 = TARGET_SOFT_FLOAT
31011 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
31012
31013 if (fpu_to_print != arm_last_printed_arch_string)
31014 {
31015 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31016 arm_last_printed_fpu_string = fpu_to_print;
31017 }
31018
31019 if (TARGET_POKE_FUNCTION_NAME)
31020 arm_poke_function_name (stream, (const char *) name);
31021 }
31022
31023 /* If MEM is in the form of [base+offset], extract the two parts
31024 of address and set to BASE and OFFSET, otherwise return false
31025 after clearing BASE and OFFSET. */
31026
31027 static bool
31028 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31029 {
31030 rtx addr;
31031
31032 gcc_assert (MEM_P (mem));
31033
31034 addr = XEXP (mem, 0);
31035
31036 /* Strip off const from addresses like (const (addr)). */
31037 if (GET_CODE (addr) == CONST)
31038 addr = XEXP (addr, 0);
31039
31040 if (GET_CODE (addr) == REG)
31041 {
31042 *base = addr;
31043 *offset = const0_rtx;
31044 return true;
31045 }
31046
31047 if (GET_CODE (addr) == PLUS
31048 && GET_CODE (XEXP (addr, 0)) == REG
31049 && CONST_INT_P (XEXP (addr, 1)))
31050 {
31051 *base = XEXP (addr, 0);
31052 *offset = XEXP (addr, 1);
31053 return true;
31054 }
31055
31056 *base = NULL_RTX;
31057 *offset = NULL_RTX;
31058
31059 return false;
31060 }
31061
31062 /* If INSN is a load or store of address in the form of [base+offset],
31063 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
31064 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
31065 otherwise return FALSE. */
31066
31067 static bool
31068 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31069 {
31070 rtx x, dest, src;
31071
31072 gcc_assert (INSN_P (insn));
31073 x = PATTERN (insn);
31074 if (GET_CODE (x) != SET)
31075 return false;
31076
31077 src = SET_SRC (x);
31078 dest = SET_DEST (x);
31079 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31080 {
31081 *is_load = false;
31082 extract_base_offset_in_addr (dest, base, offset);
31083 }
31084 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31085 {
31086 *is_load = true;
31087 extract_base_offset_in_addr (src, base, offset);
31088 }
31089 else
31090 return false;
31091
31092 return (*base != NULL_RTX && *offset != NULL_RTX);
31093 }
31094
31095 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31096
31097 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31098 and PRI are only calculated for these instructions. For other instruction,
31099 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
31100 instruction fusion can be supported by returning different priorities.
31101
31102 It's important that irrelevant instructions get the largest FUSION_PRI. */
31103
31104 static void
31105 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31106 int *fusion_pri, int *pri)
31107 {
31108 int tmp, off_val;
31109 bool is_load;
31110 rtx base, offset;
31111
31112 gcc_assert (INSN_P (insn));
31113
31114 tmp = max_pri - 1;
31115 if (!fusion_load_store (insn, &base, &offset, &is_load))
31116 {
31117 *pri = tmp;
31118 *fusion_pri = tmp;
31119 return;
31120 }
31121
31122 /* Load goes first. */
31123 if (is_load)
31124 *fusion_pri = tmp - 1;
31125 else
31126 *fusion_pri = tmp - 2;
31127
31128 tmp /= 2;
31129
31130 /* INSN with smaller base register goes first. */
31131 tmp -= ((REGNO (base) & 0xff) << 20);
31132
31133 /* INSN with smaller offset goes first. */
31134 off_val = (int)(INTVAL (offset));
31135 if (off_val >= 0)
31136 tmp -= (off_val & 0xfffff);
31137 else
31138 tmp += ((- off_val) & 0xfffff);
31139
31140 *pri = tmp;
31141 return;
31142 }
31143
31144
31145 /* Construct and return a PARALLEL RTX vector with elements numbering the
31146 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31147 the vector - from the perspective of the architecture. This does not
31148 line up with GCC's perspective on lane numbers, so we end up with
31149 different masks depending on our target endian-ness. The diagram
31150 below may help. We must draw the distinction when building masks
31151 which select one half of the vector. An instruction selecting
31152 architectural low-lanes for a big-endian target, must be described using
31153 a mask selecting GCC high-lanes.
31154
31155 Big-Endian Little-Endian
31156
31157 GCC 0 1 2 3 3 2 1 0
31158 | x | x | x | x | | x | x | x | x |
31159 Architecture 3 2 1 0 3 2 1 0
31160
31161 Low Mask: { 2, 3 } { 0, 1 }
31162 High Mask: { 0, 1 } { 2, 3 }
31163 */
31164
31165 rtx
31166 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
31167 {
31168 int nunits = GET_MODE_NUNITS (mode);
31169 rtvec v = rtvec_alloc (nunits / 2);
31170 int high_base = nunits / 2;
31171 int low_base = 0;
31172 int base;
31173 rtx t1;
31174 int i;
31175
31176 if (BYTES_BIG_ENDIAN)
31177 base = high ? low_base : high_base;
31178 else
31179 base = high ? high_base : low_base;
31180
31181 for (i = 0; i < nunits / 2; i++)
31182 RTVEC_ELT (v, i) = GEN_INT (base + i);
31183
31184 t1 = gen_rtx_PARALLEL (mode, v);
31185 return t1;
31186 }
31187
31188 /* Check OP for validity as a PARALLEL RTX vector with elements
31189 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31190 from the perspective of the architecture. See the diagram above
31191 arm_simd_vect_par_cnst_half_p for more details. */
31192
31193 bool
31194 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31195 bool high)
31196 {
31197 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31198 HOST_WIDE_INT count_op = XVECLEN (op, 0);
31199 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31200 int i = 0;
31201
31202 if (!VECTOR_MODE_P (mode))
31203 return false;
31204
31205 if (count_op != count_ideal)
31206 return false;
31207
31208 for (i = 0; i < count_ideal; i++)
31209 {
31210 rtx elt_op = XVECEXP (op, 0, i);
31211 rtx elt_ideal = XVECEXP (ideal, 0, i);
31212
31213 if (!CONST_INT_P (elt_op)
31214 || INTVAL (elt_ideal) != INTVAL (elt_op))
31215 return false;
31216 }
31217 return true;
31218 }
31219
31220 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31221 in Thumb1. */
31222 static bool
31223 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31224 const_tree)
31225 {
31226 /* For now, we punt and not handle this for TARGET_THUMB1. */
31227 if (vcall_offset && TARGET_THUMB1)
31228 return false;
31229
31230 /* Otherwise ok. */
31231 return true;
31232 }
31233
31234 /* Generate RTL for a conditional branch with rtx comparison CODE in
31235 mode CC_MODE. The destination of the unlikely conditional branch
31236 is LABEL_REF. */
31237
31238 void
31239 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31240 rtx label_ref)
31241 {
31242 rtx x;
31243 x = gen_rtx_fmt_ee (code, VOIDmode,
31244 gen_rtx_REG (cc_mode, CC_REGNUM),
31245 const0_rtx);
31246
31247 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31248 gen_rtx_LABEL_REF (VOIDmode, label_ref),
31249 pc_rtx);
31250 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31251 }
31252
31253 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31254
31255 For pure-code sections there is no letter code for this attribute, so
31256 output all the section flags numerically when this is needed. */
31257
31258 static bool
31259 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31260 {
31261
31262 if (flags & SECTION_ARM_PURECODE)
31263 {
31264 *num = 0x20000000;
31265
31266 if (!(flags & SECTION_DEBUG))
31267 *num |= 0x2;
31268 if (flags & SECTION_EXCLUDE)
31269 *num |= 0x80000000;
31270 if (flags & SECTION_WRITE)
31271 *num |= 0x1;
31272 if (flags & SECTION_CODE)
31273 *num |= 0x4;
31274 if (flags & SECTION_MERGE)
31275 *num |= 0x10;
31276 if (flags & SECTION_STRINGS)
31277 *num |= 0x20;
31278 if (flags & SECTION_TLS)
31279 *num |= 0x400;
31280 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31281 *num |= 0x200;
31282
31283 return true;
31284 }
31285
31286 return false;
31287 }
31288
31289 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31290
31291 If pure-code is passed as an option, make sure all functions are in
31292 sections that have the SHF_ARM_PURECODE attribute. */
31293
31294 static section *
31295 arm_function_section (tree decl, enum node_frequency freq,
31296 bool startup, bool exit)
31297 {
31298 const char * section_name;
31299 section * sec;
31300
31301 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31302 return default_function_section (decl, freq, startup, exit);
31303
31304 if (!target_pure_code)
31305 return default_function_section (decl, freq, startup, exit);
31306
31307
31308 section_name = DECL_SECTION_NAME (decl);
31309
31310 /* If a function is not in a named section then it falls under the 'default'
31311 text section, also known as '.text'. We can preserve previous behavior as
31312 the default text section already has the SHF_ARM_PURECODE section
31313 attribute. */
31314 if (!section_name)
31315 {
31316 section *default_sec = default_function_section (decl, freq, startup,
31317 exit);
31318
31319 /* If default_sec is not null, then it must be a special section like for
31320 example .text.startup. We set the pure-code attribute and return the
31321 same section to preserve existing behavior. */
31322 if (default_sec)
31323 default_sec->common.flags |= SECTION_ARM_PURECODE;
31324 return default_sec;
31325 }
31326
31327 /* Otherwise look whether a section has already been created with
31328 'section_name'. */
31329 sec = get_named_section (decl, section_name, 0);
31330 if (!sec)
31331 /* If that is not the case passing NULL as the section's name to
31332 'get_named_section' will create a section with the declaration's
31333 section name. */
31334 sec = get_named_section (decl, NULL, 0);
31335
31336 /* Set the SHF_ARM_PURECODE attribute. */
31337 sec->common.flags |= SECTION_ARM_PURECODE;
31338
31339 return sec;
31340 }
31341
31342 /* Implements the TARGET_SECTION_FLAGS hook.
31343
31344 If DECL is a function declaration and pure-code is passed as an option
31345 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31346 section's name and RELOC indicates whether the declarations initializer may
31347 contain runtime relocations. */
31348
31349 static unsigned int
31350 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31351 {
31352 unsigned int flags = default_section_type_flags (decl, name, reloc);
31353
31354 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31355 flags |= SECTION_ARM_PURECODE;
31356
31357 return flags;
31358 }
31359
31360 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31361
31362 static void
31363 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31364 rtx op0, rtx op1,
31365 rtx *quot_p, rtx *rem_p)
31366 {
31367 if (mode == SImode)
31368 gcc_assert (!TARGET_IDIV);
31369
31370 scalar_int_mode libval_mode
31371 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31372
31373 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31374 libval_mode,
31375 op0, GET_MODE (op0),
31376 op1, GET_MODE (op1));
31377
31378 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31379 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31380 GET_MODE_SIZE (mode));
31381
31382 gcc_assert (quotient);
31383 gcc_assert (remainder);
31384
31385 *quot_p = quotient;
31386 *rem_p = remainder;
31387 }
31388
31389 /* This function checks for the availability of the coprocessor builtin passed
31390 in BUILTIN for the current target. Returns true if it is available and
31391 false otherwise. If a BUILTIN is passed for which this function has not
31392 been implemented it will cause an exception. */
31393
31394 bool
31395 arm_coproc_builtin_available (enum unspecv builtin)
31396 {
31397 /* None of these builtins are available in Thumb mode if the target only
31398 supports Thumb-1. */
31399 if (TARGET_THUMB1)
31400 return false;
31401
31402 switch (builtin)
31403 {
31404 case VUNSPEC_CDP:
31405 case VUNSPEC_LDC:
31406 case VUNSPEC_LDCL:
31407 case VUNSPEC_STC:
31408 case VUNSPEC_STCL:
31409 case VUNSPEC_MCR:
31410 case VUNSPEC_MRC:
31411 if (arm_arch4)
31412 return true;
31413 break;
31414 case VUNSPEC_CDP2:
31415 case VUNSPEC_LDC2:
31416 case VUNSPEC_LDC2L:
31417 case VUNSPEC_STC2:
31418 case VUNSPEC_STC2L:
31419 case VUNSPEC_MCR2:
31420 case VUNSPEC_MRC2:
31421 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31422 ARMv8-{A,M}. */
31423 if (arm_arch5t)
31424 return true;
31425 break;
31426 case VUNSPEC_MCRR:
31427 case VUNSPEC_MRRC:
31428 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31429 ARMv8-{A,M}. */
31430 if (arm_arch6 || arm_arch5te)
31431 return true;
31432 break;
31433 case VUNSPEC_MCRR2:
31434 case VUNSPEC_MRRC2:
31435 if (arm_arch6)
31436 return true;
31437 break;
31438 default:
31439 gcc_unreachable ();
31440 }
31441 return false;
31442 }
31443
31444 /* This function returns true if OP is a valid memory operand for the ldc and
31445 stc coprocessor instructions and false otherwise. */
31446
31447 bool
31448 arm_coproc_ldc_stc_legitimate_address (rtx op)
31449 {
31450 HOST_WIDE_INT range;
31451 /* Has to be a memory operand. */
31452 if (!MEM_P (op))
31453 return false;
31454
31455 op = XEXP (op, 0);
31456
31457 /* We accept registers. */
31458 if (REG_P (op))
31459 return true;
31460
31461 switch GET_CODE (op)
31462 {
31463 case PLUS:
31464 {
31465 /* Or registers with an offset. */
31466 if (!REG_P (XEXP (op, 0)))
31467 return false;
31468
31469 op = XEXP (op, 1);
31470
31471 /* The offset must be an immediate though. */
31472 if (!CONST_INT_P (op))
31473 return false;
31474
31475 range = INTVAL (op);
31476
31477 /* Within the range of [-1020,1020]. */
31478 if (!IN_RANGE (range, -1020, 1020))
31479 return false;
31480
31481 /* And a multiple of 4. */
31482 return (range % 4) == 0;
31483 }
31484 case PRE_INC:
31485 case POST_INC:
31486 case PRE_DEC:
31487 case POST_DEC:
31488 return REG_P (XEXP (op, 0));
31489 default:
31490 gcc_unreachable ();
31491 }
31492 return false;
31493 }
31494
31495 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31496
31497 In VFPv1, VFP registers could only be accessed in the mode they were
31498 set, so subregs would be invalid there. However, we don't support
31499 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31500
31501 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31502 VFP registers in little-endian order. We can't describe that accurately to
31503 GCC, so avoid taking subregs of such values.
31504
31505 The only exception is going from a 128-bit to a 64-bit type. In that
31506 case the data layout happens to be consistent for big-endian, so we
31507 explicitly allow that case. */
31508
31509 static bool
31510 arm_can_change_mode_class (machine_mode from, machine_mode to,
31511 reg_class_t rclass)
31512 {
31513 if (TARGET_BIG_END
31514 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31515 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31516 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31517 && reg_classes_intersect_p (VFP_REGS, rclass))
31518 return false;
31519 return true;
31520 }
31521
31522 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31523 strcpy from constants will be faster. */
31524
31525 static HOST_WIDE_INT
31526 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31527 {
31528 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31529 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31530 return MAX (align, BITS_PER_WORD * factor);
31531 return align;
31532 }
31533
31534 #if CHECKING_P
31535 namespace selftest {
31536
31537 /* Scan the static data tables generated by parsecpu.awk looking for
31538 potential issues with the data. We primarily check for
31539 inconsistencies in the option extensions at present (extensions
31540 that duplicate others but aren't marked as aliases). Furthermore,
31541 for correct canonicalization later options must never be a subset
31542 of an earlier option. Any extension should also only specify other
31543 feature bits and never an architecture bit. The architecture is inferred
31544 from the declaration of the extension. */
31545 static void
31546 arm_test_cpu_arch_data (void)
31547 {
31548 const arch_option *arch;
31549 const cpu_option *cpu;
31550 auto_sbitmap target_isa (isa_num_bits);
31551 auto_sbitmap isa1 (isa_num_bits);
31552 auto_sbitmap isa2 (isa_num_bits);
31553
31554 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31555 {
31556 const cpu_arch_extension *ext1, *ext2;
31557
31558 if (arch->common.extensions == NULL)
31559 continue;
31560
31561 arm_initialize_isa (target_isa, arch->common.isa_bits);
31562
31563 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31564 {
31565 if (ext1->alias)
31566 continue;
31567
31568 arm_initialize_isa (isa1, ext1->isa_bits);
31569 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31570 {
31571 if (ext2->alias || ext1->remove != ext2->remove)
31572 continue;
31573
31574 arm_initialize_isa (isa2, ext2->isa_bits);
31575 /* If the option is a subset of the parent option, it doesn't
31576 add anything and so isn't useful. */
31577 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31578
31579 /* If the extension specifies any architectural bits then
31580 disallow it. Extensions should only specify feature bits. */
31581 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31582 }
31583 }
31584 }
31585
31586 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31587 {
31588 const cpu_arch_extension *ext1, *ext2;
31589
31590 if (cpu->common.extensions == NULL)
31591 continue;
31592
31593 arm_initialize_isa (target_isa, arch->common.isa_bits);
31594
31595 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31596 {
31597 if (ext1->alias)
31598 continue;
31599
31600 arm_initialize_isa (isa1, ext1->isa_bits);
31601 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31602 {
31603 if (ext2->alias || ext1->remove != ext2->remove)
31604 continue;
31605
31606 arm_initialize_isa (isa2, ext2->isa_bits);
31607 /* If the option is a subset of the parent option, it doesn't
31608 add anything and so isn't useful. */
31609 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31610
31611 /* If the extension specifies any architectural bits then
31612 disallow it. Extensions should only specify feature bits. */
31613 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31614 }
31615 }
31616 }
31617 }
31618
31619 /* Scan the static data tables generated by parsecpu.awk looking for
31620 potential issues with the data. Here we check for consistency between the
31621 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31622 a feature bit that is not defined by any FPU flag. */
31623 static void
31624 arm_test_fpu_data (void)
31625 {
31626 auto_sbitmap isa_all_fpubits (isa_num_bits);
31627 auto_sbitmap fpubits (isa_num_bits);
31628 auto_sbitmap tmpset (isa_num_bits);
31629
31630 static const enum isa_feature fpu_bitlist[]
31631 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31632 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31633
31634 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31635 {
31636 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31637 bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31638 bitmap_clear (isa_all_fpubits);
31639 bitmap_copy (isa_all_fpubits, tmpset);
31640 }
31641
31642 if (!bitmap_empty_p (isa_all_fpubits))
31643 {
31644 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31645 " group that are not defined by any FPU.\n"
31646 " Check your arm-cpus.in.\n");
31647 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31648 }
31649 }
31650
31651 static void
31652 arm_run_selftests (void)
31653 {
31654 arm_test_cpu_arch_data ();
31655 arm_test_fpu_data ();
31656 }
31657 } /* Namespace selftest. */
31658
31659 #undef TARGET_RUN_TARGET_SELFTESTS
31660 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31661 #endif /* CHECKING_P */
31662
31663 struct gcc_target targetm = TARGET_INITIALIZER;
31664
31665 #include "gt-arm.h"