]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.cc
aarch64,arm: Fix branch-protection= parsing
[thirdparty/gcc.git] / gcc / config / arm / arm.cc
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2023 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "tm_p.h"
38 #include "stringpool.h"
39 #include "attribs.h"
40 #include "optabs.h"
41 #include "regs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "cgraph.h"
45 #include "diagnostic-core.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "varasm.h"
51 #include "output.h"
52 #include "insn-attr.h"
53 #include "flags.h"
54 #include "reload.h"
55 #include "explow.h"
56 #include "expr.h"
57 #include "cfgrtl.h"
58 #include "sched-int.h"
59 #include "common/common-target.h"
60 #include "langhooks.h"
61 #include "intl.h"
62 #include "libfuncs.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "gimple-iterator.h"
73 #include "selftest.h"
74 #include "tree-vectorizer.h"
75 #include "opts.h"
76 #include "aarch-common.h"
77 #include "aarch-common-protos.h"
78
79 /* This file should be included last. */
80 #include "target-def.h"
81
82 /* Forward definitions of types. */
83 typedef struct minipool_node Mnode;
84 typedef struct minipool_fixup Mfix;
85
86 void (*arm_lang_output_object_attributes_hook)(void);
87
88 struct four_ints
89 {
90 int i[4];
91 };
92
93 /* Forward function declarations. */
94 static bool arm_const_not_ok_for_debug_p (rtx);
95 static int arm_needs_doubleword_align (machine_mode, const_tree);
96 static int arm_compute_static_chain_stack_bytes (void);
97 static arm_stack_offsets *arm_get_frame_offsets (void);
98 static void arm_compute_frame_layout (void);
99 static void arm_add_gc_roots (void);
100 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
101 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
102 static unsigned bit_count (unsigned long);
103 static unsigned bitmap_popcount (const sbitmap);
104 static int arm_address_register_rtx_p (rtx, int);
105 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
106 static bool is_called_in_ARM_mode (tree);
107 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
108 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
109 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
110 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
111 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
112 inline static int thumb1_index_register_rtx_p (rtx, int);
113 static int thumb_far_jump_used_p (void);
114 static bool thumb_force_lr_save (void);
115 static unsigned arm_size_return_regs (void);
116 static bool arm_assemble_integer (rtx, unsigned int, int);
117 static void arm_print_operand (FILE *, rtx, int);
118 static void arm_print_operand_address (FILE *, machine_mode, rtx);
119 static bool arm_print_operand_punct_valid_p (unsigned char code);
120 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
121 static arm_cc get_arm_condition_code (rtx);
122 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
123 static const char *output_multi_immediate (rtx *, const char *, const char *,
124 int, HOST_WIDE_INT);
125 static const char *shift_op (rtx, HOST_WIDE_INT *);
126 static struct machine_function *arm_init_machine_status (void);
127 static void thumb_exit (FILE *, int);
128 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
129 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
130 static Mnode *add_minipool_forward_ref (Mfix *);
131 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
132 static Mnode *add_minipool_backward_ref (Mfix *);
133 static void assign_minipool_offsets (Mfix *);
134 static void arm_print_value (FILE *, rtx);
135 static void dump_minipool (rtx_insn *);
136 static int arm_barrier_cost (rtx_insn *);
137 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
138 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
139 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
140 machine_mode, rtx);
141 static void arm_reorg (void);
142 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
143 static unsigned long arm_compute_save_reg0_reg12_mask (void);
144 static unsigned long arm_compute_save_core_reg_mask (void);
145 static unsigned long arm_isr_value (tree);
146 static unsigned long arm_compute_func_type (void);
147 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
148 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
149 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
150 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
151 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
152 #endif
153 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
154 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
155 static void arm_output_function_epilogue (FILE *);
156 static void arm_output_function_prologue (FILE *);
157 static int arm_comp_type_attributes (const_tree, const_tree);
158 static void arm_set_default_type_attributes (tree);
159 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
160 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
161 static int optimal_immediate_sequence (enum rtx_code code,
162 unsigned HOST_WIDE_INT val,
163 struct four_ints *return_sequence);
164 static int optimal_immediate_sequence_1 (enum rtx_code code,
165 unsigned HOST_WIDE_INT val,
166 struct four_ints *return_sequence,
167 int i);
168 static int arm_get_strip_length (int);
169 static bool arm_function_ok_for_sibcall (tree, tree);
170 static machine_mode arm_promote_function_mode (const_tree,
171 machine_mode, int *,
172 const_tree, int);
173 static bool arm_return_in_memory (const_tree, const_tree);
174 static rtx arm_function_value (const_tree, const_tree, bool);
175 static rtx arm_libcall_value_1 (machine_mode);
176 static rtx arm_libcall_value (machine_mode, const_rtx);
177 static bool arm_function_value_regno_p (const unsigned int);
178 static void arm_internal_label (FILE *, const char *, unsigned long);
179 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
180 tree);
181 static bool arm_have_conditional_execution (void);
182 static bool arm_cannot_force_const_mem (machine_mode, rtx);
183 static bool arm_legitimate_constant_p (machine_mode, rtx);
184 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
185 static int arm_insn_cost (rtx_insn *, bool);
186 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
187 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
188 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
189 static void emit_constant_insn (rtx cond, rtx pattern);
190 static rtx_insn *emit_set_insn (rtx, rtx);
191 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
192 static rtx emit_multi_reg_push (unsigned long, unsigned long);
193 static void arm_emit_multi_reg_pop (unsigned long);
194 static int vfp_emit_fstmd (int, int);
195 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
196 static int arm_arg_partial_bytes (cumulative_args_t,
197 const function_arg_info &);
198 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
199 static void arm_function_arg_advance (cumulative_args_t,
200 const function_arg_info &);
201 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
202 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
203 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
204 const_tree);
205 static rtx aapcs_libcall_value (machine_mode);
206 static int aapcs_select_return_coproc (const_tree, const_tree);
207
208 #ifdef OBJECT_FORMAT_ELF
209 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
210 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
211 #endif
212 #ifndef ARM_PE
213 static void arm_encode_section_info (tree, rtx, int);
214 #endif
215
216 static void arm_file_end (void);
217 static void arm_file_start (void);
218 static void arm_insert_attributes (tree, tree *);
219
220 static void arm_setup_incoming_varargs (cumulative_args_t,
221 const function_arg_info &, int *, int);
222 static bool arm_pass_by_reference (cumulative_args_t,
223 const function_arg_info &);
224 static bool arm_promote_prototypes (const_tree);
225 static bool arm_default_short_enums (void);
226 static bool arm_align_anon_bitfield (void);
227 static bool arm_return_in_msb (const_tree);
228 static bool arm_must_pass_in_stack (const function_arg_info &);
229 static bool arm_return_in_memory (const_tree, const_tree);
230 #if ARM_UNWIND_INFO
231 static void arm_unwind_emit (FILE *, rtx_insn *);
232 static bool arm_output_ttype (rtx);
233 static void arm_asm_emit_except_personality (rtx);
234 #endif
235 static void arm_asm_init_sections (void);
236 static rtx arm_dwarf_register_span (rtx);
237
238 static tree arm_cxx_guard_type (void);
239 static bool arm_cxx_guard_mask_bit (void);
240 static tree arm_get_cookie_size (tree);
241 static bool arm_cookie_has_size (void);
242 static bool arm_cxx_cdtor_returns_this (void);
243 static bool arm_cxx_key_method_may_be_inline (void);
244 static void arm_cxx_determine_class_data_visibility (tree);
245 static bool arm_cxx_class_data_always_comdat (void);
246 static bool arm_cxx_use_aeabi_atexit (void);
247 static void arm_init_libfuncs (void);
248 static tree arm_build_builtin_va_list (void);
249 static void arm_expand_builtin_va_start (tree, rtx);
250 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
251 static void arm_option_override (void);
252 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
253 struct cl_target_option *);
254 static void arm_override_options_after_change (void);
255 static void arm_option_print (FILE *, int, struct cl_target_option *);
256 static void arm_set_current_function (tree);
257 static bool arm_can_inline_p (tree, tree);
258 static void arm_relayout_function (tree);
259 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
260 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
261 static bool arm_sched_can_speculate_insn (rtx_insn *);
262 static bool arm_macro_fusion_p (void);
263 static bool arm_cannot_copy_insn_p (rtx_insn *);
264 static int arm_issue_rate (void);
265 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
266 static int arm_first_cycle_multipass_dfa_lookahead (void);
267 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
268 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
269 static bool arm_output_addr_const_extra (FILE *, rtx);
270 static bool arm_allocate_stack_slots_for_args (void);
271 static bool arm_warn_func_return (tree);
272 static tree arm_promoted_type (const_tree t);
273 static bool arm_scalar_mode_supported_p (scalar_mode);
274 static bool arm_frame_pointer_required (void);
275 static bool arm_can_eliminate (const int, const int);
276 static void arm_asm_trampoline_template (FILE *);
277 static void arm_trampoline_init (rtx, tree, rtx);
278 static rtx arm_trampoline_adjust_address (rtx);
279 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
280 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
281 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
282 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
283 static bool arm_array_mode_supported_p (machine_mode,
284 unsigned HOST_WIDE_INT);
285 static machine_mode arm_preferred_simd_mode (scalar_mode);
286 static bool arm_class_likely_spilled_p (reg_class_t);
287 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
288 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
289 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
290 const_tree type,
291 int misalignment,
292 bool is_packed);
293 static void arm_conditional_register_usage (void);
294 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
295 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
296 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
297 static int arm_default_branch_cost (bool, bool);
298 static int arm_cortex_a5_branch_cost (bool, bool);
299 static int arm_cortex_m_branch_cost (bool, bool);
300 static int arm_cortex_m7_branch_cost (bool, bool);
301
302 static bool arm_vectorize_vec_perm_const (machine_mode, machine_mode, rtx, rtx,
303 rtx, const vec_perm_indices &);
304
305 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
306
307 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
308 tree vectype,
309 int misalign ATTRIBUTE_UNUSED);
310
311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
312 bool op0_preserve_value);
313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
314
315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
317 const_tree);
318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
321 int reloc);
322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
328 static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
329 vec<machine_mode> &,
330 vec<const char *> &, vec<rtx> &,
331 vec<rtx> &, HARD_REG_SET &, location_t);
332 static const char *arm_identify_fpu_from_isa (sbitmap);
333 \f
334 /* Table of machine attributes. */
335 static const attribute_spec arm_gnu_attributes[] =
336 {
337 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
338 affects_type_identity, handler, exclude } */
339 /* Function calls made to this symbol must be done indirectly, because
340 it may lie outside of the 26 bit addressing range of a normal function
341 call. */
342 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
343 /* Whereas these functions are always known to reside within the 26 bit
344 addressing range. */
345 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
346 /* Specify the procedure call conventions for a function. */
347 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
348 NULL },
349 /* Interrupt Service Routines have special prologue and epilogue requirements. */
350 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
351 NULL },
352 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
353 NULL },
354 { "naked", 0, 0, true, false, false, false,
355 arm_handle_fndecl_attribute, NULL },
356 #ifdef ARM_PE
357 /* ARM/PE has three new attributes:
358 interfacearm - ?
359 dllexport - for exporting a function/variable that will live in a dll
360 dllimport - for importing a function/variable from a dll
361
362 Microsoft allows multiple declspecs in one __declspec, separating
363 them with spaces. We do NOT support this. Instead, use __declspec
364 multiple times.
365 */
366 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
367 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
368 { "interfacearm", 0, 0, true, false, false, false,
369 arm_handle_fndecl_attribute, NULL },
370 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
371 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
372 NULL },
373 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
374 NULL },
375 { "notshared", 0, 0, false, true, false, false,
376 arm_handle_notshared_attribute, NULL },
377 #endif
378 /* ARMv8-M Security Extensions support. */
379 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
380 arm_handle_cmse_nonsecure_entry, NULL },
381 { "cmse_nonsecure_call", 0, 0, false, false, false, true,
382 arm_handle_cmse_nonsecure_call, NULL },
383 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL }
384 };
385
386 static const scoped_attribute_specs arm_gnu_attribute_table =
387 {
388 "gnu", { arm_gnu_attributes }
389 };
390
391 static const scoped_attribute_specs *const arm_attribute_table[] =
392 {
393 &arm_gnu_attribute_table
394 };
395 \f
396 /* Initialize the GCC target structure. */
397 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
398 #undef TARGET_MERGE_DECL_ATTRIBUTES
399 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
400 #endif
401
402 #undef TARGET_CHECK_BUILTIN_CALL
403 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
404
405 #undef TARGET_LEGITIMIZE_ADDRESS
406 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
407
408 #undef TARGET_ATTRIBUTE_TABLE
409 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
410
411 #undef TARGET_INSERT_ATTRIBUTES
412 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
413
414 #undef TARGET_ASM_FILE_START
415 #define TARGET_ASM_FILE_START arm_file_start
416 #undef TARGET_ASM_FILE_END
417 #define TARGET_ASM_FILE_END arm_file_end
418
419 #undef TARGET_ASM_ALIGNED_SI_OP
420 #define TARGET_ASM_ALIGNED_SI_OP NULL
421 #undef TARGET_ASM_INTEGER
422 #define TARGET_ASM_INTEGER arm_assemble_integer
423
424 #undef TARGET_PRINT_OPERAND
425 #define TARGET_PRINT_OPERAND arm_print_operand
426 #undef TARGET_PRINT_OPERAND_ADDRESS
427 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
428 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
429 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
430
431 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
432 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
433
434 #undef TARGET_ASM_FUNCTION_PROLOGUE
435 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
436
437 #undef TARGET_ASM_FUNCTION_EPILOGUE
438 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
439
440 #undef TARGET_CAN_INLINE_P
441 #define TARGET_CAN_INLINE_P arm_can_inline_p
442
443 #undef TARGET_RELAYOUT_FUNCTION
444 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
445
446 #undef TARGET_OPTION_OVERRIDE
447 #define TARGET_OPTION_OVERRIDE arm_option_override
448
449 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
450 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
451
452 #undef TARGET_OPTION_RESTORE
453 #define TARGET_OPTION_RESTORE arm_option_restore
454
455 #undef TARGET_OPTION_PRINT
456 #define TARGET_OPTION_PRINT arm_option_print
457
458 #undef TARGET_COMP_TYPE_ATTRIBUTES
459 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
460
461 #undef TARGET_SCHED_CAN_SPECULATE_INSN
462 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
463
464 #undef TARGET_SCHED_MACRO_FUSION_P
465 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
466
467 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
468 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
469
470 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
471 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
472
473 #undef TARGET_SCHED_ADJUST_COST
474 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
475
476 #undef TARGET_SET_CURRENT_FUNCTION
477 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
478
479 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
480 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
481
482 #undef TARGET_SCHED_REORDER
483 #define TARGET_SCHED_REORDER arm_sched_reorder
484
485 #undef TARGET_REGISTER_MOVE_COST
486 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
487
488 #undef TARGET_MEMORY_MOVE_COST
489 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
490
491 #undef TARGET_ENCODE_SECTION_INFO
492 #ifdef ARM_PE
493 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
494 #else
495 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
496 #endif
497
498 #undef TARGET_STRIP_NAME_ENCODING
499 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
500
501 #undef TARGET_ASM_INTERNAL_LABEL
502 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
503
504 #undef TARGET_FLOATN_MODE
505 #define TARGET_FLOATN_MODE arm_floatn_mode
506
507 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
508 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
509
510 #undef TARGET_FUNCTION_VALUE
511 #define TARGET_FUNCTION_VALUE arm_function_value
512
513 #undef TARGET_LIBCALL_VALUE
514 #define TARGET_LIBCALL_VALUE arm_libcall_value
515
516 #undef TARGET_FUNCTION_VALUE_REGNO_P
517 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
518
519 #undef TARGET_GIMPLE_FOLD_BUILTIN
520 #define TARGET_GIMPLE_FOLD_BUILTIN arm_gimple_fold_builtin
521
522 #undef TARGET_ASM_OUTPUT_MI_THUNK
523 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
524 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
525 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
526
527 #undef TARGET_RTX_COSTS
528 #define TARGET_RTX_COSTS arm_rtx_costs
529 #undef TARGET_ADDRESS_COST
530 #define TARGET_ADDRESS_COST arm_address_cost
531 #undef TARGET_INSN_COST
532 #define TARGET_INSN_COST arm_insn_cost
533
534 #undef TARGET_SHIFT_TRUNCATION_MASK
535 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
536 #undef TARGET_VECTOR_MODE_SUPPORTED_P
537 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
538 #undef TARGET_ARRAY_MODE_SUPPORTED_P
539 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
540 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
541 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
542 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
543 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
544 arm_autovectorize_vector_modes
545
546 #undef TARGET_MACHINE_DEPENDENT_REORG
547 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
548
549 #undef TARGET_INIT_BUILTINS
550 #define TARGET_INIT_BUILTINS arm_init_builtins
551 #undef TARGET_EXPAND_BUILTIN
552 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
553 #undef TARGET_BUILTIN_DECL
554 #define TARGET_BUILTIN_DECL arm_builtin_decl
555
556 #undef TARGET_INIT_LIBFUNCS
557 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
558
559 #undef TARGET_PROMOTE_FUNCTION_MODE
560 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
561 #undef TARGET_PROMOTE_PROTOTYPES
562 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
563 #undef TARGET_PASS_BY_REFERENCE
564 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
565 #undef TARGET_ARG_PARTIAL_BYTES
566 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
567 #undef TARGET_FUNCTION_ARG
568 #define TARGET_FUNCTION_ARG arm_function_arg
569 #undef TARGET_FUNCTION_ARG_ADVANCE
570 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
571 #undef TARGET_FUNCTION_ARG_PADDING
572 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
573 #undef TARGET_FUNCTION_ARG_BOUNDARY
574 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
575
576 #undef TARGET_SETUP_INCOMING_VARARGS
577 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
578
579 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
580 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
581
582 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
583 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
584 #undef TARGET_TRAMPOLINE_INIT
585 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
586 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
587 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
588
589 #undef TARGET_WARN_FUNC_RETURN
590 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
591
592 #undef TARGET_DEFAULT_SHORT_ENUMS
593 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
594
595 #undef TARGET_ALIGN_ANON_BITFIELD
596 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
597
598 #undef TARGET_NARROW_VOLATILE_BITFIELD
599 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
600
601 #undef TARGET_CXX_GUARD_TYPE
602 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
603
604 #undef TARGET_CXX_GUARD_MASK_BIT
605 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
606
607 #undef TARGET_CXX_GET_COOKIE_SIZE
608 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
609
610 #undef TARGET_CXX_COOKIE_HAS_SIZE
611 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
612
613 #undef TARGET_CXX_CDTOR_RETURNS_THIS
614 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
615
616 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
617 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
618
619 #undef TARGET_CXX_USE_AEABI_ATEXIT
620 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
621
622 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
623 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
624 arm_cxx_determine_class_data_visibility
625
626 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
627 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
628
629 #undef TARGET_RETURN_IN_MSB
630 #define TARGET_RETURN_IN_MSB arm_return_in_msb
631
632 #undef TARGET_RETURN_IN_MEMORY
633 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
634
635 #undef TARGET_MUST_PASS_IN_STACK
636 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
637
638 #if ARM_UNWIND_INFO
639 #undef TARGET_ASM_UNWIND_EMIT
640 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
641
642 /* EABI unwinding tables use a different format for the typeinfo tables. */
643 #undef TARGET_ASM_TTYPE
644 #define TARGET_ASM_TTYPE arm_output_ttype
645
646 #undef TARGET_ARM_EABI_UNWINDER
647 #define TARGET_ARM_EABI_UNWINDER true
648
649 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
650 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
651
652 #endif /* ARM_UNWIND_INFO */
653
654 #undef TARGET_ASM_INIT_SECTIONS
655 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
656
657 #undef TARGET_DWARF_REGISTER_SPAN
658 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
659
660 #undef TARGET_CANNOT_COPY_INSN_P
661 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
662
663 #ifdef HAVE_AS_TLS
664 #undef TARGET_HAVE_TLS
665 #define TARGET_HAVE_TLS true
666 #endif
667
668 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
669 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
670
671 #undef TARGET_LEGITIMATE_CONSTANT_P
672 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
673
674 #undef TARGET_CANNOT_FORCE_CONST_MEM
675 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
676
677 #undef TARGET_MAX_ANCHOR_OFFSET
678 #define TARGET_MAX_ANCHOR_OFFSET 4095
679
680 /* The minimum is set such that the total size of the block
681 for a particular anchor is -4088 + 1 + 4095 bytes, which is
682 divisible by eight, ensuring natural spacing of anchors. */
683 #undef TARGET_MIN_ANCHOR_OFFSET
684 #define TARGET_MIN_ANCHOR_OFFSET -4088
685
686 #undef TARGET_SCHED_ISSUE_RATE
687 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
688
689 #undef TARGET_SCHED_VARIABLE_ISSUE
690 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
691
692 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
693 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
694 arm_first_cycle_multipass_dfa_lookahead
695
696 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
697 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
698 arm_first_cycle_multipass_dfa_lookahead_guard
699
700 #undef TARGET_MANGLE_TYPE
701 #define TARGET_MANGLE_TYPE arm_mangle_type
702
703 #undef TARGET_INVALID_CONVERSION
704 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
705
706 #undef TARGET_INVALID_UNARY_OP
707 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
708
709 #undef TARGET_INVALID_BINARY_OP
710 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
711
712 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
713 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
714
715 #undef TARGET_BUILD_BUILTIN_VA_LIST
716 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
717 #undef TARGET_EXPAND_BUILTIN_VA_START
718 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
719 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
720 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
721
722 #ifdef HAVE_AS_TLS
723 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
724 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
725 #endif
726
727 #undef TARGET_LEGITIMATE_ADDRESS_P
728 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
729
730 #undef TARGET_PREFERRED_RELOAD_CLASS
731 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
732
733 #undef TARGET_PROMOTED_TYPE
734 #define TARGET_PROMOTED_TYPE arm_promoted_type
735
736 #undef TARGET_SCALAR_MODE_SUPPORTED_P
737 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
738
739 #undef TARGET_COMPUTE_FRAME_LAYOUT
740 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
741
742 #undef TARGET_FRAME_POINTER_REQUIRED
743 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
744
745 #undef TARGET_CAN_ELIMINATE
746 #define TARGET_CAN_ELIMINATE arm_can_eliminate
747
748 #undef TARGET_CONDITIONAL_REGISTER_USAGE
749 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
750
751 #undef TARGET_CLASS_LIKELY_SPILLED_P
752 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
753
754 #undef TARGET_VECTORIZE_BUILTINS
755 #define TARGET_VECTORIZE_BUILTINS
756
757 #undef TARGET_VECTOR_ALIGNMENT
758 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
759
760 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
761 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
762 arm_vector_alignment_reachable
763
764 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
765 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
766 arm_builtin_support_vector_misalignment
767
768 #undef TARGET_PREFERRED_RENAME_CLASS
769 #define TARGET_PREFERRED_RENAME_CLASS \
770 arm_preferred_rename_class
771
772 #undef TARGET_VECTORIZE_VEC_PERM_CONST
773 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
774
775 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
776 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
777 arm_builtin_vectorization_cost
778
779 #undef TARGET_CANONICALIZE_COMPARISON
780 #define TARGET_CANONICALIZE_COMPARISON \
781 arm_canonicalize_comparison
782
783 #undef TARGET_ASAN_SHADOW_OFFSET
784 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
785
786 #undef MAX_INSN_PER_IT_BLOCK
787 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
788
789 #undef TARGET_CAN_USE_DOLOOP_P
790 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
791
792 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
793 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
794
795 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
796 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
797
798 #undef TARGET_SCHED_FUSION_PRIORITY
799 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
800
801 #undef TARGET_ASM_FUNCTION_SECTION
802 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
803
804 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
805 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
806
807 #undef TARGET_SECTION_TYPE_FLAGS
808 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
809
810 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
811 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
812
813 #undef TARGET_C_EXCESS_PRECISION
814 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
815
816 /* Although the architecture reserves bits 0 and 1, only the former is
817 used for ARM/Thumb ISA selection in v7 and earlier versions. */
818 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
819 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
820
821 #undef TARGET_FIXED_CONDITION_CODE_REGS
822 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
823
824 #undef TARGET_HARD_REGNO_NREGS
825 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
826 #undef TARGET_HARD_REGNO_MODE_OK
827 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
828
829 #undef TARGET_MODES_TIEABLE_P
830 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
831
832 #undef TARGET_CAN_CHANGE_MODE_CLASS
833 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
834
835 #undef TARGET_CONSTANT_ALIGNMENT
836 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
837
838 #undef TARGET_INVALID_WITHIN_DOLOOP
839 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
840
841 #undef TARGET_MD_ASM_ADJUST
842 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
843
844 #undef TARGET_STACK_PROTECT_GUARD
845 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
846
847 #undef TARGET_VECTORIZE_GET_MASK_MODE
848 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
849 \f
850 /* Obstack for minipool constant handling. */
851 static struct obstack minipool_obstack;
852 static char * minipool_startobj;
853
854 /* The maximum number of insns skipped which
855 will be conditionalised if possible. */
856 static int max_insns_skipped = 5;
857
858 /* True if we are currently building a constant table. */
859 int making_const_table;
860
861 /* The processor for which instructions should be scheduled. */
862 enum processor_type arm_tune = TARGET_CPU_arm_none;
863
864 /* The current tuning set. */
865 const struct tune_params *current_tune;
866
867 /* Which floating point hardware to schedule for. */
868 int arm_fpu_attr;
869
870 /* Used for Thumb call_via trampolines. */
871 rtx thumb_call_via_label[14];
872 static int thumb_call_reg_needed;
873
874 /* The bits in this mask specify which instruction scheduling options should
875 be used. */
876 unsigned int tune_flags = 0;
877
878 /* The highest ARM architecture version supported by the
879 target. */
880 enum base_architecture arm_base_arch = BASE_ARCH_0;
881
882 /* Active target architecture and tuning. */
883
884 struct arm_build_target arm_active_target;
885
886 /* The following are used in the arm.md file as equivalents to bits
887 in the above two flag variables. */
888
889 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
890 int arm_arch4 = 0;
891
892 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
893 int arm_arch4t = 0;
894
895 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
896 int arm_arch5t = 0;
897
898 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
899 int arm_arch5te = 0;
900
901 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
902 int arm_arch6 = 0;
903
904 /* Nonzero if this chip supports the ARM 6K extensions. */
905 int arm_arch6k = 0;
906
907 /* Nonzero if this chip supports the ARM 6KZ extensions. */
908 int arm_arch6kz = 0;
909
910 /* Nonzero if instructions present in ARMv6-M can be used. */
911 int arm_arch6m = 0;
912
913 /* Nonzero if this chip supports the ARM 7 extensions. */
914 int arm_arch7 = 0;
915
916 /* Nonzero if this chip supports the Large Physical Address Extension. */
917 int arm_arch_lpae = 0;
918
919 /* Nonzero if instructions not present in the 'M' profile can be used. */
920 int arm_arch_notm = 0;
921
922 /* Nonzero if instructions present in ARMv7E-M can be used. */
923 int arm_arch7em = 0;
924
925 /* Nonzero if instructions present in ARMv8 can be used. */
926 int arm_arch8 = 0;
927
928 /* Nonzero if this chip supports the ARMv8.1 extensions. */
929 int arm_arch8_1 = 0;
930
931 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
932 int arm_arch8_2 = 0;
933
934 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
935 int arm_arch8_3 = 0;
936
937 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
938 int arm_arch8_4 = 0;
939
940 /* Nonzero if this chip supports the ARM Architecture 8-M Mainline
941 extensions. */
942 int arm_arch8m_main = 0;
943
944 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
945 extensions. */
946 int arm_arch8_1m_main = 0;
947
948 /* Nonzero if this chip supports the FP16 instructions extension of ARM
949 Architecture 8.2. */
950 int arm_fp16_inst = 0;
951
952 /* Nonzero if this chip can benefit from load scheduling. */
953 int arm_ld_sched = 0;
954
955 /* Nonzero if this chip is a StrongARM. */
956 int arm_tune_strongarm = 0;
957
958 /* Nonzero if this chip supports Intel Wireless MMX technology. */
959 int arm_arch_iwmmxt = 0;
960
961 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
962 int arm_arch_iwmmxt2 = 0;
963
964 /* Nonzero if this chip is an XScale. */
965 int arm_arch_xscale = 0;
966
967 /* Nonzero if tuning for XScale */
968 int arm_tune_xscale = 0;
969
970 /* Nonzero if we want to tune for stores that access the write-buffer.
971 This typically means an ARM6 or ARM7 with MMU or MPU. */
972 int arm_tune_wbuf = 0;
973
974 /* Nonzero if tuning for Cortex-A9. */
975 int arm_tune_cortex_a9 = 0;
976
977 /* Nonzero if we should define __THUMB_INTERWORK__ in the
978 preprocessor.
979 XXX This is a bit of a hack, it's intended to help work around
980 problems in GLD which doesn't understand that armv5t code is
981 interworking clean. */
982 int arm_cpp_interwork = 0;
983
984 /* Nonzero if chip supports Thumb 1. */
985 int arm_arch_thumb1;
986
987 /* Nonzero if chip supports Thumb 2. */
988 int arm_arch_thumb2;
989
990 /* Nonzero if chip supports integer division instruction. */
991 int arm_arch_arm_hwdiv;
992 int arm_arch_thumb_hwdiv;
993
994 /* Nonzero if chip disallows volatile memory access in IT block. */
995 int arm_arch_no_volatile_ce;
996
997 /* Nonzero if we shouldn't use literal pools. */
998 bool arm_disable_literal_pool = false;
999
1000 /* The register number to be used for the PIC offset register. */
1001 unsigned arm_pic_register = INVALID_REGNUM;
1002
1003 enum arm_pcs arm_pcs_default;
1004
1005 /* For an explanation of these variables, see final_prescan_insn below. */
1006 int arm_ccfsm_state;
1007 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
1008 enum arm_cond_code arm_current_cc;
1009
1010 rtx arm_target_insn;
1011 int arm_target_label;
1012 /* The number of conditionally executed insns, including the current insn. */
1013 int arm_condexec_count = 0;
1014 /* A bitmask specifying the patterns for the IT block.
1015 Zero means do not output an IT block before this insn. */
1016 int arm_condexec_mask = 0;
1017 /* The number of bits used in arm_condexec_mask. */
1018 int arm_condexec_masklen = 0;
1019
1020 /* Nonzero if chip supports the ARMv8 CRC instructions. */
1021 int arm_arch_crc = 0;
1022
1023 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
1024 int arm_arch_dotprod = 0;
1025
1026 /* Nonzero if chip supports the ARMv8-M security extensions. */
1027 int arm_arch_cmse = 0;
1028
1029 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1030 int arm_m_profile_small_mul = 0;
1031
1032 /* Nonzero if chip supports the AdvSIMD I8MM instructions. */
1033 int arm_arch_i8mm = 0;
1034
1035 /* Nonzero if chip supports the BFloat16 instructions. */
1036 int arm_arch_bf16 = 0;
1037
1038 /* Nonzero if chip supports the Custom Datapath Extension. */
1039 int arm_arch_cde = 0;
1040 int arm_arch_cde_coproc = 0;
1041 const int arm_arch_cde_coproc_bits[] = {
1042 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1043 };
1044
1045 /* The condition codes of the ARM, and the inverse function. */
1046 static const char * const arm_condition_codes[] =
1047 {
1048 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1049 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1050 };
1051
1052 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1053 int arm_regs_in_sequence[] =
1054 {
1055 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1056 };
1057
1058 #define DEF_FP_SYSREG(reg) #reg,
1059 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1060 FP_SYSREGS
1061 };
1062 #undef DEF_FP_SYSREG
1063
1064 #define ARM_LSL_NAME "lsl"
1065 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1066
1067 #define THUMB2_WORK_REGS \
1068 (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1069 | (1 << SP_REGNUM) \
1070 | (1 << PC_REGNUM) \
1071 | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM \
1072 ? (1 << PIC_OFFSET_TABLE_REGNUM) \
1073 : 0)))
1074 \f
1075 /* Initialization code. */
1076
1077 struct cpu_tune
1078 {
1079 enum processor_type scheduler;
1080 unsigned int tune_flags;
1081 const struct tune_params *tune;
1082 };
1083
1084 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1085 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1086 { \
1087 num_slots, \
1088 l1_size, \
1089 l1_line_size \
1090 }
1091
1092 /* arm generic vectorizer costs. */
1093 static const
1094 struct cpu_vec_costs arm_default_vec_cost = {
1095 1, /* scalar_stmt_cost. */
1096 1, /* scalar load_cost. */
1097 1, /* scalar_store_cost. */
1098 1, /* vec_stmt_cost. */
1099 1, /* vec_to_scalar_cost. */
1100 1, /* scalar_to_vec_cost. */
1101 1, /* vec_align_load_cost. */
1102 1, /* vec_unalign_load_cost. */
1103 1, /* vec_unalign_store_cost. */
1104 1, /* vec_store_cost. */
1105 3, /* cond_taken_branch_cost. */
1106 1, /* cond_not_taken_branch_cost. */
1107 };
1108
1109 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1110 #include "aarch-cost-tables.h"
1111
1112
1113
1114 const struct cpu_cost_table cortexa9_extra_costs =
1115 {
1116 /* ALU */
1117 {
1118 0, /* arith. */
1119 0, /* logical. */
1120 0, /* shift. */
1121 COSTS_N_INSNS (1), /* shift_reg. */
1122 COSTS_N_INSNS (1), /* arith_shift. */
1123 COSTS_N_INSNS (2), /* arith_shift_reg. */
1124 0, /* log_shift. */
1125 COSTS_N_INSNS (1), /* log_shift_reg. */
1126 COSTS_N_INSNS (1), /* extend. */
1127 COSTS_N_INSNS (2), /* extend_arith. */
1128 COSTS_N_INSNS (1), /* bfi. */
1129 COSTS_N_INSNS (1), /* bfx. */
1130 0, /* clz. */
1131 0, /* rev. */
1132 0, /* non_exec. */
1133 true /* non_exec_costs_exec. */
1134 },
1135 {
1136 /* MULT SImode */
1137 {
1138 COSTS_N_INSNS (3), /* simple. */
1139 COSTS_N_INSNS (3), /* flag_setting. */
1140 COSTS_N_INSNS (2), /* extend. */
1141 COSTS_N_INSNS (3), /* add. */
1142 COSTS_N_INSNS (2), /* extend_add. */
1143 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1144 },
1145 /* MULT DImode */
1146 {
1147 0, /* simple (N/A). */
1148 0, /* flag_setting (N/A). */
1149 COSTS_N_INSNS (4), /* extend. */
1150 0, /* add (N/A). */
1151 COSTS_N_INSNS (4), /* extend_add. */
1152 0 /* idiv (N/A). */
1153 }
1154 },
1155 /* LD/ST */
1156 {
1157 COSTS_N_INSNS (2), /* load. */
1158 COSTS_N_INSNS (2), /* load_sign_extend. */
1159 COSTS_N_INSNS (2), /* ldrd. */
1160 COSTS_N_INSNS (2), /* ldm_1st. */
1161 1, /* ldm_regs_per_insn_1st. */
1162 2, /* ldm_regs_per_insn_subsequent. */
1163 COSTS_N_INSNS (5), /* loadf. */
1164 COSTS_N_INSNS (5), /* loadd. */
1165 COSTS_N_INSNS (1), /* load_unaligned. */
1166 COSTS_N_INSNS (2), /* store. */
1167 COSTS_N_INSNS (2), /* strd. */
1168 COSTS_N_INSNS (2), /* stm_1st. */
1169 1, /* stm_regs_per_insn_1st. */
1170 2, /* stm_regs_per_insn_subsequent. */
1171 COSTS_N_INSNS (1), /* storef. */
1172 COSTS_N_INSNS (1), /* stored. */
1173 COSTS_N_INSNS (1), /* store_unaligned. */
1174 COSTS_N_INSNS (1), /* loadv. */
1175 COSTS_N_INSNS (1) /* storev. */
1176 },
1177 {
1178 /* FP SFmode */
1179 {
1180 COSTS_N_INSNS (14), /* div. */
1181 COSTS_N_INSNS (4), /* mult. */
1182 COSTS_N_INSNS (7), /* mult_addsub. */
1183 COSTS_N_INSNS (30), /* fma. */
1184 COSTS_N_INSNS (3), /* addsub. */
1185 COSTS_N_INSNS (1), /* fpconst. */
1186 COSTS_N_INSNS (1), /* neg. */
1187 COSTS_N_INSNS (3), /* compare. */
1188 COSTS_N_INSNS (3), /* widen. */
1189 COSTS_N_INSNS (3), /* narrow. */
1190 COSTS_N_INSNS (3), /* toint. */
1191 COSTS_N_INSNS (3), /* fromint. */
1192 COSTS_N_INSNS (3) /* roundint. */
1193 },
1194 /* FP DFmode */
1195 {
1196 COSTS_N_INSNS (24), /* div. */
1197 COSTS_N_INSNS (5), /* mult. */
1198 COSTS_N_INSNS (8), /* mult_addsub. */
1199 COSTS_N_INSNS (30), /* fma. */
1200 COSTS_N_INSNS (3), /* addsub. */
1201 COSTS_N_INSNS (1), /* fpconst. */
1202 COSTS_N_INSNS (1), /* neg. */
1203 COSTS_N_INSNS (3), /* compare. */
1204 COSTS_N_INSNS (3), /* widen. */
1205 COSTS_N_INSNS (3), /* narrow. */
1206 COSTS_N_INSNS (3), /* toint. */
1207 COSTS_N_INSNS (3), /* fromint. */
1208 COSTS_N_INSNS (3) /* roundint. */
1209 }
1210 },
1211 /* Vector */
1212 {
1213 COSTS_N_INSNS (1), /* alu. */
1214 COSTS_N_INSNS (4), /* mult. */
1215 COSTS_N_INSNS (1), /* movi. */
1216 COSTS_N_INSNS (2), /* dup. */
1217 COSTS_N_INSNS (2) /* extract. */
1218 }
1219 };
1220
1221 const struct cpu_cost_table cortexa8_extra_costs =
1222 {
1223 /* ALU */
1224 {
1225 0, /* arith. */
1226 0, /* logical. */
1227 COSTS_N_INSNS (1), /* shift. */
1228 0, /* shift_reg. */
1229 COSTS_N_INSNS (1), /* arith_shift. */
1230 0, /* arith_shift_reg. */
1231 COSTS_N_INSNS (1), /* log_shift. */
1232 0, /* log_shift_reg. */
1233 0, /* extend. */
1234 0, /* extend_arith. */
1235 0, /* bfi. */
1236 0, /* bfx. */
1237 0, /* clz. */
1238 0, /* rev. */
1239 0, /* non_exec. */
1240 true /* non_exec_costs_exec. */
1241 },
1242 {
1243 /* MULT SImode */
1244 {
1245 COSTS_N_INSNS (1), /* simple. */
1246 COSTS_N_INSNS (1), /* flag_setting. */
1247 COSTS_N_INSNS (1), /* extend. */
1248 COSTS_N_INSNS (1), /* add. */
1249 COSTS_N_INSNS (1), /* extend_add. */
1250 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1251 },
1252 /* MULT DImode */
1253 {
1254 0, /* simple (N/A). */
1255 0, /* flag_setting (N/A). */
1256 COSTS_N_INSNS (2), /* extend. */
1257 0, /* add (N/A). */
1258 COSTS_N_INSNS (2), /* extend_add. */
1259 0 /* idiv (N/A). */
1260 }
1261 },
1262 /* LD/ST */
1263 {
1264 COSTS_N_INSNS (1), /* load. */
1265 COSTS_N_INSNS (1), /* load_sign_extend. */
1266 COSTS_N_INSNS (1), /* ldrd. */
1267 COSTS_N_INSNS (1), /* ldm_1st. */
1268 1, /* ldm_regs_per_insn_1st. */
1269 2, /* ldm_regs_per_insn_subsequent. */
1270 COSTS_N_INSNS (1), /* loadf. */
1271 COSTS_N_INSNS (1), /* loadd. */
1272 COSTS_N_INSNS (1), /* load_unaligned. */
1273 COSTS_N_INSNS (1), /* store. */
1274 COSTS_N_INSNS (1), /* strd. */
1275 COSTS_N_INSNS (1), /* stm_1st. */
1276 1, /* stm_regs_per_insn_1st. */
1277 2, /* stm_regs_per_insn_subsequent. */
1278 COSTS_N_INSNS (1), /* storef. */
1279 COSTS_N_INSNS (1), /* stored. */
1280 COSTS_N_INSNS (1), /* store_unaligned. */
1281 COSTS_N_INSNS (1), /* loadv. */
1282 COSTS_N_INSNS (1) /* storev. */
1283 },
1284 {
1285 /* FP SFmode */
1286 {
1287 COSTS_N_INSNS (36), /* div. */
1288 COSTS_N_INSNS (11), /* mult. */
1289 COSTS_N_INSNS (20), /* mult_addsub. */
1290 COSTS_N_INSNS (30), /* fma. */
1291 COSTS_N_INSNS (9), /* addsub. */
1292 COSTS_N_INSNS (3), /* fpconst. */
1293 COSTS_N_INSNS (3), /* neg. */
1294 COSTS_N_INSNS (6), /* compare. */
1295 COSTS_N_INSNS (4), /* widen. */
1296 COSTS_N_INSNS (4), /* narrow. */
1297 COSTS_N_INSNS (8), /* toint. */
1298 COSTS_N_INSNS (8), /* fromint. */
1299 COSTS_N_INSNS (8) /* roundint. */
1300 },
1301 /* FP DFmode */
1302 {
1303 COSTS_N_INSNS (64), /* div. */
1304 COSTS_N_INSNS (16), /* mult. */
1305 COSTS_N_INSNS (25), /* mult_addsub. */
1306 COSTS_N_INSNS (30), /* fma. */
1307 COSTS_N_INSNS (9), /* addsub. */
1308 COSTS_N_INSNS (3), /* fpconst. */
1309 COSTS_N_INSNS (3), /* neg. */
1310 COSTS_N_INSNS (6), /* compare. */
1311 COSTS_N_INSNS (6), /* widen. */
1312 COSTS_N_INSNS (6), /* narrow. */
1313 COSTS_N_INSNS (8), /* toint. */
1314 COSTS_N_INSNS (8), /* fromint. */
1315 COSTS_N_INSNS (8) /* roundint. */
1316 }
1317 },
1318 /* Vector */
1319 {
1320 COSTS_N_INSNS (1), /* alu. */
1321 COSTS_N_INSNS (4), /* mult. */
1322 COSTS_N_INSNS (1), /* movi. */
1323 COSTS_N_INSNS (2), /* dup. */
1324 COSTS_N_INSNS (2) /* extract. */
1325 }
1326 };
1327
1328 const struct cpu_cost_table cortexa5_extra_costs =
1329 {
1330 /* ALU */
1331 {
1332 0, /* arith. */
1333 0, /* logical. */
1334 COSTS_N_INSNS (1), /* shift. */
1335 COSTS_N_INSNS (1), /* shift_reg. */
1336 COSTS_N_INSNS (1), /* arith_shift. */
1337 COSTS_N_INSNS (1), /* arith_shift_reg. */
1338 COSTS_N_INSNS (1), /* log_shift. */
1339 COSTS_N_INSNS (1), /* log_shift_reg. */
1340 COSTS_N_INSNS (1), /* extend. */
1341 COSTS_N_INSNS (1), /* extend_arith. */
1342 COSTS_N_INSNS (1), /* bfi. */
1343 COSTS_N_INSNS (1), /* bfx. */
1344 COSTS_N_INSNS (1), /* clz. */
1345 COSTS_N_INSNS (1), /* rev. */
1346 0, /* non_exec. */
1347 true /* non_exec_costs_exec. */
1348 },
1349
1350 {
1351 /* MULT SImode */
1352 {
1353 0, /* simple. */
1354 COSTS_N_INSNS (1), /* flag_setting. */
1355 COSTS_N_INSNS (1), /* extend. */
1356 COSTS_N_INSNS (1), /* add. */
1357 COSTS_N_INSNS (1), /* extend_add. */
1358 COSTS_N_INSNS (7) /* idiv. */
1359 },
1360 /* MULT DImode */
1361 {
1362 0, /* simple (N/A). */
1363 0, /* flag_setting (N/A). */
1364 COSTS_N_INSNS (1), /* extend. */
1365 0, /* add. */
1366 COSTS_N_INSNS (2), /* extend_add. */
1367 0 /* idiv (N/A). */
1368 }
1369 },
1370 /* LD/ST */
1371 {
1372 COSTS_N_INSNS (1), /* load. */
1373 COSTS_N_INSNS (1), /* load_sign_extend. */
1374 COSTS_N_INSNS (6), /* ldrd. */
1375 COSTS_N_INSNS (1), /* ldm_1st. */
1376 1, /* ldm_regs_per_insn_1st. */
1377 2, /* ldm_regs_per_insn_subsequent. */
1378 COSTS_N_INSNS (2), /* loadf. */
1379 COSTS_N_INSNS (4), /* loadd. */
1380 COSTS_N_INSNS (1), /* load_unaligned. */
1381 COSTS_N_INSNS (1), /* store. */
1382 COSTS_N_INSNS (3), /* strd. */
1383 COSTS_N_INSNS (1), /* stm_1st. */
1384 1, /* stm_regs_per_insn_1st. */
1385 2, /* stm_regs_per_insn_subsequent. */
1386 COSTS_N_INSNS (2), /* storef. */
1387 COSTS_N_INSNS (2), /* stored. */
1388 COSTS_N_INSNS (1), /* store_unaligned. */
1389 COSTS_N_INSNS (1), /* loadv. */
1390 COSTS_N_INSNS (1) /* storev. */
1391 },
1392 {
1393 /* FP SFmode */
1394 {
1395 COSTS_N_INSNS (15), /* div. */
1396 COSTS_N_INSNS (3), /* mult. */
1397 COSTS_N_INSNS (7), /* mult_addsub. */
1398 COSTS_N_INSNS (7), /* fma. */
1399 COSTS_N_INSNS (3), /* addsub. */
1400 COSTS_N_INSNS (3), /* fpconst. */
1401 COSTS_N_INSNS (3), /* neg. */
1402 COSTS_N_INSNS (3), /* compare. */
1403 COSTS_N_INSNS (3), /* widen. */
1404 COSTS_N_INSNS (3), /* narrow. */
1405 COSTS_N_INSNS (3), /* toint. */
1406 COSTS_N_INSNS (3), /* fromint. */
1407 COSTS_N_INSNS (3) /* roundint. */
1408 },
1409 /* FP DFmode */
1410 {
1411 COSTS_N_INSNS (30), /* div. */
1412 COSTS_N_INSNS (6), /* mult. */
1413 COSTS_N_INSNS (10), /* mult_addsub. */
1414 COSTS_N_INSNS (7), /* fma. */
1415 COSTS_N_INSNS (3), /* addsub. */
1416 COSTS_N_INSNS (3), /* fpconst. */
1417 COSTS_N_INSNS (3), /* neg. */
1418 COSTS_N_INSNS (3), /* compare. */
1419 COSTS_N_INSNS (3), /* widen. */
1420 COSTS_N_INSNS (3), /* narrow. */
1421 COSTS_N_INSNS (3), /* toint. */
1422 COSTS_N_INSNS (3), /* fromint. */
1423 COSTS_N_INSNS (3) /* roundint. */
1424 }
1425 },
1426 /* Vector */
1427 {
1428 COSTS_N_INSNS (1), /* alu. */
1429 COSTS_N_INSNS (4), /* mult. */
1430 COSTS_N_INSNS (1), /* movi. */
1431 COSTS_N_INSNS (2), /* dup. */
1432 COSTS_N_INSNS (2) /* extract. */
1433 }
1434 };
1435
1436
1437 const struct cpu_cost_table cortexa7_extra_costs =
1438 {
1439 /* ALU */
1440 {
1441 0, /* arith. */
1442 0, /* logical. */
1443 COSTS_N_INSNS (1), /* shift. */
1444 COSTS_N_INSNS (1), /* shift_reg. */
1445 COSTS_N_INSNS (1), /* arith_shift. */
1446 COSTS_N_INSNS (1), /* arith_shift_reg. */
1447 COSTS_N_INSNS (1), /* log_shift. */
1448 COSTS_N_INSNS (1), /* log_shift_reg. */
1449 COSTS_N_INSNS (1), /* extend. */
1450 COSTS_N_INSNS (1), /* extend_arith. */
1451 COSTS_N_INSNS (1), /* bfi. */
1452 COSTS_N_INSNS (1), /* bfx. */
1453 COSTS_N_INSNS (1), /* clz. */
1454 COSTS_N_INSNS (1), /* rev. */
1455 0, /* non_exec. */
1456 true /* non_exec_costs_exec. */
1457 },
1458
1459 {
1460 /* MULT SImode */
1461 {
1462 0, /* simple. */
1463 COSTS_N_INSNS (1), /* flag_setting. */
1464 COSTS_N_INSNS (1), /* extend. */
1465 COSTS_N_INSNS (1), /* add. */
1466 COSTS_N_INSNS (1), /* extend_add. */
1467 COSTS_N_INSNS (7) /* idiv. */
1468 },
1469 /* MULT DImode */
1470 {
1471 0, /* simple (N/A). */
1472 0, /* flag_setting (N/A). */
1473 COSTS_N_INSNS (1), /* extend. */
1474 0, /* add. */
1475 COSTS_N_INSNS (2), /* extend_add. */
1476 0 /* idiv (N/A). */
1477 }
1478 },
1479 /* LD/ST */
1480 {
1481 COSTS_N_INSNS (1), /* load. */
1482 COSTS_N_INSNS (1), /* load_sign_extend. */
1483 COSTS_N_INSNS (3), /* ldrd. */
1484 COSTS_N_INSNS (1), /* ldm_1st. */
1485 1, /* ldm_regs_per_insn_1st. */
1486 2, /* ldm_regs_per_insn_subsequent. */
1487 COSTS_N_INSNS (2), /* loadf. */
1488 COSTS_N_INSNS (2), /* loadd. */
1489 COSTS_N_INSNS (1), /* load_unaligned. */
1490 COSTS_N_INSNS (1), /* store. */
1491 COSTS_N_INSNS (3), /* strd. */
1492 COSTS_N_INSNS (1), /* stm_1st. */
1493 1, /* stm_regs_per_insn_1st. */
1494 2, /* stm_regs_per_insn_subsequent. */
1495 COSTS_N_INSNS (2), /* storef. */
1496 COSTS_N_INSNS (2), /* stored. */
1497 COSTS_N_INSNS (1), /* store_unaligned. */
1498 COSTS_N_INSNS (1), /* loadv. */
1499 COSTS_N_INSNS (1) /* storev. */
1500 },
1501 {
1502 /* FP SFmode */
1503 {
1504 COSTS_N_INSNS (15), /* div. */
1505 COSTS_N_INSNS (3), /* mult. */
1506 COSTS_N_INSNS (7), /* mult_addsub. */
1507 COSTS_N_INSNS (7), /* fma. */
1508 COSTS_N_INSNS (3), /* addsub. */
1509 COSTS_N_INSNS (3), /* fpconst. */
1510 COSTS_N_INSNS (3), /* neg. */
1511 COSTS_N_INSNS (3), /* compare. */
1512 COSTS_N_INSNS (3), /* widen. */
1513 COSTS_N_INSNS (3), /* narrow. */
1514 COSTS_N_INSNS (3), /* toint. */
1515 COSTS_N_INSNS (3), /* fromint. */
1516 COSTS_N_INSNS (3) /* roundint. */
1517 },
1518 /* FP DFmode */
1519 {
1520 COSTS_N_INSNS (30), /* div. */
1521 COSTS_N_INSNS (6), /* mult. */
1522 COSTS_N_INSNS (10), /* mult_addsub. */
1523 COSTS_N_INSNS (7), /* fma. */
1524 COSTS_N_INSNS (3), /* addsub. */
1525 COSTS_N_INSNS (3), /* fpconst. */
1526 COSTS_N_INSNS (3), /* neg. */
1527 COSTS_N_INSNS (3), /* compare. */
1528 COSTS_N_INSNS (3), /* widen. */
1529 COSTS_N_INSNS (3), /* narrow. */
1530 COSTS_N_INSNS (3), /* toint. */
1531 COSTS_N_INSNS (3), /* fromint. */
1532 COSTS_N_INSNS (3) /* roundint. */
1533 }
1534 },
1535 /* Vector */
1536 {
1537 COSTS_N_INSNS (1), /* alu. */
1538 COSTS_N_INSNS (4), /* mult. */
1539 COSTS_N_INSNS (1), /* movi. */
1540 COSTS_N_INSNS (2), /* dup. */
1541 COSTS_N_INSNS (2) /* extract. */
1542 }
1543 };
1544
1545 const struct cpu_cost_table cortexa12_extra_costs =
1546 {
1547 /* ALU */
1548 {
1549 0, /* arith. */
1550 0, /* logical. */
1551 0, /* shift. */
1552 COSTS_N_INSNS (1), /* shift_reg. */
1553 COSTS_N_INSNS (1), /* arith_shift. */
1554 COSTS_N_INSNS (1), /* arith_shift_reg. */
1555 COSTS_N_INSNS (1), /* log_shift. */
1556 COSTS_N_INSNS (1), /* log_shift_reg. */
1557 0, /* extend. */
1558 COSTS_N_INSNS (1), /* extend_arith. */
1559 0, /* bfi. */
1560 COSTS_N_INSNS (1), /* bfx. */
1561 COSTS_N_INSNS (1), /* clz. */
1562 COSTS_N_INSNS (1), /* rev. */
1563 0, /* non_exec. */
1564 true /* non_exec_costs_exec. */
1565 },
1566 /* MULT SImode */
1567 {
1568 {
1569 COSTS_N_INSNS (2), /* simple. */
1570 COSTS_N_INSNS (3), /* flag_setting. */
1571 COSTS_N_INSNS (2), /* extend. */
1572 COSTS_N_INSNS (3), /* add. */
1573 COSTS_N_INSNS (2), /* extend_add. */
1574 COSTS_N_INSNS (18) /* idiv. */
1575 },
1576 /* MULT DImode */
1577 {
1578 0, /* simple (N/A). */
1579 0, /* flag_setting (N/A). */
1580 COSTS_N_INSNS (3), /* extend. */
1581 0, /* add (N/A). */
1582 COSTS_N_INSNS (3), /* extend_add. */
1583 0 /* idiv (N/A). */
1584 }
1585 },
1586 /* LD/ST */
1587 {
1588 COSTS_N_INSNS (3), /* load. */
1589 COSTS_N_INSNS (3), /* load_sign_extend. */
1590 COSTS_N_INSNS (3), /* ldrd. */
1591 COSTS_N_INSNS (3), /* ldm_1st. */
1592 1, /* ldm_regs_per_insn_1st. */
1593 2, /* ldm_regs_per_insn_subsequent. */
1594 COSTS_N_INSNS (3), /* loadf. */
1595 COSTS_N_INSNS (3), /* loadd. */
1596 0, /* load_unaligned. */
1597 0, /* store. */
1598 0, /* strd. */
1599 0, /* stm_1st. */
1600 1, /* stm_regs_per_insn_1st. */
1601 2, /* stm_regs_per_insn_subsequent. */
1602 COSTS_N_INSNS (2), /* storef. */
1603 COSTS_N_INSNS (2), /* stored. */
1604 0, /* store_unaligned. */
1605 COSTS_N_INSNS (1), /* loadv. */
1606 COSTS_N_INSNS (1) /* storev. */
1607 },
1608 {
1609 /* FP SFmode */
1610 {
1611 COSTS_N_INSNS (17), /* div. */
1612 COSTS_N_INSNS (4), /* mult. */
1613 COSTS_N_INSNS (8), /* mult_addsub. */
1614 COSTS_N_INSNS (8), /* fma. */
1615 COSTS_N_INSNS (4), /* addsub. */
1616 COSTS_N_INSNS (2), /* fpconst. */
1617 COSTS_N_INSNS (2), /* neg. */
1618 COSTS_N_INSNS (2), /* compare. */
1619 COSTS_N_INSNS (4), /* widen. */
1620 COSTS_N_INSNS (4), /* narrow. */
1621 COSTS_N_INSNS (4), /* toint. */
1622 COSTS_N_INSNS (4), /* fromint. */
1623 COSTS_N_INSNS (4) /* roundint. */
1624 },
1625 /* FP DFmode */
1626 {
1627 COSTS_N_INSNS (31), /* div. */
1628 COSTS_N_INSNS (4), /* mult. */
1629 COSTS_N_INSNS (8), /* mult_addsub. */
1630 COSTS_N_INSNS (8), /* fma. */
1631 COSTS_N_INSNS (4), /* addsub. */
1632 COSTS_N_INSNS (2), /* fpconst. */
1633 COSTS_N_INSNS (2), /* neg. */
1634 COSTS_N_INSNS (2), /* compare. */
1635 COSTS_N_INSNS (4), /* widen. */
1636 COSTS_N_INSNS (4), /* narrow. */
1637 COSTS_N_INSNS (4), /* toint. */
1638 COSTS_N_INSNS (4), /* fromint. */
1639 COSTS_N_INSNS (4) /* roundint. */
1640 }
1641 },
1642 /* Vector */
1643 {
1644 COSTS_N_INSNS (1), /* alu. */
1645 COSTS_N_INSNS (4), /* mult. */
1646 COSTS_N_INSNS (1), /* movi. */
1647 COSTS_N_INSNS (2), /* dup. */
1648 COSTS_N_INSNS (2) /* extract. */
1649 }
1650 };
1651
1652 const struct cpu_cost_table cortexa15_extra_costs =
1653 {
1654 /* ALU */
1655 {
1656 0, /* arith. */
1657 0, /* logical. */
1658 0, /* shift. */
1659 0, /* shift_reg. */
1660 COSTS_N_INSNS (1), /* arith_shift. */
1661 COSTS_N_INSNS (1), /* arith_shift_reg. */
1662 COSTS_N_INSNS (1), /* log_shift. */
1663 COSTS_N_INSNS (1), /* log_shift_reg. */
1664 0, /* extend. */
1665 COSTS_N_INSNS (1), /* extend_arith. */
1666 COSTS_N_INSNS (1), /* bfi. */
1667 0, /* bfx. */
1668 0, /* clz. */
1669 0, /* rev. */
1670 0, /* non_exec. */
1671 true /* non_exec_costs_exec. */
1672 },
1673 /* MULT SImode */
1674 {
1675 {
1676 COSTS_N_INSNS (2), /* simple. */
1677 COSTS_N_INSNS (3), /* flag_setting. */
1678 COSTS_N_INSNS (2), /* extend. */
1679 COSTS_N_INSNS (2), /* add. */
1680 COSTS_N_INSNS (2), /* extend_add. */
1681 COSTS_N_INSNS (18) /* idiv. */
1682 },
1683 /* MULT DImode */
1684 {
1685 0, /* simple (N/A). */
1686 0, /* flag_setting (N/A). */
1687 COSTS_N_INSNS (3), /* extend. */
1688 0, /* add (N/A). */
1689 COSTS_N_INSNS (3), /* extend_add. */
1690 0 /* idiv (N/A). */
1691 }
1692 },
1693 /* LD/ST */
1694 {
1695 COSTS_N_INSNS (3), /* load. */
1696 COSTS_N_INSNS (3), /* load_sign_extend. */
1697 COSTS_N_INSNS (3), /* ldrd. */
1698 COSTS_N_INSNS (4), /* ldm_1st. */
1699 1, /* ldm_regs_per_insn_1st. */
1700 2, /* ldm_regs_per_insn_subsequent. */
1701 COSTS_N_INSNS (4), /* loadf. */
1702 COSTS_N_INSNS (4), /* loadd. */
1703 0, /* load_unaligned. */
1704 0, /* store. */
1705 0, /* strd. */
1706 COSTS_N_INSNS (1), /* stm_1st. */
1707 1, /* stm_regs_per_insn_1st. */
1708 2, /* stm_regs_per_insn_subsequent. */
1709 0, /* storef. */
1710 0, /* stored. */
1711 0, /* store_unaligned. */
1712 COSTS_N_INSNS (1), /* loadv. */
1713 COSTS_N_INSNS (1) /* storev. */
1714 },
1715 {
1716 /* FP SFmode */
1717 {
1718 COSTS_N_INSNS (17), /* div. */
1719 COSTS_N_INSNS (4), /* mult. */
1720 COSTS_N_INSNS (8), /* mult_addsub. */
1721 COSTS_N_INSNS (8), /* fma. */
1722 COSTS_N_INSNS (4), /* addsub. */
1723 COSTS_N_INSNS (2), /* fpconst. */
1724 COSTS_N_INSNS (2), /* neg. */
1725 COSTS_N_INSNS (5), /* compare. */
1726 COSTS_N_INSNS (4), /* widen. */
1727 COSTS_N_INSNS (4), /* narrow. */
1728 COSTS_N_INSNS (4), /* toint. */
1729 COSTS_N_INSNS (4), /* fromint. */
1730 COSTS_N_INSNS (4) /* roundint. */
1731 },
1732 /* FP DFmode */
1733 {
1734 COSTS_N_INSNS (31), /* div. */
1735 COSTS_N_INSNS (4), /* mult. */
1736 COSTS_N_INSNS (8), /* mult_addsub. */
1737 COSTS_N_INSNS (8), /* fma. */
1738 COSTS_N_INSNS (4), /* addsub. */
1739 COSTS_N_INSNS (2), /* fpconst. */
1740 COSTS_N_INSNS (2), /* neg. */
1741 COSTS_N_INSNS (2), /* compare. */
1742 COSTS_N_INSNS (4), /* widen. */
1743 COSTS_N_INSNS (4), /* narrow. */
1744 COSTS_N_INSNS (4), /* toint. */
1745 COSTS_N_INSNS (4), /* fromint. */
1746 COSTS_N_INSNS (4) /* roundint. */
1747 }
1748 },
1749 /* Vector */
1750 {
1751 COSTS_N_INSNS (1), /* alu. */
1752 COSTS_N_INSNS (4), /* mult. */
1753 COSTS_N_INSNS (1), /* movi. */
1754 COSTS_N_INSNS (2), /* dup. */
1755 COSTS_N_INSNS (2) /* extract. */
1756 }
1757 };
1758
1759 const struct cpu_cost_table v7m_extra_costs =
1760 {
1761 /* ALU */
1762 {
1763 0, /* arith. */
1764 0, /* logical. */
1765 0, /* shift. */
1766 0, /* shift_reg. */
1767 0, /* arith_shift. */
1768 COSTS_N_INSNS (1), /* arith_shift_reg. */
1769 0, /* log_shift. */
1770 COSTS_N_INSNS (1), /* log_shift_reg. */
1771 0, /* extend. */
1772 COSTS_N_INSNS (1), /* extend_arith. */
1773 0, /* bfi. */
1774 0, /* bfx. */
1775 0, /* clz. */
1776 0, /* rev. */
1777 COSTS_N_INSNS (1), /* non_exec. */
1778 false /* non_exec_costs_exec. */
1779 },
1780 {
1781 /* MULT SImode */
1782 {
1783 COSTS_N_INSNS (1), /* simple. */
1784 COSTS_N_INSNS (1), /* flag_setting. */
1785 COSTS_N_INSNS (2), /* extend. */
1786 COSTS_N_INSNS (1), /* add. */
1787 COSTS_N_INSNS (3), /* extend_add. */
1788 COSTS_N_INSNS (8) /* idiv. */
1789 },
1790 /* MULT DImode */
1791 {
1792 0, /* simple (N/A). */
1793 0, /* flag_setting (N/A). */
1794 COSTS_N_INSNS (2), /* extend. */
1795 0, /* add (N/A). */
1796 COSTS_N_INSNS (3), /* extend_add. */
1797 0 /* idiv (N/A). */
1798 }
1799 },
1800 /* LD/ST */
1801 {
1802 COSTS_N_INSNS (2), /* load. */
1803 0, /* load_sign_extend. */
1804 COSTS_N_INSNS (3), /* ldrd. */
1805 COSTS_N_INSNS (2), /* ldm_1st. */
1806 1, /* ldm_regs_per_insn_1st. */
1807 1, /* ldm_regs_per_insn_subsequent. */
1808 COSTS_N_INSNS (2), /* loadf. */
1809 COSTS_N_INSNS (3), /* loadd. */
1810 COSTS_N_INSNS (1), /* load_unaligned. */
1811 COSTS_N_INSNS (2), /* store. */
1812 COSTS_N_INSNS (3), /* strd. */
1813 COSTS_N_INSNS (2), /* stm_1st. */
1814 1, /* stm_regs_per_insn_1st. */
1815 1, /* stm_regs_per_insn_subsequent. */
1816 COSTS_N_INSNS (2), /* storef. */
1817 COSTS_N_INSNS (3), /* stored. */
1818 COSTS_N_INSNS (1), /* store_unaligned. */
1819 COSTS_N_INSNS (1), /* loadv. */
1820 COSTS_N_INSNS (1) /* storev. */
1821 },
1822 {
1823 /* FP SFmode */
1824 {
1825 COSTS_N_INSNS (7), /* div. */
1826 COSTS_N_INSNS (2), /* mult. */
1827 COSTS_N_INSNS (5), /* mult_addsub. */
1828 COSTS_N_INSNS (3), /* fma. */
1829 COSTS_N_INSNS (1), /* addsub. */
1830 0, /* fpconst. */
1831 0, /* neg. */
1832 0, /* compare. */
1833 0, /* widen. */
1834 0, /* narrow. */
1835 0, /* toint. */
1836 0, /* fromint. */
1837 0 /* roundint. */
1838 },
1839 /* FP DFmode */
1840 {
1841 COSTS_N_INSNS (15), /* div. */
1842 COSTS_N_INSNS (5), /* mult. */
1843 COSTS_N_INSNS (7), /* mult_addsub. */
1844 COSTS_N_INSNS (7), /* fma. */
1845 COSTS_N_INSNS (3), /* addsub. */
1846 0, /* fpconst. */
1847 0, /* neg. */
1848 0, /* compare. */
1849 0, /* widen. */
1850 0, /* narrow. */
1851 0, /* toint. */
1852 0, /* fromint. */
1853 0 /* roundint. */
1854 }
1855 },
1856 /* Vector */
1857 {
1858 COSTS_N_INSNS (1), /* alu. */
1859 COSTS_N_INSNS (4), /* mult. */
1860 COSTS_N_INSNS (1), /* movi. */
1861 COSTS_N_INSNS (2), /* dup. */
1862 COSTS_N_INSNS (2) /* extract. */
1863 }
1864 };
1865
1866 const struct addr_mode_cost_table generic_addr_mode_costs =
1867 {
1868 /* int. */
1869 {
1870 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1871 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1872 COSTS_N_INSNS (0) /* AMO_WB. */
1873 },
1874 /* float. */
1875 {
1876 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1877 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1878 COSTS_N_INSNS (0) /* AMO_WB. */
1879 },
1880 /* vector. */
1881 {
1882 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1883 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1884 COSTS_N_INSNS (0) /* AMO_WB. */
1885 }
1886 };
1887
1888 const struct tune_params arm_slowmul_tune =
1889 {
1890 &generic_extra_costs, /* Insn extra costs. */
1891 &generic_addr_mode_costs, /* Addressing mode costs. */
1892 NULL, /* Sched adj cost. */
1893 arm_default_branch_cost,
1894 &arm_default_vec_cost,
1895 3, /* Constant limit. */
1896 5, /* Max cond insns. */
1897 8, /* Memset max inline. */
1898 1, /* Issue rate. */
1899 ARM_PREFETCH_NOT_BENEFICIAL,
1900 tune_params::PREF_CONST_POOL_TRUE,
1901 tune_params::PREF_LDRD_FALSE,
1902 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1903 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1904 tune_params::DISPARAGE_FLAGS_NEITHER,
1905 tune_params::PREF_NEON_STRINGOPS_FALSE,
1906 tune_params::FUSE_NOTHING,
1907 tune_params::SCHED_AUTOPREF_OFF
1908 };
1909
1910 const struct tune_params arm_fastmul_tune =
1911 {
1912 &generic_extra_costs, /* Insn extra costs. */
1913 &generic_addr_mode_costs, /* Addressing mode costs. */
1914 NULL, /* Sched adj cost. */
1915 arm_default_branch_cost,
1916 &arm_default_vec_cost,
1917 1, /* Constant limit. */
1918 5, /* Max cond insns. */
1919 8, /* Memset max inline. */
1920 1, /* Issue rate. */
1921 ARM_PREFETCH_NOT_BENEFICIAL,
1922 tune_params::PREF_CONST_POOL_TRUE,
1923 tune_params::PREF_LDRD_FALSE,
1924 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1925 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1926 tune_params::DISPARAGE_FLAGS_NEITHER,
1927 tune_params::PREF_NEON_STRINGOPS_FALSE,
1928 tune_params::FUSE_NOTHING,
1929 tune_params::SCHED_AUTOPREF_OFF
1930 };
1931
1932 /* StrongARM has early execution of branches, so a sequence that is worth
1933 skipping is shorter. Set max_insns_skipped to a lower value. */
1934
1935 const struct tune_params arm_strongarm_tune =
1936 {
1937 &generic_extra_costs, /* Insn extra costs. */
1938 &generic_addr_mode_costs, /* Addressing mode costs. */
1939 NULL, /* Sched adj cost. */
1940 arm_default_branch_cost,
1941 &arm_default_vec_cost,
1942 1, /* Constant limit. */
1943 3, /* Max cond insns. */
1944 8, /* Memset max inline. */
1945 1, /* Issue rate. */
1946 ARM_PREFETCH_NOT_BENEFICIAL,
1947 tune_params::PREF_CONST_POOL_TRUE,
1948 tune_params::PREF_LDRD_FALSE,
1949 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1950 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1951 tune_params::DISPARAGE_FLAGS_NEITHER,
1952 tune_params::PREF_NEON_STRINGOPS_FALSE,
1953 tune_params::FUSE_NOTHING,
1954 tune_params::SCHED_AUTOPREF_OFF
1955 };
1956
1957 const struct tune_params arm_xscale_tune =
1958 {
1959 &generic_extra_costs, /* Insn extra costs. */
1960 &generic_addr_mode_costs, /* Addressing mode costs. */
1961 xscale_sched_adjust_cost,
1962 arm_default_branch_cost,
1963 &arm_default_vec_cost,
1964 2, /* Constant limit. */
1965 3, /* Max cond insns. */
1966 8, /* Memset max inline. */
1967 1, /* Issue rate. */
1968 ARM_PREFETCH_NOT_BENEFICIAL,
1969 tune_params::PREF_CONST_POOL_TRUE,
1970 tune_params::PREF_LDRD_FALSE,
1971 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1972 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1973 tune_params::DISPARAGE_FLAGS_NEITHER,
1974 tune_params::PREF_NEON_STRINGOPS_FALSE,
1975 tune_params::FUSE_NOTHING,
1976 tune_params::SCHED_AUTOPREF_OFF
1977 };
1978
1979 const struct tune_params arm_9e_tune =
1980 {
1981 &generic_extra_costs, /* Insn extra costs. */
1982 &generic_addr_mode_costs, /* Addressing mode costs. */
1983 NULL, /* Sched adj cost. */
1984 arm_default_branch_cost,
1985 &arm_default_vec_cost,
1986 1, /* Constant limit. */
1987 5, /* Max cond insns. */
1988 8, /* Memset max inline. */
1989 1, /* Issue rate. */
1990 ARM_PREFETCH_NOT_BENEFICIAL,
1991 tune_params::PREF_CONST_POOL_TRUE,
1992 tune_params::PREF_LDRD_FALSE,
1993 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1994 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1995 tune_params::DISPARAGE_FLAGS_NEITHER,
1996 tune_params::PREF_NEON_STRINGOPS_FALSE,
1997 tune_params::FUSE_NOTHING,
1998 tune_params::SCHED_AUTOPREF_OFF
1999 };
2000
2001 const struct tune_params arm_marvell_pj4_tune =
2002 {
2003 &generic_extra_costs, /* Insn extra costs. */
2004 &generic_addr_mode_costs, /* Addressing mode costs. */
2005 NULL, /* Sched adj cost. */
2006 arm_default_branch_cost,
2007 &arm_default_vec_cost,
2008 1, /* Constant limit. */
2009 5, /* Max cond insns. */
2010 8, /* Memset max inline. */
2011 2, /* Issue rate. */
2012 ARM_PREFETCH_NOT_BENEFICIAL,
2013 tune_params::PREF_CONST_POOL_TRUE,
2014 tune_params::PREF_LDRD_FALSE,
2015 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2016 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2017 tune_params::DISPARAGE_FLAGS_NEITHER,
2018 tune_params::PREF_NEON_STRINGOPS_FALSE,
2019 tune_params::FUSE_NOTHING,
2020 tune_params::SCHED_AUTOPREF_OFF
2021 };
2022
2023 const struct tune_params arm_v6t2_tune =
2024 {
2025 &generic_extra_costs, /* Insn extra costs. */
2026 &generic_addr_mode_costs, /* Addressing mode costs. */
2027 NULL, /* Sched adj cost. */
2028 arm_default_branch_cost,
2029 &arm_default_vec_cost,
2030 1, /* Constant limit. */
2031 5, /* Max cond insns. */
2032 8, /* Memset max inline. */
2033 1, /* Issue rate. */
2034 ARM_PREFETCH_NOT_BENEFICIAL,
2035 tune_params::PREF_CONST_POOL_FALSE,
2036 tune_params::PREF_LDRD_FALSE,
2037 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2038 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2039 tune_params::DISPARAGE_FLAGS_NEITHER,
2040 tune_params::PREF_NEON_STRINGOPS_FALSE,
2041 tune_params::FUSE_NOTHING,
2042 tune_params::SCHED_AUTOPREF_OFF
2043 };
2044
2045
2046 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
2047 const struct tune_params arm_cortex_tune =
2048 {
2049 &generic_extra_costs,
2050 &generic_addr_mode_costs, /* Addressing mode costs. */
2051 NULL, /* Sched adj cost. */
2052 arm_default_branch_cost,
2053 &arm_default_vec_cost,
2054 1, /* Constant limit. */
2055 5, /* Max cond insns. */
2056 8, /* Memset max inline. */
2057 2, /* Issue rate. */
2058 ARM_PREFETCH_NOT_BENEFICIAL,
2059 tune_params::PREF_CONST_POOL_FALSE,
2060 tune_params::PREF_LDRD_FALSE,
2061 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2062 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2063 tune_params::DISPARAGE_FLAGS_NEITHER,
2064 tune_params::PREF_NEON_STRINGOPS_FALSE,
2065 tune_params::FUSE_NOTHING,
2066 tune_params::SCHED_AUTOPREF_OFF
2067 };
2068
2069 const struct tune_params arm_cortex_a8_tune =
2070 {
2071 &cortexa8_extra_costs,
2072 &generic_addr_mode_costs, /* Addressing mode costs. */
2073 NULL, /* Sched adj cost. */
2074 arm_default_branch_cost,
2075 &arm_default_vec_cost,
2076 1, /* Constant limit. */
2077 5, /* Max cond insns. */
2078 8, /* Memset max inline. */
2079 2, /* Issue rate. */
2080 ARM_PREFETCH_NOT_BENEFICIAL,
2081 tune_params::PREF_CONST_POOL_FALSE,
2082 tune_params::PREF_LDRD_FALSE,
2083 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2084 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2085 tune_params::DISPARAGE_FLAGS_NEITHER,
2086 tune_params::PREF_NEON_STRINGOPS_TRUE,
2087 tune_params::FUSE_NOTHING,
2088 tune_params::SCHED_AUTOPREF_OFF
2089 };
2090
2091 const struct tune_params arm_cortex_a7_tune =
2092 {
2093 &cortexa7_extra_costs,
2094 &generic_addr_mode_costs, /* Addressing mode costs. */
2095 NULL, /* Sched adj cost. */
2096 arm_default_branch_cost,
2097 &arm_default_vec_cost,
2098 1, /* Constant limit. */
2099 5, /* Max cond insns. */
2100 8, /* Memset max inline. */
2101 2, /* Issue rate. */
2102 ARM_PREFETCH_NOT_BENEFICIAL,
2103 tune_params::PREF_CONST_POOL_FALSE,
2104 tune_params::PREF_LDRD_FALSE,
2105 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2106 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2107 tune_params::DISPARAGE_FLAGS_NEITHER,
2108 tune_params::PREF_NEON_STRINGOPS_TRUE,
2109 tune_params::FUSE_NOTHING,
2110 tune_params::SCHED_AUTOPREF_OFF
2111 };
2112
2113 const struct tune_params arm_cortex_a15_tune =
2114 {
2115 &cortexa15_extra_costs,
2116 &generic_addr_mode_costs, /* Addressing mode costs. */
2117 NULL, /* Sched adj cost. */
2118 arm_default_branch_cost,
2119 &arm_default_vec_cost,
2120 1, /* Constant limit. */
2121 2, /* Max cond insns. */
2122 8, /* Memset max inline. */
2123 3, /* Issue rate. */
2124 ARM_PREFETCH_NOT_BENEFICIAL,
2125 tune_params::PREF_CONST_POOL_FALSE,
2126 tune_params::PREF_LDRD_TRUE,
2127 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2128 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2129 tune_params::DISPARAGE_FLAGS_ALL,
2130 tune_params::PREF_NEON_STRINGOPS_TRUE,
2131 tune_params::FUSE_NOTHING,
2132 tune_params::SCHED_AUTOPREF_FULL
2133 };
2134
2135 const struct tune_params arm_cortex_a35_tune =
2136 {
2137 &cortexa53_extra_costs,
2138 &generic_addr_mode_costs, /* Addressing mode costs. */
2139 NULL, /* Sched adj cost. */
2140 arm_default_branch_cost,
2141 &arm_default_vec_cost,
2142 1, /* Constant limit. */
2143 5, /* Max cond insns. */
2144 8, /* Memset max inline. */
2145 1, /* Issue rate. */
2146 ARM_PREFETCH_NOT_BENEFICIAL,
2147 tune_params::PREF_CONST_POOL_FALSE,
2148 tune_params::PREF_LDRD_FALSE,
2149 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2150 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2151 tune_params::DISPARAGE_FLAGS_NEITHER,
2152 tune_params::PREF_NEON_STRINGOPS_TRUE,
2153 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2154 tune_params::SCHED_AUTOPREF_OFF
2155 };
2156
2157 const struct tune_params arm_cortex_a53_tune =
2158 {
2159 &cortexa53_extra_costs,
2160 &generic_addr_mode_costs, /* Addressing mode costs. */
2161 NULL, /* Sched adj cost. */
2162 arm_default_branch_cost,
2163 &arm_default_vec_cost,
2164 1, /* Constant limit. */
2165 5, /* Max cond insns. */
2166 8, /* Memset max inline. */
2167 2, /* Issue rate. */
2168 ARM_PREFETCH_NOT_BENEFICIAL,
2169 tune_params::PREF_CONST_POOL_FALSE,
2170 tune_params::PREF_LDRD_FALSE,
2171 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2172 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2173 tune_params::DISPARAGE_FLAGS_NEITHER,
2174 tune_params::PREF_NEON_STRINGOPS_TRUE,
2175 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2176 tune_params::SCHED_AUTOPREF_OFF
2177 };
2178
2179 const struct tune_params arm_cortex_a57_tune =
2180 {
2181 &cortexa57_extra_costs,
2182 &generic_addr_mode_costs, /* addressing mode costs */
2183 NULL, /* Sched adj cost. */
2184 arm_default_branch_cost,
2185 &arm_default_vec_cost,
2186 1, /* Constant limit. */
2187 2, /* Max cond insns. */
2188 8, /* Memset max inline. */
2189 3, /* Issue rate. */
2190 ARM_PREFETCH_NOT_BENEFICIAL,
2191 tune_params::PREF_CONST_POOL_FALSE,
2192 tune_params::PREF_LDRD_TRUE,
2193 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2194 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2195 tune_params::DISPARAGE_FLAGS_ALL,
2196 tune_params::PREF_NEON_STRINGOPS_TRUE,
2197 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2198 tune_params::SCHED_AUTOPREF_FULL
2199 };
2200
2201 const struct tune_params arm_exynosm1_tune =
2202 {
2203 &exynosm1_extra_costs,
2204 &generic_addr_mode_costs, /* Addressing mode costs. */
2205 NULL, /* Sched adj cost. */
2206 arm_default_branch_cost,
2207 &arm_default_vec_cost,
2208 1, /* Constant limit. */
2209 2, /* Max cond insns. */
2210 8, /* Memset max inline. */
2211 3, /* Issue rate. */
2212 ARM_PREFETCH_NOT_BENEFICIAL,
2213 tune_params::PREF_CONST_POOL_FALSE,
2214 tune_params::PREF_LDRD_TRUE,
2215 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2216 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2217 tune_params::DISPARAGE_FLAGS_ALL,
2218 tune_params::PREF_NEON_STRINGOPS_TRUE,
2219 tune_params::FUSE_NOTHING,
2220 tune_params::SCHED_AUTOPREF_OFF
2221 };
2222
2223 const struct tune_params arm_xgene1_tune =
2224 {
2225 &xgene1_extra_costs,
2226 &generic_addr_mode_costs, /* Addressing mode costs. */
2227 NULL, /* Sched adj cost. */
2228 arm_default_branch_cost,
2229 &arm_default_vec_cost,
2230 1, /* Constant limit. */
2231 2, /* Max cond insns. */
2232 32, /* Memset max inline. */
2233 4, /* Issue rate. */
2234 ARM_PREFETCH_NOT_BENEFICIAL,
2235 tune_params::PREF_CONST_POOL_FALSE,
2236 tune_params::PREF_LDRD_TRUE,
2237 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2238 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2239 tune_params::DISPARAGE_FLAGS_ALL,
2240 tune_params::PREF_NEON_STRINGOPS_FALSE,
2241 tune_params::FUSE_NOTHING,
2242 tune_params::SCHED_AUTOPREF_OFF
2243 };
2244
2245 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2246 less appealing. Set max_insns_skipped to a low value. */
2247
2248 const struct tune_params arm_cortex_a5_tune =
2249 {
2250 &cortexa5_extra_costs,
2251 &generic_addr_mode_costs, /* Addressing mode costs. */
2252 NULL, /* Sched adj cost. */
2253 arm_cortex_a5_branch_cost,
2254 &arm_default_vec_cost,
2255 1, /* Constant limit. */
2256 1, /* Max cond insns. */
2257 8, /* Memset max inline. */
2258 2, /* Issue rate. */
2259 ARM_PREFETCH_NOT_BENEFICIAL,
2260 tune_params::PREF_CONST_POOL_FALSE,
2261 tune_params::PREF_LDRD_FALSE,
2262 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2263 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2264 tune_params::DISPARAGE_FLAGS_NEITHER,
2265 tune_params::PREF_NEON_STRINGOPS_TRUE,
2266 tune_params::FUSE_NOTHING,
2267 tune_params::SCHED_AUTOPREF_OFF
2268 };
2269
2270 const struct tune_params arm_cortex_a9_tune =
2271 {
2272 &cortexa9_extra_costs,
2273 &generic_addr_mode_costs, /* Addressing mode costs. */
2274 cortex_a9_sched_adjust_cost,
2275 arm_default_branch_cost,
2276 &arm_default_vec_cost,
2277 1, /* Constant limit. */
2278 5, /* Max cond insns. */
2279 8, /* Memset max inline. */
2280 2, /* Issue rate. */
2281 ARM_PREFETCH_BENEFICIAL(4,32,32),
2282 tune_params::PREF_CONST_POOL_FALSE,
2283 tune_params::PREF_LDRD_FALSE,
2284 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2285 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2286 tune_params::DISPARAGE_FLAGS_NEITHER,
2287 tune_params::PREF_NEON_STRINGOPS_FALSE,
2288 tune_params::FUSE_NOTHING,
2289 tune_params::SCHED_AUTOPREF_OFF
2290 };
2291
2292 const struct tune_params arm_cortex_a12_tune =
2293 {
2294 &cortexa12_extra_costs,
2295 &generic_addr_mode_costs, /* Addressing mode costs. */
2296 NULL, /* Sched adj cost. */
2297 arm_default_branch_cost,
2298 &arm_default_vec_cost, /* Vectorizer costs. */
2299 1, /* Constant limit. */
2300 2, /* Max cond insns. */
2301 8, /* Memset max inline. */
2302 2, /* Issue rate. */
2303 ARM_PREFETCH_NOT_BENEFICIAL,
2304 tune_params::PREF_CONST_POOL_FALSE,
2305 tune_params::PREF_LDRD_TRUE,
2306 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2307 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2308 tune_params::DISPARAGE_FLAGS_ALL,
2309 tune_params::PREF_NEON_STRINGOPS_TRUE,
2310 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2311 tune_params::SCHED_AUTOPREF_OFF
2312 };
2313
2314 const struct tune_params arm_cortex_a73_tune =
2315 {
2316 &cortexa57_extra_costs,
2317 &generic_addr_mode_costs, /* Addressing mode costs. */
2318 NULL, /* Sched adj cost. */
2319 arm_default_branch_cost,
2320 &arm_default_vec_cost, /* Vectorizer costs. */
2321 1, /* Constant limit. */
2322 2, /* Max cond insns. */
2323 8, /* Memset max inline. */
2324 2, /* Issue rate. */
2325 ARM_PREFETCH_NOT_BENEFICIAL,
2326 tune_params::PREF_CONST_POOL_FALSE,
2327 tune_params::PREF_LDRD_TRUE,
2328 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2329 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2330 tune_params::DISPARAGE_FLAGS_ALL,
2331 tune_params::PREF_NEON_STRINGOPS_TRUE,
2332 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2333 tune_params::SCHED_AUTOPREF_FULL
2334 };
2335
2336 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2337 cycle to execute each. An LDR from the constant pool also takes two cycles
2338 to execute, but mildly increases pipelining opportunity (consecutive
2339 loads/stores can be pipelined together, saving one cycle), and may also
2340 improve icache utilisation. Hence we prefer the constant pool for such
2341 processors. */
2342
2343 const struct tune_params arm_v7m_tune =
2344 {
2345 &v7m_extra_costs,
2346 &generic_addr_mode_costs, /* Addressing mode costs. */
2347 NULL, /* Sched adj cost. */
2348 arm_cortex_m_branch_cost,
2349 &arm_default_vec_cost,
2350 1, /* Constant limit. */
2351 2, /* Max cond insns. */
2352 8, /* Memset max inline. */
2353 1, /* Issue rate. */
2354 ARM_PREFETCH_NOT_BENEFICIAL,
2355 tune_params::PREF_CONST_POOL_TRUE,
2356 tune_params::PREF_LDRD_FALSE,
2357 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2358 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2359 tune_params::DISPARAGE_FLAGS_NEITHER,
2360 tune_params::PREF_NEON_STRINGOPS_FALSE,
2361 tune_params::FUSE_NOTHING,
2362 tune_params::SCHED_AUTOPREF_OFF
2363 };
2364
2365 /* Cortex-M7 tuning. */
2366
2367 const struct tune_params arm_cortex_m7_tune =
2368 {
2369 &v7m_extra_costs,
2370 &generic_addr_mode_costs, /* Addressing mode costs. */
2371 NULL, /* Sched adj cost. */
2372 arm_cortex_m7_branch_cost,
2373 &arm_default_vec_cost,
2374 0, /* Constant limit. */
2375 1, /* Max cond insns. */
2376 8, /* Memset max inline. */
2377 2, /* Issue rate. */
2378 ARM_PREFETCH_NOT_BENEFICIAL,
2379 tune_params::PREF_CONST_POOL_TRUE,
2380 tune_params::PREF_LDRD_FALSE,
2381 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2382 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2383 tune_params::DISPARAGE_FLAGS_NEITHER,
2384 tune_params::PREF_NEON_STRINGOPS_FALSE,
2385 tune_params::FUSE_NOTHING,
2386 tune_params::SCHED_AUTOPREF_OFF
2387 };
2388
2389 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2390 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2391 cortex-m23. */
2392 const struct tune_params arm_v6m_tune =
2393 {
2394 &generic_extra_costs, /* Insn extra costs. */
2395 &generic_addr_mode_costs, /* Addressing mode costs. */
2396 NULL, /* Sched adj cost. */
2397 arm_default_branch_cost,
2398 &arm_default_vec_cost, /* Vectorizer costs. */
2399 1, /* Constant limit. */
2400 5, /* Max cond insns. */
2401 8, /* Memset max inline. */
2402 1, /* Issue rate. */
2403 ARM_PREFETCH_NOT_BENEFICIAL,
2404 tune_params::PREF_CONST_POOL_FALSE,
2405 tune_params::PREF_LDRD_FALSE,
2406 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2407 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2408 tune_params::DISPARAGE_FLAGS_NEITHER,
2409 tune_params::PREF_NEON_STRINGOPS_FALSE,
2410 tune_params::FUSE_NOTHING,
2411 tune_params::SCHED_AUTOPREF_OFF
2412 };
2413
2414 const struct tune_params arm_fa726te_tune =
2415 {
2416 &generic_extra_costs, /* Insn extra costs. */
2417 &generic_addr_mode_costs, /* Addressing mode costs. */
2418 fa726te_sched_adjust_cost,
2419 arm_default_branch_cost,
2420 &arm_default_vec_cost,
2421 1, /* Constant limit. */
2422 5, /* Max cond insns. */
2423 8, /* Memset max inline. */
2424 2, /* Issue rate. */
2425 ARM_PREFETCH_NOT_BENEFICIAL,
2426 tune_params::PREF_CONST_POOL_TRUE,
2427 tune_params::PREF_LDRD_FALSE,
2428 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2429 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2430 tune_params::DISPARAGE_FLAGS_NEITHER,
2431 tune_params::PREF_NEON_STRINGOPS_FALSE,
2432 tune_params::FUSE_NOTHING,
2433 tune_params::SCHED_AUTOPREF_OFF
2434 };
2435
2436 /* Auto-generated CPU, FPU and architecture tables. */
2437 #include "arm-cpu-data.h"
2438
2439 /* The name of the preprocessor macro to define for this architecture. PROFILE
2440 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2441 is thus chosen to be big enough to hold the longest architecture name. */
2442
2443 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2444
2445 /* Supported TLS relocations. */
2446
2447 enum tls_reloc {
2448 TLS_GD32,
2449 TLS_GD32_FDPIC,
2450 TLS_LDM32,
2451 TLS_LDM32_FDPIC,
2452 TLS_LDO32,
2453 TLS_IE32,
2454 TLS_IE32_FDPIC,
2455 TLS_LE32,
2456 TLS_DESCSEQ /* GNU scheme */
2457 };
2458
2459 /* The maximum number of insns to be used when loading a constant. */
2460 inline static int
2461 arm_constant_limit (bool size_p)
2462 {
2463 return size_p ? 1 : current_tune->constant_limit;
2464 }
2465
2466 /* Emit an insn that's a simple single-set. Both the operands must be known
2467 to be valid. */
2468 inline static rtx_insn *
2469 emit_set_insn (rtx x, rtx y)
2470 {
2471 return emit_insn (gen_rtx_SET (x, y));
2472 }
2473
2474 /* Return the number of bits set in VALUE. */
2475 static unsigned
2476 bit_count (unsigned long value)
2477 {
2478 unsigned long count = 0;
2479
2480 while (value)
2481 {
2482 count++;
2483 value &= value - 1; /* Clear the least-significant set bit. */
2484 }
2485
2486 return count;
2487 }
2488
2489 /* Return the number of bits set in BMAP. */
2490 static unsigned
2491 bitmap_popcount (const sbitmap bmap)
2492 {
2493 unsigned int count = 0;
2494 unsigned int n = 0;
2495 sbitmap_iterator sbi;
2496
2497 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2498 count++;
2499 return count;
2500 }
2501
2502 typedef struct
2503 {
2504 machine_mode mode;
2505 const char *name;
2506 } arm_fixed_mode_set;
2507
2508 /* A small helper for setting fixed-point library libfuncs. */
2509
2510 static void
2511 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2512 const char *funcname, const char *modename,
2513 int num_suffix)
2514 {
2515 char buffer[50];
2516
2517 if (num_suffix == 0)
2518 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2519 else
2520 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2521
2522 set_optab_libfunc (optable, mode, buffer);
2523 }
2524
2525 static void
2526 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2527 machine_mode from, const char *funcname,
2528 const char *toname, const char *fromname)
2529 {
2530 char buffer[50];
2531 const char *maybe_suffix_2 = "";
2532
2533 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2534 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2535 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2536 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2537 maybe_suffix_2 = "2";
2538
2539 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2540 maybe_suffix_2);
2541
2542 set_conv_libfunc (optable, to, from, buffer);
2543 }
2544
2545 static GTY(()) rtx speculation_barrier_libfunc;
2546
2547 /* Record that we have no arithmetic or comparison libfuncs for
2548 machine mode MODE. */
2549
2550 static void
2551 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2552 {
2553 /* Arithmetic. */
2554 set_optab_libfunc (add_optab, mode, NULL);
2555 set_optab_libfunc (sdiv_optab, mode, NULL);
2556 set_optab_libfunc (smul_optab, mode, NULL);
2557 set_optab_libfunc (neg_optab, mode, NULL);
2558 set_optab_libfunc (sub_optab, mode, NULL);
2559
2560 /* Comparisons. */
2561 set_optab_libfunc (eq_optab, mode, NULL);
2562 set_optab_libfunc (ne_optab, mode, NULL);
2563 set_optab_libfunc (lt_optab, mode, NULL);
2564 set_optab_libfunc (le_optab, mode, NULL);
2565 set_optab_libfunc (ge_optab, mode, NULL);
2566 set_optab_libfunc (gt_optab, mode, NULL);
2567 set_optab_libfunc (unord_optab, mode, NULL);
2568 }
2569
2570 /* Set up library functions unique to ARM. */
2571 static void
2572 arm_init_libfuncs (void)
2573 {
2574 machine_mode mode_iter;
2575
2576 /* For Linux, we have access to kernel support for atomic operations. */
2577 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2578 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2579
2580 /* There are no special library functions unless we are using the
2581 ARM BPABI. */
2582 if (!TARGET_BPABI)
2583 return;
2584
2585 /* The functions below are described in Section 4 of the "Run-Time
2586 ABI for the ARM architecture", Version 1.0. */
2587
2588 /* Double-precision floating-point arithmetic. Table 2. */
2589 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2590 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2591 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2592 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2593 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2594
2595 /* Double-precision comparisons. Table 3. */
2596 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2597 set_optab_libfunc (ne_optab, DFmode, NULL);
2598 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2599 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2600 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2601 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2602 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2603
2604 /* Single-precision floating-point arithmetic. Table 4. */
2605 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2606 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2607 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2608 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2609 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2610
2611 /* Single-precision comparisons. Table 5. */
2612 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2613 set_optab_libfunc (ne_optab, SFmode, NULL);
2614 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2615 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2616 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2617 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2618 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2619
2620 /* Floating-point to integer conversions. Table 6. */
2621 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2622 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2623 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2624 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2625 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2626 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2627 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2628 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2629
2630 /* Conversions between floating types. Table 7. */
2631 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2632 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2633
2634 /* Integer to floating-point conversions. Table 8. */
2635 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2636 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2637 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2638 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2639 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2640 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2641 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2642 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2643
2644 /* Long long. Table 9. */
2645 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2646 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2647 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2648 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2649 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2650 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2651 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2652 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2653
2654 /* Integer (32/32->32) division. \S 4.3.1. */
2655 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2656 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2657
2658 /* The divmod functions are designed so that they can be used for
2659 plain division, even though they return both the quotient and the
2660 remainder. The quotient is returned in the usual location (i.e.,
2661 r0 for SImode, {r0, r1} for DImode), just as would be expected
2662 for an ordinary division routine. Because the AAPCS calling
2663 conventions specify that all of { r0, r1, r2, r3 } are
2664 callee-saved registers, there is no need to tell the compiler
2665 explicitly that those registers are clobbered by these
2666 routines. */
2667 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2668 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2669
2670 /* For SImode division the ABI provides div-without-mod routines,
2671 which are faster. */
2672 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2673 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2674
2675 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2676 divmod libcalls instead. */
2677 set_optab_libfunc (smod_optab, DImode, NULL);
2678 set_optab_libfunc (umod_optab, DImode, NULL);
2679 set_optab_libfunc (smod_optab, SImode, NULL);
2680 set_optab_libfunc (umod_optab, SImode, NULL);
2681
2682 /* Half-precision float operations. The compiler handles all operations
2683 with NULL libfuncs by converting the SFmode. */
2684 switch (arm_fp16_format)
2685 {
2686 case ARM_FP16_FORMAT_IEEE:
2687 case ARM_FP16_FORMAT_ALTERNATIVE:
2688
2689 /* Conversions. */
2690 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2691 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2692 ? "__gnu_f2h_ieee"
2693 : "__gnu_f2h_alternative"));
2694 set_conv_libfunc (sext_optab, SFmode, HFmode,
2695 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2696 ? "__gnu_h2f_ieee"
2697 : "__gnu_h2f_alternative"));
2698
2699 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2700 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2701 ? "__gnu_d2h_ieee"
2702 : "__gnu_d2h_alternative"));
2703
2704 arm_block_arith_comp_libfuncs_for_mode (HFmode);
2705 break;
2706
2707 default:
2708 break;
2709 }
2710
2711 /* For all possible libcalls in BFmode, record NULL. */
2712 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2713 {
2714 set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2715 set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2716 set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2717 set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2718 }
2719 arm_block_arith_comp_libfuncs_for_mode (BFmode);
2720
2721 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2722 {
2723 const arm_fixed_mode_set fixed_arith_modes[] =
2724 {
2725 { E_QQmode, "qq" },
2726 { E_UQQmode, "uqq" },
2727 { E_HQmode, "hq" },
2728 { E_UHQmode, "uhq" },
2729 { E_SQmode, "sq" },
2730 { E_USQmode, "usq" },
2731 { E_DQmode, "dq" },
2732 { E_UDQmode, "udq" },
2733 { E_TQmode, "tq" },
2734 { E_UTQmode, "utq" },
2735 { E_HAmode, "ha" },
2736 { E_UHAmode, "uha" },
2737 { E_SAmode, "sa" },
2738 { E_USAmode, "usa" },
2739 { E_DAmode, "da" },
2740 { E_UDAmode, "uda" },
2741 { E_TAmode, "ta" },
2742 { E_UTAmode, "uta" }
2743 };
2744 const arm_fixed_mode_set fixed_conv_modes[] =
2745 {
2746 { E_QQmode, "qq" },
2747 { E_UQQmode, "uqq" },
2748 { E_HQmode, "hq" },
2749 { E_UHQmode, "uhq" },
2750 { E_SQmode, "sq" },
2751 { E_USQmode, "usq" },
2752 { E_DQmode, "dq" },
2753 { E_UDQmode, "udq" },
2754 { E_TQmode, "tq" },
2755 { E_UTQmode, "utq" },
2756 { E_HAmode, "ha" },
2757 { E_UHAmode, "uha" },
2758 { E_SAmode, "sa" },
2759 { E_USAmode, "usa" },
2760 { E_DAmode, "da" },
2761 { E_UDAmode, "uda" },
2762 { E_TAmode, "ta" },
2763 { E_UTAmode, "uta" },
2764 { E_QImode, "qi" },
2765 { E_HImode, "hi" },
2766 { E_SImode, "si" },
2767 { E_DImode, "di" },
2768 { E_TImode, "ti" },
2769 { E_SFmode, "sf" },
2770 { E_DFmode, "df" }
2771 };
2772 unsigned int i, j;
2773
2774 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2775 {
2776 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2777 "add", fixed_arith_modes[i].name, 3);
2778 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2779 "ssadd", fixed_arith_modes[i].name, 3);
2780 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2781 "usadd", fixed_arith_modes[i].name, 3);
2782 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2783 "sub", fixed_arith_modes[i].name, 3);
2784 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2785 "sssub", fixed_arith_modes[i].name, 3);
2786 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2787 "ussub", fixed_arith_modes[i].name, 3);
2788 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2789 "mul", fixed_arith_modes[i].name, 3);
2790 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2791 "ssmul", fixed_arith_modes[i].name, 3);
2792 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2793 "usmul", fixed_arith_modes[i].name, 3);
2794 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2795 "div", fixed_arith_modes[i].name, 3);
2796 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2797 "udiv", fixed_arith_modes[i].name, 3);
2798 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2799 "ssdiv", fixed_arith_modes[i].name, 3);
2800 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2801 "usdiv", fixed_arith_modes[i].name, 3);
2802 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2803 "neg", fixed_arith_modes[i].name, 2);
2804 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2805 "ssneg", fixed_arith_modes[i].name, 2);
2806 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2807 "usneg", fixed_arith_modes[i].name, 2);
2808 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2809 "ashl", fixed_arith_modes[i].name, 3);
2810 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2811 "ashr", fixed_arith_modes[i].name, 3);
2812 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2813 "lshr", fixed_arith_modes[i].name, 3);
2814 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2815 "ssashl", fixed_arith_modes[i].name, 3);
2816 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2817 "usashl", fixed_arith_modes[i].name, 3);
2818 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2819 "cmp", fixed_arith_modes[i].name, 2);
2820 }
2821
2822 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2823 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2824 {
2825 if (i == j
2826 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2827 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2828 continue;
2829
2830 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2831 fixed_conv_modes[j].mode, "fract",
2832 fixed_conv_modes[i].name,
2833 fixed_conv_modes[j].name);
2834 arm_set_fixed_conv_libfunc (satfract_optab,
2835 fixed_conv_modes[i].mode,
2836 fixed_conv_modes[j].mode, "satfract",
2837 fixed_conv_modes[i].name,
2838 fixed_conv_modes[j].name);
2839 arm_set_fixed_conv_libfunc (fractuns_optab,
2840 fixed_conv_modes[i].mode,
2841 fixed_conv_modes[j].mode, "fractuns",
2842 fixed_conv_modes[i].name,
2843 fixed_conv_modes[j].name);
2844 arm_set_fixed_conv_libfunc (satfractuns_optab,
2845 fixed_conv_modes[i].mode,
2846 fixed_conv_modes[j].mode, "satfractuns",
2847 fixed_conv_modes[i].name,
2848 fixed_conv_modes[j].name);
2849 }
2850 }
2851
2852 if (TARGET_AAPCS_BASED)
2853 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2854
2855 speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2856 }
2857
2858 /* Implement TARGET_GIMPLE_FOLD_BUILTIN. */
2859 static bool
2860 arm_gimple_fold_builtin (gimple_stmt_iterator *gsi)
2861 {
2862 gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
2863 tree fndecl = gimple_call_fndecl (stmt);
2864 unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
2865 unsigned int subcode = code >> ARM_BUILTIN_SHIFT;
2866 gimple *new_stmt = NULL;
2867 switch (code & ARM_BUILTIN_CLASS)
2868 {
2869 case ARM_BUILTIN_GENERAL:
2870 break;
2871 case ARM_BUILTIN_MVE:
2872 new_stmt = arm_mve::gimple_fold_builtin (subcode, stmt);
2873 }
2874 if (!new_stmt)
2875 return false;
2876
2877 gsi_replace (gsi, new_stmt, true);
2878 return true;
2879 }
2880
2881 /* On AAPCS systems, this is the "struct __va_list". */
2882 static GTY(()) tree va_list_type;
2883
2884 /* Return the type to use as __builtin_va_list. */
2885 static tree
2886 arm_build_builtin_va_list (void)
2887 {
2888 tree va_list_name;
2889 tree ap_field;
2890
2891 if (!TARGET_AAPCS_BASED)
2892 return std_build_builtin_va_list ();
2893
2894 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2895 defined as:
2896
2897 struct __va_list
2898 {
2899 void *__ap;
2900 };
2901
2902 The C Library ABI further reinforces this definition in \S
2903 4.1.
2904
2905 We must follow this definition exactly. The structure tag
2906 name is visible in C++ mangled names, and thus forms a part
2907 of the ABI. The field name may be used by people who
2908 #include <stdarg.h>. */
2909 /* Create the type. */
2910 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2911 /* Give it the required name. */
2912 va_list_name = build_decl (BUILTINS_LOCATION,
2913 TYPE_DECL,
2914 get_identifier ("__va_list"),
2915 va_list_type);
2916 DECL_ARTIFICIAL (va_list_name) = 1;
2917 TYPE_NAME (va_list_type) = va_list_name;
2918 TYPE_STUB_DECL (va_list_type) = va_list_name;
2919 /* Create the __ap field. */
2920 ap_field = build_decl (BUILTINS_LOCATION,
2921 FIELD_DECL,
2922 get_identifier ("__ap"),
2923 ptr_type_node);
2924 DECL_ARTIFICIAL (ap_field) = 1;
2925 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2926 TYPE_FIELDS (va_list_type) = ap_field;
2927 /* Compute its layout. */
2928 layout_type (va_list_type);
2929
2930 return va_list_type;
2931 }
2932
2933 /* Return an expression of type "void *" pointing to the next
2934 available argument in a variable-argument list. VALIST is the
2935 user-level va_list object, of type __builtin_va_list. */
2936 static tree
2937 arm_extract_valist_ptr (tree valist)
2938 {
2939 if (TREE_TYPE (valist) == error_mark_node)
2940 return error_mark_node;
2941
2942 /* On an AAPCS target, the pointer is stored within "struct
2943 va_list". */
2944 if (TARGET_AAPCS_BASED)
2945 {
2946 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2947 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2948 valist, ap_field, NULL_TREE);
2949 }
2950
2951 return valist;
2952 }
2953
2954 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2955 static void
2956 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2957 {
2958 valist = arm_extract_valist_ptr (valist);
2959 std_expand_builtin_va_start (valist, nextarg);
2960 }
2961
2962 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2963 static tree
2964 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2965 gimple_seq *post_p)
2966 {
2967 valist = arm_extract_valist_ptr (valist);
2968 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2969 }
2970
2971 /* Check any incompatible options that the user has specified. */
2972 static void
2973 arm_option_check_internal (struct gcc_options *opts)
2974 {
2975 int flags = opts->x_target_flags;
2976
2977 /* iWMMXt and NEON are incompatible. */
2978 if (TARGET_IWMMXT
2979 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2980 error ("iWMMXt and NEON are incompatible");
2981
2982 /* Make sure that the processor choice does not conflict with any of the
2983 other command line choices. */
2984 if (TARGET_ARM_P (flags)
2985 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2986 error ("target CPU does not support ARM mode");
2987
2988 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2989 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2990 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2991
2992 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2993 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2994
2995 /* If this target is normally configured to use APCS frames, warn if they
2996 are turned off and debugging is turned on. */
2997 if (TARGET_ARM_P (flags)
2998 && write_symbols != NO_DEBUG
2999 && !TARGET_APCS_FRAME
3000 && (TARGET_DEFAULT & MASK_APCS_FRAME))
3001 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
3002 "debugging");
3003
3004 /* iWMMXt unsupported under Thumb mode. */
3005 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
3006 error ("iWMMXt unsupported under Thumb mode");
3007
3008 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
3009 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
3010
3011 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
3012 {
3013 error ("RTP PIC is incompatible with Thumb");
3014 flag_pic = 0;
3015 }
3016
3017 if (target_pure_code || target_slow_flash_data)
3018 {
3019 const char *flag = (target_pure_code ? "-mpure-code" :
3020 "-mslow-flash-data");
3021 bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
3022
3023 /* We only support -mslow-flash-data on M-profile targets with
3024 MOVT. */
3025 if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
3026 error ("%s only supports non-pic code on M-profile targets with the "
3027 "MOVT instruction", flag);
3028
3029 /* We only support -mpure-code on M-profile targets. */
3030 if (target_pure_code && common_unsupported_modes)
3031 error ("%s only supports non-pic code on M-profile targets", flag);
3032
3033 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
3034 -mword-relocations forbids relocation of MOVT/MOVW. */
3035 if (target_word_relocations)
3036 error ("%s incompatible with %<-mword-relocations%>", flag);
3037 }
3038 }
3039
3040 /* Recompute the global settings depending on target attribute options. */
3041
3042 static void
3043 arm_option_params_internal (void)
3044 {
3045 /* If we are not using the default (ARM mode) section anchor offset
3046 ranges, then set the correct ranges now. */
3047 if (TARGET_THUMB1)
3048 {
3049 /* Thumb-1 LDR instructions cannot have negative offsets.
3050 Permissible positive offset ranges are 5-bit (for byte loads),
3051 6-bit (for halfword loads), or 7-bit (for word loads).
3052 Empirical results suggest a 7-bit anchor range gives the best
3053 overall code size. */
3054 targetm.min_anchor_offset = 0;
3055 targetm.max_anchor_offset = 127;
3056 }
3057 else if (TARGET_THUMB2)
3058 {
3059 /* The minimum is set such that the total size of the block
3060 for a particular anchor is 248 + 1 + 4095 bytes, which is
3061 divisible by eight, ensuring natural spacing of anchors. */
3062 targetm.min_anchor_offset = -248;
3063 targetm.max_anchor_offset = 4095;
3064 }
3065 else
3066 {
3067 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
3068 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3069 }
3070
3071 /* Increase the number of conditional instructions with -Os. */
3072 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3073
3074 /* For THUMB2, we limit the conditional sequence to one IT block. */
3075 if (TARGET_THUMB2)
3076 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3077
3078 if (TARGET_THUMB1)
3079 targetm.md_asm_adjust = thumb1_md_asm_adjust;
3080 else
3081 targetm.md_asm_adjust = arm_md_asm_adjust;
3082 }
3083
3084 /* True if -mflip-thumb should next add an attribute for the default
3085 mode, false if it should next add an attribute for the opposite mode. */
3086 static GTY(()) bool thumb_flipper;
3087
3088 /* Options after initial target override. */
3089 static GTY(()) tree init_optimize;
3090
3091 static void
3092 arm_override_options_after_change_1 (struct gcc_options *opts,
3093 struct gcc_options *opts_set)
3094 {
3095 /* -falign-functions without argument: supply one. */
3096 if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3097 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3098 && opts->x_optimize_size ? "2" : "4";
3099 }
3100
3101 /* Implement targetm.override_options_after_change. */
3102
3103 static void
3104 arm_override_options_after_change (void)
3105 {
3106 arm_override_options_after_change_1 (&global_options, &global_options_set);
3107 }
3108
3109 /* Implement TARGET_OPTION_RESTORE. */
3110 static void
3111 arm_option_restore (struct gcc_options */* opts */,
3112 struct gcc_options */* opts_set */,
3113 struct cl_target_option *ptr)
3114 {
3115 arm_configure_build_target (&arm_active_target, ptr, false);
3116 arm_option_reconfigure_globals ();
3117 }
3118
3119 /* Reset options between modes that the user has specified. */
3120 static void
3121 arm_option_override_internal (struct gcc_options *opts,
3122 struct gcc_options *opts_set)
3123 {
3124 arm_override_options_after_change_1 (opts, opts_set);
3125
3126 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3127 {
3128 /* The default is to enable interworking, so this warning message would
3129 be confusing to users who have just compiled with
3130 eg, -march=armv4. */
3131 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3132 opts->x_target_flags &= ~MASK_INTERWORK;
3133 }
3134
3135 if (TARGET_THUMB_P (opts->x_target_flags)
3136 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3137 {
3138 warning (0, "target CPU does not support THUMB instructions");
3139 opts->x_target_flags &= ~MASK_THUMB;
3140 }
3141
3142 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3143 {
3144 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3145 opts->x_target_flags &= ~MASK_APCS_FRAME;
3146 }
3147
3148 /* Callee super interworking implies thumb interworking. Adding
3149 this to the flags here simplifies the logic elsewhere. */
3150 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3151 opts->x_target_flags |= MASK_INTERWORK;
3152
3153 /* need to remember initial values so combinaisons of options like
3154 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3155 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3156
3157 if (! opts_set->x_arm_restrict_it)
3158 opts->x_arm_restrict_it = arm_arch8;
3159
3160 /* ARM execution state and M profile don't have [restrict] IT. */
3161 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3162 opts->x_arm_restrict_it = 0;
3163
3164 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3165 if (!opts_set->x_arm_restrict_it
3166 && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3167 opts->x_arm_restrict_it = 0;
3168
3169 /* Enable -munaligned-access by default for
3170 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3171 i.e. Thumb2 and ARM state only.
3172 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3173 - ARMv8 architecture-base processors.
3174
3175 Disable -munaligned-access by default for
3176 - all pre-ARMv6 architecture-based processors
3177 - ARMv6-M architecture-based processors
3178 - ARMv8-M Baseline processors. */
3179
3180 if (! opts_set->x_unaligned_access)
3181 {
3182 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3183 && arm_arch6 && (arm_arch_notm || arm_arch7));
3184 }
3185 else if (opts->x_unaligned_access == 1
3186 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3187 {
3188 warning (0, "target CPU does not support unaligned accesses");
3189 opts->x_unaligned_access = 0;
3190 }
3191
3192 /* Don't warn since it's on by default in -O2. */
3193 if (TARGET_THUMB1_P (opts->x_target_flags))
3194 opts->x_flag_schedule_insns = 0;
3195 else
3196 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3197
3198 /* Disable shrink-wrap when optimizing function for size, since it tends to
3199 generate additional returns. */
3200 if (optimize_function_for_size_p (cfun)
3201 && TARGET_THUMB2_P (opts->x_target_flags))
3202 opts->x_flag_shrink_wrap = false;
3203 else
3204 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3205
3206 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3207 - epilogue_insns - does not accurately model the corresponding insns
3208 emitted in the asm file. In particular, see the comment in thumb_exit
3209 'Find out how many of the (return) argument registers we can corrupt'.
3210 As a consequence, the epilogue may clobber registers without fipa-ra
3211 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3212 TODO: Accurately model clobbers for epilogue_insns and reenable
3213 fipa-ra. */
3214 if (TARGET_THUMB1_P (opts->x_target_flags))
3215 opts->x_flag_ipa_ra = 0;
3216 else
3217 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3218
3219 /* Thumb2 inline assembly code should always use unified syntax.
3220 This will apply to ARM and Thumb1 eventually. */
3221 if (TARGET_THUMB2_P (opts->x_target_flags))
3222 opts->x_inline_asm_unified = true;
3223
3224 if (arm_stack_protector_guard == SSP_GLOBAL
3225 && opts->x_arm_stack_protector_guard_offset_str)
3226 {
3227 error ("incompatible options %<-mstack-protector-guard=global%> and "
3228 "%<-mstack-protector-guard-offset=%s%>",
3229 arm_stack_protector_guard_offset_str);
3230 }
3231
3232 if (opts->x_arm_stack_protector_guard_offset_str)
3233 {
3234 char *end;
3235 const char *str = arm_stack_protector_guard_offset_str;
3236 errno = 0;
3237 long offs = strtol (arm_stack_protector_guard_offset_str, &end, 0);
3238 if (!*str || *end || errno)
3239 error ("%qs is not a valid offset in %qs", str,
3240 "-mstack-protector-guard-offset=");
3241 arm_stack_protector_guard_offset = offs;
3242 }
3243
3244 if (arm_current_function_pac_enabled_p ())
3245 {
3246 if (!arm_arch8m_main)
3247 error ("This architecture does not support branch protection "
3248 "instructions");
3249 if (TARGET_TPCS_FRAME)
3250 sorry ("Return address signing is not supported with %<-mtpcs-frame%>.");
3251 }
3252
3253 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3254 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3255 #endif
3256 }
3257
3258 static sbitmap isa_all_fpubits_internal;
3259 static sbitmap isa_all_fpbits;
3260 static sbitmap isa_quirkbits;
3261
3262 /* Configure a build target TARGET from the user-specified options OPTS and
3263 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3264 architecture have been specified, but the two are not identical. */
3265 void
3266 arm_configure_build_target (struct arm_build_target *target,
3267 struct cl_target_option *opts,
3268 bool warn_compatible)
3269 {
3270 const cpu_option *arm_selected_tune = NULL;
3271 const arch_option *arm_selected_arch = NULL;
3272 const cpu_option *arm_selected_cpu = NULL;
3273 const arm_fpu_desc *arm_selected_fpu = NULL;
3274 const char *tune_opts = NULL;
3275 const char *arch_opts = NULL;
3276 const char *cpu_opts = NULL;
3277
3278 bitmap_clear (target->isa);
3279 target->core_name = NULL;
3280 target->arch_name = NULL;
3281
3282 if (opts->x_arm_arch_string)
3283 {
3284 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3285 "-march",
3286 opts->x_arm_arch_string);
3287 arch_opts = strchr (opts->x_arm_arch_string, '+');
3288 }
3289
3290 if (opts->x_arm_cpu_string)
3291 {
3292 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3293 opts->x_arm_cpu_string);
3294 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3295 arm_selected_tune = arm_selected_cpu;
3296 /* If taking the tuning from -mcpu, we don't need to rescan the
3297 options for tuning. */
3298 }
3299
3300 if (opts->x_arm_tune_string)
3301 {
3302 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3303 opts->x_arm_tune_string);
3304 tune_opts = strchr (opts->x_arm_tune_string, '+');
3305 }
3306
3307 if (opts->x_arm_branch_protection_string)
3308 {
3309 aarch_validate_mbranch_protection (opts->x_arm_branch_protection_string,
3310 "-mbranch-protection=");
3311
3312 if (aarch_ra_sign_key != AARCH_KEY_A)
3313 {
3314 warning (0, "invalid key type for %<-mbranch-protection=%>");
3315 aarch_ra_sign_key = AARCH_KEY_A;
3316 }
3317 }
3318
3319 if (arm_selected_arch)
3320 {
3321 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3322 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3323 arch_opts);
3324
3325 if (arm_selected_cpu)
3326 {
3327 auto_sbitmap cpu_isa (isa_num_bits);
3328 auto_sbitmap isa_delta (isa_num_bits);
3329
3330 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3331 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3332 cpu_opts);
3333 bitmap_xor (isa_delta, cpu_isa, target->isa);
3334 /* Ignore any bits that are quirk bits. */
3335 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3336 /* If the user (or the default configuration) has specified a
3337 specific FPU, then ignore any bits that depend on the FPU
3338 configuration. Do similarly if using the soft-float
3339 ABI. */
3340 if (opts->x_arm_fpu_index != TARGET_FPU_auto
3341 || arm_float_abi == ARM_FLOAT_ABI_SOFT)
3342 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3343
3344 if (!bitmap_empty_p (isa_delta))
3345 {
3346 if (warn_compatible)
3347 warning (0, "switch %<-mcpu=%s%> conflicts "
3348 "with switch %<-march=%s%>",
3349 opts->x_arm_cpu_string,
3350 opts->x_arm_arch_string);
3351
3352 /* -march wins for code generation.
3353 -mcpu wins for default tuning. */
3354 if (!arm_selected_tune)
3355 arm_selected_tune = arm_selected_cpu;
3356
3357 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3358 target->arch_name = arm_selected_arch->common.name;
3359 }
3360 else
3361 {
3362 /* Architecture and CPU are essentially the same.
3363 Prefer the CPU setting. */
3364 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3365 target->core_name = arm_selected_cpu->common.name;
3366 /* Copy the CPU's capabilities, so that we inherit the
3367 appropriate extensions and quirks. */
3368 bitmap_copy (target->isa, cpu_isa);
3369 }
3370 }
3371 else
3372 {
3373 /* Pick a CPU based on the architecture. */
3374 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3375 target->arch_name = arm_selected_arch->common.name;
3376 /* Note: target->core_name is left unset in this path. */
3377 }
3378 }
3379 else if (arm_selected_cpu)
3380 {
3381 target->core_name = arm_selected_cpu->common.name;
3382 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3383 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3384 cpu_opts);
3385 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3386 }
3387 /* If the user did not specify a processor or architecture, choose
3388 one for them. */
3389 else
3390 {
3391 const cpu_option *sel;
3392 auto_sbitmap sought_isa (isa_num_bits);
3393 bitmap_clear (sought_isa);
3394 auto_sbitmap default_isa (isa_num_bits);
3395
3396 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3397 TARGET_CPU_DEFAULT);
3398 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3399 gcc_assert (arm_selected_cpu->common.name);
3400
3401 /* RWE: All of the selection logic below (to the end of this
3402 'if' clause) looks somewhat suspect. It appears to be mostly
3403 there to support forcing thumb support when the default CPU
3404 does not have thumb (somewhat dubious in terms of what the
3405 user might be expecting). I think it should be removed once
3406 support for the pre-thumb era cores is removed. */
3407 sel = arm_selected_cpu;
3408 arm_initialize_isa (default_isa, sel->common.isa_bits);
3409 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3410 cpu_opts);
3411
3412 /* Now check to see if the user has specified any command line
3413 switches that require certain abilities from the cpu. */
3414
3415 if (TARGET_INTERWORK || TARGET_THUMB)
3416 bitmap_set_bit (sought_isa, isa_bit_thumb);
3417
3418 /* If there are such requirements and the default CPU does not
3419 satisfy them, we need to run over the complete list of
3420 cores looking for one that is satisfactory. */
3421 if (!bitmap_empty_p (sought_isa)
3422 && !bitmap_subset_p (sought_isa, default_isa))
3423 {
3424 auto_sbitmap candidate_isa (isa_num_bits);
3425 /* We're only interested in a CPU with at least the
3426 capabilities of the default CPU and the required
3427 additional features. */
3428 bitmap_ior (default_isa, default_isa, sought_isa);
3429
3430 /* Try to locate a CPU type that supports all of the abilities
3431 of the default CPU, plus the extra abilities requested by
3432 the user. */
3433 for (sel = all_cores; sel->common.name != NULL; sel++)
3434 {
3435 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3436 /* An exact match? */
3437 if (bitmap_equal_p (default_isa, candidate_isa))
3438 break;
3439 }
3440
3441 if (sel->common.name == NULL)
3442 {
3443 unsigned current_bit_count = isa_num_bits;
3444 const cpu_option *best_fit = NULL;
3445
3446 /* Ideally we would like to issue an error message here
3447 saying that it was not possible to find a CPU compatible
3448 with the default CPU, but which also supports the command
3449 line options specified by the programmer, and so they
3450 ought to use the -mcpu=<name> command line option to
3451 override the default CPU type.
3452
3453 If we cannot find a CPU that has exactly the
3454 characteristics of the default CPU and the given
3455 command line options we scan the array again looking
3456 for a best match. The best match must have at least
3457 the capabilities of the perfect match. */
3458 for (sel = all_cores; sel->common.name != NULL; sel++)
3459 {
3460 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3461
3462 if (bitmap_subset_p (default_isa, candidate_isa))
3463 {
3464 unsigned count;
3465
3466 bitmap_and_compl (candidate_isa, candidate_isa,
3467 default_isa);
3468 count = bitmap_popcount (candidate_isa);
3469
3470 if (count < current_bit_count)
3471 {
3472 best_fit = sel;
3473 current_bit_count = count;
3474 }
3475 }
3476
3477 gcc_assert (best_fit);
3478 sel = best_fit;
3479 }
3480 }
3481 arm_selected_cpu = sel;
3482 }
3483
3484 /* Now we know the CPU, we can finally initialize the target
3485 structure. */
3486 target->core_name = arm_selected_cpu->common.name;
3487 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3488 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3489 cpu_opts);
3490 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3491 }
3492
3493 gcc_assert (arm_selected_cpu);
3494 gcc_assert (arm_selected_arch);
3495
3496 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3497 {
3498 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3499 auto_sbitmap fpu_bits (isa_num_bits);
3500
3501 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3502 /* This should clear out ALL bits relating to the FPU/simd
3503 extensions, to avoid potentially invalid combinations later on
3504 that we can't match. At present we only clear out those bits
3505 that can be set by -mfpu. This should be fixed in GCC-12. */
3506 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3507 bitmap_ior (target->isa, target->isa, fpu_bits);
3508 }
3509
3510 /* If we have the soft-float ABI, clear any feature bits relating to use of
3511 floating-point operations. They'll just confuse things later on. */
3512 if (arm_float_abi == ARM_FLOAT_ABI_SOFT)
3513 bitmap_and_compl (target->isa, target->isa, isa_all_fpbits);
3514
3515 /* There may be implied bits which we still need to enable. These are
3516 non-named features which are needed to complete other sets of features,
3517 but cannot be enabled from arm-cpus.in due to being shared between
3518 multiple fgroups. Each entry in all_implied_fbits is of the form
3519 ante -> cons, meaning that if the feature "ante" is enabled, we should
3520 implicitly enable "cons". */
3521 const struct fbit_implication *impl = all_implied_fbits;
3522 while (impl->ante)
3523 {
3524 if (bitmap_bit_p (target->isa, impl->ante))
3525 bitmap_set_bit (target->isa, impl->cons);
3526 impl++;
3527 }
3528
3529 if (!arm_selected_tune)
3530 arm_selected_tune = arm_selected_cpu;
3531 else /* Validate the features passed to -mtune. */
3532 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3533
3534 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3535
3536 /* Finish initializing the target structure. */
3537 if (!target->arch_name)
3538 target->arch_name = arm_selected_arch->common.name;
3539 target->arch_pp_name = arm_selected_arch->arch;
3540 target->base_arch = arm_selected_arch->base_arch;
3541 target->profile = arm_selected_arch->profile;
3542
3543 target->tune_flags = tune_data->tune_flags;
3544 target->tune = tune_data->tune;
3545 target->tune_core = tune_data->scheduler;
3546 }
3547
3548 /* Fix up any incompatible options that the user has specified. */
3549 static void
3550 arm_option_override (void)
3551 {
3552 static const enum isa_feature fpu_bitlist_internal[]
3553 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3554 /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main. */
3555 static const enum isa_feature fp_bitlist[]
3556 = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3557 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3558 cl_target_option opts;
3559
3560 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3561 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3562
3563 isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3564 isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3565 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3566 arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3567
3568 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3569
3570 if (!OPTION_SET_P (arm_fpu_index))
3571 {
3572 bool ok;
3573 int fpu_index;
3574
3575 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3576 CL_TARGET);
3577 gcc_assert (ok);
3578 arm_fpu_index = (enum fpu_type) fpu_index;
3579 }
3580
3581 cl_target_option_save (&opts, &global_options, &global_options_set);
3582 arm_configure_build_target (&arm_active_target, &opts, true);
3583
3584 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3585 SUBTARGET_OVERRIDE_OPTIONS;
3586 #endif
3587
3588 /* Initialize boolean versions of the architectural flags, for use
3589 in the arm.md file and for enabling feature flags. */
3590 arm_option_reconfigure_globals ();
3591
3592 arm_tune = arm_active_target.tune_core;
3593 tune_flags = arm_active_target.tune_flags;
3594 current_tune = arm_active_target.tune;
3595
3596 /* TBD: Dwarf info for apcs frame is not handled yet. */
3597 if (TARGET_APCS_FRAME)
3598 flag_shrink_wrap = false;
3599
3600 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3601 {
3602 warning (0, "%<-mapcs-stack-check%> incompatible with "
3603 "%<-mno-apcs-frame%>");
3604 target_flags |= MASK_APCS_FRAME;
3605 }
3606
3607 if (TARGET_POKE_FUNCTION_NAME)
3608 target_flags |= MASK_APCS_FRAME;
3609
3610 if (TARGET_APCS_REENT && flag_pic)
3611 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3612
3613 if (TARGET_APCS_REENT)
3614 warning (0, "APCS reentrant code not supported. Ignored");
3615
3616 /* Set up some tuning parameters. */
3617 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3618 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3619 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3620 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3621 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3622 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3623
3624 /* For arm2/3 there is no need to do any scheduling if we are doing
3625 software floating-point. */
3626 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3627 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3628
3629 /* Override the default structure alignment for AAPCS ABI. */
3630 if (!OPTION_SET_P (arm_structure_size_boundary))
3631 {
3632 if (TARGET_AAPCS_BASED)
3633 arm_structure_size_boundary = 8;
3634 }
3635 else
3636 {
3637 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3638
3639 if (arm_structure_size_boundary != 8
3640 && arm_structure_size_boundary != 32
3641 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3642 {
3643 if (ARM_DOUBLEWORD_ALIGN)
3644 warning (0,
3645 "structure size boundary can only be set to 8, 32 or 64");
3646 else
3647 warning (0, "structure size boundary can only be set to 8 or 32");
3648 arm_structure_size_boundary
3649 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3650 }
3651 }
3652
3653 if (TARGET_VXWORKS_RTP)
3654 {
3655 if (!OPTION_SET_P (arm_pic_data_is_text_relative))
3656 arm_pic_data_is_text_relative = 0;
3657 }
3658 else if (flag_pic
3659 && !arm_pic_data_is_text_relative
3660 && !(OPTION_SET_P (target_flags) & MASK_SINGLE_PIC_BASE))
3661 /* When text & data segments don't have a fixed displacement, the
3662 intended use is with a single, read only, pic base register.
3663 Unless the user explicitly requested not to do that, set
3664 it. */
3665 target_flags |= MASK_SINGLE_PIC_BASE;
3666
3667 /* If stack checking is disabled, we can use r10 as the PIC register,
3668 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3669 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3670 {
3671 if (TARGET_VXWORKS_RTP)
3672 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3673 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3674 }
3675
3676 if (flag_pic && TARGET_VXWORKS_RTP)
3677 arm_pic_register = 9;
3678
3679 /* If in FDPIC mode then force arm_pic_register to be r9. */
3680 if (TARGET_FDPIC)
3681 {
3682 arm_pic_register = FDPIC_REGNUM;
3683 if (TARGET_THUMB1)
3684 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3685 }
3686
3687 if (arm_pic_register_string != NULL)
3688 {
3689 int pic_register = decode_reg_name (arm_pic_register_string);
3690
3691 if (!flag_pic)
3692 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3693
3694 /* Prevent the user from choosing an obviously stupid PIC register. */
3695 else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3696 || pic_register == HARD_FRAME_POINTER_REGNUM
3697 || pic_register == STACK_POINTER_REGNUM
3698 || pic_register >= PC_REGNUM
3699 || (TARGET_VXWORKS_RTP
3700 && (unsigned int) pic_register != arm_pic_register))
3701 error ("unable to use %qs for PIC register", arm_pic_register_string);
3702 else
3703 arm_pic_register = pic_register;
3704 }
3705
3706 if (flag_pic)
3707 target_word_relocations = 1;
3708
3709 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3710 if (fix_cm3_ldrd == 2)
3711 {
3712 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3713 fix_cm3_ldrd = 1;
3714 else
3715 fix_cm3_ldrd = 0;
3716 }
3717
3718 /* Enable fix_vlldm by default if required. */
3719 if (fix_vlldm == 2)
3720 {
3721 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
3722 fix_vlldm = 1;
3723 else
3724 fix_vlldm = 0;
3725 }
3726
3727 /* Enable fix_aes by default if required. */
3728 if (fix_aes_erratum_1742098 == 2)
3729 {
3730 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_aes_1742098))
3731 fix_aes_erratum_1742098 = 1;
3732 else
3733 fix_aes_erratum_1742098 = 0;
3734 }
3735
3736 /* Hot/Cold partitioning is not currently supported, since we can't
3737 handle literal pool placement in that case. */
3738 if (flag_reorder_blocks_and_partition)
3739 {
3740 inform (input_location,
3741 "%<-freorder-blocks-and-partition%> not supported "
3742 "on this architecture");
3743 flag_reorder_blocks_and_partition = 0;
3744 flag_reorder_blocks = 1;
3745 }
3746
3747 if (flag_pic)
3748 /* Hoisting PIC address calculations more aggressively provides a small,
3749 but measurable, size reduction for PIC code. Therefore, we decrease
3750 the bar for unrestricted expression hoisting to the cost of PIC address
3751 calculation, which is 2 instructions. */
3752 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3753 param_gcse_unrestricted_cost, 2);
3754
3755 /* ARM EABI defaults to strict volatile bitfields. */
3756 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3757 && abi_version_at_least(2))
3758 flag_strict_volatile_bitfields = 1;
3759
3760 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3761 have deemed it beneficial (signified by setting
3762 prefetch.num_slots to 1 or more). */
3763 if (flag_prefetch_loop_arrays < 0
3764 && HAVE_prefetch
3765 && optimize >= 3
3766 && current_tune->prefetch.num_slots > 0)
3767 flag_prefetch_loop_arrays = 1;
3768
3769 /* Set up parameters to be used in prefetching algorithm. Do not
3770 override the defaults unless we are tuning for a core we have
3771 researched values for. */
3772 if (current_tune->prefetch.num_slots > 0)
3773 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3774 param_simultaneous_prefetches,
3775 current_tune->prefetch.num_slots);
3776 if (current_tune->prefetch.l1_cache_line_size >= 0)
3777 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3778 param_l1_cache_line_size,
3779 current_tune->prefetch.l1_cache_line_size);
3780 if (current_tune->prefetch.l1_cache_line_size >= 0)
3781 {
3782 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3783 param_destruct_interfere_size,
3784 current_tune->prefetch.l1_cache_line_size);
3785 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3786 param_construct_interfere_size,
3787 current_tune->prefetch.l1_cache_line_size);
3788 }
3789 else
3790 {
3791 /* For a generic ARM target, JF Bastien proposed using 64 for both. */
3792 /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3793 constructive? */
3794 /* More recent Cortex chips have a 64-byte cache line, but are marked
3795 ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults. */
3796 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3797 param_destruct_interfere_size, 64);
3798 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3799 param_construct_interfere_size, 64);
3800 }
3801
3802 if (current_tune->prefetch.l1_cache_size >= 0)
3803 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3804 param_l1_cache_size,
3805 current_tune->prefetch.l1_cache_size);
3806
3807 /* Look through ready list and all of queue for instructions
3808 relevant for L2 auto-prefetcher. */
3809 int sched_autopref_queue_depth;
3810
3811 switch (current_tune->sched_autopref)
3812 {
3813 case tune_params::SCHED_AUTOPREF_OFF:
3814 sched_autopref_queue_depth = -1;
3815 break;
3816
3817 case tune_params::SCHED_AUTOPREF_RANK:
3818 sched_autopref_queue_depth = 0;
3819 break;
3820
3821 case tune_params::SCHED_AUTOPREF_FULL:
3822 sched_autopref_queue_depth = max_insn_queue_index + 1;
3823 break;
3824
3825 default:
3826 gcc_unreachable ();
3827 }
3828
3829 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3830 param_sched_autopref_queue_depth,
3831 sched_autopref_queue_depth);
3832
3833 /* Currently, for slow flash data, we just disable literal pools. We also
3834 disable it for pure-code. */
3835 if (target_slow_flash_data || target_pure_code)
3836 arm_disable_literal_pool = true;
3837
3838 /* Disable scheduling fusion by default if it's not armv7 processor
3839 or doesn't prefer ldrd/strd. */
3840 if (flag_schedule_fusion == 2
3841 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3842 flag_schedule_fusion = 0;
3843
3844 /* Need to remember initial options before they are overriden. */
3845 init_optimize = build_optimization_node (&global_options,
3846 &global_options_set);
3847
3848 arm_options_perform_arch_sanity_checks ();
3849 arm_option_override_internal (&global_options, &global_options_set);
3850 arm_option_check_internal (&global_options);
3851 arm_option_params_internal ();
3852
3853 /* Create the default target_options structure. */
3854 target_option_default_node = target_option_current_node
3855 = build_target_option_node (&global_options, &global_options_set);
3856
3857 /* Register global variables with the garbage collector. */
3858 arm_add_gc_roots ();
3859
3860 /* Init initial mode for testing. */
3861 thumb_flipper = TARGET_THUMB;
3862 }
3863
3864
3865 /* Reconfigure global status flags from the active_target.isa. */
3866 void
3867 arm_option_reconfigure_globals (void)
3868 {
3869 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3870 arm_base_arch = arm_active_target.base_arch;
3871
3872 /* Initialize boolean versions of the architectural flags, for use
3873 in the arm.md file. */
3874 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3875 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3876 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3877 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3878 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3879 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3880 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3881 arm_arch6m = arm_arch6 && !arm_arch_notm;
3882 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3883 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3884 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3885 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3886 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3887 arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3888 arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3889 arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3890 isa_bit_armv8_1m_main);
3891 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3892 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3893 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3894 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3895 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3896 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3897 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3898 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3899 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3900 arm_arch8m_main = arm_arch7 && arm_arch_cmse;
3901 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3902 arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3903 arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3904
3905 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3906 if (arm_fp16_inst)
3907 {
3908 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3909 error ("selected fp16 options are incompatible");
3910 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3911 }
3912
3913 arm_arch_cde = 0;
3914 arm_arch_cde_coproc = 0;
3915 int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3916 isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3917 isa_bit_cdecp6, isa_bit_cdecp7};
3918 for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3919 {
3920 int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3921 if (cde_bit)
3922 {
3923 arm_arch_cde |= cde_bit;
3924 arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3925 }
3926 }
3927
3928 /* And finally, set up some quirks. */
3929 arm_arch_no_volatile_ce
3930 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3931 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3932 isa_bit_quirk_armv6kz);
3933
3934 /* Use the cp15 method if it is available. */
3935 if (target_thread_pointer == TP_AUTO)
3936 {
3937 if (arm_arch6k && !TARGET_THUMB1)
3938 target_thread_pointer = TP_TPIDRURO;
3939 else
3940 target_thread_pointer = TP_SOFT;
3941 }
3942
3943 if (!TARGET_HARD_TP && arm_stack_protector_guard == SSP_TLSREG)
3944 error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3945 }
3946
3947 /* Perform some validation between the desired architecture and the rest of the
3948 options. */
3949 void
3950 arm_options_perform_arch_sanity_checks (void)
3951 {
3952 /* V5T code we generate is completely interworking capable, so we turn off
3953 TARGET_INTERWORK here to avoid many tests later on. */
3954
3955 /* XXX However, we must pass the right pre-processor defines to CPP
3956 or GLD can get confused. This is a hack. */
3957 if (TARGET_INTERWORK)
3958 arm_cpp_interwork = 1;
3959
3960 if (arm_arch5t)
3961 target_flags &= ~MASK_INTERWORK;
3962
3963 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3964 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3965
3966 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3967 error ("iwmmxt abi requires an iwmmxt capable cpu");
3968
3969 /* BPABI targets use linker tricks to allow interworking on cores
3970 without thumb support. */
3971 if (TARGET_INTERWORK
3972 && !TARGET_BPABI
3973 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3974 {
3975 warning (0, "target CPU does not support interworking" );
3976 target_flags &= ~MASK_INTERWORK;
3977 }
3978
3979 /* If soft-float is specified then don't use FPU. */
3980 if (TARGET_SOFT_FLOAT)
3981 arm_fpu_attr = FPU_NONE;
3982 else
3983 arm_fpu_attr = FPU_VFP;
3984
3985 if (TARGET_AAPCS_BASED)
3986 {
3987 if (TARGET_CALLER_INTERWORKING)
3988 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3989 else
3990 if (TARGET_CALLEE_INTERWORKING)
3991 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3992 }
3993
3994 /* __fp16 support currently assumes the core has ldrh. */
3995 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3996 sorry ("%<__fp16%> and no ldrh");
3997
3998 if (use_cmse && !arm_arch_cmse)
3999 error ("target CPU does not support ARMv8-M Security Extensions");
4000
4001 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
4002 and ARMv8-M Baseline and Mainline do not allow such configuration. */
4003 if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
4004 error ("ARMv8-M Security Extensions incompatible with selected FPU");
4005
4006
4007 if (TARGET_AAPCS_BASED)
4008 {
4009 if (arm_abi == ARM_ABI_IWMMXT)
4010 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
4011 else if (TARGET_HARD_FLOAT_ABI)
4012 {
4013 arm_pcs_default = ARM_PCS_AAPCS_VFP;
4014 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
4015 && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
4016 error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
4017 }
4018 else
4019 arm_pcs_default = ARM_PCS_AAPCS;
4020 }
4021 else
4022 {
4023 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
4024 sorry ("%<-mfloat-abi=hard%> and VFP");
4025
4026 if (arm_abi == ARM_ABI_APCS)
4027 arm_pcs_default = ARM_PCS_APCS;
4028 else
4029 arm_pcs_default = ARM_PCS_ATPCS;
4030 }
4031 }
4032
4033 /* Test whether a local function descriptor is canonical, i.e.,
4034 whether we can use GOTOFFFUNCDESC to compute the address of the
4035 function. */
4036 static bool
4037 arm_fdpic_local_funcdesc_p (rtx fnx)
4038 {
4039 tree fn;
4040 enum symbol_visibility vis;
4041 bool ret;
4042
4043 if (!TARGET_FDPIC)
4044 return true;
4045
4046 if (! SYMBOL_REF_LOCAL_P (fnx))
4047 return false;
4048
4049 fn = SYMBOL_REF_DECL (fnx);
4050
4051 if (! fn)
4052 return false;
4053
4054 vis = DECL_VISIBILITY (fn);
4055
4056 if (vis == VISIBILITY_PROTECTED)
4057 /* Private function descriptors for protected functions are not
4058 canonical. Temporarily change the visibility to global so that
4059 we can ensure uniqueness of funcdesc pointers. */
4060 DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
4061
4062 ret = default_binds_local_p_1 (fn, flag_pic);
4063
4064 DECL_VISIBILITY (fn) = vis;
4065
4066 return ret;
4067 }
4068
4069 static void
4070 arm_add_gc_roots (void)
4071 {
4072 gcc_obstack_init(&minipool_obstack);
4073 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
4074 }
4075 \f
4076 /* A table of known ARM exception types.
4077 For use with the interrupt function attribute. */
4078
4079 typedef struct
4080 {
4081 const char *const arg;
4082 const unsigned long return_value;
4083 }
4084 isr_attribute_arg;
4085
4086 static const isr_attribute_arg isr_attribute_args [] =
4087 {
4088 { "IRQ", ARM_FT_ISR },
4089 { "irq", ARM_FT_ISR },
4090 { "FIQ", ARM_FT_FIQ },
4091 { "fiq", ARM_FT_FIQ },
4092 { "ABORT", ARM_FT_ISR },
4093 { "abort", ARM_FT_ISR },
4094 { "UNDEF", ARM_FT_EXCEPTION },
4095 { "undef", ARM_FT_EXCEPTION },
4096 { "SWI", ARM_FT_EXCEPTION },
4097 { "swi", ARM_FT_EXCEPTION },
4098 { NULL, ARM_FT_NORMAL }
4099 };
4100
4101 /* Returns the (interrupt) function type of the current
4102 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
4103
4104 static unsigned long
4105 arm_isr_value (tree argument)
4106 {
4107 const isr_attribute_arg * ptr;
4108 const char * arg;
4109
4110 if (!arm_arch_notm)
4111 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
4112
4113 /* No argument - default to IRQ. */
4114 if (argument == NULL_TREE)
4115 return ARM_FT_ISR;
4116
4117 /* Get the value of the argument. */
4118 if (TREE_VALUE (argument) == NULL_TREE
4119 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
4120 return ARM_FT_UNKNOWN;
4121
4122 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
4123
4124 /* Check it against the list of known arguments. */
4125 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
4126 if (streq (arg, ptr->arg))
4127 return ptr->return_value;
4128
4129 /* An unrecognized interrupt type. */
4130 return ARM_FT_UNKNOWN;
4131 }
4132
4133 /* Computes the type of the current function. */
4134
4135 static unsigned long
4136 arm_compute_func_type (void)
4137 {
4138 unsigned long type = ARM_FT_UNKNOWN;
4139 tree a;
4140 tree attr;
4141
4142 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
4143
4144 /* Decide if the current function is volatile. Such functions
4145 never return, and many memory cycles can be saved by not storing
4146 register values that will never be needed again. This optimization
4147 was added to speed up context switching in a kernel application. */
4148 if (optimize > 0
4149 && (TREE_NOTHROW (current_function_decl)
4150 || !(flag_unwind_tables
4151 || (flag_exceptions
4152 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4153 && TREE_THIS_VOLATILE (current_function_decl))
4154 type |= ARM_FT_VOLATILE;
4155
4156 if (cfun->static_chain_decl != NULL)
4157 type |= ARM_FT_NESTED;
4158
4159 attr = DECL_ATTRIBUTES (current_function_decl);
4160
4161 a = lookup_attribute ("naked", attr);
4162 if (a != NULL_TREE)
4163 type |= ARM_FT_NAKED;
4164
4165 a = lookup_attribute ("isr", attr);
4166 if (a == NULL_TREE)
4167 a = lookup_attribute ("interrupt", attr);
4168
4169 if (a == NULL_TREE)
4170 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4171 else
4172 type |= arm_isr_value (TREE_VALUE (a));
4173
4174 if (lookup_attribute ("cmse_nonsecure_entry", attr))
4175 type |= ARM_FT_CMSE_ENTRY;
4176
4177 return type;
4178 }
4179
4180 /* Returns the type of the current function. */
4181
4182 unsigned long
4183 arm_current_func_type (void)
4184 {
4185 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4186 cfun->machine->func_type = arm_compute_func_type ();
4187
4188 return cfun->machine->func_type;
4189 }
4190
4191 bool
4192 arm_allocate_stack_slots_for_args (void)
4193 {
4194 /* Naked functions should not allocate stack slots for arguments. */
4195 return !IS_NAKED (arm_current_func_type ());
4196 }
4197
4198 static bool
4199 arm_warn_func_return (tree decl)
4200 {
4201 /* Naked functions are implemented entirely in assembly, including the
4202 return sequence, so suppress warnings about this. */
4203 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4204 }
4205
4206 \f
4207 /* Output assembler code for a block containing the constant parts
4208 of a trampoline, leaving space for the variable parts.
4209
4210 On the ARM, (if r8 is the static chain regnum, and remembering that
4211 referencing pc adds an offset of 8) the trampoline looks like:
4212 ldr r8, [pc, #0]
4213 ldr pc, [pc]
4214 .word static chain value
4215 .word function's address
4216 XXX FIXME: When the trampoline returns, r8 will be clobbered.
4217
4218 In FDPIC mode, the trampoline looks like:
4219 .word trampoline address
4220 .word trampoline GOT address
4221 ldr r12, [pc, #8] ; #4 for Arm mode
4222 ldr r9, [pc, #8] ; #4 for Arm mode
4223 ldr pc, [pc, #8] ; #4 for Arm mode
4224 .word static chain value
4225 .word GOT address
4226 .word function's address
4227 */
4228
4229 static void
4230 arm_asm_trampoline_template (FILE *f)
4231 {
4232 fprintf (f, "\t.syntax unified\n");
4233
4234 if (TARGET_FDPIC)
4235 {
4236 /* The first two words are a function descriptor pointing to the
4237 trampoline code just below. */
4238 if (TARGET_ARM)
4239 fprintf (f, "\t.arm\n");
4240 else if (TARGET_THUMB2)
4241 fprintf (f, "\t.thumb\n");
4242 else
4243 /* Only ARM and Thumb-2 are supported. */
4244 gcc_unreachable ();
4245
4246 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4247 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4248 /* Trampoline code which sets the static chain register but also
4249 PIC register before jumping into real code. */
4250 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4251 STATIC_CHAIN_REGNUM, PC_REGNUM,
4252 TARGET_THUMB2 ? 8 : 4);
4253 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4254 PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4255 TARGET_THUMB2 ? 8 : 4);
4256 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4257 PC_REGNUM, PC_REGNUM,
4258 TARGET_THUMB2 ? 8 : 4);
4259 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4260 }
4261 else if (TARGET_ARM)
4262 {
4263 fprintf (f, "\t.arm\n");
4264 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4265 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4266 }
4267 else if (TARGET_THUMB2)
4268 {
4269 fprintf (f, "\t.thumb\n");
4270 /* The Thumb-2 trampoline is similar to the arm implementation.
4271 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4272 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4273 STATIC_CHAIN_REGNUM, PC_REGNUM);
4274 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4275 }
4276 else
4277 {
4278 ASM_OUTPUT_ALIGN (f, 2);
4279 fprintf (f, "\t.code\t16\n");
4280 fprintf (f, ".Ltrampoline_start:\n");
4281 asm_fprintf (f, "\tpush\t{r0, r1}\n");
4282 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4283 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4284 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4285 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4286 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4287 }
4288 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4289 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4290 }
4291
4292 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4293
4294 static void
4295 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4296 {
4297 rtx fnaddr, mem, a_tramp;
4298
4299 emit_block_move (m_tramp, assemble_trampoline_template (),
4300 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4301
4302 if (TARGET_FDPIC)
4303 {
4304 rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4305 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4306 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4307 /* The function start address is at offset 8, but in Thumb mode
4308 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4309 below. */
4310 rtx trampoline_code_start
4311 = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4312
4313 /* Write initial funcdesc which points to the trampoline. */
4314 mem = adjust_address (m_tramp, SImode, 0);
4315 emit_move_insn (mem, trampoline_code_start);
4316 mem = adjust_address (m_tramp, SImode, 4);
4317 emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4318 /* Setup static chain. */
4319 mem = adjust_address (m_tramp, SImode, 20);
4320 emit_move_insn (mem, chain_value);
4321 /* GOT + real function entry point. */
4322 mem = adjust_address (m_tramp, SImode, 24);
4323 emit_move_insn (mem, gotaddr);
4324 mem = adjust_address (m_tramp, SImode, 28);
4325 emit_move_insn (mem, fnaddr);
4326 }
4327 else
4328 {
4329 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4330 emit_move_insn (mem, chain_value);
4331
4332 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4333 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4334 emit_move_insn (mem, fnaddr);
4335 }
4336
4337 a_tramp = XEXP (m_tramp, 0);
4338 maybe_emit_call_builtin___clear_cache (a_tramp,
4339 plus_constant (ptr_mode,
4340 a_tramp,
4341 TRAMPOLINE_SIZE));
4342 }
4343
4344 /* Thumb trampolines should be entered in thumb mode, so set
4345 the bottom bit of the address. */
4346
4347 static rtx
4348 arm_trampoline_adjust_address (rtx addr)
4349 {
4350 /* For FDPIC don't fix trampoline address since it's a function
4351 descriptor and not a function address. */
4352 if (TARGET_THUMB && !TARGET_FDPIC)
4353 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4354 NULL, 0, OPTAB_LIB_WIDEN);
4355 return addr;
4356 }
4357 \f
4358 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4359 includes call-clobbered registers too. If this is a leaf function
4360 we can just examine the registers used by the RTL, but otherwise we
4361 have to assume that whatever function is called might clobber
4362 anything, and so we have to save all the call-clobbered registers
4363 as well. */
4364 static inline bool reg_needs_saving_p (unsigned reg)
4365 {
4366 unsigned long func_type = arm_current_func_type ();
4367
4368 if (IS_INTERRUPT (func_type))
4369 if (df_regs_ever_live_p (reg)
4370 /* Save call-clobbered core registers. */
4371 || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4372 return true;
4373 else
4374 return false;
4375 else
4376 if (!df_regs_ever_live_p (reg)
4377 || call_used_or_fixed_reg_p (reg))
4378 return false;
4379 else
4380 return true;
4381 }
4382
4383 /* Return 1 if it is possible to return using a single instruction.
4384 If SIBLING is non-null, this is a test for a return before a sibling
4385 call. SIBLING is the call insn, so we can examine its register usage. */
4386
4387 int
4388 use_return_insn (int iscond, rtx sibling)
4389 {
4390 int regno;
4391 unsigned int func_type;
4392 unsigned long saved_int_regs;
4393 unsigned HOST_WIDE_INT stack_adjust;
4394 arm_stack_offsets *offsets;
4395
4396 /* Never use a return instruction before reload has run. */
4397 if (!reload_completed)
4398 return 0;
4399
4400 /* Never use a return instruction when return address signing
4401 mechanism is enabled as it requires more than one
4402 instruction. */
4403 if (arm_current_function_pac_enabled_p ())
4404 return 0;
4405
4406 func_type = arm_current_func_type ();
4407
4408 /* Naked, volatile and stack alignment functions need special
4409 consideration. */
4410 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4411 return 0;
4412
4413 /* So do interrupt functions that use the frame pointer and Thumb
4414 interrupt functions. */
4415 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4416 return 0;
4417
4418 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4419 && !optimize_function_for_size_p (cfun))
4420 return 0;
4421
4422 offsets = arm_get_frame_offsets ();
4423 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4424
4425 /* As do variadic functions. */
4426 if (crtl->args.pretend_args_size
4427 || cfun->machine->uses_anonymous_args
4428 /* Or if the function calls __builtin_eh_return () */
4429 || crtl->calls_eh_return
4430 /* Or if the function calls alloca */
4431 || cfun->calls_alloca
4432 /* Or if there is a stack adjustment. However, if the stack pointer
4433 is saved on the stack, we can use a pre-incrementing stack load. */
4434 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4435 && stack_adjust == 4))
4436 /* Or if the static chain register was saved above the frame, under the
4437 assumption that the stack pointer isn't saved on the stack. */
4438 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4439 && arm_compute_static_chain_stack_bytes() != 0))
4440 return 0;
4441
4442 saved_int_regs = offsets->saved_regs_mask;
4443
4444 /* Unfortunately, the insn
4445
4446 ldmib sp, {..., sp, ...}
4447
4448 triggers a bug on most SA-110 based devices, such that the stack
4449 pointer won't be correctly restored if the instruction takes a
4450 page fault. We work around this problem by popping r3 along with
4451 the other registers, since that is never slower than executing
4452 another instruction.
4453
4454 We test for !arm_arch5t here, because code for any architecture
4455 less than this could potentially be run on one of the buggy
4456 chips. */
4457 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4458 {
4459 /* Validate that r3 is a call-clobbered register (always true in
4460 the default abi) ... */
4461 if (!call_used_or_fixed_reg_p (3))
4462 return 0;
4463
4464 /* ... that it isn't being used for a return value ... */
4465 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4466 return 0;
4467
4468 /* ... or for a tail-call argument ... */
4469 if (sibling)
4470 {
4471 gcc_assert (CALL_P (sibling));
4472
4473 if (find_regno_fusage (sibling, USE, 3))
4474 return 0;
4475 }
4476
4477 /* ... and that there are no call-saved registers in r0-r2
4478 (always true in the default ABI). */
4479 if (saved_int_regs & 0x7)
4480 return 0;
4481 }
4482
4483 /* Can't be done if interworking with Thumb, and any registers have been
4484 stacked. */
4485 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4486 return 0;
4487
4488 /* On StrongARM, conditional returns are expensive if they aren't
4489 taken and multiple registers have been stacked. */
4490 if (iscond && arm_tune_strongarm)
4491 {
4492 /* Conditional return when just the LR is stored is a simple
4493 conditional-load instruction, that's not expensive. */
4494 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4495 return 0;
4496
4497 if (flag_pic
4498 && arm_pic_register != INVALID_REGNUM
4499 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4500 return 0;
4501 }
4502
4503 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4504 several instructions if anything needs to be popped. Armv8.1-M Mainline
4505 also needs several instructions to save and restore FP context. */
4506 if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4507 return 0;
4508
4509 /* If there are saved registers but the LR isn't saved, then we need
4510 two instructions for the return. */
4511 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4512 return 0;
4513
4514 /* Can't be done if any of the VFP regs are pushed,
4515 since this also requires an insn. */
4516 if (TARGET_VFP_BASE)
4517 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4518 if (reg_needs_saving_p (regno))
4519 return 0;
4520
4521 if (TARGET_REALLY_IWMMXT)
4522 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4523 if (reg_needs_saving_p (regno))
4524 return 0;
4525
4526 return 1;
4527 }
4528
4529 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4530 shrink-wrapping if possible. This is the case if we need to emit a
4531 prologue, which we can test by looking at the offsets. */
4532 bool
4533 use_simple_return_p (void)
4534 {
4535 arm_stack_offsets *offsets;
4536
4537 /* Note this function can be called before or after reload. */
4538 if (!reload_completed)
4539 arm_compute_frame_layout ();
4540
4541 offsets = arm_get_frame_offsets ();
4542 return offsets->outgoing_args != 0;
4543 }
4544
4545 /* Return TRUE if int I is a valid immediate ARM constant. */
4546
4547 int
4548 const_ok_for_arm (HOST_WIDE_INT i)
4549 {
4550 int lowbit;
4551
4552 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4553 be all zero, or all one. */
4554 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4555 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4556 != ((~(unsigned HOST_WIDE_INT) 0)
4557 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4558 return FALSE;
4559
4560 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4561
4562 /* Fast return for 0 and small values. We must do this for zero, since
4563 the code below can't handle that one case. */
4564 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4565 return TRUE;
4566
4567 /* Get the number of trailing zeros. */
4568 lowbit = ffs((int) i) - 1;
4569
4570 /* Only even shifts are allowed in ARM mode so round down to the
4571 nearest even number. */
4572 if (TARGET_ARM)
4573 lowbit &= ~1;
4574
4575 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4576 return TRUE;
4577
4578 if (TARGET_ARM)
4579 {
4580 /* Allow rotated constants in ARM mode. */
4581 if (lowbit <= 4
4582 && ((i & ~0xc000003f) == 0
4583 || (i & ~0xf000000f) == 0
4584 || (i & ~0xfc000003) == 0))
4585 return TRUE;
4586 }
4587 else if (TARGET_THUMB2)
4588 {
4589 HOST_WIDE_INT v;
4590
4591 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4592 v = i & 0xff;
4593 v |= v << 16;
4594 if (i == v || i == (v | (v << 8)))
4595 return TRUE;
4596
4597 /* Allow repeated pattern 0xXY00XY00. */
4598 v = i & 0xff00;
4599 v |= v << 16;
4600 if (i == v)
4601 return TRUE;
4602 }
4603 else if (TARGET_HAVE_MOVT)
4604 {
4605 /* Thumb-1 Targets with MOVT. */
4606 if (i > 0xffff)
4607 return FALSE;
4608 else
4609 return TRUE;
4610 }
4611
4612 return FALSE;
4613 }
4614
4615 /* Return true if I is a valid constant for the operation CODE. */
4616 int
4617 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4618 {
4619 if (const_ok_for_arm (i))
4620 return 1;
4621
4622 switch (code)
4623 {
4624 case SET:
4625 /* See if we can use movw. */
4626 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4627 return 1;
4628 else
4629 /* Otherwise, try mvn. */
4630 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4631
4632 case PLUS:
4633 /* See if we can use addw or subw. */
4634 if (TARGET_THUMB2
4635 && ((i & 0xfffff000) == 0
4636 || ((-i) & 0xfffff000) == 0))
4637 return 1;
4638 /* Fall through. */
4639 case COMPARE:
4640 case EQ:
4641 case NE:
4642 case GT:
4643 case LE:
4644 case LT:
4645 case GE:
4646 case GEU:
4647 case LTU:
4648 case GTU:
4649 case LEU:
4650 case UNORDERED:
4651 case ORDERED:
4652 case UNEQ:
4653 case UNGE:
4654 case UNLT:
4655 case UNGT:
4656 case UNLE:
4657 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4658
4659 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4660 case XOR:
4661 return 0;
4662
4663 case IOR:
4664 if (TARGET_THUMB2)
4665 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4666 return 0;
4667
4668 case AND:
4669 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4670
4671 default:
4672 gcc_unreachable ();
4673 }
4674 }
4675
4676 /* Return true if I is a valid di mode constant for the operation CODE. */
4677 int
4678 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4679 {
4680 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4681 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4682 rtx hi = GEN_INT (hi_val);
4683 rtx lo = GEN_INT (lo_val);
4684
4685 if (TARGET_THUMB1)
4686 return 0;
4687
4688 switch (code)
4689 {
4690 case AND:
4691 case IOR:
4692 case XOR:
4693 return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4694 || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4695 case PLUS:
4696 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4697
4698 default:
4699 return 0;
4700 }
4701 }
4702
4703 /* Emit a sequence of insns to handle a large constant.
4704 CODE is the code of the operation required, it can be any of SET, PLUS,
4705 IOR, AND, XOR, MINUS;
4706 MODE is the mode in which the operation is being performed;
4707 VAL is the integer to operate on;
4708 SOURCE is the other operand (a register, or a null-pointer for SET);
4709 SUBTARGETS means it is safe to create scratch registers if that will
4710 either produce a simpler sequence, or we will want to cse the values.
4711 Return value is the number of insns emitted. */
4712
4713 /* ??? Tweak this for thumb2. */
4714 int
4715 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4716 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4717 {
4718 rtx cond;
4719
4720 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4721 cond = COND_EXEC_TEST (PATTERN (insn));
4722 else
4723 cond = NULL_RTX;
4724
4725 if (subtargets || code == SET
4726 || (REG_P (target) && REG_P (source)
4727 && REGNO (target) != REGNO (source)))
4728 {
4729 /* After arm_reorg has been called, we can't fix up expensive
4730 constants by pushing them into memory so we must synthesize
4731 them in-line, regardless of the cost. This is only likely to
4732 be more costly on chips that have load delay slots and we are
4733 compiling without running the scheduler (so no splitting
4734 occurred before the final instruction emission).
4735
4736 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4737 */
4738 if (!cfun->machine->after_arm_reorg
4739 && !cond
4740 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4741 1, 0)
4742 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4743 + (code != SET))))
4744 {
4745 if (code == SET)
4746 {
4747 /* Currently SET is the only monadic value for CODE, all
4748 the rest are diadic. */
4749 if (TARGET_USE_MOVT)
4750 arm_emit_movpair (target, GEN_INT (val));
4751 else
4752 emit_set_insn (target, GEN_INT (val));
4753
4754 return 1;
4755 }
4756 else
4757 {
4758 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4759
4760 if (TARGET_USE_MOVT)
4761 arm_emit_movpair (temp, GEN_INT (val));
4762 else
4763 emit_set_insn (temp, GEN_INT (val));
4764
4765 /* For MINUS, the value is subtracted from, since we never
4766 have subtraction of a constant. */
4767 if (code == MINUS)
4768 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4769 else
4770 emit_set_insn (target,
4771 gen_rtx_fmt_ee (code, mode, source, temp));
4772 return 2;
4773 }
4774 }
4775 }
4776
4777 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4778 1);
4779 }
4780
4781 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4782 ARM/THUMB2 immediates, and add up to VAL.
4783 Thr function return value gives the number of insns required. */
4784 static int
4785 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4786 struct four_ints *return_sequence)
4787 {
4788 int best_consecutive_zeros = 0;
4789 int i;
4790 int best_start = 0;
4791 int insns1, insns2;
4792 struct four_ints tmp_sequence;
4793
4794 /* If we aren't targeting ARM, the best place to start is always at
4795 the bottom, otherwise look more closely. */
4796 if (TARGET_ARM)
4797 {
4798 for (i = 0; i < 32; i += 2)
4799 {
4800 int consecutive_zeros = 0;
4801
4802 if (!(val & (3 << i)))
4803 {
4804 while ((i < 32) && !(val & (3 << i)))
4805 {
4806 consecutive_zeros += 2;
4807 i += 2;
4808 }
4809 if (consecutive_zeros > best_consecutive_zeros)
4810 {
4811 best_consecutive_zeros = consecutive_zeros;
4812 best_start = i - consecutive_zeros;
4813 }
4814 i -= 2;
4815 }
4816 }
4817 }
4818
4819 /* So long as it won't require any more insns to do so, it's
4820 desirable to emit a small constant (in bits 0...9) in the last
4821 insn. This way there is more chance that it can be combined with
4822 a later addressing insn to form a pre-indexed load or store
4823 operation. Consider:
4824
4825 *((volatile int *)0xe0000100) = 1;
4826 *((volatile int *)0xe0000110) = 2;
4827
4828 We want this to wind up as:
4829
4830 mov rA, #0xe0000000
4831 mov rB, #1
4832 str rB, [rA, #0x100]
4833 mov rB, #2
4834 str rB, [rA, #0x110]
4835
4836 rather than having to synthesize both large constants from scratch.
4837
4838 Therefore, we calculate how many insns would be required to emit
4839 the constant starting from `best_start', and also starting from
4840 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4841 yield a shorter sequence, we may as well use zero. */
4842 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4843 if (best_start != 0
4844 && ((HOST_WIDE_INT_1U << best_start) < val))
4845 {
4846 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4847 if (insns2 <= insns1)
4848 {
4849 *return_sequence = tmp_sequence;
4850 insns1 = insns2;
4851 }
4852 }
4853
4854 return insns1;
4855 }
4856
4857 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4858 static int
4859 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4860 struct four_ints *return_sequence, int i)
4861 {
4862 int remainder = val & 0xffffffff;
4863 int insns = 0;
4864
4865 /* Try and find a way of doing the job in either two or three
4866 instructions.
4867
4868 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4869 location. We start at position I. This may be the MSB, or
4870 optimial_immediate_sequence may have positioned it at the largest block
4871 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4872 wrapping around to the top of the word when we drop off the bottom.
4873 In the worst case this code should produce no more than four insns.
4874
4875 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4876 constants, shifted to any arbitrary location. We should always start
4877 at the MSB. */
4878 do
4879 {
4880 int end;
4881 unsigned int b1, b2, b3, b4;
4882 unsigned HOST_WIDE_INT result;
4883 int loc;
4884
4885 gcc_assert (insns < 4);
4886
4887 if (i <= 0)
4888 i += 32;
4889
4890 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4891 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4892 {
4893 loc = i;
4894 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4895 /* We can use addw/subw for the last 12 bits. */
4896 result = remainder;
4897 else
4898 {
4899 /* Use an 8-bit shifted/rotated immediate. */
4900 end = i - 8;
4901 if (end < 0)
4902 end += 32;
4903 result = remainder & ((0x0ff << end)
4904 | ((i < end) ? (0xff >> (32 - end))
4905 : 0));
4906 i -= 8;
4907 }
4908 }
4909 else
4910 {
4911 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4912 arbitrary shifts. */
4913 i -= TARGET_ARM ? 2 : 1;
4914 continue;
4915 }
4916
4917 /* Next, see if we can do a better job with a thumb2 replicated
4918 constant.
4919
4920 We do it this way around to catch the cases like 0x01F001E0 where
4921 two 8-bit immediates would work, but a replicated constant would
4922 make it worse.
4923
4924 TODO: 16-bit constants that don't clear all the bits, but still win.
4925 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4926 if (TARGET_THUMB2)
4927 {
4928 b1 = (remainder & 0xff000000) >> 24;
4929 b2 = (remainder & 0x00ff0000) >> 16;
4930 b3 = (remainder & 0x0000ff00) >> 8;
4931 b4 = remainder & 0xff;
4932
4933 if (loc > 24)
4934 {
4935 /* The 8-bit immediate already found clears b1 (and maybe b2),
4936 but must leave b3 and b4 alone. */
4937
4938 /* First try to find a 32-bit replicated constant that clears
4939 almost everything. We can assume that we can't do it in one,
4940 or else we wouldn't be here. */
4941 unsigned int tmp = b1 & b2 & b3 & b4;
4942 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4943 + (tmp << 24);
4944 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4945 + (tmp == b3) + (tmp == b4);
4946 if (tmp
4947 && (matching_bytes >= 3
4948 || (matching_bytes == 2
4949 && const_ok_for_op (remainder & ~tmp2, code))))
4950 {
4951 /* At least 3 of the bytes match, and the fourth has at
4952 least as many bits set, or two of the bytes match
4953 and it will only require one more insn to finish. */
4954 result = tmp2;
4955 i = tmp != b1 ? 32
4956 : tmp != b2 ? 24
4957 : tmp != b3 ? 16
4958 : 8;
4959 }
4960
4961 /* Second, try to find a 16-bit replicated constant that can
4962 leave three of the bytes clear. If b2 or b4 is already
4963 zero, then we can. If the 8-bit from above would not
4964 clear b2 anyway, then we still win. */
4965 else if (b1 == b3 && (!b2 || !b4
4966 || (remainder & 0x00ff0000 & ~result)))
4967 {
4968 result = remainder & 0xff00ff00;
4969 i = 24;
4970 }
4971 }
4972 else if (loc > 16)
4973 {
4974 /* The 8-bit immediate already found clears b2 (and maybe b3)
4975 and we don't get here unless b1 is alredy clear, but it will
4976 leave b4 unchanged. */
4977
4978 /* If we can clear b2 and b4 at once, then we win, since the
4979 8-bits couldn't possibly reach that far. */
4980 if (b2 == b4)
4981 {
4982 result = remainder & 0x00ff00ff;
4983 i = 16;
4984 }
4985 }
4986 }
4987
4988 return_sequence->i[insns++] = result;
4989 remainder &= ~result;
4990
4991 if (code == SET || code == MINUS)
4992 code = PLUS;
4993 }
4994 while (remainder);
4995
4996 return insns;
4997 }
4998
4999 /* Emit an instruction with the indicated PATTERN. If COND is
5000 non-NULL, conditionalize the execution of the instruction on COND
5001 being true. */
5002
5003 static void
5004 emit_constant_insn (rtx cond, rtx pattern)
5005 {
5006 if (cond)
5007 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
5008 emit_insn (pattern);
5009 }
5010
5011 /* As above, but extra parameter GENERATE which, if clear, suppresses
5012 RTL generation. */
5013
5014 static int
5015 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
5016 unsigned HOST_WIDE_INT val, rtx target, rtx source,
5017 int subtargets, int generate)
5018 {
5019 int can_invert = 0;
5020 int can_negate = 0;
5021 int final_invert = 0;
5022 int i;
5023 int set_sign_bit_copies = 0;
5024 int clear_sign_bit_copies = 0;
5025 int clear_zero_bit_copies = 0;
5026 int set_zero_bit_copies = 0;
5027 int insns = 0, neg_insns, inv_insns;
5028 unsigned HOST_WIDE_INT temp1, temp2;
5029 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
5030 struct four_ints *immediates;
5031 struct four_ints pos_immediates, neg_immediates, inv_immediates;
5032
5033 /* Find out which operations are safe for a given CODE. Also do a quick
5034 check for degenerate cases; these can occur when DImode operations
5035 are split. */
5036 switch (code)
5037 {
5038 case SET:
5039 can_invert = 1;
5040 break;
5041
5042 case PLUS:
5043 can_negate = 1;
5044 break;
5045
5046 case IOR:
5047 if (remainder == 0xffffffff)
5048 {
5049 if (generate)
5050 emit_constant_insn (cond,
5051 gen_rtx_SET (target,
5052 GEN_INT (ARM_SIGN_EXTEND (val))));
5053 return 1;
5054 }
5055
5056 if (remainder == 0)
5057 {
5058 if (reload_completed && rtx_equal_p (target, source))
5059 return 0;
5060
5061 if (generate)
5062 emit_constant_insn (cond, gen_rtx_SET (target, source));
5063 return 1;
5064 }
5065 break;
5066
5067 case AND:
5068 if (remainder == 0)
5069 {
5070 if (generate)
5071 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
5072 return 1;
5073 }
5074 if (remainder == 0xffffffff)
5075 {
5076 if (reload_completed && rtx_equal_p (target, source))
5077 return 0;
5078 if (generate)
5079 emit_constant_insn (cond, gen_rtx_SET (target, source));
5080 return 1;
5081 }
5082 can_invert = 1;
5083 break;
5084
5085 case XOR:
5086 if (remainder == 0)
5087 {
5088 if (reload_completed && rtx_equal_p (target, source))
5089 return 0;
5090 if (generate)
5091 emit_constant_insn (cond, gen_rtx_SET (target, source));
5092 return 1;
5093 }
5094
5095 if (remainder == 0xffffffff)
5096 {
5097 if (generate)
5098 emit_constant_insn (cond,
5099 gen_rtx_SET (target,
5100 gen_rtx_NOT (mode, source)));
5101 return 1;
5102 }
5103 final_invert = 1;
5104 break;
5105
5106 case MINUS:
5107 /* We treat MINUS as (val - source), since (source - val) is always
5108 passed as (source + (-val)). */
5109 if (remainder == 0)
5110 {
5111 if (generate)
5112 emit_constant_insn (cond,
5113 gen_rtx_SET (target,
5114 gen_rtx_NEG (mode, source)));
5115 return 1;
5116 }
5117 if (const_ok_for_arm (val))
5118 {
5119 if (generate)
5120 emit_constant_insn (cond,
5121 gen_rtx_SET (target,
5122 gen_rtx_MINUS (mode, GEN_INT (val),
5123 source)));
5124 return 1;
5125 }
5126
5127 break;
5128
5129 default:
5130 gcc_unreachable ();
5131 }
5132
5133 /* If we can do it in one insn get out quickly. */
5134 if (const_ok_for_op (val, code))
5135 {
5136 if (generate)
5137 emit_constant_insn (cond,
5138 gen_rtx_SET (target,
5139 (source
5140 ? gen_rtx_fmt_ee (code, mode, source,
5141 GEN_INT (val))
5142 : GEN_INT (val))));
5143 return 1;
5144 }
5145
5146 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5147 insn. */
5148 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5149 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5150 {
5151 if (generate)
5152 {
5153 if (mode == SImode && i == 16)
5154 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5155 smaller insn. */
5156 emit_constant_insn (cond,
5157 gen_zero_extendhisi2
5158 (target, gen_lowpart (HImode, source)));
5159 else
5160 /* Extz only supports SImode, but we can coerce the operands
5161 into that mode. */
5162 emit_constant_insn (cond,
5163 gen_extzv_t2 (gen_lowpart (SImode, target),
5164 gen_lowpart (SImode, source),
5165 GEN_INT (i), const0_rtx));
5166 }
5167
5168 return 1;
5169 }
5170
5171 /* Calculate a few attributes that may be useful for specific
5172 optimizations. */
5173 /* Count number of leading zeros. */
5174 for (i = 31; i >= 0; i--)
5175 {
5176 if ((remainder & (1 << i)) == 0)
5177 clear_sign_bit_copies++;
5178 else
5179 break;
5180 }
5181
5182 /* Count number of leading 1's. */
5183 for (i = 31; i >= 0; i--)
5184 {
5185 if ((remainder & (1 << i)) != 0)
5186 set_sign_bit_copies++;
5187 else
5188 break;
5189 }
5190
5191 /* Count number of trailing zero's. */
5192 for (i = 0; i <= 31; i++)
5193 {
5194 if ((remainder & (1 << i)) == 0)
5195 clear_zero_bit_copies++;
5196 else
5197 break;
5198 }
5199
5200 /* Count number of trailing 1's. */
5201 for (i = 0; i <= 31; i++)
5202 {
5203 if ((remainder & (1 << i)) != 0)
5204 set_zero_bit_copies++;
5205 else
5206 break;
5207 }
5208
5209 switch (code)
5210 {
5211 case SET:
5212 /* See if we can do this by sign_extending a constant that is known
5213 to be negative. This is a good, way of doing it, since the shift
5214 may well merge into a subsequent insn. */
5215 if (set_sign_bit_copies > 1)
5216 {
5217 if (const_ok_for_arm
5218 (temp1 = ARM_SIGN_EXTEND (remainder
5219 << (set_sign_bit_copies - 1))))
5220 {
5221 if (generate)
5222 {
5223 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5224 emit_constant_insn (cond,
5225 gen_rtx_SET (new_src, GEN_INT (temp1)));
5226 emit_constant_insn (cond,
5227 gen_ashrsi3 (target, new_src,
5228 GEN_INT (set_sign_bit_copies - 1)));
5229 }
5230 return 2;
5231 }
5232 /* For an inverted constant, we will need to set the low bits,
5233 these will be shifted out of harm's way. */
5234 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5235 if (const_ok_for_arm (~temp1))
5236 {
5237 if (generate)
5238 {
5239 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5240 emit_constant_insn (cond,
5241 gen_rtx_SET (new_src, GEN_INT (temp1)));
5242 emit_constant_insn (cond,
5243 gen_ashrsi3 (target, new_src,
5244 GEN_INT (set_sign_bit_copies - 1)));
5245 }
5246 return 2;
5247 }
5248 }
5249
5250 /* See if we can calculate the value as the difference between two
5251 valid immediates. */
5252 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5253 {
5254 int topshift = clear_sign_bit_copies & ~1;
5255
5256 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5257 & (0xff000000 >> topshift));
5258
5259 /* If temp1 is zero, then that means the 9 most significant
5260 bits of remainder were 1 and we've caused it to overflow.
5261 When topshift is 0 we don't need to do anything since we
5262 can borrow from 'bit 32'. */
5263 if (temp1 == 0 && topshift != 0)
5264 temp1 = 0x80000000 >> (topshift - 1);
5265
5266 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5267
5268 if (const_ok_for_arm (temp2))
5269 {
5270 if (generate)
5271 {
5272 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5273 emit_constant_insn (cond,
5274 gen_rtx_SET (new_src, GEN_INT (temp1)));
5275 emit_constant_insn (cond,
5276 gen_addsi3 (target, new_src,
5277 GEN_INT (-temp2)));
5278 }
5279
5280 return 2;
5281 }
5282 }
5283
5284 /* See if we can generate this by setting the bottom (or the top)
5285 16 bits, and then shifting these into the other half of the
5286 word. We only look for the simplest cases, to do more would cost
5287 too much. Be careful, however, not to generate this when the
5288 alternative would take fewer insns. */
5289 if (val & 0xffff0000)
5290 {
5291 temp1 = remainder & 0xffff0000;
5292 temp2 = remainder & 0x0000ffff;
5293
5294 /* Overlaps outside this range are best done using other methods. */
5295 for (i = 9; i < 24; i++)
5296 {
5297 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5298 && !const_ok_for_arm (temp2))
5299 {
5300 rtx new_src = (subtargets
5301 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5302 : target);
5303 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5304 source, subtargets, generate);
5305 source = new_src;
5306 if (generate)
5307 emit_constant_insn
5308 (cond,
5309 gen_rtx_SET
5310 (target,
5311 gen_rtx_IOR (mode,
5312 gen_rtx_ASHIFT (mode, source,
5313 GEN_INT (i)),
5314 source)));
5315 return insns + 1;
5316 }
5317 }
5318
5319 /* Don't duplicate cases already considered. */
5320 for (i = 17; i < 24; i++)
5321 {
5322 if (((temp1 | (temp1 >> i)) == remainder)
5323 && !const_ok_for_arm (temp1))
5324 {
5325 rtx new_src = (subtargets
5326 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5327 : target);
5328 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5329 source, subtargets, generate);
5330 source = new_src;
5331 if (generate)
5332 emit_constant_insn
5333 (cond,
5334 gen_rtx_SET (target,
5335 gen_rtx_IOR
5336 (mode,
5337 gen_rtx_LSHIFTRT (mode, source,
5338 GEN_INT (i)),
5339 source)));
5340 return insns + 1;
5341 }
5342 }
5343 }
5344 break;
5345
5346 case IOR:
5347 case XOR:
5348 /* If we have IOR or XOR, and the constant can be loaded in a
5349 single instruction, and we can find a temporary to put it in,
5350 then this can be done in two instructions instead of 3-4. */
5351 if (subtargets
5352 /* TARGET can't be NULL if SUBTARGETS is 0 */
5353 || (reload_completed && !reg_mentioned_p (target, source)))
5354 {
5355 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5356 {
5357 if (generate)
5358 {
5359 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5360
5361 emit_constant_insn (cond,
5362 gen_rtx_SET (sub, GEN_INT (val)));
5363 emit_constant_insn (cond,
5364 gen_rtx_SET (target,
5365 gen_rtx_fmt_ee (code, mode,
5366 source, sub)));
5367 }
5368 return 2;
5369 }
5370 }
5371
5372 if (code == XOR)
5373 break;
5374
5375 /* Convert.
5376 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5377 and the remainder 0s for e.g. 0xfff00000)
5378 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5379
5380 This can be done in 2 instructions by using shifts with mov or mvn.
5381 e.g. for
5382 x = x | 0xfff00000;
5383 we generate.
5384 mvn r0, r0, asl #12
5385 mvn r0, r0, lsr #12 */
5386 if (set_sign_bit_copies > 8
5387 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5388 {
5389 if (generate)
5390 {
5391 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5392 rtx shift = GEN_INT (set_sign_bit_copies);
5393
5394 emit_constant_insn
5395 (cond,
5396 gen_rtx_SET (sub,
5397 gen_rtx_NOT (mode,
5398 gen_rtx_ASHIFT (mode,
5399 source,
5400 shift))));
5401 emit_constant_insn
5402 (cond,
5403 gen_rtx_SET (target,
5404 gen_rtx_NOT (mode,
5405 gen_rtx_LSHIFTRT (mode, sub,
5406 shift))));
5407 }
5408 return 2;
5409 }
5410
5411 /* Convert
5412 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5413 to
5414 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5415
5416 For eg. r0 = r0 | 0xfff
5417 mvn r0, r0, lsr #12
5418 mvn r0, r0, asl #12
5419
5420 */
5421 if (set_zero_bit_copies > 8
5422 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5423 {
5424 if (generate)
5425 {
5426 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5427 rtx shift = GEN_INT (set_zero_bit_copies);
5428
5429 emit_constant_insn
5430 (cond,
5431 gen_rtx_SET (sub,
5432 gen_rtx_NOT (mode,
5433 gen_rtx_LSHIFTRT (mode,
5434 source,
5435 shift))));
5436 emit_constant_insn
5437 (cond,
5438 gen_rtx_SET (target,
5439 gen_rtx_NOT (mode,
5440 gen_rtx_ASHIFT (mode, sub,
5441 shift))));
5442 }
5443 return 2;
5444 }
5445
5446 /* This will never be reached for Thumb2 because orn is a valid
5447 instruction. This is for Thumb1 and the ARM 32 bit cases.
5448
5449 x = y | constant (such that ~constant is a valid constant)
5450 Transform this to
5451 x = ~(~y & ~constant).
5452 */
5453 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5454 {
5455 if (generate)
5456 {
5457 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5458 emit_constant_insn (cond,
5459 gen_rtx_SET (sub,
5460 gen_rtx_NOT (mode, source)));
5461 source = sub;
5462 if (subtargets)
5463 sub = gen_reg_rtx (mode);
5464 emit_constant_insn (cond,
5465 gen_rtx_SET (sub,
5466 gen_rtx_AND (mode, source,
5467 GEN_INT (temp1))));
5468 emit_constant_insn (cond,
5469 gen_rtx_SET (target,
5470 gen_rtx_NOT (mode, sub)));
5471 }
5472 return 3;
5473 }
5474 break;
5475
5476 case AND:
5477 /* See if two shifts will do 2 or more insn's worth of work. */
5478 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5479 {
5480 HOST_WIDE_INT shift_mask = ((0xffffffff
5481 << (32 - clear_sign_bit_copies))
5482 & 0xffffffff);
5483
5484 if ((remainder | shift_mask) != 0xffffffff)
5485 {
5486 HOST_WIDE_INT new_val
5487 = ARM_SIGN_EXTEND (remainder | shift_mask);
5488
5489 if (generate)
5490 {
5491 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5492 insns = arm_gen_constant (AND, SImode, cond, new_val,
5493 new_src, source, subtargets, 1);
5494 source = new_src;
5495 }
5496 else
5497 {
5498 rtx targ = subtargets ? NULL_RTX : target;
5499 insns = arm_gen_constant (AND, mode, cond, new_val,
5500 targ, source, subtargets, 0);
5501 }
5502 }
5503
5504 if (generate)
5505 {
5506 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5507 rtx shift = GEN_INT (clear_sign_bit_copies);
5508
5509 emit_insn (gen_ashlsi3 (new_src, source, shift));
5510 emit_insn (gen_lshrsi3 (target, new_src, shift));
5511 }
5512
5513 return insns + 2;
5514 }
5515
5516 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5517 {
5518 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5519
5520 if ((remainder | shift_mask) != 0xffffffff)
5521 {
5522 HOST_WIDE_INT new_val
5523 = ARM_SIGN_EXTEND (remainder | shift_mask);
5524 if (generate)
5525 {
5526 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5527
5528 insns = arm_gen_constant (AND, mode, cond, new_val,
5529 new_src, source, subtargets, 1);
5530 source = new_src;
5531 }
5532 else
5533 {
5534 rtx targ = subtargets ? NULL_RTX : target;
5535
5536 insns = arm_gen_constant (AND, mode, cond, new_val,
5537 targ, source, subtargets, 0);
5538 }
5539 }
5540
5541 if (generate)
5542 {
5543 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5544 rtx shift = GEN_INT (clear_zero_bit_copies);
5545
5546 emit_insn (gen_lshrsi3 (new_src, source, shift));
5547 emit_insn (gen_ashlsi3 (target, new_src, shift));
5548 }
5549
5550 return insns + 2;
5551 }
5552
5553 break;
5554
5555 default:
5556 break;
5557 }
5558
5559 /* Calculate what the instruction sequences would be if we generated it
5560 normally, negated, or inverted. */
5561 if (code == AND)
5562 /* AND cannot be split into multiple insns, so invert and use BIC. */
5563 insns = 99;
5564 else
5565 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5566
5567 if (can_negate)
5568 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5569 &neg_immediates);
5570 else
5571 neg_insns = 99;
5572
5573 if (can_invert || final_invert)
5574 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5575 &inv_immediates);
5576 else
5577 inv_insns = 99;
5578
5579 immediates = &pos_immediates;
5580
5581 /* Is the negated immediate sequence more efficient? */
5582 if (neg_insns < insns && neg_insns <= inv_insns)
5583 {
5584 insns = neg_insns;
5585 immediates = &neg_immediates;
5586 }
5587 else
5588 can_negate = 0;
5589
5590 /* Is the inverted immediate sequence more efficient?
5591 We must allow for an extra NOT instruction for XOR operations, although
5592 there is some chance that the final 'mvn' will get optimized later. */
5593 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5594 {
5595 insns = inv_insns;
5596 immediates = &inv_immediates;
5597 }
5598 else
5599 {
5600 can_invert = 0;
5601 final_invert = 0;
5602 }
5603
5604 /* Now output the chosen sequence as instructions. */
5605 if (generate)
5606 {
5607 for (i = 0; i < insns; i++)
5608 {
5609 rtx new_src, temp1_rtx;
5610
5611 temp1 = immediates->i[i];
5612
5613 if (code == SET || code == MINUS)
5614 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5615 else if ((final_invert || i < (insns - 1)) && subtargets)
5616 new_src = gen_reg_rtx (mode);
5617 else
5618 new_src = target;
5619
5620 if (can_invert)
5621 temp1 = ~temp1;
5622 else if (can_negate)
5623 temp1 = -temp1;
5624
5625 temp1 = trunc_int_for_mode (temp1, mode);
5626 temp1_rtx = GEN_INT (temp1);
5627
5628 if (code == SET)
5629 ;
5630 else if (code == MINUS)
5631 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5632 else
5633 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5634
5635 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5636 source = new_src;
5637
5638 if (code == SET)
5639 {
5640 can_negate = can_invert;
5641 can_invert = 0;
5642 code = PLUS;
5643 }
5644 else if (code == MINUS)
5645 code = PLUS;
5646 }
5647 }
5648
5649 if (final_invert)
5650 {
5651 if (generate)
5652 emit_constant_insn (cond, gen_rtx_SET (target,
5653 gen_rtx_NOT (mode, source)));
5654 insns++;
5655 }
5656
5657 return insns;
5658 }
5659
5660 /* Return TRUE if op is a constant where both the low and top words are
5661 suitable for RSB/RSC instructions. This is never true for Thumb, since
5662 we do not have RSC in that case. */
5663 static bool
5664 arm_const_double_prefer_rsbs_rsc (rtx op)
5665 {
5666 /* Thumb lacks RSC, so we never prefer that sequence. */
5667 if (TARGET_THUMB || !CONST_INT_P (op))
5668 return false;
5669 HOST_WIDE_INT hi, lo;
5670 lo = UINTVAL (op) & 0xffffffffULL;
5671 hi = UINTVAL (op) >> 32;
5672 return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5673 }
5674
5675 /* Canonicalize a comparison so that we are more likely to recognize it.
5676 This can be done for a few constant compares, where we can make the
5677 immediate value easier to load. */
5678
5679 static void
5680 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5681 bool op0_preserve_value)
5682 {
5683 machine_mode mode;
5684 unsigned HOST_WIDE_INT i, maxval;
5685
5686 mode = GET_MODE (*op0);
5687 if (mode == VOIDmode)
5688 mode = GET_MODE (*op1);
5689
5690 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5691
5692 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5693 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5694 either reversed or (for constant OP1) adjusted to GE/LT.
5695 Similarly for GTU/LEU in Thumb mode. */
5696 if (mode == DImode)
5697 {
5698
5699 if (*code == GT || *code == LE
5700 || *code == GTU || *code == LEU)
5701 {
5702 /* Missing comparison. First try to use an available
5703 comparison. */
5704 if (CONST_INT_P (*op1))
5705 {
5706 i = INTVAL (*op1);
5707 switch (*code)
5708 {
5709 case GT:
5710 case LE:
5711 if (i != maxval)
5712 {
5713 /* Try to convert to GE/LT, unless that would be more
5714 expensive. */
5715 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5716 && arm_const_double_prefer_rsbs_rsc (*op1))
5717 return;
5718 *op1 = GEN_INT (i + 1);
5719 *code = *code == GT ? GE : LT;
5720 }
5721 else
5722 {
5723 /* GT maxval is always false, LE maxval is always true.
5724 We can't fold that away here as we must make a
5725 comparison, but we can fold them to comparisons
5726 with the same result that can be handled:
5727 op0 GT maxval -> op0 LT minval
5728 op0 LE maxval -> op0 GE minval
5729 where minval = (-maxval - 1). */
5730 *op1 = GEN_INT (-maxval - 1);
5731 *code = *code == GT ? LT : GE;
5732 }
5733 return;
5734
5735 case GTU:
5736 case LEU:
5737 if (i != ~((unsigned HOST_WIDE_INT) 0))
5738 {
5739 /* Try to convert to GEU/LTU, unless that would
5740 be more expensive. */
5741 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5742 && arm_const_double_prefer_rsbs_rsc (*op1))
5743 return;
5744 *op1 = GEN_INT (i + 1);
5745 *code = *code == GTU ? GEU : LTU;
5746 }
5747 else
5748 {
5749 /* GTU ~0 is always false, LEU ~0 is always true.
5750 We can't fold that away here as we must make a
5751 comparison, but we can fold them to comparisons
5752 with the same result that can be handled:
5753 op0 GTU ~0 -> op0 LTU 0
5754 op0 LEU ~0 -> op0 GEU 0. */
5755 *op1 = const0_rtx;
5756 *code = *code == GTU ? LTU : GEU;
5757 }
5758 return;
5759
5760 default:
5761 gcc_unreachable ();
5762 }
5763 }
5764
5765 if (!op0_preserve_value)
5766 {
5767 std::swap (*op0, *op1);
5768 *code = (int)swap_condition ((enum rtx_code)*code);
5769 }
5770 }
5771 return;
5772 }
5773
5774 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5775 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5776 to facilitate possible combining with a cmp into 'ands'. */
5777 if (mode == SImode
5778 && GET_CODE (*op0) == ZERO_EXTEND
5779 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5780 && GET_MODE (XEXP (*op0, 0)) == QImode
5781 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5782 && subreg_lowpart_p (XEXP (*op0, 0))
5783 && *op1 == const0_rtx)
5784 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5785 GEN_INT (255));
5786
5787 /* Comparisons smaller than DImode. Only adjust comparisons against
5788 an out-of-range constant. */
5789 if (!CONST_INT_P (*op1)
5790 || const_ok_for_arm (INTVAL (*op1))
5791 || const_ok_for_arm (- INTVAL (*op1)))
5792 return;
5793
5794 i = INTVAL (*op1);
5795
5796 switch (*code)
5797 {
5798 case EQ:
5799 case NE:
5800 return;
5801
5802 case GT:
5803 case LE:
5804 if (i != maxval
5805 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5806 {
5807 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5808 *code = *code == GT ? GE : LT;
5809 return;
5810 }
5811 break;
5812
5813 case GE:
5814 case LT:
5815 if (i != ~maxval
5816 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5817 {
5818 *op1 = GEN_INT (i - 1);
5819 *code = *code == GE ? GT : LE;
5820 return;
5821 }
5822 break;
5823
5824 case GTU:
5825 case LEU:
5826 if (i != ~((unsigned HOST_WIDE_INT) 0)
5827 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5828 {
5829 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5830 *code = *code == GTU ? GEU : LTU;
5831 return;
5832 }
5833 break;
5834
5835 case GEU:
5836 case LTU:
5837 if (i != 0
5838 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5839 {
5840 *op1 = GEN_INT (i - 1);
5841 *code = *code == GEU ? GTU : LEU;
5842 return;
5843 }
5844 break;
5845
5846 default:
5847 gcc_unreachable ();
5848 }
5849 }
5850
5851
5852 /* Define how to find the value returned by a function. */
5853
5854 static rtx
5855 arm_function_value(const_tree type, const_tree func,
5856 bool outgoing ATTRIBUTE_UNUSED)
5857 {
5858 machine_mode mode;
5859 int unsignedp ATTRIBUTE_UNUSED;
5860 rtx r ATTRIBUTE_UNUSED;
5861
5862 mode = TYPE_MODE (type);
5863
5864 if (TARGET_AAPCS_BASED)
5865 return aapcs_allocate_return_reg (mode, type, func);
5866
5867 /* Promote integer types. */
5868 if (INTEGRAL_TYPE_P (type))
5869 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5870
5871 /* Promotes small structs returned in a register to full-word size
5872 for big-endian AAPCS. */
5873 if (arm_return_in_msb (type))
5874 {
5875 HOST_WIDE_INT size = int_size_in_bytes (type);
5876 if (size % UNITS_PER_WORD != 0)
5877 {
5878 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5879 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5880 }
5881 }
5882
5883 return arm_libcall_value_1 (mode);
5884 }
5885
5886 /* libcall hashtable helpers. */
5887
5888 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5889 {
5890 static inline hashval_t hash (const rtx_def *);
5891 static inline bool equal (const rtx_def *, const rtx_def *);
5892 static inline void remove (rtx_def *);
5893 };
5894
5895 inline bool
5896 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5897 {
5898 return rtx_equal_p (p1, p2);
5899 }
5900
5901 inline hashval_t
5902 libcall_hasher::hash (const rtx_def *p1)
5903 {
5904 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5905 }
5906
5907 typedef hash_table<libcall_hasher> libcall_table_type;
5908
5909 static void
5910 add_libcall (libcall_table_type *htab, rtx libcall)
5911 {
5912 *htab->find_slot (libcall, INSERT) = libcall;
5913 }
5914
5915 static bool
5916 arm_libcall_uses_aapcs_base (const_rtx libcall)
5917 {
5918 static bool init_done = false;
5919 static libcall_table_type *libcall_htab = NULL;
5920
5921 if (!init_done)
5922 {
5923 init_done = true;
5924
5925 libcall_htab = new libcall_table_type (31);
5926 add_libcall (libcall_htab,
5927 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5928 add_libcall (libcall_htab,
5929 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5930 add_libcall (libcall_htab,
5931 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5932 add_libcall (libcall_htab,
5933 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5934
5935 add_libcall (libcall_htab,
5936 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5937 add_libcall (libcall_htab,
5938 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5939 add_libcall (libcall_htab,
5940 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5941 add_libcall (libcall_htab,
5942 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5943
5944 add_libcall (libcall_htab,
5945 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5946 add_libcall (libcall_htab,
5947 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5948 add_libcall (libcall_htab,
5949 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5950 add_libcall (libcall_htab,
5951 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5952 add_libcall (libcall_htab,
5953 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5954 add_libcall (libcall_htab,
5955 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5956 add_libcall (libcall_htab,
5957 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5958 add_libcall (libcall_htab,
5959 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5960 add_libcall (libcall_htab,
5961 convert_optab_libfunc (sfix_optab, SImode, SFmode));
5962 add_libcall (libcall_htab,
5963 convert_optab_libfunc (ufix_optab, SImode, SFmode));
5964
5965 /* Values from double-precision helper functions are returned in core
5966 registers if the selected core only supports single-precision
5967 arithmetic, even if we are using the hard-float ABI. The same is
5968 true for single-precision helpers except in case of MVE, because in
5969 MVE we will be using the hard-float ABI on a CPU which doesn't support
5970 single-precision operations in hardware. In MVE the following check
5971 enables use of emulation for the single-precision arithmetic
5972 operations. */
5973 if (TARGET_HAVE_MVE)
5974 {
5975 add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5976 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5977 add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5978 add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5979 add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5980 add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5981 add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5982 add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5983 add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5984 add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5985 add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5986 }
5987 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5988 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5989 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5990 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5991 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5992 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5993 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5994 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5995 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5996 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5997 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5998 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5999 SFmode));
6000 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
6001 DFmode));
6002 add_libcall (libcall_htab,
6003 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
6004 }
6005
6006 return libcall && libcall_htab->find (libcall) != NULL;
6007 }
6008
6009 static rtx
6010 arm_libcall_value_1 (machine_mode mode)
6011 {
6012 if (TARGET_AAPCS_BASED)
6013 return aapcs_libcall_value (mode);
6014 else if (TARGET_IWMMXT_ABI
6015 && arm_vector_mode_supported_p (mode))
6016 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
6017 else
6018 return gen_rtx_REG (mode, ARG_REGISTER (1));
6019 }
6020
6021 /* Define how to find the value returned by a library function
6022 assuming the value has mode MODE. */
6023
6024 static rtx
6025 arm_libcall_value (machine_mode mode, const_rtx libcall)
6026 {
6027 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
6028 && GET_MODE_CLASS (mode) == MODE_FLOAT)
6029 {
6030 /* The following libcalls return their result in integer registers,
6031 even though they return a floating point value. */
6032 if (arm_libcall_uses_aapcs_base (libcall))
6033 return gen_rtx_REG (mode, ARG_REGISTER(1));
6034
6035 }
6036
6037 return arm_libcall_value_1 (mode);
6038 }
6039
6040 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
6041
6042 static bool
6043 arm_function_value_regno_p (const unsigned int regno)
6044 {
6045 if (regno == ARG_REGISTER (1)
6046 || (TARGET_32BIT
6047 && TARGET_AAPCS_BASED
6048 && TARGET_HARD_FLOAT
6049 && regno == FIRST_VFP_REGNUM)
6050 || (TARGET_IWMMXT_ABI
6051 && regno == FIRST_IWMMXT_REGNUM))
6052 return true;
6053
6054 return false;
6055 }
6056
6057 /* Determine the amount of memory needed to store the possible return
6058 registers of an untyped call. */
6059 int
6060 arm_apply_result_size (void)
6061 {
6062 int size = 16;
6063
6064 if (TARGET_32BIT)
6065 {
6066 if (TARGET_HARD_FLOAT_ABI)
6067 size += 32;
6068 if (TARGET_IWMMXT_ABI)
6069 size += 8;
6070 }
6071
6072 return size;
6073 }
6074
6075 /* Decide whether TYPE should be returned in memory (true)
6076 or in a register (false). FNTYPE is the type of the function making
6077 the call. */
6078 static bool
6079 arm_return_in_memory (const_tree type, const_tree fntype)
6080 {
6081 HOST_WIDE_INT size;
6082
6083 size = int_size_in_bytes (type); /* Negative if not fixed size. */
6084
6085 if (TARGET_AAPCS_BASED)
6086 {
6087 /* Simple, non-aggregate types (ie not including vectors and
6088 complex) are always returned in a register (or registers).
6089 We don't care about which register here, so we can short-cut
6090 some of the detail. */
6091 if (!AGGREGATE_TYPE_P (type)
6092 && TREE_CODE (type) != VECTOR_TYPE
6093 && TREE_CODE (type) != COMPLEX_TYPE)
6094 return false;
6095
6096 /* Any return value that is no larger than one word can be
6097 returned in r0. */
6098 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
6099 return false;
6100
6101 /* Check any available co-processors to see if they accept the
6102 type as a register candidate (VFP, for example, can return
6103 some aggregates in consecutive registers). These aren't
6104 available if the call is variadic. */
6105 if (aapcs_select_return_coproc (type, fntype) >= 0)
6106 return false;
6107
6108 /* Vector values should be returned using ARM registers, not
6109 memory (unless they're over 16 bytes, which will break since
6110 we only have four call-clobbered registers to play with). */
6111 if (TREE_CODE (type) == VECTOR_TYPE)
6112 return (size < 0 || size > (4 * UNITS_PER_WORD));
6113
6114 /* The rest go in memory. */
6115 return true;
6116 }
6117
6118 if (TREE_CODE (type) == VECTOR_TYPE)
6119 return (size < 0 || size > (4 * UNITS_PER_WORD));
6120
6121 if (!AGGREGATE_TYPE_P (type) &&
6122 (TREE_CODE (type) != VECTOR_TYPE))
6123 /* All simple types are returned in registers. */
6124 return false;
6125
6126 if (arm_abi != ARM_ABI_APCS)
6127 {
6128 /* ATPCS and later return aggregate types in memory only if they are
6129 larger than a word (or are variable size). */
6130 return (size < 0 || size > UNITS_PER_WORD);
6131 }
6132
6133 /* For the arm-wince targets we choose to be compatible with Microsoft's
6134 ARM and Thumb compilers, which always return aggregates in memory. */
6135 #ifndef ARM_WINCE
6136 /* All structures/unions bigger than one word are returned in memory.
6137 Also catch the case where int_size_in_bytes returns -1. In this case
6138 the aggregate is either huge or of variable size, and in either case
6139 we will want to return it via memory and not in a register. */
6140 if (size < 0 || size > UNITS_PER_WORD)
6141 return true;
6142
6143 if (TREE_CODE (type) == RECORD_TYPE)
6144 {
6145 tree field;
6146
6147 /* For a struct the APCS says that we only return in a register
6148 if the type is 'integer like' and every addressable element
6149 has an offset of zero. For practical purposes this means
6150 that the structure can have at most one non bit-field element
6151 and that this element must be the first one in the structure. */
6152
6153 /* Find the first field, ignoring non FIELD_DECL things which will
6154 have been created by C++. */
6155 /* NOTE: This code is deprecated and has not been updated to handle
6156 DECL_FIELD_ABI_IGNORED. */
6157 for (field = TYPE_FIELDS (type);
6158 field && TREE_CODE (field) != FIELD_DECL;
6159 field = DECL_CHAIN (field))
6160 continue;
6161
6162 if (field == NULL)
6163 return false; /* An empty structure. Allowed by an extension to ANSI C. */
6164
6165 /* Check that the first field is valid for returning in a register. */
6166
6167 /* ... Floats are not allowed */
6168 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6169 return true;
6170
6171 /* ... Aggregates that are not themselves valid for returning in
6172 a register are not allowed. */
6173 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6174 return true;
6175
6176 /* Now check the remaining fields, if any. Only bitfields are allowed,
6177 since they are not addressable. */
6178 for (field = DECL_CHAIN (field);
6179 field;
6180 field = DECL_CHAIN (field))
6181 {
6182 if (TREE_CODE (field) != FIELD_DECL)
6183 continue;
6184
6185 if (!DECL_BIT_FIELD_TYPE (field))
6186 return true;
6187 }
6188
6189 return false;
6190 }
6191
6192 if (TREE_CODE (type) == UNION_TYPE)
6193 {
6194 tree field;
6195
6196 /* Unions can be returned in registers if every element is
6197 integral, or can be returned in an integer register. */
6198 for (field = TYPE_FIELDS (type);
6199 field;
6200 field = DECL_CHAIN (field))
6201 {
6202 if (TREE_CODE (field) != FIELD_DECL)
6203 continue;
6204
6205 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6206 return true;
6207
6208 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6209 return true;
6210 }
6211
6212 return false;
6213 }
6214 #endif /* not ARM_WINCE */
6215
6216 /* Return all other types in memory. */
6217 return true;
6218 }
6219
6220 const struct pcs_attribute_arg
6221 {
6222 const char *arg;
6223 enum arm_pcs value;
6224 } pcs_attribute_args[] =
6225 {
6226 {"aapcs", ARM_PCS_AAPCS},
6227 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6228 #if 0
6229 /* We could recognize these, but changes would be needed elsewhere
6230 * to implement them. */
6231 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6232 {"atpcs", ARM_PCS_ATPCS},
6233 {"apcs", ARM_PCS_APCS},
6234 #endif
6235 {NULL, ARM_PCS_UNKNOWN}
6236 };
6237
6238 static enum arm_pcs
6239 arm_pcs_from_attribute (tree attr)
6240 {
6241 const struct pcs_attribute_arg *ptr;
6242 const char *arg;
6243
6244 /* Get the value of the argument. */
6245 if (TREE_VALUE (attr) == NULL_TREE
6246 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6247 return ARM_PCS_UNKNOWN;
6248
6249 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6250
6251 /* Check it against the list of known arguments. */
6252 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6253 if (streq (arg, ptr->arg))
6254 return ptr->value;
6255
6256 /* An unrecognized interrupt type. */
6257 return ARM_PCS_UNKNOWN;
6258 }
6259
6260 /* Get the PCS variant to use for this call. TYPE is the function's type
6261 specification, DECL is the specific declartion. DECL may be null if
6262 the call could be indirect or if this is a library call. */
6263 static enum arm_pcs
6264 arm_get_pcs_model (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
6265 {
6266 bool user_convention = false;
6267 enum arm_pcs user_pcs = arm_pcs_default;
6268 tree attr;
6269
6270 gcc_assert (type);
6271
6272 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6273 if (attr)
6274 {
6275 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6276 user_convention = true;
6277 }
6278
6279 if (TARGET_AAPCS_BASED)
6280 {
6281 /* Detect varargs functions. These always use the base rules
6282 (no argument is ever a candidate for a co-processor
6283 register). */
6284 bool base_rules = stdarg_p (type);
6285
6286 if (user_convention)
6287 {
6288 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6289 sorry ("non-AAPCS derived PCS variant");
6290 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6291 error ("variadic functions must use the base AAPCS variant");
6292 }
6293
6294 if (base_rules)
6295 return ARM_PCS_AAPCS;
6296 else if (user_convention)
6297 return user_pcs;
6298 #if 0
6299 /* Unfortunately, this is not safe and can lead to wrong code
6300 being generated (PR96882). Not all calls into the back-end
6301 pass the DECL, so it is unsafe to make any PCS-changing
6302 decisions based on it. In particular the RETURN_IN_MEMORY
6303 hook is only ever passed a TYPE. This needs revisiting to
6304 see if there are any partial improvements that can be
6305 re-enabled. */
6306 else if (decl && flag_unit_at_a_time)
6307 {
6308 /* Local functions never leak outside this compilation unit,
6309 so we are free to use whatever conventions are
6310 appropriate. */
6311 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
6312 cgraph_node *local_info_node
6313 = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6314 if (local_info_node && local_info_node->local)
6315 return ARM_PCS_AAPCS_LOCAL;
6316 }
6317 #endif
6318 }
6319 else if (user_convention && user_pcs != arm_pcs_default)
6320 sorry ("PCS variant");
6321
6322 /* For everything else we use the target's default. */
6323 return arm_pcs_default;
6324 }
6325
6326
6327 static void
6328 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6329 const_tree fntype ATTRIBUTE_UNUSED,
6330 rtx libcall ATTRIBUTE_UNUSED,
6331 const_tree fndecl ATTRIBUTE_UNUSED)
6332 {
6333 /* Record the unallocated VFP registers. */
6334 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6335 pcum->aapcs_vfp_reg_alloc = 0;
6336 }
6337
6338 /* Bitmasks that indicate whether earlier versions of GCC would have
6339 taken a different path through the ABI logic. This should result in
6340 a -Wpsabi warning if the earlier path led to a different ABI decision.
6341
6342 WARN_PSABI_EMPTY_CXX17_BASE
6343 Indicates that the type includes an artificial empty C++17 base field
6344 that, prior to GCC 10.1, would prevent the type from being treated as
6345 a HFA or HVA. See PR94711 for details.
6346
6347 WARN_PSABI_NO_UNIQUE_ADDRESS
6348 Indicates that the type includes an empty [[no_unique_address]] field
6349 that, prior to GCC 10.1, would prevent the type from being treated as
6350 a HFA or HVA. */
6351 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6352 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6353 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD = 1U << 2;
6354
6355 /* Walk down the type tree of TYPE counting consecutive base elements.
6356 If *MODEP is VOIDmode, then set it to the first valid floating point
6357 type. If a non-floating point type is found, or if a floating point
6358 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6359 otherwise return the count in the sub-tree.
6360
6361 The WARN_PSABI_FLAGS argument allows the caller to check whether this
6362 function has changed its behavior relative to earlier versions of GCC.
6363 Normally the argument should be nonnull and point to a zero-initialized
6364 variable. The function then records whether the ABI decision might
6365 be affected by a known fix to the ABI logic, setting the associated
6366 WARN_PSABI_* bits if so.
6367
6368 When the argument is instead a null pointer, the function tries to
6369 simulate the behavior of GCC before all such ABI fixes were made.
6370 This is useful to check whether the function returns something
6371 different after the ABI fixes. */
6372 static int
6373 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6374 unsigned int *warn_psabi_flags)
6375 {
6376 machine_mode mode;
6377 HOST_WIDE_INT size;
6378
6379 switch (TREE_CODE (type))
6380 {
6381 case REAL_TYPE:
6382 mode = TYPE_MODE (type);
6383 if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6384 return -1;
6385
6386 if (*modep == VOIDmode)
6387 *modep = mode;
6388
6389 if (*modep == mode)
6390 return 1;
6391
6392 break;
6393
6394 case COMPLEX_TYPE:
6395 mode = TYPE_MODE (TREE_TYPE (type));
6396 if (mode != DFmode && mode != SFmode)
6397 return -1;
6398
6399 if (*modep == VOIDmode)
6400 *modep = mode;
6401
6402 if (*modep == mode)
6403 return 2;
6404
6405 break;
6406
6407 case VECTOR_TYPE:
6408 /* Use V2SImode and V4SImode as representatives of all 64-bit
6409 and 128-bit vector types, whether or not those modes are
6410 supported with the present options. */
6411 size = int_size_in_bytes (type);
6412 switch (size)
6413 {
6414 case 8:
6415 mode = V2SImode;
6416 break;
6417 case 16:
6418 mode = V4SImode;
6419 break;
6420 default:
6421 return -1;
6422 }
6423
6424 if (*modep == VOIDmode)
6425 *modep = mode;
6426
6427 /* Vector modes are considered to be opaque: two vectors are
6428 equivalent for the purposes of being homogeneous aggregates
6429 if they are the same size. */
6430 if (*modep == mode)
6431 return 1;
6432
6433 break;
6434
6435 case ARRAY_TYPE:
6436 {
6437 int count;
6438 tree index = TYPE_DOMAIN (type);
6439
6440 /* Can't handle incomplete types nor sizes that are not
6441 fixed. */
6442 if (!COMPLETE_TYPE_P (type)
6443 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6444 return -1;
6445
6446 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6447 warn_psabi_flags);
6448 if (count == -1
6449 || !index
6450 || !TYPE_MAX_VALUE (index)
6451 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6452 || !TYPE_MIN_VALUE (index)
6453 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6454 || count < 0)
6455 return -1;
6456
6457 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6458 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6459
6460 /* There must be no padding. */
6461 if (wi::to_wide (TYPE_SIZE (type))
6462 != count * GET_MODE_BITSIZE (*modep))
6463 return -1;
6464
6465 return count;
6466 }
6467
6468 case RECORD_TYPE:
6469 {
6470 int count = 0;
6471 int sub_count;
6472 tree field;
6473
6474 /* Can't handle incomplete types nor sizes that are not
6475 fixed. */
6476 if (!COMPLETE_TYPE_P (type)
6477 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6478 return -1;
6479
6480 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6481 {
6482 if (TREE_CODE (field) != FIELD_DECL)
6483 continue;
6484
6485 if (DECL_FIELD_ABI_IGNORED (field))
6486 {
6487 /* See whether this is something that earlier versions of
6488 GCC failed to ignore. */
6489 unsigned int flag;
6490 if (lookup_attribute ("no_unique_address",
6491 DECL_ATTRIBUTES (field)))
6492 flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6493 else if (cxx17_empty_base_field_p (field))
6494 flag = WARN_PSABI_EMPTY_CXX17_BASE;
6495 else
6496 /* No compatibility problem. */
6497 continue;
6498
6499 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
6500 if (warn_psabi_flags)
6501 {
6502 *warn_psabi_flags |= flag;
6503 continue;
6504 }
6505 }
6506 /* A zero-width bitfield may affect layout in some
6507 circumstances, but adds no members. The determination
6508 of whether or not a type is an HFA is performed after
6509 layout is complete, so if the type still looks like an
6510 HFA afterwards, it is still classed as one. This is
6511 potentially an ABI break for the hard-float ABI. */
6512 else if (DECL_BIT_FIELD (field)
6513 && integer_zerop (DECL_SIZE (field)))
6514 {
6515 /* Prior to GCC-12 these fields were striped early,
6516 hiding them from the back-end entirely and
6517 resulting in the correct behaviour for argument
6518 passing. Simulate that old behaviour without
6519 generating a warning. */
6520 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
6521 continue;
6522 if (warn_psabi_flags)
6523 {
6524 *warn_psabi_flags |= WARN_PSABI_ZERO_WIDTH_BITFIELD;
6525 continue;
6526 }
6527 }
6528
6529 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6530 warn_psabi_flags);
6531 if (sub_count < 0)
6532 return -1;
6533 count += sub_count;
6534 }
6535
6536 /* There must be no padding. */
6537 if (wi::to_wide (TYPE_SIZE (type))
6538 != count * GET_MODE_BITSIZE (*modep))
6539 return -1;
6540
6541 return count;
6542 }
6543
6544 case UNION_TYPE:
6545 case QUAL_UNION_TYPE:
6546 {
6547 /* These aren't very interesting except in a degenerate case. */
6548 int count = 0;
6549 int sub_count;
6550 tree field;
6551
6552 /* Can't handle incomplete types nor sizes that are not
6553 fixed. */
6554 if (!COMPLETE_TYPE_P (type)
6555 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6556 return -1;
6557
6558 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6559 {
6560 if (TREE_CODE (field) != FIELD_DECL)
6561 continue;
6562
6563 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6564 warn_psabi_flags);
6565 if (sub_count < 0)
6566 return -1;
6567 count = count > sub_count ? count : sub_count;
6568 }
6569
6570 /* There must be no padding. */
6571 if (wi::to_wide (TYPE_SIZE (type))
6572 != count * GET_MODE_BITSIZE (*modep))
6573 return -1;
6574
6575 return count;
6576 }
6577
6578 default:
6579 break;
6580 }
6581
6582 return -1;
6583 }
6584
6585 /* Return true if PCS_VARIANT should use VFP registers. */
6586 static bool
6587 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6588 {
6589 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6590 {
6591 static bool seen_thumb1_vfp = false;
6592
6593 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6594 {
6595 sorry ("Thumb-1 %<hard-float%> VFP ABI");
6596 /* sorry() is not immediately fatal, so only display this once. */
6597 seen_thumb1_vfp = true;
6598 }
6599
6600 return true;
6601 }
6602
6603 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6604 return false;
6605
6606 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6607 (TARGET_VFP_DOUBLE || !is_double));
6608 }
6609
6610 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6611 suitable for passing or returning in VFP registers for the PCS
6612 variant selected. If it is, then *BASE_MODE is updated to contain
6613 a machine mode describing each element of the argument's type and
6614 *COUNT to hold the number of such elements. */
6615 static bool
6616 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6617 machine_mode mode, const_tree type,
6618 machine_mode *base_mode, int *count)
6619 {
6620 machine_mode new_mode = VOIDmode;
6621
6622 /* If we have the type information, prefer that to working things
6623 out from the mode. */
6624 if (type)
6625 {
6626 unsigned int warn_psabi_flags = 0;
6627 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6628 &warn_psabi_flags);
6629 if (ag_count > 0 && ag_count <= 4)
6630 {
6631 static unsigned last_reported_type_uid;
6632 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6633 int alt;
6634 if (warn_psabi
6635 && warn_psabi_flags
6636 && uid != last_reported_type_uid
6637 && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6638 != ag_count))
6639 {
6640 const char *url10
6641 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6642 const char *url12
6643 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
6644 gcc_assert (alt == -1);
6645 last_reported_type_uid = uid;
6646 /* Use TYPE_MAIN_VARIANT to strip any redundant const
6647 qualification. */
6648 if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6649 inform (input_location, "parameter passing for argument of "
6650 "type %qT with %<[[no_unique_address]]%> members "
6651 "changed %{in GCC 10.1%}",
6652 TYPE_MAIN_VARIANT (type), url10);
6653 else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6654 inform (input_location, "parameter passing for argument of "
6655 "type %qT when C++17 is enabled changed to match "
6656 "C++14 %{in GCC 10.1%}",
6657 TYPE_MAIN_VARIANT (type), url10);
6658 else if (warn_psabi_flags & WARN_PSABI_ZERO_WIDTH_BITFIELD)
6659 inform (input_location, "parameter passing for argument of "
6660 "type %qT changed %{in GCC 12.1%}",
6661 TYPE_MAIN_VARIANT (type), url12);
6662 }
6663 *count = ag_count;
6664 }
6665 else
6666 return false;
6667 }
6668 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6669 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6670 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6671 {
6672 *count = 1;
6673 new_mode = mode;
6674 }
6675 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6676 {
6677 *count = 2;
6678 new_mode = (mode == DCmode ? DFmode : SFmode);
6679 }
6680 else
6681 return false;
6682
6683
6684 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6685 return false;
6686
6687 *base_mode = new_mode;
6688
6689 if (TARGET_GENERAL_REGS_ONLY)
6690 error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6691 type);
6692
6693 return true;
6694 }
6695
6696 static bool
6697 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6698 machine_mode mode, const_tree type)
6699 {
6700 int count ATTRIBUTE_UNUSED;
6701 machine_mode ag_mode ATTRIBUTE_UNUSED;
6702
6703 if (!use_vfp_abi (pcs_variant, false))
6704 return false;
6705 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6706 &ag_mode, &count);
6707 }
6708
6709 static bool
6710 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6711 const_tree type)
6712 {
6713 if (!use_vfp_abi (pcum->pcs_variant, false))
6714 return false;
6715
6716 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6717 &pcum->aapcs_vfp_rmode,
6718 &pcum->aapcs_vfp_rcount);
6719 }
6720
6721 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6722 for the behaviour of this function. */
6723
6724 static bool
6725 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6726 const_tree type ATTRIBUTE_UNUSED)
6727 {
6728 int rmode_size
6729 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6730 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6731 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6732 int regno;
6733
6734 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6735 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6736 {
6737 pcum->aapcs_vfp_reg_alloc = mask << regno;
6738 if (mode == BLKmode
6739 || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6740 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6741 {
6742 int i;
6743 int rcount = pcum->aapcs_vfp_rcount;
6744 int rshift = shift;
6745 machine_mode rmode = pcum->aapcs_vfp_rmode;
6746 rtx par;
6747 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6748 {
6749 /* Avoid using unsupported vector modes. */
6750 if (rmode == V2SImode)
6751 rmode = DImode;
6752 else if (rmode == V4SImode)
6753 {
6754 rmode = DImode;
6755 rcount *= 2;
6756 rshift /= 2;
6757 }
6758 }
6759 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6760 for (i = 0; i < rcount; i++)
6761 {
6762 rtx tmp = gen_rtx_REG (rmode,
6763 FIRST_VFP_REGNUM + regno + i * rshift);
6764 tmp = gen_rtx_EXPR_LIST
6765 (VOIDmode, tmp,
6766 GEN_INT (i * GET_MODE_SIZE (rmode)));
6767 XVECEXP (par, 0, i) = tmp;
6768 }
6769
6770 pcum->aapcs_reg = par;
6771 }
6772 else
6773 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6774 return true;
6775 }
6776 return false;
6777 }
6778
6779 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6780 comment there for the behaviour of this function. */
6781
6782 static rtx
6783 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6784 machine_mode mode,
6785 const_tree type ATTRIBUTE_UNUSED)
6786 {
6787 if (!use_vfp_abi (pcs_variant, false))
6788 return NULL;
6789
6790 if (mode == BLKmode
6791 || (GET_MODE_CLASS (mode) == MODE_INT
6792 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6793 && !(TARGET_NEON || TARGET_HAVE_MVE)))
6794 {
6795 int count;
6796 machine_mode ag_mode;
6797 int i;
6798 rtx par;
6799 int shift;
6800
6801 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6802 &ag_mode, &count);
6803
6804 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6805 {
6806 if (ag_mode == V2SImode)
6807 ag_mode = DImode;
6808 else if (ag_mode == V4SImode)
6809 {
6810 ag_mode = DImode;
6811 count *= 2;
6812 }
6813 }
6814 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6815 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6816 for (i = 0; i < count; i++)
6817 {
6818 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6819 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6820 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6821 XVECEXP (par, 0, i) = tmp;
6822 }
6823
6824 return par;
6825 }
6826
6827 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6828 }
6829
6830 static void
6831 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6832 machine_mode mode ATTRIBUTE_UNUSED,
6833 const_tree type ATTRIBUTE_UNUSED)
6834 {
6835 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6836 pcum->aapcs_vfp_reg_alloc = 0;
6837 return;
6838 }
6839
6840 #define AAPCS_CP(X) \
6841 { \
6842 aapcs_ ## X ## _cum_init, \
6843 aapcs_ ## X ## _is_call_candidate, \
6844 aapcs_ ## X ## _allocate, \
6845 aapcs_ ## X ## _is_return_candidate, \
6846 aapcs_ ## X ## _allocate_return_reg, \
6847 aapcs_ ## X ## _advance \
6848 }
6849
6850 /* Table of co-processors that can be used to pass arguments in
6851 registers. Idealy no arugment should be a candidate for more than
6852 one co-processor table entry, but the table is processed in order
6853 and stops after the first match. If that entry then fails to put
6854 the argument into a co-processor register, the argument will go on
6855 the stack. */
6856 static struct
6857 {
6858 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6859 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6860
6861 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6862 BLKmode) is a candidate for this co-processor's registers; this
6863 function should ignore any position-dependent state in
6864 CUMULATIVE_ARGS and only use call-type dependent information. */
6865 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6866
6867 /* Return true if the argument does get a co-processor register; it
6868 should set aapcs_reg to an RTX of the register allocated as is
6869 required for a return from FUNCTION_ARG. */
6870 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6871
6872 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6873 be returned in this co-processor's registers. */
6874 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6875
6876 /* Allocate and return an RTX element to hold the return type of a call. This
6877 routine must not fail and will only be called if is_return_candidate
6878 returned true with the same parameters. */
6879 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6880
6881 /* Finish processing this argument and prepare to start processing
6882 the next one. */
6883 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6884 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6885 {
6886 AAPCS_CP(vfp)
6887 };
6888
6889 #undef AAPCS_CP
6890
6891 static int
6892 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6893 const_tree type)
6894 {
6895 int i;
6896
6897 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6898 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6899 return i;
6900
6901 return -1;
6902 }
6903
6904 static int
6905 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6906 {
6907 /* We aren't passed a decl, so we can't check that a call is local.
6908 However, it isn't clear that that would be a win anyway, since it
6909 might limit some tail-calling opportunities. */
6910 enum arm_pcs pcs_variant;
6911
6912 if (fntype)
6913 {
6914 const_tree fndecl = NULL_TREE;
6915
6916 if (TREE_CODE (fntype) == FUNCTION_DECL)
6917 {
6918 fndecl = fntype;
6919 fntype = TREE_TYPE (fntype);
6920 }
6921
6922 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6923 }
6924 else
6925 pcs_variant = arm_pcs_default;
6926
6927 if (pcs_variant != ARM_PCS_AAPCS)
6928 {
6929 int i;
6930
6931 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6932 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6933 TYPE_MODE (type),
6934 type))
6935 return i;
6936 }
6937 return -1;
6938 }
6939
6940 static rtx
6941 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6942 const_tree fntype)
6943 {
6944 /* We aren't passed a decl, so we can't check that a call is local.
6945 However, it isn't clear that that would be a win anyway, since it
6946 might limit some tail-calling opportunities. */
6947 enum arm_pcs pcs_variant;
6948 int unsignedp ATTRIBUTE_UNUSED;
6949
6950 if (fntype)
6951 {
6952 const_tree fndecl = NULL_TREE;
6953
6954 if (TREE_CODE (fntype) == FUNCTION_DECL)
6955 {
6956 fndecl = fntype;
6957 fntype = TREE_TYPE (fntype);
6958 }
6959
6960 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6961 }
6962 else
6963 pcs_variant = arm_pcs_default;
6964
6965 /* Promote integer types. */
6966 if (type && INTEGRAL_TYPE_P (type))
6967 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6968
6969 if (pcs_variant != ARM_PCS_AAPCS)
6970 {
6971 int i;
6972
6973 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6974 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6975 type))
6976 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6977 mode, type);
6978 }
6979
6980 /* Promotes small structs returned in a register to full-word size
6981 for big-endian AAPCS. */
6982 if (type && arm_return_in_msb (type))
6983 {
6984 HOST_WIDE_INT size = int_size_in_bytes (type);
6985 if (size % UNITS_PER_WORD != 0)
6986 {
6987 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6988 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6989 }
6990 }
6991
6992 return gen_rtx_REG (mode, R0_REGNUM);
6993 }
6994
6995 static rtx
6996 aapcs_libcall_value (machine_mode mode)
6997 {
6998 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6999 && GET_MODE_SIZE (mode) <= 4)
7000 mode = SImode;
7001
7002 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
7003 }
7004
7005 /* Lay out a function argument using the AAPCS rules. The rule
7006 numbers referred to here are those in the AAPCS. */
7007 static void
7008 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
7009 const_tree type, bool named)
7010 {
7011 int nregs, nregs2;
7012 int ncrn;
7013
7014 /* We only need to do this once per argument. */
7015 if (pcum->aapcs_arg_processed)
7016 return;
7017
7018 pcum->aapcs_arg_processed = true;
7019
7020 /* Special case: if named is false then we are handling an incoming
7021 anonymous argument which is on the stack. */
7022 if (!named)
7023 return;
7024
7025 /* Is this a potential co-processor register candidate? */
7026 if (pcum->pcs_variant != ARM_PCS_AAPCS)
7027 {
7028 int slot = aapcs_select_call_coproc (pcum, mode, type);
7029 pcum->aapcs_cprc_slot = slot;
7030
7031 /* We don't have to apply any of the rules from part B of the
7032 preparation phase, these are handled elsewhere in the
7033 compiler. */
7034
7035 if (slot >= 0)
7036 {
7037 /* A Co-processor register candidate goes either in its own
7038 class of registers or on the stack. */
7039 if (!pcum->aapcs_cprc_failed[slot])
7040 {
7041 /* C1.cp - Try to allocate the argument to co-processor
7042 registers. */
7043 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
7044 return;
7045
7046 /* C2.cp - Put the argument on the stack and note that we
7047 can't assign any more candidates in this slot. We also
7048 need to note that we have allocated stack space, so that
7049 we won't later try to split a non-cprc candidate between
7050 core registers and the stack. */
7051 pcum->aapcs_cprc_failed[slot] = true;
7052 pcum->can_split = false;
7053 }
7054
7055 /* We didn't get a register, so this argument goes on the
7056 stack. */
7057 gcc_assert (pcum->can_split == false);
7058 return;
7059 }
7060 }
7061
7062 /* C3 - For double-word aligned arguments, round the NCRN up to the
7063 next even number. */
7064 ncrn = pcum->aapcs_ncrn;
7065 if (ncrn & 1)
7066 {
7067 int res = arm_needs_doubleword_align (mode, type);
7068 /* Only warn during RTL expansion of call stmts, otherwise we would
7069 warn e.g. during gimplification even on functions that will be
7070 always inlined, and we'd warn multiple times. Don't warn when
7071 called in expand_function_start either, as we warn instead in
7072 arm_function_arg_boundary in that case. */
7073 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
7074 inform (input_location, "parameter passing for argument of type "
7075 "%qT changed in GCC 7.1", type);
7076 else if (res > 0)
7077 ncrn++;
7078 }
7079
7080 nregs = ARM_NUM_REGS2(mode, type);
7081
7082 /* Sigh, this test should really assert that nregs > 0, but a GCC
7083 extension allows empty structs and then gives them empty size; it
7084 then allows such a structure to be passed by value. For some of
7085 the code below we have to pretend that such an argument has
7086 non-zero size so that we 'locate' it correctly either in
7087 registers or on the stack. */
7088 gcc_assert (nregs >= 0);
7089
7090 nregs2 = nregs ? nregs : 1;
7091
7092 /* C4 - Argument fits entirely in core registers. */
7093 if (ncrn + nregs2 <= NUM_ARG_REGS)
7094 {
7095 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7096 pcum->aapcs_next_ncrn = ncrn + nregs;
7097 return;
7098 }
7099
7100 /* C5 - Some core registers left and there are no arguments already
7101 on the stack: split this argument between the remaining core
7102 registers and the stack. */
7103 if (ncrn < NUM_ARG_REGS && pcum->can_split)
7104 {
7105 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7106 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7107 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
7108 return;
7109 }
7110
7111 /* C6 - NCRN is set to 4. */
7112 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7113
7114 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
7115 return;
7116 }
7117
7118 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7119 for a call to a function whose data type is FNTYPE.
7120 For a library call, FNTYPE is NULL. */
7121 void
7122 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
7123 rtx libname,
7124 tree fndecl ATTRIBUTE_UNUSED)
7125 {
7126 /* Long call handling. */
7127 if (fntype)
7128 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
7129 else
7130 pcum->pcs_variant = arm_pcs_default;
7131
7132 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7133 {
7134 if (arm_libcall_uses_aapcs_base (libname))
7135 pcum->pcs_variant = ARM_PCS_AAPCS;
7136
7137 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
7138 pcum->aapcs_reg = NULL_RTX;
7139 pcum->aapcs_partial = 0;
7140 pcum->aapcs_arg_processed = false;
7141 pcum->aapcs_cprc_slot = -1;
7142 pcum->can_split = true;
7143
7144 if (pcum->pcs_variant != ARM_PCS_AAPCS)
7145 {
7146 int i;
7147
7148 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
7149 {
7150 pcum->aapcs_cprc_failed[i] = false;
7151 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
7152 }
7153 }
7154 return;
7155 }
7156
7157 /* Legacy ABIs */
7158
7159 /* On the ARM, the offset starts at 0. */
7160 pcum->nregs = 0;
7161 pcum->iwmmxt_nregs = 0;
7162 pcum->can_split = true;
7163
7164 /* Varargs vectors are treated the same as long long.
7165 named_count avoids having to change the way arm handles 'named' */
7166 pcum->named_count = 0;
7167 pcum->nargs = 0;
7168
7169 if (TARGET_REALLY_IWMMXT && fntype)
7170 {
7171 tree fn_arg;
7172
7173 for (fn_arg = TYPE_ARG_TYPES (fntype);
7174 fn_arg;
7175 fn_arg = TREE_CHAIN (fn_arg))
7176 pcum->named_count += 1;
7177
7178 if (! pcum->named_count)
7179 pcum->named_count = INT_MAX;
7180 }
7181 }
7182
7183 /* Return 2 if double word alignment is required for argument passing,
7184 but wasn't required before the fix for PR88469.
7185 Return 1 if double word alignment is required for argument passing.
7186 Return -1 if double word alignment used to be required for argument
7187 passing before PR77728 ABI fix, but is not required anymore.
7188 Return 0 if double word alignment is not required and wasn't requried
7189 before either. */
7190 static int
7191 arm_needs_doubleword_align (machine_mode mode, const_tree type)
7192 {
7193 if (!type)
7194 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7195
7196 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
7197 if (!AGGREGATE_TYPE_P (type))
7198 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7199
7200 /* Array types: Use member alignment of element type. */
7201 if (TREE_CODE (type) == ARRAY_TYPE)
7202 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7203
7204 int ret = 0;
7205 int ret2 = 0;
7206 /* Record/aggregate types: Use greatest member alignment of any member.
7207
7208 Note that we explicitly consider zero-sized fields here, even though
7209 they don't map to AAPCS machine types. For example, in:
7210
7211 struct __attribute__((aligned(8))) empty {};
7212
7213 struct s {
7214 [[no_unique_address]] empty e;
7215 int x;
7216 };
7217
7218 "s" contains only one Fundamental Data Type (the int field)
7219 but gains 8-byte alignment and size thanks to "e". */
7220 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7221 if (DECL_ALIGN (field) > PARM_BOUNDARY)
7222 {
7223 if (TREE_CODE (field) == FIELD_DECL)
7224 return 1;
7225 else
7226 /* Before PR77728 fix, we were incorrectly considering also
7227 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7228 Make sure we can warn about that with -Wpsabi. */
7229 ret = -1;
7230 }
7231 else if (TREE_CODE (field) == FIELD_DECL
7232 && DECL_BIT_FIELD_TYPE (field)
7233 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7234 ret2 = 1;
7235
7236 if (ret2)
7237 return 2;
7238
7239 return ret;
7240 }
7241
7242
7243 /* Determine where to put an argument to a function.
7244 Value is zero to push the argument on the stack,
7245 or a hard register in which to store the argument.
7246
7247 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7248 the preceding args and about the function being called.
7249 ARG is a description of the argument.
7250
7251 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7252 other arguments are passed on the stack. If (NAMED == 0) (which happens
7253 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7254 defined), say it is passed in the stack (function_prologue will
7255 indeed make it pass in the stack if necessary). */
7256
7257 static rtx
7258 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7259 {
7260 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7261 int nregs;
7262
7263 /* Handle the special case quickly. Pick an arbitrary value for op2 of
7264 a call insn (op3 of a call_value insn). */
7265 if (arg.end_marker_p ())
7266 return const0_rtx;
7267
7268 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7269 {
7270 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7271 return pcum->aapcs_reg;
7272 }
7273
7274 /* Varargs vectors are treated the same as long long.
7275 named_count avoids having to change the way arm handles 'named' */
7276 if (TARGET_IWMMXT_ABI
7277 && arm_vector_mode_supported_p (arg.mode)
7278 && pcum->named_count > pcum->nargs + 1)
7279 {
7280 if (pcum->iwmmxt_nregs <= 9)
7281 return gen_rtx_REG (arg.mode,
7282 pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7283 else
7284 {
7285 pcum->can_split = false;
7286 return NULL_RTX;
7287 }
7288 }
7289
7290 /* Put doubleword aligned quantities in even register pairs. */
7291 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7292 {
7293 int res = arm_needs_doubleword_align (arg.mode, arg.type);
7294 if (res < 0 && warn_psabi)
7295 inform (input_location, "parameter passing for argument of type "
7296 "%qT changed in GCC 7.1", arg.type);
7297 else if (res > 0)
7298 {
7299 pcum->nregs++;
7300 if (res > 1 && warn_psabi)
7301 inform (input_location, "parameter passing for argument of type "
7302 "%qT changed in GCC 9.1", arg.type);
7303 }
7304 }
7305
7306 /* Only allow splitting an arg between regs and memory if all preceding
7307 args were allocated to regs. For args passed by reference we only count
7308 the reference pointer. */
7309 if (pcum->can_split)
7310 nregs = 1;
7311 else
7312 nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7313
7314 if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7315 return NULL_RTX;
7316
7317 return gen_rtx_REG (arg.mode, pcum->nregs);
7318 }
7319
7320 static unsigned int
7321 arm_function_arg_boundary (machine_mode mode, const_tree type)
7322 {
7323 if (!ARM_DOUBLEWORD_ALIGN)
7324 return PARM_BOUNDARY;
7325
7326 int res = arm_needs_doubleword_align (mode, type);
7327 if (res < 0 && warn_psabi)
7328 inform (input_location, "parameter passing for argument of type %qT "
7329 "changed in GCC 7.1", type);
7330 if (res > 1 && warn_psabi)
7331 inform (input_location, "parameter passing for argument of type "
7332 "%qT changed in GCC 9.1", type);
7333
7334 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7335 }
7336
7337 static int
7338 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7339 {
7340 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7341 int nregs = pcum->nregs;
7342
7343 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7344 {
7345 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7346 return pcum->aapcs_partial;
7347 }
7348
7349 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7350 return 0;
7351
7352 if (NUM_ARG_REGS > nregs
7353 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7354 && pcum->can_split)
7355 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7356
7357 return 0;
7358 }
7359
7360 /* Update the data in PCUM to advance over argument ARG. */
7361
7362 static void
7363 arm_function_arg_advance (cumulative_args_t pcum_v,
7364 const function_arg_info &arg)
7365 {
7366 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7367
7368 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7369 {
7370 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7371
7372 if (pcum->aapcs_cprc_slot >= 0)
7373 {
7374 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7375 arg.type);
7376 pcum->aapcs_cprc_slot = -1;
7377 }
7378
7379 /* Generic stuff. */
7380 pcum->aapcs_arg_processed = false;
7381 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7382 pcum->aapcs_reg = NULL_RTX;
7383 pcum->aapcs_partial = 0;
7384 }
7385 else
7386 {
7387 pcum->nargs += 1;
7388 if (arm_vector_mode_supported_p (arg.mode)
7389 && pcum->named_count > pcum->nargs
7390 && TARGET_IWMMXT_ABI)
7391 pcum->iwmmxt_nregs += 1;
7392 else
7393 pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7394 }
7395 }
7396
7397 /* Variable sized types are passed by reference. This is a GCC
7398 extension to the ARM ABI. */
7399
7400 static bool
7401 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7402 {
7403 return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7404 }
7405 \f
7406 /* Encode the current state of the #pragma [no_]long_calls. */
7407 typedef enum
7408 {
7409 OFF, /* No #pragma [no_]long_calls is in effect. */
7410 LONG, /* #pragma long_calls is in effect. */
7411 SHORT /* #pragma no_long_calls is in effect. */
7412 } arm_pragma_enum;
7413
7414 static arm_pragma_enum arm_pragma_long_calls = OFF;
7415
7416 void
7417 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7418 {
7419 arm_pragma_long_calls = LONG;
7420 }
7421
7422 void
7423 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7424 {
7425 arm_pragma_long_calls = SHORT;
7426 }
7427
7428 void
7429 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7430 {
7431 arm_pragma_long_calls = OFF;
7432 }
7433 \f
7434 /* Handle an attribute requiring a FUNCTION_DECL;
7435 arguments as in struct attribute_spec.handler. */
7436 static tree
7437 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7438 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7439 {
7440 if (TREE_CODE (*node) != FUNCTION_DECL)
7441 {
7442 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7443 name);
7444 *no_add_attrs = true;
7445 }
7446
7447 return NULL_TREE;
7448 }
7449
7450 /* Handle an "interrupt" or "isr" attribute;
7451 arguments as in struct attribute_spec.handler. */
7452 static tree
7453 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7454 bool *no_add_attrs)
7455 {
7456 if (DECL_P (*node))
7457 {
7458 if (TREE_CODE (*node) != FUNCTION_DECL)
7459 {
7460 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7461 name);
7462 *no_add_attrs = true;
7463 }
7464 else if (TARGET_VFP_BASE)
7465 {
7466 warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7467 name);
7468 }
7469 /* FIXME: the argument if any is checked for type attributes;
7470 should it be checked for decl ones? */
7471 }
7472 else
7473 {
7474 if (FUNC_OR_METHOD_TYPE_P (*node))
7475 {
7476 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7477 {
7478 warning (OPT_Wattributes, "%qE attribute ignored",
7479 name);
7480 *no_add_attrs = true;
7481 }
7482 }
7483 else if (TREE_CODE (*node) == POINTER_TYPE
7484 && FUNC_OR_METHOD_TYPE_P (TREE_TYPE (*node))
7485 && arm_isr_value (args) != ARM_FT_UNKNOWN)
7486 {
7487 *node = build_variant_type_copy (*node);
7488 TREE_TYPE (*node) = build_type_attribute_variant
7489 (TREE_TYPE (*node),
7490 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7491 *no_add_attrs = true;
7492 }
7493 else
7494 {
7495 /* Possibly pass this attribute on from the type to a decl. */
7496 if (flags & ((int) ATTR_FLAG_DECL_NEXT
7497 | (int) ATTR_FLAG_FUNCTION_NEXT
7498 | (int) ATTR_FLAG_ARRAY_NEXT))
7499 {
7500 *no_add_attrs = true;
7501 return tree_cons (name, args, NULL_TREE);
7502 }
7503 else
7504 {
7505 warning (OPT_Wattributes, "%qE attribute ignored",
7506 name);
7507 }
7508 }
7509 }
7510
7511 return NULL_TREE;
7512 }
7513
7514 /* Handle a "pcs" attribute; arguments as in struct
7515 attribute_spec.handler. */
7516 static tree
7517 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7518 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7519 {
7520 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7521 {
7522 warning (OPT_Wattributes, "%qE attribute ignored", name);
7523 *no_add_attrs = true;
7524 }
7525 return NULL_TREE;
7526 }
7527
7528 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7529 /* Handle the "notshared" attribute. This attribute is another way of
7530 requesting hidden visibility. ARM's compiler supports
7531 "__declspec(notshared)"; we support the same thing via an
7532 attribute. */
7533
7534 static tree
7535 arm_handle_notshared_attribute (tree *node,
7536 tree name ATTRIBUTE_UNUSED,
7537 tree args ATTRIBUTE_UNUSED,
7538 int flags ATTRIBUTE_UNUSED,
7539 bool *no_add_attrs)
7540 {
7541 tree decl = TYPE_NAME (*node);
7542
7543 if (decl)
7544 {
7545 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7546 DECL_VISIBILITY_SPECIFIED (decl) = 1;
7547 *no_add_attrs = false;
7548 }
7549 return NULL_TREE;
7550 }
7551 #endif
7552
7553 /* This function returns true if a function with declaration FNDECL and type
7554 FNTYPE uses the stack to pass arguments or return variables and false
7555 otherwise. This is used for functions with the attributes
7556 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7557 diagnostic messages if the stack is used. NAME is the name of the attribute
7558 used. */
7559
7560 static bool
7561 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7562 {
7563 function_args_iterator args_iter;
7564 CUMULATIVE_ARGS args_so_far_v;
7565 cumulative_args_t args_so_far;
7566 bool first_param = true;
7567 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7568
7569 /* Error out if any argument is passed on the stack. */
7570 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7571 args_so_far = pack_cumulative_args (&args_so_far_v);
7572 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7573 {
7574 rtx arg_rtx;
7575
7576 prev_arg_type = arg_type;
7577 if (VOID_TYPE_P (arg_type))
7578 continue;
7579
7580 function_arg_info arg (arg_type, /*named=*/true);
7581 if (!first_param)
7582 /* ??? We should advance after processing the argument and pass
7583 the argument we're advancing past. */
7584 arm_function_arg_advance (args_so_far, arg);
7585 arg_rtx = arm_function_arg (args_so_far, arg);
7586 if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7587 {
7588 error ("%qE attribute not available to functions with arguments "
7589 "passed on the stack", name);
7590 return true;
7591 }
7592 first_param = false;
7593 }
7594
7595 /* Error out for variadic functions since we cannot control how many
7596 arguments will be passed and thus stack could be used. stdarg_p () is not
7597 used for the checking to avoid browsing arguments twice. */
7598 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7599 {
7600 error ("%qE attribute not available to functions with variable number "
7601 "of arguments", name);
7602 return true;
7603 }
7604
7605 /* Error out if return value is passed on the stack. */
7606 ret_type = TREE_TYPE (fntype);
7607 if (arm_return_in_memory (ret_type, fntype))
7608 {
7609 error ("%qE attribute not available to functions that return value on "
7610 "the stack", name);
7611 return true;
7612 }
7613 return false;
7614 }
7615
7616 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7617 function will check whether the attribute is allowed here and will add the
7618 attribute to the function declaration tree or otherwise issue a warning. */
7619
7620 static tree
7621 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7622 tree /* args */,
7623 int /* flags */,
7624 bool *no_add_attrs)
7625 {
7626 tree fndecl;
7627
7628 if (!use_cmse)
7629 {
7630 *no_add_attrs = true;
7631 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7632 "option", name);
7633 return NULL_TREE;
7634 }
7635
7636 /* Ignore attribute for function types. */
7637 if (TREE_CODE (*node) != FUNCTION_DECL)
7638 {
7639 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7640 name);
7641 *no_add_attrs = true;
7642 return NULL_TREE;
7643 }
7644
7645 fndecl = *node;
7646
7647 /* Warn for static linkage functions. */
7648 if (!TREE_PUBLIC (fndecl))
7649 {
7650 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7651 "with static linkage", name);
7652 *no_add_attrs = true;
7653 return NULL_TREE;
7654 }
7655
7656 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7657 TREE_TYPE (fndecl));
7658 return NULL_TREE;
7659 }
7660
7661
7662 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7663 function will check whether the attribute is allowed here and will add the
7664 attribute to the function type tree or otherwise issue a diagnostic. The
7665 reason we check this at declaration time is to only allow the use of the
7666 attribute with declarations of function pointers and not function
7667 declarations. This function checks NODE is of the expected type and issues
7668 diagnostics otherwise using NAME. If it is not of the expected type
7669 *NO_ADD_ATTRS will be set to true. */
7670
7671 static tree
7672 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7673 tree /* args */,
7674 int /* flags */,
7675 bool *no_add_attrs)
7676 {
7677 tree decl = NULL_TREE;
7678 tree fntype, type;
7679
7680 if (!use_cmse)
7681 {
7682 *no_add_attrs = true;
7683 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7684 "option", name);
7685 return NULL_TREE;
7686 }
7687
7688 if (DECL_P (*node))
7689 {
7690 fntype = TREE_TYPE (*node);
7691
7692 if (VAR_P (*node) || TREE_CODE (*node) == TYPE_DECL)
7693 decl = *node;
7694 }
7695 else
7696 fntype = *node;
7697
7698 while (fntype && TREE_CODE (fntype) == POINTER_TYPE)
7699 fntype = TREE_TYPE (fntype);
7700
7701 if ((DECL_P (*node) && !decl) || TREE_CODE (fntype) != FUNCTION_TYPE)
7702 {
7703 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7704 "function pointer", name);
7705 *no_add_attrs = true;
7706 return NULL_TREE;
7707 }
7708
7709 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7710
7711 if (*no_add_attrs)
7712 return NULL_TREE;
7713
7714 /* Prevent trees being shared among function types with and without
7715 cmse_nonsecure_call attribute. */
7716 if (decl)
7717 {
7718 type = build_distinct_type_copy (TREE_TYPE (decl));
7719 TREE_TYPE (decl) = type;
7720 }
7721 else
7722 {
7723 type = build_distinct_type_copy (*node);
7724 *node = type;
7725 }
7726
7727 fntype = type;
7728
7729 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7730 {
7731 type = fntype;
7732 fntype = TREE_TYPE (fntype);
7733 fntype = build_distinct_type_copy (fntype);
7734 TREE_TYPE (type) = fntype;
7735 }
7736
7737 /* Construct a type attribute and add it to the function type. */
7738 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7739 TYPE_ATTRIBUTES (fntype));
7740 TYPE_ATTRIBUTES (fntype) = attrs;
7741 return NULL_TREE;
7742 }
7743
7744 /* Return 0 if the attributes for two types are incompatible, 1 if they
7745 are compatible, and 2 if they are nearly compatible (which causes a
7746 warning to be generated). */
7747 static int
7748 arm_comp_type_attributes (const_tree type1, const_tree type2)
7749 {
7750 int l1, l2, s1, s2;
7751
7752 tree attrs1 = lookup_attribute ("Advanced SIMD type",
7753 TYPE_ATTRIBUTES (type1));
7754 tree attrs2 = lookup_attribute ("Advanced SIMD type",
7755 TYPE_ATTRIBUTES (type2));
7756 if (bool (attrs1) != bool (attrs2))
7757 return 0;
7758 if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7759 return 0;
7760
7761 /* Check for mismatch of non-default calling convention. */
7762 if (TREE_CODE (type1) != FUNCTION_TYPE)
7763 return 1;
7764
7765 /* Check for mismatched call attributes. */
7766 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7767 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7768 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7769 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7770
7771 /* Only bother to check if an attribute is defined. */
7772 if (l1 | l2 | s1 | s2)
7773 {
7774 /* If one type has an attribute, the other must have the same attribute. */
7775 if ((l1 != l2) || (s1 != s2))
7776 return 0;
7777
7778 /* Disallow mixed attributes. */
7779 if ((l1 & s2) || (l2 & s1))
7780 return 0;
7781 }
7782
7783 /* Check for mismatched ISR attribute. */
7784 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7785 if (! l1)
7786 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7787 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7788 if (! l2)
7789 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7790 if (l1 != l2)
7791 return 0;
7792
7793 l1 = lookup_attribute ("cmse_nonsecure_call",
7794 TYPE_ATTRIBUTES (type1)) != NULL;
7795 l2 = lookup_attribute ("cmse_nonsecure_call",
7796 TYPE_ATTRIBUTES (type2)) != NULL;
7797
7798 if (l1 != l2)
7799 return 0;
7800
7801 return 1;
7802 }
7803
7804 /* Assigns default attributes to newly defined type. This is used to
7805 set short_call/long_call attributes for function types of
7806 functions defined inside corresponding #pragma scopes. */
7807 static void
7808 arm_set_default_type_attributes (tree type)
7809 {
7810 /* Add __attribute__ ((long_call)) to all functions, when
7811 inside #pragma long_calls or __attribute__ ((short_call)),
7812 when inside #pragma no_long_calls. */
7813 if (FUNC_OR_METHOD_TYPE_P (type))
7814 {
7815 tree type_attr_list, attr_name;
7816 type_attr_list = TYPE_ATTRIBUTES (type);
7817
7818 if (arm_pragma_long_calls == LONG)
7819 attr_name = get_identifier ("long_call");
7820 else if (arm_pragma_long_calls == SHORT)
7821 attr_name = get_identifier ("short_call");
7822 else
7823 return;
7824
7825 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7826 TYPE_ATTRIBUTES (type) = type_attr_list;
7827 }
7828 }
7829 \f
7830 /* Return true if DECL is known to be linked into section SECTION. */
7831
7832 static bool
7833 arm_function_in_section_p (tree decl, section *section)
7834 {
7835 /* We can only be certain about the prevailing symbol definition. */
7836 if (!decl_binds_to_current_def_p (decl))
7837 return false;
7838
7839 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7840 if (!DECL_SECTION_NAME (decl))
7841 {
7842 /* Make sure that we will not create a unique section for DECL. */
7843 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7844 return false;
7845 }
7846
7847 return function_section (decl) == section;
7848 }
7849
7850 /* Return nonzero if a 32-bit "long_call" should be generated for
7851 a call from the current function to DECL. We generate a long_call
7852 if the function:
7853
7854 a. has an __attribute__((long call))
7855 or b. is within the scope of a #pragma long_calls
7856 or c. the -mlong-calls command line switch has been specified
7857
7858 However we do not generate a long call if the function:
7859
7860 d. has an __attribute__ ((short_call))
7861 or e. is inside the scope of a #pragma no_long_calls
7862 or f. is defined in the same section as the current function. */
7863
7864 bool
7865 arm_is_long_call_p (tree decl)
7866 {
7867 tree attrs;
7868
7869 if (!decl)
7870 return TARGET_LONG_CALLS;
7871
7872 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7873 if (lookup_attribute ("short_call", attrs))
7874 return false;
7875
7876 /* For "f", be conservative, and only cater for cases in which the
7877 whole of the current function is placed in the same section. */
7878 if (!flag_reorder_blocks_and_partition
7879 && TREE_CODE (decl) == FUNCTION_DECL
7880 && arm_function_in_section_p (decl, current_function_section ()))
7881 return false;
7882
7883 if (lookup_attribute ("long_call", attrs))
7884 return true;
7885
7886 return TARGET_LONG_CALLS;
7887 }
7888
7889 /* Return nonzero if it is ok to make a tail-call to DECL. */
7890 static bool
7891 arm_function_ok_for_sibcall (tree decl, tree exp)
7892 {
7893 unsigned long func_type;
7894
7895 if (cfun->machine->sibcall_blocked)
7896 return false;
7897
7898 if (TARGET_FDPIC)
7899 {
7900 /* In FDPIC, never tailcall something for which we have no decl:
7901 the target function could be in a different module, requiring
7902 a different FDPIC register value. */
7903 if (decl == NULL)
7904 return false;
7905 }
7906
7907 /* Never tailcall something if we are generating code for Thumb-1. */
7908 if (TARGET_THUMB1)
7909 return false;
7910
7911 /* The PIC register is live on entry to VxWorks PLT entries, so we
7912 must make the call before restoring the PIC register. */
7913 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7914 return false;
7915
7916 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7917 may be used both as target of the call and base register for restoring
7918 the VFP registers */
7919 if (TARGET_APCS_FRAME && TARGET_ARM
7920 && TARGET_HARD_FLOAT
7921 && decl && arm_is_long_call_p (decl))
7922 return false;
7923
7924 /* If we are interworking and the function is not declared static
7925 then we can't tail-call it unless we know that it exists in this
7926 compilation unit (since it might be a Thumb routine). */
7927 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7928 && !TREE_ASM_WRITTEN (decl))
7929 return false;
7930
7931 func_type = arm_current_func_type ();
7932 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7933 if (IS_INTERRUPT (func_type))
7934 return false;
7935
7936 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7937 generated for entry functions themselves. */
7938 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7939 return false;
7940
7941 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7942 this would complicate matters for later code generation. */
7943 if (TREE_CODE (exp) == CALL_EXPR)
7944 {
7945 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7946 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7947 return false;
7948 }
7949
7950 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7951 {
7952 /* Check that the return value locations are the same. For
7953 example that we aren't returning a value from the sibling in
7954 a VFP register but then need to transfer it to a core
7955 register. */
7956 rtx a, b;
7957 tree decl_or_type = decl;
7958
7959 /* If it is an indirect function pointer, get the function type. */
7960 if (!decl)
7961 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7962
7963 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7964 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7965 cfun->decl, false);
7966 if (!rtx_equal_p (a, b))
7967 return false;
7968 }
7969
7970 /* Never tailcall if function may be called with a misaligned SP. */
7971 if (IS_STACKALIGN (func_type))
7972 return false;
7973
7974 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7975 references should become a NOP. Don't convert such calls into
7976 sibling calls. */
7977 if (TARGET_AAPCS_BASED
7978 && arm_abi == ARM_ABI_AAPCS
7979 && decl
7980 && DECL_WEAK (decl))
7981 return false;
7982
7983 /* We cannot do a tailcall for an indirect call by descriptor if all the
7984 argument registers are used because the only register left to load the
7985 address is IP and it will already contain the static chain. */
7986 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7987 {
7988 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7989 CUMULATIVE_ARGS cum;
7990 cumulative_args_t cum_v;
7991
7992 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7993 cum_v = pack_cumulative_args (&cum);
7994
7995 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7996 {
7997 tree type = TREE_VALUE (t);
7998 if (!VOID_TYPE_P (type))
7999 {
8000 function_arg_info arg (type, /*named=*/true);
8001 arm_function_arg_advance (cum_v, arg);
8002 }
8003 }
8004
8005 function_arg_info arg (integer_type_node, /*named=*/true);
8006 if (!arm_function_arg (cum_v, arg))
8007 return false;
8008 }
8009
8010 /* Everything else is ok. */
8011 return true;
8012 }
8013
8014 \f
8015 /* Addressing mode support functions. */
8016
8017 /* Return nonzero if X is a legitimate immediate operand when compiling
8018 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
8019 int
8020 legitimate_pic_operand_p (rtx x)
8021 {
8022 if (SYMBOL_REF_P (x)
8023 || (GET_CODE (x) == CONST
8024 && GET_CODE (XEXP (x, 0)) == PLUS
8025 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
8026 return 0;
8027
8028 return 1;
8029 }
8030
8031 /* Record that the current function needs a PIC register. If PIC_REG is null,
8032 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
8033 both case cfun->machine->pic_reg is initialized if we have not already done
8034 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
8035 PIC register is reloaded in the current position of the instruction stream
8036 irregardless of whether it was loaded before. Otherwise, it is only loaded
8037 if not already done so (crtl->uses_pic_offset_table is null). Note that
8038 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
8039 is only supported iff COMPUTE_NOW is false. */
8040
8041 static void
8042 require_pic_register (rtx pic_reg, bool compute_now)
8043 {
8044 gcc_assert (compute_now == (pic_reg != NULL_RTX));
8045
8046 /* A lot of the logic here is made obscure by the fact that this
8047 routine gets called as part of the rtx cost estimation process.
8048 We don't want those calls to affect any assumptions about the real
8049 function; and further, we can't call entry_of_function() until we
8050 start the real expansion process. */
8051 if (!crtl->uses_pic_offset_table || compute_now)
8052 {
8053 gcc_assert (can_create_pseudo_p ()
8054 || (pic_reg != NULL_RTX
8055 && REG_P (pic_reg)
8056 && GET_MODE (pic_reg) == Pmode));
8057 if (arm_pic_register != INVALID_REGNUM
8058 && !compute_now
8059 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
8060 {
8061 if (!cfun->machine->pic_reg)
8062 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
8063
8064 /* Play games to avoid marking the function as needing pic
8065 if we are being called as part of the cost-estimation
8066 process. */
8067 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8068 crtl->uses_pic_offset_table = 1;
8069 }
8070 else
8071 {
8072 rtx_insn *seq, *insn;
8073
8074 if (pic_reg == NULL_RTX)
8075 pic_reg = gen_reg_rtx (Pmode);
8076 if (!cfun->machine->pic_reg)
8077 cfun->machine->pic_reg = pic_reg;
8078
8079 /* Play games to avoid marking the function as needing pic
8080 if we are being called as part of the cost-estimation
8081 process. */
8082 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8083 {
8084 crtl->uses_pic_offset_table = 1;
8085 start_sequence ();
8086
8087 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
8088 && arm_pic_register > LAST_LO_REGNUM
8089 && !compute_now)
8090 emit_move_insn (cfun->machine->pic_reg,
8091 gen_rtx_REG (Pmode, arm_pic_register));
8092 else
8093 arm_load_pic_register (0UL, pic_reg);
8094
8095 seq = get_insns ();
8096 end_sequence ();
8097
8098 for (insn = seq; insn; insn = NEXT_INSN (insn))
8099 if (INSN_P (insn))
8100 INSN_LOCATION (insn) = prologue_location;
8101
8102 /* We can be called during expansion of PHI nodes, where
8103 we can't yet emit instructions directly in the final
8104 insn stream. Queue the insns on the entry edge, they will
8105 be committed after everything else is expanded. */
8106 if (currently_expanding_to_rtl)
8107 insert_insn_on_edge (seq,
8108 single_succ_edge
8109 (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
8110 else
8111 emit_insn (seq);
8112 }
8113 }
8114 }
8115 }
8116
8117 /* Generate insns to calculate the address of ORIG in pic mode. */
8118 static rtx_insn *
8119 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
8120 {
8121 rtx pat;
8122 rtx mem;
8123
8124 pat = gen_calculate_pic_address (reg, pic_reg, orig);
8125
8126 /* Make the MEM as close to a constant as possible. */
8127 mem = SET_SRC (pat);
8128 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
8129 MEM_READONLY_P (mem) = 1;
8130 MEM_NOTRAP_P (mem) = 1;
8131
8132 return emit_insn (pat);
8133 }
8134
8135 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
8136 created to hold the result of the load. If not NULL, PIC_REG indicates
8137 which register to use as PIC register, otherwise it is decided by register
8138 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
8139 location in the instruction stream, irregardless of whether it was loaded
8140 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8141 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8142
8143 Returns the register REG into which the PIC load is performed. */
8144
8145 rtx
8146 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
8147 bool compute_now)
8148 {
8149 gcc_assert (compute_now == (pic_reg != NULL_RTX));
8150
8151 if (SYMBOL_REF_P (orig)
8152 || LABEL_REF_P (orig))
8153 {
8154 if (reg == 0)
8155 {
8156 gcc_assert (can_create_pseudo_p ());
8157 reg = gen_reg_rtx (Pmode);
8158 }
8159
8160 /* VxWorks does not impose a fixed gap between segments; the run-time
8161 gap can be different from the object-file gap. We therefore can't
8162 use GOTOFF unless we are absolutely sure that the symbol is in the
8163 same segment as the GOT. Unfortunately, the flexibility of linker
8164 scripts means that we can't be sure of that in general, so assume
8165 that GOTOFF is never valid on VxWorks. */
8166 /* References to weak symbols cannot be resolved locally: they
8167 may be overridden by a non-weak definition at link time. */
8168 rtx_insn *insn;
8169 if ((LABEL_REF_P (orig)
8170 || (SYMBOL_REF_P (orig)
8171 && SYMBOL_REF_LOCAL_P (orig)
8172 && (SYMBOL_REF_DECL (orig)
8173 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
8174 && (!SYMBOL_REF_FUNCTION_P (orig)
8175 || arm_fdpic_local_funcdesc_p (orig))))
8176 && NEED_GOT_RELOC
8177 && arm_pic_data_is_text_relative)
8178 insn = arm_pic_static_addr (orig, reg);
8179 else
8180 {
8181 /* If this function doesn't have a pic register, create one now. */
8182 require_pic_register (pic_reg, compute_now);
8183
8184 if (pic_reg == NULL_RTX)
8185 pic_reg = cfun->machine->pic_reg;
8186
8187 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8188 }
8189
8190 /* Put a REG_EQUAL note on this insn, so that it can be optimized
8191 by loop. */
8192 set_unique_reg_note (insn, REG_EQUAL, orig);
8193
8194 return reg;
8195 }
8196 else if (GET_CODE (orig) == CONST)
8197 {
8198 rtx base, offset;
8199
8200 if (GET_CODE (XEXP (orig, 0)) == PLUS
8201 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8202 return orig;
8203
8204 /* Handle the case where we have: const (UNSPEC_TLS). */
8205 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8206 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8207 return orig;
8208
8209 /* Handle the case where we have:
8210 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
8211 CONST_INT. */
8212 if (GET_CODE (XEXP (orig, 0)) == PLUS
8213 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8214 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8215 {
8216 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8217 return orig;
8218 }
8219
8220 if (reg == 0)
8221 {
8222 gcc_assert (can_create_pseudo_p ());
8223 reg = gen_reg_rtx (Pmode);
8224 }
8225
8226 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8227
8228 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8229 pic_reg, compute_now);
8230 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8231 base == reg ? 0 : reg, pic_reg,
8232 compute_now);
8233
8234 if (CONST_INT_P (offset))
8235 {
8236 /* The base register doesn't really matter, we only want to
8237 test the index for the appropriate mode. */
8238 if (!arm_legitimate_index_p (mode, offset, SET, 0))
8239 {
8240 gcc_assert (can_create_pseudo_p ());
8241 offset = force_reg (Pmode, offset);
8242 }
8243
8244 if (CONST_INT_P (offset))
8245 return plus_constant (Pmode, base, INTVAL (offset));
8246 }
8247
8248 if (GET_MODE_SIZE (mode) > 4
8249 && (GET_MODE_CLASS (mode) == MODE_INT
8250 || TARGET_SOFT_FLOAT))
8251 {
8252 emit_insn (gen_addsi3 (reg, base, offset));
8253 return reg;
8254 }
8255
8256 return gen_rtx_PLUS (Pmode, base, offset);
8257 }
8258
8259 return orig;
8260 }
8261
8262
8263 /* Generate insns that produce the address of the stack canary */
8264 rtx
8265 arm_stack_protect_tls_canary_mem (bool reload)
8266 {
8267 rtx tp = gen_reg_rtx (SImode);
8268 if (reload)
8269 emit_insn (gen_reload_tp_hard (tp));
8270 else
8271 emit_insn (gen_load_tp_hard (tp));
8272
8273 rtx reg = gen_reg_rtx (SImode);
8274 rtx offset = GEN_INT (arm_stack_protector_guard_offset);
8275 emit_set_insn (reg, gen_rtx_PLUS (SImode, tp, offset));
8276 return gen_rtx_MEM (SImode, reg);
8277 }
8278
8279
8280 /* Whether a register is callee saved or not. This is necessary because high
8281 registers are marked as caller saved when optimizing for size on Thumb-1
8282 targets despite being callee saved in order to avoid using them. */
8283 #define callee_saved_reg_p(reg) \
8284 (!call_used_or_fixed_reg_p (reg) \
8285 || (TARGET_THUMB1 && optimize_size \
8286 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8287
8288 /* Return a mask for the call-clobbered low registers that are unused
8289 at the end of the prologue. */
8290 static unsigned long
8291 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8292 {
8293 unsigned long mask = 0;
8294 bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8295
8296 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8297 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8298 mask |= 1 << (reg - FIRST_LO_REGNUM);
8299 return mask;
8300 }
8301
8302 /* Similarly for the start of the epilogue. */
8303 static unsigned long
8304 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8305 {
8306 unsigned long mask = 0;
8307 bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8308
8309 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8310 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8311 mask |= 1 << (reg - FIRST_LO_REGNUM);
8312 return mask;
8313 }
8314
8315 /* Find a spare register to use during the prolog of a function. */
8316
8317 static int
8318 thumb_find_work_register (unsigned long pushed_regs_mask)
8319 {
8320 int reg;
8321
8322 unsigned long unused_regs
8323 = thumb1_prologue_unused_call_clobbered_lo_regs ();
8324
8325 /* Check the argument registers first as these are call-used. The
8326 register allocation order means that sometimes r3 might be used
8327 but earlier argument registers might not, so check them all. */
8328 for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8329 if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8330 return reg;
8331
8332 /* Otherwise look for a call-saved register that is going to be pushed. */
8333 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8334 if (pushed_regs_mask & (1 << reg))
8335 return reg;
8336
8337 if (TARGET_THUMB2)
8338 {
8339 /* Thumb-2 can use high regs. */
8340 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8341 if (pushed_regs_mask & (1 << reg))
8342 return reg;
8343 }
8344 /* Something went wrong - thumb_compute_save_reg_mask()
8345 should have arranged for a suitable register to be pushed. */
8346 gcc_unreachable ();
8347 }
8348
8349 static GTY(()) int pic_labelno;
8350
8351 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
8352 low register. */
8353
8354 void
8355 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8356 {
8357 rtx l1, labelno, pic_tmp, pic_rtx;
8358
8359 if (crtl->uses_pic_offset_table == 0
8360 || TARGET_SINGLE_PIC_BASE
8361 || TARGET_FDPIC)
8362 return;
8363
8364 gcc_assert (flag_pic);
8365
8366 if (pic_reg == NULL_RTX)
8367 pic_reg = cfun->machine->pic_reg;
8368 if (TARGET_VXWORKS_RTP)
8369 {
8370 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8371 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8372 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8373
8374 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8375
8376 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8377 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8378 }
8379 else
8380 {
8381 /* We use an UNSPEC rather than a LABEL_REF because this label
8382 never appears in the code stream. */
8383
8384 labelno = GEN_INT (pic_labelno++);
8385 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8386 l1 = gen_rtx_CONST (VOIDmode, l1);
8387
8388 /* On the ARM the PC register contains 'dot + 8' at the time of the
8389 addition, on the Thumb it is 'dot + 4'. */
8390 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8391 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8392 UNSPEC_GOTSYM_OFF);
8393 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8394
8395 if (TARGET_32BIT)
8396 {
8397 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8398 }
8399 else /* TARGET_THUMB1 */
8400 {
8401 if (arm_pic_register != INVALID_REGNUM
8402 && REGNO (pic_reg) > LAST_LO_REGNUM)
8403 {
8404 /* We will have pushed the pic register, so we should always be
8405 able to find a work register. */
8406 pic_tmp = gen_rtx_REG (SImode,
8407 thumb_find_work_register (saved_regs));
8408 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8409 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8410 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8411 }
8412 else if (arm_pic_register != INVALID_REGNUM
8413 && arm_pic_register > LAST_LO_REGNUM
8414 && REGNO (pic_reg) <= LAST_LO_REGNUM)
8415 {
8416 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8417 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8418 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8419 }
8420 else
8421 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8422 }
8423 }
8424
8425 /* Need to emit this whether or not we obey regdecls,
8426 since setjmp/longjmp can cause life info to screw up. */
8427 emit_use (pic_reg);
8428 }
8429
8430 /* Try to determine whether an object, referenced via ORIG, will be
8431 placed in the text or data segment. This is used in FDPIC mode, to
8432 decide which relocations to use when accessing ORIG. *IS_READONLY
8433 is set to true if ORIG is a read-only location, false otherwise.
8434 Return true if we could determine the location of ORIG, false
8435 otherwise. *IS_READONLY is valid only when we return true. */
8436 static bool
8437 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8438 {
8439 *is_readonly = false;
8440
8441 if (LABEL_REF_P (orig))
8442 {
8443 *is_readonly = true;
8444 return true;
8445 }
8446
8447 if (SYMBOL_REF_P (orig))
8448 {
8449 if (CONSTANT_POOL_ADDRESS_P (orig))
8450 {
8451 *is_readonly = true;
8452 return true;
8453 }
8454 if (SYMBOL_REF_LOCAL_P (orig)
8455 && !SYMBOL_REF_EXTERNAL_P (orig)
8456 && SYMBOL_REF_DECL (orig)
8457 && (!DECL_P (SYMBOL_REF_DECL (orig))
8458 || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8459 {
8460 tree decl = SYMBOL_REF_DECL (orig);
8461 tree init = VAR_P (decl)
8462 ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8463 ? decl : 0;
8464 int reloc = 0;
8465 bool named_section, readonly;
8466
8467 if (init && init != error_mark_node)
8468 reloc = compute_reloc_for_constant (init);
8469
8470 named_section = VAR_P (decl)
8471 && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8472 readonly = decl_readonly_section (decl, reloc);
8473
8474 /* We don't know where the link script will put a named
8475 section, so return false in such a case. */
8476 if (named_section)
8477 return false;
8478
8479 *is_readonly = readonly;
8480 return true;
8481 }
8482
8483 /* We don't know. */
8484 return false;
8485 }
8486
8487 gcc_unreachable ();
8488 }
8489
8490 /* Generate code to load the address of a static var when flag_pic is set. */
8491 static rtx_insn *
8492 arm_pic_static_addr (rtx orig, rtx reg)
8493 {
8494 rtx l1, labelno, offset_rtx;
8495 rtx_insn *insn;
8496
8497 gcc_assert (flag_pic);
8498
8499 bool is_readonly = false;
8500 bool info_known = false;
8501
8502 if (TARGET_FDPIC
8503 && SYMBOL_REF_P (orig)
8504 && !SYMBOL_REF_FUNCTION_P (orig))
8505 info_known = arm_is_segment_info_known (orig, &is_readonly);
8506
8507 if (TARGET_FDPIC
8508 && SYMBOL_REF_P (orig)
8509 && !SYMBOL_REF_FUNCTION_P (orig)
8510 && !info_known)
8511 {
8512 /* We don't know where orig is stored, so we have be
8513 pessimistic and use a GOT relocation. */
8514 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8515
8516 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8517 }
8518 else if (TARGET_FDPIC
8519 && SYMBOL_REF_P (orig)
8520 && (SYMBOL_REF_FUNCTION_P (orig)
8521 || !is_readonly))
8522 {
8523 /* We use the GOTOFF relocation. */
8524 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8525
8526 rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8527 emit_insn (gen_movsi (reg, l1));
8528 insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8529 }
8530 else
8531 {
8532 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8533 PC-relative access. */
8534 /* We use an UNSPEC rather than a LABEL_REF because this label
8535 never appears in the code stream. */
8536 labelno = GEN_INT (pic_labelno++);
8537 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8538 l1 = gen_rtx_CONST (VOIDmode, l1);
8539
8540 /* On the ARM the PC register contains 'dot + 8' at the time of the
8541 addition, on the Thumb it is 'dot + 4'. */
8542 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8543 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8544 UNSPEC_SYMBOL_OFFSET);
8545 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8546
8547 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8548 labelno));
8549 }
8550
8551 return insn;
8552 }
8553
8554 /* Return nonzero if X is valid as an ARM state addressing register. */
8555 static int
8556 arm_address_register_rtx_p (rtx x, int strict_p)
8557 {
8558 int regno;
8559
8560 if (!REG_P (x))
8561 return 0;
8562
8563 regno = REGNO (x);
8564
8565 if (strict_p)
8566 return ARM_REGNO_OK_FOR_BASE_P (regno);
8567
8568 return (regno <= LAST_ARM_REGNUM
8569 || regno >= FIRST_PSEUDO_REGISTER
8570 || regno == FRAME_POINTER_REGNUM
8571 || regno == ARG_POINTER_REGNUM);
8572 }
8573
8574 /* Return TRUE if this rtx is the difference of a symbol and a label,
8575 and will reduce to a PC-relative relocation in the object file.
8576 Expressions like this can be left alone when generating PIC, rather
8577 than forced through the GOT. */
8578 static int
8579 pcrel_constant_p (rtx x)
8580 {
8581 if (GET_CODE (x) == MINUS)
8582 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8583
8584 return FALSE;
8585 }
8586
8587 /* Return true if X will surely end up in an index register after next
8588 splitting pass. */
8589 static bool
8590 will_be_in_index_register (const_rtx x)
8591 {
8592 /* arm.md: calculate_pic_address will split this into a register. */
8593 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8594 }
8595
8596 /* Return nonzero if X is a valid ARM state address operand. */
8597 int
8598 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8599 int strict_p)
8600 {
8601 bool use_ldrd;
8602 enum rtx_code code = GET_CODE (x);
8603
8604 if (arm_address_register_rtx_p (x, strict_p))
8605 return 1;
8606
8607 use_ldrd = (TARGET_LDRD
8608 && (mode == DImode || mode == DFmode));
8609
8610 if (code == POST_INC || code == PRE_DEC
8611 || ((code == PRE_INC || code == POST_DEC)
8612 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8613 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8614
8615 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8616 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8617 && GET_CODE (XEXP (x, 1)) == PLUS
8618 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8619 {
8620 rtx addend = XEXP (XEXP (x, 1), 1);
8621
8622 /* Don't allow ldrd post increment by register because it's hard
8623 to fixup invalid register choices. */
8624 if (use_ldrd
8625 && GET_CODE (x) == POST_MODIFY
8626 && REG_P (addend))
8627 return 0;
8628
8629 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8630 && arm_legitimate_index_p (mode, addend, outer, strict_p));
8631 }
8632
8633 /* After reload constants split into minipools will have addresses
8634 from a LABEL_REF. */
8635 else if (reload_completed
8636 && (code == LABEL_REF
8637 || (code == CONST
8638 && GET_CODE (XEXP (x, 0)) == PLUS
8639 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8640 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8641 return 1;
8642
8643 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8644 return 0;
8645
8646 else if (code == PLUS)
8647 {
8648 rtx xop0 = XEXP (x, 0);
8649 rtx xop1 = XEXP (x, 1);
8650
8651 return ((arm_address_register_rtx_p (xop0, strict_p)
8652 && ((CONST_INT_P (xop1)
8653 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8654 || (!strict_p && will_be_in_index_register (xop1))))
8655 || (arm_address_register_rtx_p (xop1, strict_p)
8656 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8657 }
8658
8659 #if 0
8660 /* Reload currently can't handle MINUS, so disable this for now */
8661 else if (GET_CODE (x) == MINUS)
8662 {
8663 rtx xop0 = XEXP (x, 0);
8664 rtx xop1 = XEXP (x, 1);
8665
8666 return (arm_address_register_rtx_p (xop0, strict_p)
8667 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8668 }
8669 #endif
8670
8671 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8672 && code == SYMBOL_REF
8673 && CONSTANT_POOL_ADDRESS_P (x)
8674 && ! (flag_pic
8675 && symbol_mentioned_p (get_pool_constant (x))
8676 && ! pcrel_constant_p (get_pool_constant (x))))
8677 return 1;
8678
8679 return 0;
8680 }
8681
8682 /* Return true if we can avoid creating a constant pool entry for x. */
8683 static bool
8684 can_avoid_literal_pool_for_label_p (rtx x)
8685 {
8686 /* Normally we can assign constant values to target registers without
8687 the help of constant pool. But there are cases we have to use constant
8688 pool like:
8689 1) assign a label to register.
8690 2) sign-extend a 8bit value to 32bit and then assign to register.
8691
8692 Constant pool access in format:
8693 (set (reg r0) (mem (symbol_ref (".LC0"))))
8694 will cause the use of literal pool (later in function arm_reorg).
8695 So here we mark such format as an invalid format, then the compiler
8696 will adjust it into:
8697 (set (reg r0) (symbol_ref (".LC0")))
8698 (set (reg r0) (mem (reg r0))).
8699 No extra register is required, and (mem (reg r0)) won't cause the use
8700 of literal pools. */
8701 if (arm_disable_literal_pool && SYMBOL_REF_P (x)
8702 && CONSTANT_POOL_ADDRESS_P (x))
8703 return 1;
8704 return 0;
8705 }
8706
8707
8708 /* Return nonzero if X is a valid Thumb-2 address operand. */
8709 static int
8710 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8711 {
8712 bool use_ldrd;
8713 enum rtx_code code = GET_CODE (x);
8714
8715 /* If we are dealing with a MVE predicate mode, then treat it as a HImode as
8716 can store and load it like any other 16-bit value. */
8717 if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE (mode))
8718 mode = HImode;
8719
8720 if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode))
8721 return mve_vector_mem_operand (mode, x, strict_p);
8722
8723 if (arm_address_register_rtx_p (x, strict_p))
8724 return 1;
8725
8726 use_ldrd = (TARGET_LDRD
8727 && (mode == DImode || mode == DFmode));
8728
8729 if (code == POST_INC || code == PRE_DEC
8730 || ((code == PRE_INC || code == POST_DEC)
8731 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8732 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8733
8734 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8735 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8736 && GET_CODE (XEXP (x, 1)) == PLUS
8737 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8738 {
8739 /* Thumb-2 only has autoincrement by constant. */
8740 rtx addend = XEXP (XEXP (x, 1), 1);
8741 HOST_WIDE_INT offset;
8742
8743 if (!CONST_INT_P (addend))
8744 return 0;
8745
8746 offset = INTVAL(addend);
8747 if (GET_MODE_SIZE (mode) <= 4)
8748 return (offset > -256 && offset < 256);
8749
8750 return (use_ldrd && offset > -1024 && offset < 1024
8751 && (offset & 3) == 0);
8752 }
8753
8754 /* After reload constants split into minipools will have addresses
8755 from a LABEL_REF. */
8756 else if (reload_completed
8757 && (code == LABEL_REF
8758 || (code == CONST
8759 && GET_CODE (XEXP (x, 0)) == PLUS
8760 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8761 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8762 return 1;
8763
8764 else if (mode == TImode
8765 || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8766 || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8767 return 0;
8768
8769 else if (code == PLUS)
8770 {
8771 rtx xop0 = XEXP (x, 0);
8772 rtx xop1 = XEXP (x, 1);
8773
8774 return ((arm_address_register_rtx_p (xop0, strict_p)
8775 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8776 || (!strict_p && will_be_in_index_register (xop1))))
8777 || (arm_address_register_rtx_p (xop1, strict_p)
8778 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8779 }
8780
8781 else if (can_avoid_literal_pool_for_label_p (x))
8782 return 0;
8783
8784 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8785 && code == SYMBOL_REF
8786 && CONSTANT_POOL_ADDRESS_P (x)
8787 && ! (flag_pic
8788 && symbol_mentioned_p (get_pool_constant (x))
8789 && ! pcrel_constant_p (get_pool_constant (x))))
8790 return 1;
8791
8792 return 0;
8793 }
8794
8795 /* Return nonzero if INDEX is valid for an address index operand in
8796 ARM state. */
8797 static int
8798 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8799 int strict_p)
8800 {
8801 HOST_WIDE_INT range;
8802 enum rtx_code code = GET_CODE (index);
8803
8804 /* Standard coprocessor addressing modes. */
8805 if (TARGET_HARD_FLOAT
8806 && (mode == SFmode || mode == DFmode))
8807 return (code == CONST_INT && INTVAL (index) < 1024
8808 && INTVAL (index) > -1024
8809 && (INTVAL (index) & 3) == 0);
8810
8811 /* For quad modes, we restrict the constant offset to be slightly less
8812 than what the instruction format permits. We do this because for
8813 quad mode moves, we will actually decompose them into two separate
8814 double-mode reads or writes. INDEX must therefore be a valid
8815 (double-mode) offset and so should INDEX+8. */
8816 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8817 return (code == CONST_INT
8818 && INTVAL (index) < 1016
8819 && INTVAL (index) > -1024
8820 && (INTVAL (index) & 3) == 0);
8821
8822 /* We have no such constraint on double mode offsets, so we permit the
8823 full range of the instruction format. */
8824 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8825 return (code == CONST_INT
8826 && INTVAL (index) < 1024
8827 && INTVAL (index) > -1024
8828 && (INTVAL (index) & 3) == 0);
8829
8830 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8831 return (code == CONST_INT
8832 && INTVAL (index) < 1024
8833 && INTVAL (index) > -1024
8834 && (INTVAL (index) & 3) == 0);
8835
8836 if (arm_address_register_rtx_p (index, strict_p)
8837 && (GET_MODE_SIZE (mode) <= 4))
8838 return 1;
8839
8840 if (mode == DImode || mode == DFmode)
8841 {
8842 if (code == CONST_INT)
8843 {
8844 HOST_WIDE_INT val = INTVAL (index);
8845
8846 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8847 If vldr is selected it uses arm_coproc_mem_operand. */
8848 if (TARGET_LDRD)
8849 return val > -256 && val < 256;
8850 else
8851 return val > -4096 && val < 4092;
8852 }
8853
8854 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8855 }
8856
8857 if (GET_MODE_SIZE (mode) <= 4
8858 && ! (arm_arch4
8859 && (mode == HImode
8860 || mode == HFmode
8861 || (mode == QImode && outer == SIGN_EXTEND))))
8862 {
8863 if (code == MULT)
8864 {
8865 rtx xiop0 = XEXP (index, 0);
8866 rtx xiop1 = XEXP (index, 1);
8867
8868 return ((arm_address_register_rtx_p (xiop0, strict_p)
8869 && power_of_two_operand (xiop1, SImode))
8870 || (arm_address_register_rtx_p (xiop1, strict_p)
8871 && power_of_two_operand (xiop0, SImode)));
8872 }
8873 else if (code == LSHIFTRT || code == ASHIFTRT
8874 || code == ASHIFT || code == ROTATERT)
8875 {
8876 rtx op = XEXP (index, 1);
8877
8878 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8879 && CONST_INT_P (op)
8880 && INTVAL (op) > 0
8881 && INTVAL (op) <= 31);
8882 }
8883 }
8884
8885 /* For ARM v4 we may be doing a sign-extend operation during the
8886 load. */
8887 if (arm_arch4)
8888 {
8889 if (mode == HImode
8890 || mode == HFmode
8891 || (outer == SIGN_EXTEND && mode == QImode))
8892 range = 256;
8893 else
8894 range = 4096;
8895 }
8896 else
8897 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8898
8899 return (code == CONST_INT
8900 && INTVAL (index) < range
8901 && INTVAL (index) > -range);
8902 }
8903
8904 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8905 index operand. i.e. 1, 2, 4 or 8. */
8906 static bool
8907 thumb2_index_mul_operand (rtx op)
8908 {
8909 HOST_WIDE_INT val;
8910
8911 if (!CONST_INT_P (op))
8912 return false;
8913
8914 val = INTVAL(op);
8915 return (val == 1 || val == 2 || val == 4 || val == 8);
8916 }
8917
8918 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8919 static int
8920 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8921 {
8922 enum rtx_code code = GET_CODE (index);
8923
8924 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8925 /* Standard coprocessor addressing modes. */
8926 if (TARGET_VFP_BASE
8927 && (mode == SFmode || mode == DFmode))
8928 return (code == CONST_INT && INTVAL (index) < 1024
8929 /* Thumb-2 allows only > -256 index range for it's core register
8930 load/stores. Since we allow SF/DF in core registers, we have
8931 to use the intersection between -256~4096 (core) and -1024~1024
8932 (coprocessor). */
8933 && INTVAL (index) > -256
8934 && (INTVAL (index) & 3) == 0);
8935
8936 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8937 {
8938 /* For DImode assume values will usually live in core regs
8939 and only allow LDRD addressing modes. */
8940 if (!TARGET_LDRD || mode != DImode)
8941 return (code == CONST_INT
8942 && INTVAL (index) < 1024
8943 && INTVAL (index) > -1024
8944 && (INTVAL (index) & 3) == 0);
8945 }
8946
8947 /* For quad modes, we restrict the constant offset to be slightly less
8948 than what the instruction format permits. We do this because for
8949 quad mode moves, we will actually decompose them into two separate
8950 double-mode reads or writes. INDEX must therefore be a valid
8951 (double-mode) offset and so should INDEX+8. */
8952 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8953 return (code == CONST_INT
8954 && INTVAL (index) < 1016
8955 && INTVAL (index) > -1024
8956 && (INTVAL (index) & 3) == 0);
8957
8958 /* We have no such constraint on double mode offsets, so we permit the
8959 full range of the instruction format. */
8960 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8961 return (code == CONST_INT
8962 && INTVAL (index) < 1024
8963 && INTVAL (index) > -1024
8964 && (INTVAL (index) & 3) == 0);
8965
8966 if (arm_address_register_rtx_p (index, strict_p)
8967 && (GET_MODE_SIZE (mode) <= 4))
8968 return 1;
8969
8970 if (mode == DImode || mode == DFmode)
8971 {
8972 if (code == CONST_INT)
8973 {
8974 HOST_WIDE_INT val = INTVAL (index);
8975 /* Thumb-2 ldrd only has reg+const addressing modes.
8976 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8977 If vldr is selected it uses arm_coproc_mem_operand. */
8978 if (TARGET_LDRD)
8979 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8980 else
8981 return IN_RANGE (val, -255, 4095 - 4);
8982 }
8983 else
8984 return 0;
8985 }
8986
8987 if (code == MULT)
8988 {
8989 rtx xiop0 = XEXP (index, 0);
8990 rtx xiop1 = XEXP (index, 1);
8991
8992 return ((arm_address_register_rtx_p (xiop0, strict_p)
8993 && thumb2_index_mul_operand (xiop1))
8994 || (arm_address_register_rtx_p (xiop1, strict_p)
8995 && thumb2_index_mul_operand (xiop0)));
8996 }
8997 else if (code == ASHIFT)
8998 {
8999 rtx op = XEXP (index, 1);
9000
9001 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
9002 && CONST_INT_P (op)
9003 && INTVAL (op) > 0
9004 && INTVAL (op) <= 3);
9005 }
9006
9007 return (code == CONST_INT
9008 && INTVAL (index) < 4096
9009 && INTVAL (index) > -256);
9010 }
9011
9012 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
9013 static int
9014 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
9015 {
9016 int regno;
9017
9018 if (!REG_P (x))
9019 return 0;
9020
9021 regno = REGNO (x);
9022
9023 if (strict_p)
9024 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
9025
9026 return (regno <= LAST_LO_REGNUM
9027 || regno > LAST_VIRTUAL_REGISTER
9028 || regno == FRAME_POINTER_REGNUM
9029 || (GET_MODE_SIZE (mode) >= 4
9030 && (regno == STACK_POINTER_REGNUM
9031 || regno >= FIRST_PSEUDO_REGISTER
9032 || x == hard_frame_pointer_rtx
9033 || x == arg_pointer_rtx)));
9034 }
9035
9036 /* Return nonzero if x is a legitimate index register. This is the case
9037 for any base register that can access a QImode object. */
9038 inline static int
9039 thumb1_index_register_rtx_p (rtx x, int strict_p)
9040 {
9041 return thumb1_base_register_rtx_p (x, QImode, strict_p);
9042 }
9043
9044 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
9045
9046 The AP may be eliminated to either the SP or the FP, so we use the
9047 least common denominator, e.g. SImode, and offsets from 0 to 64.
9048
9049 ??? Verify whether the above is the right approach.
9050
9051 ??? Also, the FP may be eliminated to the SP, so perhaps that
9052 needs special handling also.
9053
9054 ??? Look at how the mips16 port solves this problem. It probably uses
9055 better ways to solve some of these problems.
9056
9057 Although it is not incorrect, we don't accept QImode and HImode
9058 addresses based on the frame pointer or arg pointer until the
9059 reload pass starts. This is so that eliminating such addresses
9060 into stack based ones won't produce impossible code. */
9061 int
9062 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
9063 {
9064 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
9065 return 0;
9066
9067 /* ??? Not clear if this is right. Experiment. */
9068 if (GET_MODE_SIZE (mode) < 4
9069 && !(reload_in_progress || reload_completed)
9070 && (reg_mentioned_p (frame_pointer_rtx, x)
9071 || reg_mentioned_p (arg_pointer_rtx, x)
9072 || reg_mentioned_p (virtual_incoming_args_rtx, x)
9073 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
9074 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
9075 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
9076 return 0;
9077
9078 /* Accept any base register. SP only in SImode or larger. */
9079 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
9080 return 1;
9081
9082 /* This is PC relative data before arm_reorg runs. */
9083 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
9084 && SYMBOL_REF_P (x)
9085 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
9086 && !arm_disable_literal_pool)
9087 return 1;
9088
9089 /* This is PC relative data after arm_reorg runs. */
9090 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
9091 && reload_completed
9092 && (LABEL_REF_P (x)
9093 || (GET_CODE (x) == CONST
9094 && GET_CODE (XEXP (x, 0)) == PLUS
9095 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
9096 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
9097 return 1;
9098
9099 /* Post-inc indexing only supported for SImode and larger. */
9100 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
9101 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
9102 return 1;
9103
9104 else if (GET_CODE (x) == PLUS)
9105 {
9106 /* REG+REG address can be any two index registers. */
9107 /* We disallow FRAME+REG addressing since we know that FRAME
9108 will be replaced with STACK, and SP relative addressing only
9109 permits SP+OFFSET. */
9110 if (GET_MODE_SIZE (mode) <= 4
9111 && XEXP (x, 0) != frame_pointer_rtx
9112 && XEXP (x, 1) != frame_pointer_rtx
9113 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9114 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
9115 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
9116 return 1;
9117
9118 /* REG+const has 5-7 bit offset for non-SP registers. */
9119 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9120 || XEXP (x, 0) == arg_pointer_rtx)
9121 && CONST_INT_P (XEXP (x, 1))
9122 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
9123 return 1;
9124
9125 /* REG+const has 10-bit offset for SP, but only SImode and
9126 larger is supported. */
9127 /* ??? Should probably check for DI/DFmode overflow here
9128 just like GO_IF_LEGITIMATE_OFFSET does. */
9129 else if (REG_P (XEXP (x, 0))
9130 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
9131 && GET_MODE_SIZE (mode) >= 4
9132 && CONST_INT_P (XEXP (x, 1))
9133 && INTVAL (XEXP (x, 1)) >= 0
9134 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
9135 && (INTVAL (XEXP (x, 1)) & 3) == 0)
9136 return 1;
9137
9138 else if (REG_P (XEXP (x, 0))
9139 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
9140 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
9141 || VIRTUAL_REGISTER_P (XEXP (x, 0)))
9142 && GET_MODE_SIZE (mode) >= 4
9143 && CONST_INT_P (XEXP (x, 1))
9144 && (INTVAL (XEXP (x, 1)) & 3) == 0)
9145 return 1;
9146 }
9147
9148 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
9149 && GET_MODE_SIZE (mode) == 4
9150 && SYMBOL_REF_P (x)
9151 && CONSTANT_POOL_ADDRESS_P (x)
9152 && !arm_disable_literal_pool
9153 && ! (flag_pic
9154 && symbol_mentioned_p (get_pool_constant (x))
9155 && ! pcrel_constant_p (get_pool_constant (x))))
9156 return 1;
9157
9158 return 0;
9159 }
9160
9161 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9162 instruction of mode MODE. */
9163 int
9164 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
9165 {
9166 switch (GET_MODE_SIZE (mode))
9167 {
9168 case 1:
9169 return val >= 0 && val < 32;
9170
9171 case 2:
9172 return val >= 0 && val < 64 && (val & 1) == 0;
9173
9174 default:
9175 return (val >= 0
9176 && (val + GET_MODE_SIZE (mode)) <= 128
9177 && (val & 3) == 0);
9178 }
9179 }
9180
9181 bool
9182 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p, code_helper)
9183 {
9184 if (TARGET_ARM)
9185 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
9186 else if (TARGET_THUMB2)
9187 return thumb2_legitimate_address_p (mode, x, strict_p);
9188 else /* if (TARGET_THUMB1) */
9189 return thumb1_legitimate_address_p (mode, x, strict_p);
9190 }
9191
9192 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9193
9194 Given an rtx X being reloaded into a reg required to be
9195 in class CLASS, return the class of reg to actually use.
9196 In general this is just CLASS, but for the Thumb core registers and
9197 immediate constants we prefer a LO_REGS class or a subset. */
9198
9199 static reg_class_t
9200 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
9201 {
9202 if (TARGET_32BIT)
9203 return rclass;
9204 else
9205 {
9206 if (rclass == GENERAL_REGS)
9207 return LO_REGS;
9208 else
9209 return rclass;
9210 }
9211 }
9212
9213 /* Build the SYMBOL_REF for __tls_get_addr. */
9214
9215 static GTY(()) rtx tls_get_addr_libfunc;
9216
9217 static rtx
9218 get_tls_get_addr (void)
9219 {
9220 if (!tls_get_addr_libfunc)
9221 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9222 return tls_get_addr_libfunc;
9223 }
9224
9225 rtx
9226 arm_load_tp (rtx target)
9227 {
9228 if (!target)
9229 target = gen_reg_rtx (SImode);
9230
9231 if (TARGET_HARD_TP)
9232 {
9233 /* Can return in any reg. */
9234 emit_insn (gen_load_tp_hard (target));
9235 }
9236 else
9237 {
9238 /* Always returned in r0. Immediately copy the result into a pseudo,
9239 otherwise other uses of r0 (e.g. setting up function arguments) may
9240 clobber the value. */
9241
9242 rtx tmp;
9243
9244 if (TARGET_FDPIC)
9245 {
9246 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9247 rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9248
9249 emit_insn (gen_load_tp_soft_fdpic ());
9250
9251 /* Restore r9. */
9252 emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9253 }
9254 else
9255 emit_insn (gen_load_tp_soft ());
9256
9257 tmp = gen_rtx_REG (SImode, R0_REGNUM);
9258 emit_move_insn (target, tmp);
9259 }
9260 return target;
9261 }
9262
9263 static rtx
9264 load_tls_operand (rtx x, rtx reg)
9265 {
9266 rtx tmp;
9267
9268 if (reg == NULL_RTX)
9269 reg = gen_reg_rtx (SImode);
9270
9271 tmp = gen_rtx_CONST (SImode, x);
9272
9273 emit_move_insn (reg, tmp);
9274
9275 return reg;
9276 }
9277
9278 static rtx_insn *
9279 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9280 {
9281 rtx label, labelno = NULL_RTX, sum;
9282
9283 gcc_assert (reloc != TLS_DESCSEQ);
9284 start_sequence ();
9285
9286 if (TARGET_FDPIC)
9287 {
9288 sum = gen_rtx_UNSPEC (Pmode,
9289 gen_rtvec (2, x, GEN_INT (reloc)),
9290 UNSPEC_TLS);
9291 }
9292 else
9293 {
9294 labelno = GEN_INT (pic_labelno++);
9295 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9296 label = gen_rtx_CONST (VOIDmode, label);
9297
9298 sum = gen_rtx_UNSPEC (Pmode,
9299 gen_rtvec (4, x, GEN_INT (reloc), label,
9300 GEN_INT (TARGET_ARM ? 8 : 4)),
9301 UNSPEC_TLS);
9302 }
9303 reg = load_tls_operand (sum, reg);
9304
9305 if (TARGET_FDPIC)
9306 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9307 else if (TARGET_ARM)
9308 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9309 else
9310 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9311
9312 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9313 LCT_PURE, /* LCT_CONST? */
9314 Pmode, reg, Pmode);
9315
9316 rtx_insn *insns = get_insns ();
9317 end_sequence ();
9318
9319 return insns;
9320 }
9321
9322 static rtx
9323 arm_tls_descseq_addr (rtx x, rtx reg)
9324 {
9325 rtx labelno = GEN_INT (pic_labelno++);
9326 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9327 rtx sum = gen_rtx_UNSPEC (Pmode,
9328 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9329 gen_rtx_CONST (VOIDmode, label),
9330 GEN_INT (!TARGET_ARM)),
9331 UNSPEC_TLS);
9332 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9333
9334 emit_insn (gen_tlscall (x, labelno));
9335 if (!reg)
9336 reg = gen_reg_rtx (SImode);
9337 else
9338 gcc_assert (REGNO (reg) != R0_REGNUM);
9339
9340 emit_move_insn (reg, reg0);
9341
9342 return reg;
9343 }
9344
9345
9346 rtx
9347 legitimize_tls_address (rtx x, rtx reg)
9348 {
9349 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9350 rtx_insn *insns;
9351 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9352
9353 switch (model)
9354 {
9355 case TLS_MODEL_GLOBAL_DYNAMIC:
9356 if (TARGET_GNU2_TLS)
9357 {
9358 gcc_assert (!TARGET_FDPIC);
9359
9360 reg = arm_tls_descseq_addr (x, reg);
9361
9362 tp = arm_load_tp (NULL_RTX);
9363
9364 dest = gen_rtx_PLUS (Pmode, tp, reg);
9365 }
9366 else
9367 {
9368 /* Original scheme */
9369 if (TARGET_FDPIC)
9370 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9371 else
9372 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9373 dest = gen_reg_rtx (Pmode);
9374 emit_libcall_block (insns, dest, ret, x);
9375 }
9376 return dest;
9377
9378 case TLS_MODEL_LOCAL_DYNAMIC:
9379 if (TARGET_GNU2_TLS)
9380 {
9381 gcc_assert (!TARGET_FDPIC);
9382
9383 reg = arm_tls_descseq_addr (x, reg);
9384
9385 tp = arm_load_tp (NULL_RTX);
9386
9387 dest = gen_rtx_PLUS (Pmode, tp, reg);
9388 }
9389 else
9390 {
9391 if (TARGET_FDPIC)
9392 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9393 else
9394 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9395
9396 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9397 share the LDM result with other LD model accesses. */
9398 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9399 UNSPEC_TLS);
9400 dest = gen_reg_rtx (Pmode);
9401 emit_libcall_block (insns, dest, ret, eqv);
9402
9403 /* Load the addend. */
9404 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9405 GEN_INT (TLS_LDO32)),
9406 UNSPEC_TLS);
9407 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9408 dest = gen_rtx_PLUS (Pmode, dest, addend);
9409 }
9410 return dest;
9411
9412 case TLS_MODEL_INITIAL_EXEC:
9413 if (TARGET_FDPIC)
9414 {
9415 sum = gen_rtx_UNSPEC (Pmode,
9416 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9417 UNSPEC_TLS);
9418 reg = load_tls_operand (sum, reg);
9419 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9420 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9421 }
9422 else
9423 {
9424 labelno = GEN_INT (pic_labelno++);
9425 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9426 label = gen_rtx_CONST (VOIDmode, label);
9427 sum = gen_rtx_UNSPEC (Pmode,
9428 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9429 GEN_INT (TARGET_ARM ? 8 : 4)),
9430 UNSPEC_TLS);
9431 reg = load_tls_operand (sum, reg);
9432
9433 if (TARGET_ARM)
9434 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9435 else if (TARGET_THUMB2)
9436 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9437 else
9438 {
9439 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9440 emit_move_insn (reg, gen_const_mem (SImode, reg));
9441 }
9442 }
9443
9444 tp = arm_load_tp (NULL_RTX);
9445
9446 return gen_rtx_PLUS (Pmode, tp, reg);
9447
9448 case TLS_MODEL_LOCAL_EXEC:
9449 tp = arm_load_tp (NULL_RTX);
9450
9451 reg = gen_rtx_UNSPEC (Pmode,
9452 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9453 UNSPEC_TLS);
9454 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9455
9456 return gen_rtx_PLUS (Pmode, tp, reg);
9457
9458 default:
9459 abort ();
9460 }
9461 }
9462
9463 /* Try machine-dependent ways of modifying an illegitimate address
9464 to be legitimate. If we find one, return the new, valid address. */
9465 rtx
9466 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9467 {
9468 if (arm_tls_referenced_p (x))
9469 {
9470 rtx addend = NULL;
9471
9472 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9473 {
9474 addend = XEXP (XEXP (x, 0), 1);
9475 x = XEXP (XEXP (x, 0), 0);
9476 }
9477
9478 if (!SYMBOL_REF_P (x))
9479 return x;
9480
9481 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9482
9483 x = legitimize_tls_address (x, NULL_RTX);
9484
9485 if (addend)
9486 {
9487 x = gen_rtx_PLUS (SImode, x, addend);
9488 orig_x = x;
9489 }
9490 else
9491 return x;
9492 }
9493
9494 if (TARGET_THUMB1)
9495 return thumb_legitimize_address (x, orig_x, mode);
9496
9497 if (GET_CODE (x) == PLUS)
9498 {
9499 rtx xop0 = XEXP (x, 0);
9500 rtx xop1 = XEXP (x, 1);
9501
9502 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9503 xop0 = force_reg (SImode, xop0);
9504
9505 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9506 && !symbol_mentioned_p (xop1))
9507 xop1 = force_reg (SImode, xop1);
9508
9509 if (ARM_BASE_REGISTER_RTX_P (xop0)
9510 && CONST_INT_P (xop1))
9511 {
9512 HOST_WIDE_INT n, low_n;
9513 rtx base_reg, val;
9514 n = INTVAL (xop1);
9515
9516 /* VFP addressing modes actually allow greater offsets, but for
9517 now we just stick with the lowest common denominator. */
9518 if (mode == DImode || mode == DFmode)
9519 {
9520 low_n = n & 0x0f;
9521 n &= ~0x0f;
9522 if (low_n > 4)
9523 {
9524 n += 16;
9525 low_n -= 16;
9526 }
9527 }
9528 else
9529 {
9530 low_n = ((mode) == TImode ? 0
9531 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9532 n -= low_n;
9533 }
9534
9535 base_reg = gen_reg_rtx (SImode);
9536 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9537 emit_move_insn (base_reg, val);
9538 x = plus_constant (Pmode, base_reg, low_n);
9539 }
9540 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9541 x = gen_rtx_PLUS (SImode, xop0, xop1);
9542 }
9543
9544 /* XXX We don't allow MINUS any more -- see comment in
9545 arm_legitimate_address_outer_p (). */
9546 else if (GET_CODE (x) == MINUS)
9547 {
9548 rtx xop0 = XEXP (x, 0);
9549 rtx xop1 = XEXP (x, 1);
9550
9551 if (CONSTANT_P (xop0))
9552 xop0 = force_reg (SImode, xop0);
9553
9554 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9555 xop1 = force_reg (SImode, xop1);
9556
9557 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9558 x = gen_rtx_MINUS (SImode, xop0, xop1);
9559 }
9560
9561 /* Make sure to take full advantage of the pre-indexed addressing mode
9562 with absolute addresses which often allows for the base register to
9563 be factorized for multiple adjacent memory references, and it might
9564 even allows for the mini pool to be avoided entirely. */
9565 else if (CONST_INT_P (x) && optimize > 0)
9566 {
9567 unsigned int bits;
9568 HOST_WIDE_INT mask, base, index;
9569 rtx base_reg;
9570
9571 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9572 only use a 8-bit index. So let's use a 12-bit index for
9573 SImode only and hope that arm_gen_constant will enable LDRB
9574 to use more bits. */
9575 bits = (mode == SImode) ? 12 : 8;
9576 mask = (1 << bits) - 1;
9577 base = INTVAL (x) & ~mask;
9578 index = INTVAL (x) & mask;
9579 if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9580 {
9581 /* It'll most probably be more efficient to generate the
9582 base with more bits set and use a negative index instead.
9583 Don't do this for Thumb as negative offsets are much more
9584 limited. */
9585 base |= mask;
9586 index -= mask;
9587 }
9588 base_reg = force_reg (SImode, GEN_INT (base));
9589 x = plus_constant (Pmode, base_reg, index);
9590 }
9591
9592 if (flag_pic)
9593 {
9594 /* We need to find and carefully transform any SYMBOL and LABEL
9595 references; so go back to the original address expression. */
9596 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9597 false /*compute_now*/);
9598
9599 if (new_x != orig_x)
9600 x = new_x;
9601 }
9602
9603 return x;
9604 }
9605
9606
9607 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9608 to be legitimate. If we find one, return the new, valid address. */
9609 rtx
9610 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9611 {
9612 if (GET_CODE (x) == PLUS
9613 && CONST_INT_P (XEXP (x, 1))
9614 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9615 || INTVAL (XEXP (x, 1)) < 0))
9616 {
9617 rtx xop0 = XEXP (x, 0);
9618 rtx xop1 = XEXP (x, 1);
9619 HOST_WIDE_INT offset = INTVAL (xop1);
9620
9621 /* Try and fold the offset into a biasing of the base register and
9622 then offsetting that. Don't do this when optimizing for space
9623 since it can cause too many CSEs. */
9624 if (optimize_size && offset >= 0
9625 && offset < 256 + 31 * GET_MODE_SIZE (mode))
9626 {
9627 HOST_WIDE_INT delta;
9628
9629 if (offset >= 256)
9630 delta = offset - (256 - GET_MODE_SIZE (mode));
9631 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9632 delta = 31 * GET_MODE_SIZE (mode);
9633 else
9634 delta = offset & (~31 * GET_MODE_SIZE (mode));
9635
9636 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9637 NULL_RTX);
9638 x = plus_constant (Pmode, xop0, delta);
9639 }
9640 else if (offset < 0 && offset > -256)
9641 /* Small negative offsets are best done with a subtract before the
9642 dereference, forcing these into a register normally takes two
9643 instructions. */
9644 x = force_operand (x, NULL_RTX);
9645 else
9646 {
9647 /* For the remaining cases, force the constant into a register. */
9648 xop1 = force_reg (SImode, xop1);
9649 x = gen_rtx_PLUS (SImode, xop0, xop1);
9650 }
9651 }
9652 else if (GET_CODE (x) == PLUS
9653 && s_register_operand (XEXP (x, 1), SImode)
9654 && !s_register_operand (XEXP (x, 0), SImode))
9655 {
9656 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9657
9658 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9659 }
9660
9661 if (flag_pic)
9662 {
9663 /* We need to find and carefully transform any SYMBOL and LABEL
9664 references; so go back to the original address expression. */
9665 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9666 false /*compute_now*/);
9667
9668 if (new_x != orig_x)
9669 x = new_x;
9670 }
9671
9672 return x;
9673 }
9674
9675 /* Return TRUE if X contains any TLS symbol references. */
9676
9677 bool
9678 arm_tls_referenced_p (rtx x)
9679 {
9680 if (! TARGET_HAVE_TLS)
9681 return false;
9682
9683 subrtx_iterator::array_type array;
9684 FOR_EACH_SUBRTX (iter, array, x, ALL)
9685 {
9686 const_rtx x = *iter;
9687 if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
9688 {
9689 /* ARM currently does not provide relocations to encode TLS variables
9690 into AArch32 instructions, only data, so there is no way to
9691 currently implement these if a literal pool is disabled. */
9692 if (arm_disable_literal_pool)
9693 sorry ("accessing thread-local storage is not currently supported "
9694 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9695
9696 return true;
9697 }
9698
9699 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9700 TLS offsets, not real symbol references. */
9701 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9702 iter.skip_subrtxes ();
9703 }
9704 return false;
9705 }
9706
9707 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9708
9709 On the ARM, allow any integer (invalid ones are removed later by insn
9710 patterns), nice doubles and symbol_refs which refer to the function's
9711 constant pool XXX.
9712
9713 When generating pic allow anything. */
9714
9715 static bool
9716 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9717 {
9718 if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9719 return false;
9720
9721 return flag_pic || !label_mentioned_p (x);
9722 }
9723
9724 static bool
9725 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9726 {
9727 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9728 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9729 for ARMv8-M Baseline or later the result is valid. */
9730 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9731 x = XEXP (x, 0);
9732
9733 return (CONST_INT_P (x)
9734 || CONST_DOUBLE_P (x)
9735 || CONSTANT_ADDRESS_P (x)
9736 || (TARGET_HAVE_MOVT && SYMBOL_REF_P (x))
9737 /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9738 we build the symbol address with upper/lower
9739 relocations. */
9740 || (TARGET_THUMB1
9741 && !label_mentioned_p (x)
9742 && arm_valid_symbolic_address_p (x)
9743 && arm_disable_literal_pool)
9744 || flag_pic);
9745 }
9746
9747 static bool
9748 arm_legitimate_constant_p (machine_mode mode, rtx x)
9749 {
9750 return (!arm_cannot_force_const_mem (mode, x)
9751 && (TARGET_32BIT
9752 ? arm_legitimate_constant_p_1 (mode, x)
9753 : thumb_legitimate_constant_p (mode, x)));
9754 }
9755
9756 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9757
9758 static bool
9759 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9760 {
9761 rtx base, offset;
9762 split_const (x, &base, &offset);
9763
9764 if (SYMBOL_REF_P (base))
9765 {
9766 /* Function symbols cannot have an offset due to the Thumb bit. */
9767 if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9768 && INTVAL (offset) != 0)
9769 return true;
9770
9771 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9772 && !offset_within_block_p (base, INTVAL (offset)))
9773 return true;
9774 }
9775 return arm_tls_referenced_p (x);
9776 }
9777 \f
9778 #define REG_OR_SUBREG_REG(X) \
9779 (REG_P (X) \
9780 || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9781
9782 #define REG_OR_SUBREG_RTX(X) \
9783 (REG_P (X) ? (X) : SUBREG_REG (X))
9784
9785 static inline int
9786 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9787 {
9788 machine_mode mode = GET_MODE (x);
9789 int total, words;
9790
9791 switch (code)
9792 {
9793 case ASHIFT:
9794 case ASHIFTRT:
9795 case LSHIFTRT:
9796 case ROTATERT:
9797 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9798
9799 case PLUS:
9800 case MINUS:
9801 case COMPARE:
9802 case NEG:
9803 case NOT:
9804 return COSTS_N_INSNS (1);
9805
9806 case MULT:
9807 if (arm_arch6m && arm_m_profile_small_mul)
9808 return COSTS_N_INSNS (32);
9809
9810 if (CONST_INT_P (XEXP (x, 1)))
9811 {
9812 int cycles = 0;
9813 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9814
9815 while (i)
9816 {
9817 i >>= 2;
9818 cycles++;
9819 }
9820 return COSTS_N_INSNS (2) + cycles;
9821 }
9822 return COSTS_N_INSNS (1) + 16;
9823
9824 case SET:
9825 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9826 the mode. */
9827 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9828 return (COSTS_N_INSNS (words)
9829 + 4 * ((MEM_P (SET_SRC (x)))
9830 + MEM_P (SET_DEST (x))));
9831
9832 case CONST_INT:
9833 if (outer == SET)
9834 {
9835 if (UINTVAL (x) < 256
9836 /* 16-bit constant. */
9837 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9838 return 0;
9839 if (thumb_shiftable_const (INTVAL (x)))
9840 return COSTS_N_INSNS (2);
9841 return arm_disable_literal_pool
9842 ? COSTS_N_INSNS (8)
9843 : COSTS_N_INSNS (3);
9844 }
9845 else if ((outer == PLUS || outer == COMPARE)
9846 && INTVAL (x) < 256 && INTVAL (x) > -256)
9847 return 0;
9848 else if ((outer == IOR || outer == XOR || outer == AND)
9849 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9850 return COSTS_N_INSNS (1);
9851 else if (outer == AND)
9852 {
9853 int i;
9854 /* This duplicates the tests in the andsi3 expander. */
9855 for (i = 9; i <= 31; i++)
9856 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9857 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9858 return COSTS_N_INSNS (2);
9859 }
9860 else if (outer == ASHIFT || outer == ASHIFTRT
9861 || outer == LSHIFTRT)
9862 return 0;
9863 return COSTS_N_INSNS (2);
9864
9865 case CONST:
9866 case CONST_DOUBLE:
9867 case LABEL_REF:
9868 case SYMBOL_REF:
9869 return COSTS_N_INSNS (3);
9870
9871 case UDIV:
9872 case UMOD:
9873 case DIV:
9874 case MOD:
9875 return 100;
9876
9877 case TRUNCATE:
9878 return 99;
9879
9880 case AND:
9881 case XOR:
9882 case IOR:
9883 /* XXX guess. */
9884 return 8;
9885
9886 case MEM:
9887 /* XXX another guess. */
9888 /* Memory costs quite a lot for the first word, but subsequent words
9889 load at the equivalent of a single insn each. */
9890 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9891 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9892 ? 4 : 0));
9893
9894 case IF_THEN_ELSE:
9895 /* XXX a guess. */
9896 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9897 return 14;
9898 return 2;
9899
9900 case SIGN_EXTEND:
9901 case ZERO_EXTEND:
9902 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9903 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9904
9905 if (mode == SImode)
9906 return total;
9907
9908 if (arm_arch6)
9909 return total + COSTS_N_INSNS (1);
9910
9911 /* Assume a two-shift sequence. Increase the cost slightly so
9912 we prefer actual shifts over an extend operation. */
9913 return total + 1 + COSTS_N_INSNS (2);
9914
9915 default:
9916 return 99;
9917 }
9918 }
9919
9920 /* Estimates the size cost of thumb1 instructions.
9921 For now most of the code is copied from thumb1_rtx_costs. We need more
9922 fine grain tuning when we have more related test cases. */
9923 static inline int
9924 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9925 {
9926 machine_mode mode = GET_MODE (x);
9927 int words, cost;
9928
9929 switch (code)
9930 {
9931 case ASHIFT:
9932 case ASHIFTRT:
9933 case LSHIFTRT:
9934 case ROTATERT:
9935 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9936
9937 case PLUS:
9938 case MINUS:
9939 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9940 defined by RTL expansion, especially for the expansion of
9941 multiplication. */
9942 if ((GET_CODE (XEXP (x, 0)) == MULT
9943 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9944 || (GET_CODE (XEXP (x, 1)) == MULT
9945 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9946 return COSTS_N_INSNS (2);
9947 /* Fall through. */
9948 case COMPARE:
9949 case NEG:
9950 case NOT:
9951 return COSTS_N_INSNS (1);
9952
9953 case MULT:
9954 if (CONST_INT_P (XEXP (x, 1)))
9955 {
9956 /* Thumb1 mul instruction can't operate on const. We must Load it
9957 into a register first. */
9958 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9959 /* For the targets which have a very small and high-latency multiply
9960 unit, we prefer to synthesize the mult with up to 5 instructions,
9961 giving a good balance between size and performance. */
9962 if (arm_arch6m && arm_m_profile_small_mul)
9963 return COSTS_N_INSNS (5);
9964 else
9965 return COSTS_N_INSNS (1) + const_size;
9966 }
9967 return COSTS_N_INSNS (1);
9968
9969 case SET:
9970 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9971 the mode. */
9972 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9973 cost = COSTS_N_INSNS (words);
9974 if (satisfies_constraint_J (SET_SRC (x))
9975 || satisfies_constraint_K (SET_SRC (x))
9976 /* Too big an immediate for a 2-byte mov, using MOVT. */
9977 || (CONST_INT_P (SET_SRC (x))
9978 && UINTVAL (SET_SRC (x)) >= 256
9979 && TARGET_HAVE_MOVT
9980 && satisfies_constraint_j (SET_SRC (x)))
9981 /* thumb1_movdi_insn. */
9982 || ((words > 1) && MEM_P (SET_SRC (x))))
9983 cost += COSTS_N_INSNS (1);
9984 return cost;
9985
9986 case CONST_INT:
9987 if (outer == SET)
9988 {
9989 if (UINTVAL (x) < 256)
9990 return COSTS_N_INSNS (1);
9991 /* movw is 4byte long. */
9992 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9993 return COSTS_N_INSNS (2);
9994 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9995 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9996 return COSTS_N_INSNS (2);
9997 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9998 if (thumb_shiftable_const (INTVAL (x)))
9999 return COSTS_N_INSNS (2);
10000 return arm_disable_literal_pool
10001 ? COSTS_N_INSNS (8)
10002 : COSTS_N_INSNS (3);
10003 }
10004 else if ((outer == PLUS || outer == COMPARE)
10005 && INTVAL (x) < 256 && INTVAL (x) > -256)
10006 return 0;
10007 else if ((outer == IOR || outer == XOR || outer == AND)
10008 && INTVAL (x) < 256 && INTVAL (x) >= -256)
10009 return COSTS_N_INSNS (1);
10010 else if (outer == AND)
10011 {
10012 int i;
10013 /* This duplicates the tests in the andsi3 expander. */
10014 for (i = 9; i <= 31; i++)
10015 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
10016 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
10017 return COSTS_N_INSNS (2);
10018 }
10019 else if (outer == ASHIFT || outer == ASHIFTRT
10020 || outer == LSHIFTRT)
10021 return 0;
10022 return COSTS_N_INSNS (2);
10023
10024 case CONST:
10025 case CONST_DOUBLE:
10026 case LABEL_REF:
10027 case SYMBOL_REF:
10028 return COSTS_N_INSNS (3);
10029
10030 case UDIV:
10031 case UMOD:
10032 case DIV:
10033 case MOD:
10034 return 100;
10035
10036 case TRUNCATE:
10037 return 99;
10038
10039 case AND:
10040 case XOR:
10041 case IOR:
10042 return COSTS_N_INSNS (1);
10043
10044 case MEM:
10045 return (COSTS_N_INSNS (1)
10046 + COSTS_N_INSNS (1)
10047 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
10048 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
10049 ? COSTS_N_INSNS (1) : 0));
10050
10051 case IF_THEN_ELSE:
10052 /* XXX a guess. */
10053 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10054 return 14;
10055 return 2;
10056
10057 case ZERO_EXTEND:
10058 /* XXX still guessing. */
10059 switch (GET_MODE (XEXP (x, 0)))
10060 {
10061 case E_QImode:
10062 return (1 + (mode == DImode ? 4 : 0)
10063 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10064
10065 case E_HImode:
10066 return (4 + (mode == DImode ? 4 : 0)
10067 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10068
10069 case E_SImode:
10070 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10071
10072 default:
10073 return 99;
10074 }
10075
10076 default:
10077 return 99;
10078 }
10079 }
10080
10081 /* Helper function for arm_rtx_costs. If one operand of the OP, a
10082 PLUS, adds the carry flag, then return the other operand. If
10083 neither is a carry, return OP unchanged. */
10084 static rtx
10085 strip_carry_operation (rtx op)
10086 {
10087 gcc_assert (GET_CODE (op) == PLUS);
10088 if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
10089 return XEXP (op, 1);
10090 else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
10091 return XEXP (op, 0);
10092 return op;
10093 }
10094
10095 /* Helper function for arm_rtx_costs. If the operand is a valid shift
10096 operand, then return the operand that is being shifted. If the shift
10097 is not by a constant, then set SHIFT_REG to point to the operand.
10098 Return NULL if OP is not a shifter operand. */
10099 static rtx
10100 shifter_op_p (rtx op, rtx *shift_reg)
10101 {
10102 enum rtx_code code = GET_CODE (op);
10103
10104 if (code == MULT && CONST_INT_P (XEXP (op, 1))
10105 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
10106 return XEXP (op, 0);
10107 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
10108 return XEXP (op, 0);
10109 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
10110 || code == ASHIFTRT)
10111 {
10112 if (!CONST_INT_P (XEXP (op, 1)))
10113 *shift_reg = XEXP (op, 1);
10114 return XEXP (op, 0);
10115 }
10116
10117 return NULL;
10118 }
10119
10120 static bool
10121 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
10122 {
10123 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
10124 rtx_code code = GET_CODE (x);
10125 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
10126
10127 switch (XINT (x, 1))
10128 {
10129 case UNSPEC_UNALIGNED_LOAD:
10130 /* We can only do unaligned loads into the integer unit, and we can't
10131 use LDM or LDRD. */
10132 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10133 if (speed_p)
10134 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
10135 + extra_cost->ldst.load_unaligned);
10136
10137 #ifdef NOT_YET
10138 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10139 ADDR_SPACE_GENERIC, speed_p);
10140 #endif
10141 return true;
10142
10143 case UNSPEC_UNALIGNED_STORE:
10144 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10145 if (speed_p)
10146 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
10147 + extra_cost->ldst.store_unaligned);
10148
10149 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
10150 #ifdef NOT_YET
10151 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10152 ADDR_SPACE_GENERIC, speed_p);
10153 #endif
10154 return true;
10155
10156 case UNSPEC_VRINTZ:
10157 case UNSPEC_VRINTP:
10158 case UNSPEC_VRINTM:
10159 case UNSPEC_VRINTR:
10160 case UNSPEC_VRINTX:
10161 case UNSPEC_VRINTA:
10162 if (speed_p)
10163 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
10164
10165 return true;
10166 default:
10167 *cost = COSTS_N_INSNS (2);
10168 break;
10169 }
10170 return true;
10171 }
10172
10173 /* Cost of a libcall. We assume one insn per argument, an amount for the
10174 call (one insn for -Os) and then one for processing the result. */
10175 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10176
10177 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
10178 do \
10179 { \
10180 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
10181 if (shift_op != NULL \
10182 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
10183 { \
10184 if (shift_reg) \
10185 { \
10186 if (speed_p) \
10187 *cost += extra_cost->alu.arith_shift_reg; \
10188 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10189 ASHIFT, 1, speed_p); \
10190 } \
10191 else if (speed_p) \
10192 *cost += extra_cost->alu.arith_shift; \
10193 \
10194 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
10195 ASHIFT, 0, speed_p) \
10196 + rtx_cost (XEXP (x, 1 - IDX), \
10197 GET_MODE (shift_op), \
10198 OP, 1, speed_p)); \
10199 return true; \
10200 } \
10201 } \
10202 while (0)
10203
10204 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
10205 considering the costs of the addressing mode and memory access
10206 separately. */
10207 static bool
10208 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
10209 int *cost, bool speed_p)
10210 {
10211 machine_mode mode = GET_MODE (x);
10212
10213 *cost = COSTS_N_INSNS (1);
10214
10215 if (flag_pic
10216 && GET_CODE (XEXP (x, 0)) == PLUS
10217 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
10218 /* This will be split into two instructions. Add the cost of the
10219 additional instruction here. The cost of the memory access is computed
10220 below. See arm.md:calculate_pic_address. */
10221 *cost += COSTS_N_INSNS (1);
10222
10223 /* Calculate cost of the addressing mode. */
10224 if (speed_p)
10225 {
10226 arm_addr_mode_op op_type;
10227 switch (GET_CODE (XEXP (x, 0)))
10228 {
10229 default:
10230 case REG:
10231 op_type = AMO_DEFAULT;
10232 break;
10233 case MINUS:
10234 /* MINUS does not appear in RTL, but the architecture supports it,
10235 so handle this case defensively. */
10236 /* fall through */
10237 case PLUS:
10238 op_type = AMO_NO_WB;
10239 break;
10240 case PRE_INC:
10241 case PRE_DEC:
10242 case POST_INC:
10243 case POST_DEC:
10244 case PRE_MODIFY:
10245 case POST_MODIFY:
10246 op_type = AMO_WB;
10247 break;
10248 }
10249
10250 if (VECTOR_MODE_P (mode))
10251 *cost += current_tune->addr_mode_costs->vector[op_type];
10252 else if (FLOAT_MODE_P (mode))
10253 *cost += current_tune->addr_mode_costs->fp[op_type];
10254 else
10255 *cost += current_tune->addr_mode_costs->integer[op_type];
10256 }
10257
10258 /* Calculate cost of memory access. */
10259 if (speed_p)
10260 {
10261 if (FLOAT_MODE_P (mode))
10262 {
10263 if (GET_MODE_SIZE (mode) == 8)
10264 *cost += extra_cost->ldst.loadd;
10265 else
10266 *cost += extra_cost->ldst.loadf;
10267 }
10268 else if (VECTOR_MODE_P (mode))
10269 *cost += extra_cost->ldst.loadv;
10270 else
10271 {
10272 /* Integer modes */
10273 if (GET_MODE_SIZE (mode) == 8)
10274 *cost += extra_cost->ldst.ldrd;
10275 else
10276 *cost += extra_cost->ldst.load;
10277 }
10278 }
10279
10280 return true;
10281 }
10282
10283 /* Helper for arm_bfi_p. */
10284 static bool
10285 arm_bfi_1_p (rtx op0, rtx op1, rtx *sub0, rtx *sub1)
10286 {
10287 unsigned HOST_WIDE_INT const1;
10288 unsigned HOST_WIDE_INT const2 = 0;
10289
10290 if (!CONST_INT_P (XEXP (op0, 1)))
10291 return false;
10292
10293 const1 = UINTVAL (XEXP (op0, 1));
10294 if (!CONST_INT_P (XEXP (op1, 1))
10295 || ~UINTVAL (XEXP (op1, 1)) != const1)
10296 return false;
10297
10298 if (GET_CODE (XEXP (op0, 0)) == ASHIFT
10299 && CONST_INT_P (XEXP (XEXP (op0, 0), 1)))
10300 {
10301 const2 = UINTVAL (XEXP (XEXP (op0, 0), 1));
10302 *sub0 = XEXP (XEXP (op0, 0), 0);
10303 }
10304 else
10305 *sub0 = XEXP (op0, 0);
10306
10307 if (const2 >= GET_MODE_BITSIZE (GET_MODE (op0)))
10308 return false;
10309
10310 *sub1 = XEXP (op1, 0);
10311 return exact_log2 (const1 + (HOST_WIDE_INT_1U << const2)) >= 0;
10312 }
10313
10314 /* Recognize a BFI idiom. Helper for arm_rtx_costs_internal. The
10315 format looks something like:
10316
10317 (IOR (AND (reg1) (~const1))
10318 (AND (ASHIFT (reg2) (const2))
10319 (const1)))
10320
10321 where const1 is a consecutive sequence of 1-bits with the
10322 least-significant non-zero bit starting at bit position const2. If
10323 const2 is zero, then the shift will not appear at all, due to
10324 canonicalization. The two arms of the IOR expression may be
10325 flipped. */
10326 static bool
10327 arm_bfi_p (rtx x, rtx *sub0, rtx *sub1)
10328 {
10329 if (GET_CODE (x) != IOR)
10330 return false;
10331 if (GET_CODE (XEXP (x, 0)) != AND
10332 || GET_CODE (XEXP (x, 1)) != AND)
10333 return false;
10334 return (arm_bfi_1_p (XEXP (x, 0), XEXP (x, 1), sub0, sub1)
10335 || arm_bfi_1_p (XEXP (x, 1), XEXP (x, 0), sub1, sub0));
10336 }
10337
10338 /* RTX costs. Make an estimate of the cost of executing the operation
10339 X, which is contained within an operation with code OUTER_CODE.
10340 SPEED_P indicates whether the cost desired is the performance cost,
10341 or the size cost. The estimate is stored in COST and the return
10342 value is TRUE if the cost calculation is final, or FALSE if the
10343 caller should recurse through the operands of X to add additional
10344 costs.
10345
10346 We currently make no attempt to model the size savings of Thumb-2
10347 16-bit instructions. At the normal points in compilation where
10348 this code is called we have no measure of whether the condition
10349 flags are live or not, and thus no realistic way to determine what
10350 the size will eventually be. */
10351 static bool
10352 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10353 const struct cpu_cost_table *extra_cost,
10354 int *cost, bool speed_p)
10355 {
10356 machine_mode mode = GET_MODE (x);
10357
10358 *cost = COSTS_N_INSNS (1);
10359
10360 if (TARGET_THUMB1)
10361 {
10362 if (speed_p)
10363 *cost = thumb1_rtx_costs (x, code, outer_code);
10364 else
10365 *cost = thumb1_size_rtx_costs (x, code, outer_code);
10366 return true;
10367 }
10368
10369 switch (code)
10370 {
10371 case SET:
10372 *cost = 0;
10373 /* SET RTXs don't have a mode so we get it from the destination. */
10374 mode = GET_MODE (SET_DEST (x));
10375
10376 if (REG_P (SET_SRC (x))
10377 && REG_P (SET_DEST (x)))
10378 {
10379 /* Assume that most copies can be done with a single insn,
10380 unless we don't have HW FP, in which case everything
10381 larger than word mode will require two insns. */
10382 *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10383 && GET_MODE_SIZE (mode) > 4)
10384 || mode == DImode)
10385 ? 2 : 1);
10386 /* Conditional register moves can be encoded
10387 in 16 bits in Thumb mode. */
10388 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10389 *cost >>= 1;
10390
10391 return true;
10392 }
10393
10394 if (CONST_INT_P (SET_SRC (x)))
10395 {
10396 /* Handle CONST_INT here, since the value doesn't have a mode
10397 and we would otherwise be unable to work out the true cost. */
10398 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10399 0, speed_p);
10400 outer_code = SET;
10401 /* Slightly lower the cost of setting a core reg to a constant.
10402 This helps break up chains and allows for better scheduling. */
10403 if (REG_P (SET_DEST (x))
10404 && REGNO (SET_DEST (x)) <= LR_REGNUM)
10405 *cost -= 1;
10406 x = SET_SRC (x);
10407 /* Immediate moves with an immediate in the range [0, 255] can be
10408 encoded in 16 bits in Thumb mode. */
10409 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10410 && INTVAL (x) >= 0 && INTVAL (x) <=255)
10411 *cost >>= 1;
10412 goto const_int_cost;
10413 }
10414
10415 return false;
10416
10417 case MEM:
10418 return arm_mem_costs (x, extra_cost, cost, speed_p);
10419
10420 case PARALLEL:
10421 {
10422 /* Calculations of LDM costs are complex. We assume an initial cost
10423 (ldm_1st) which will load the number of registers mentioned in
10424 ldm_regs_per_insn_1st registers; then each additional
10425 ldm_regs_per_insn_subsequent registers cost one more insn. The
10426 formula for N regs is thus:
10427
10428 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10429 + ldm_regs_per_insn_subsequent - 1)
10430 / ldm_regs_per_insn_subsequent).
10431
10432 Additional costs may also be added for addressing. A similar
10433 formula is used for STM. */
10434
10435 bool is_ldm = load_multiple_operation (x, SImode);
10436 bool is_stm = store_multiple_operation (x, SImode);
10437
10438 if (is_ldm || is_stm)
10439 {
10440 if (speed_p)
10441 {
10442 HOST_WIDE_INT nregs = XVECLEN (x, 0);
10443 HOST_WIDE_INT regs_per_insn_1st = is_ldm
10444 ? extra_cost->ldst.ldm_regs_per_insn_1st
10445 : extra_cost->ldst.stm_regs_per_insn_1st;
10446 HOST_WIDE_INT regs_per_insn_sub = is_ldm
10447 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10448 : extra_cost->ldst.stm_regs_per_insn_subsequent;
10449
10450 *cost += regs_per_insn_1st
10451 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10452 + regs_per_insn_sub - 1)
10453 / regs_per_insn_sub);
10454 return true;
10455 }
10456
10457 }
10458 return false;
10459 }
10460 case DIV:
10461 case UDIV:
10462 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10463 && (mode == SFmode || !TARGET_VFP_SINGLE))
10464 *cost += COSTS_N_INSNS (speed_p
10465 ? extra_cost->fp[mode != SFmode].div : 0);
10466 else if (mode == SImode && TARGET_IDIV)
10467 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10468 else
10469 *cost = LIBCALL_COST (2);
10470
10471 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10472 possible udiv is prefered. */
10473 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10474 return false; /* All arguments must be in registers. */
10475
10476 case MOD:
10477 /* MOD by a power of 2 can be expanded as:
10478 rsbs r1, r0, #0
10479 and r0, r0, #(n - 1)
10480 and r1, r1, #(n - 1)
10481 rsbpl r0, r1, #0. */
10482 if (CONST_INT_P (XEXP (x, 1))
10483 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10484 && mode == SImode)
10485 {
10486 *cost += COSTS_N_INSNS (3);
10487
10488 if (speed_p)
10489 *cost += 2 * extra_cost->alu.logical
10490 + extra_cost->alu.arith;
10491 return true;
10492 }
10493
10494 /* Fall-through. */
10495 case UMOD:
10496 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10497 possible udiv is prefered. */
10498 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10499 return false; /* All arguments must be in registers. */
10500
10501 case ROTATE:
10502 if (mode == SImode && REG_P (XEXP (x, 1)))
10503 {
10504 *cost += (COSTS_N_INSNS (1)
10505 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10506 if (speed_p)
10507 *cost += extra_cost->alu.shift_reg;
10508 return true;
10509 }
10510 /* Fall through */
10511 case ROTATERT:
10512 case ASHIFT:
10513 case LSHIFTRT:
10514 case ASHIFTRT:
10515 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10516 {
10517 *cost += (COSTS_N_INSNS (2)
10518 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10519 if (speed_p)
10520 *cost += 2 * extra_cost->alu.shift;
10521 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
10522 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10523 *cost += 1;
10524 return true;
10525 }
10526 else if (mode == SImode)
10527 {
10528 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10529 /* Slightly disparage register shifts at -Os, but not by much. */
10530 if (!CONST_INT_P (XEXP (x, 1)))
10531 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10532 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10533 return true;
10534 }
10535 else if (GET_MODE_CLASS (mode) == MODE_INT
10536 && GET_MODE_SIZE (mode) < 4)
10537 {
10538 if (code == ASHIFT)
10539 {
10540 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10541 /* Slightly disparage register shifts at -Os, but not by
10542 much. */
10543 if (!CONST_INT_P (XEXP (x, 1)))
10544 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10545 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10546 }
10547 else if (code == LSHIFTRT || code == ASHIFTRT)
10548 {
10549 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10550 {
10551 /* Can use SBFX/UBFX. */
10552 if (speed_p)
10553 *cost += extra_cost->alu.bfx;
10554 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10555 }
10556 else
10557 {
10558 *cost += COSTS_N_INSNS (1);
10559 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10560 if (speed_p)
10561 {
10562 if (CONST_INT_P (XEXP (x, 1)))
10563 *cost += 2 * extra_cost->alu.shift;
10564 else
10565 *cost += (extra_cost->alu.shift
10566 + extra_cost->alu.shift_reg);
10567 }
10568 else
10569 /* Slightly disparage register shifts. */
10570 *cost += !CONST_INT_P (XEXP (x, 1));
10571 }
10572 }
10573 else /* Rotates. */
10574 {
10575 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10576 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10577 if (speed_p)
10578 {
10579 if (CONST_INT_P (XEXP (x, 1)))
10580 *cost += (2 * extra_cost->alu.shift
10581 + extra_cost->alu.log_shift);
10582 else
10583 *cost += (extra_cost->alu.shift
10584 + extra_cost->alu.shift_reg
10585 + extra_cost->alu.log_shift_reg);
10586 }
10587 }
10588 return true;
10589 }
10590
10591 *cost = LIBCALL_COST (2);
10592 return false;
10593
10594 case BSWAP:
10595 if (arm_arch6)
10596 {
10597 if (mode == SImode)
10598 {
10599 if (speed_p)
10600 *cost += extra_cost->alu.rev;
10601
10602 return false;
10603 }
10604 }
10605 else
10606 {
10607 /* No rev instruction available. Look at arm_legacy_rev
10608 and thumb_legacy_rev for the form of RTL used then. */
10609 if (TARGET_THUMB)
10610 {
10611 *cost += COSTS_N_INSNS (9);
10612
10613 if (speed_p)
10614 {
10615 *cost += 6 * extra_cost->alu.shift;
10616 *cost += 3 * extra_cost->alu.logical;
10617 }
10618 }
10619 else
10620 {
10621 *cost += COSTS_N_INSNS (4);
10622
10623 if (speed_p)
10624 {
10625 *cost += 2 * extra_cost->alu.shift;
10626 *cost += extra_cost->alu.arith_shift;
10627 *cost += 2 * extra_cost->alu.logical;
10628 }
10629 }
10630 return true;
10631 }
10632 return false;
10633
10634 case MINUS:
10635 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10636 && (mode == SFmode || !TARGET_VFP_SINGLE))
10637 {
10638 if (GET_CODE (XEXP (x, 0)) == MULT
10639 || GET_CODE (XEXP (x, 1)) == MULT)
10640 {
10641 rtx mul_op0, mul_op1, sub_op;
10642
10643 if (speed_p)
10644 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10645
10646 if (GET_CODE (XEXP (x, 0)) == MULT)
10647 {
10648 mul_op0 = XEXP (XEXP (x, 0), 0);
10649 mul_op1 = XEXP (XEXP (x, 0), 1);
10650 sub_op = XEXP (x, 1);
10651 }
10652 else
10653 {
10654 mul_op0 = XEXP (XEXP (x, 1), 0);
10655 mul_op1 = XEXP (XEXP (x, 1), 1);
10656 sub_op = XEXP (x, 0);
10657 }
10658
10659 /* The first operand of the multiply may be optionally
10660 negated. */
10661 if (GET_CODE (mul_op0) == NEG)
10662 mul_op0 = XEXP (mul_op0, 0);
10663
10664 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10665 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10666 + rtx_cost (sub_op, mode, code, 0, speed_p));
10667
10668 return true;
10669 }
10670
10671 if (speed_p)
10672 *cost += extra_cost->fp[mode != SFmode].addsub;
10673 return false;
10674 }
10675
10676 if (mode == SImode)
10677 {
10678 rtx shift_by_reg = NULL;
10679 rtx shift_op;
10680 rtx non_shift_op;
10681 rtx op0 = XEXP (x, 0);
10682 rtx op1 = XEXP (x, 1);
10683
10684 /* Factor out any borrow operation. There's more than one way
10685 of expressing this; try to recognize them all. */
10686 if (GET_CODE (op0) == MINUS)
10687 {
10688 if (arm_borrow_operation (op1, SImode))
10689 {
10690 op1 = XEXP (op0, 1);
10691 op0 = XEXP (op0, 0);
10692 }
10693 else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10694 op0 = XEXP (op0, 0);
10695 }
10696 else if (GET_CODE (op1) == PLUS
10697 && arm_borrow_operation (XEXP (op1, 0), SImode))
10698 op1 = XEXP (op1, 0);
10699 else if (GET_CODE (op0) == NEG
10700 && arm_borrow_operation (op1, SImode))
10701 {
10702 /* Negate with carry-in. For Thumb2 this is done with
10703 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10704 RSC instruction that exists in Arm mode. */
10705 if (speed_p)
10706 *cost += (TARGET_THUMB2
10707 ? extra_cost->alu.arith_shift
10708 : extra_cost->alu.arith);
10709 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10710 return true;
10711 }
10712 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10713 Note we do mean ~borrow here. */
10714 else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10715 {
10716 *cost += rtx_cost (op1, mode, code, 1, speed_p);
10717 return true;
10718 }
10719
10720 shift_op = shifter_op_p (op0, &shift_by_reg);
10721 if (shift_op == NULL)
10722 {
10723 shift_op = shifter_op_p (op1, &shift_by_reg);
10724 non_shift_op = op0;
10725 }
10726 else
10727 non_shift_op = op1;
10728
10729 if (shift_op != NULL)
10730 {
10731 if (shift_by_reg != NULL)
10732 {
10733 if (speed_p)
10734 *cost += extra_cost->alu.arith_shift_reg;
10735 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10736 }
10737 else if (speed_p)
10738 *cost += extra_cost->alu.arith_shift;
10739
10740 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10741 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10742 return true;
10743 }
10744
10745 if (arm_arch_thumb2
10746 && GET_CODE (XEXP (x, 1)) == MULT)
10747 {
10748 /* MLS. */
10749 if (speed_p)
10750 *cost += extra_cost->mult[0].add;
10751 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10752 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10753 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10754 return true;
10755 }
10756
10757 if (CONST_INT_P (op0))
10758 {
10759 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10760 INTVAL (op0), NULL_RTX,
10761 NULL_RTX, 1, 0);
10762 *cost = COSTS_N_INSNS (insns);
10763 if (speed_p)
10764 *cost += insns * extra_cost->alu.arith;
10765 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10766 return true;
10767 }
10768 else if (speed_p)
10769 *cost += extra_cost->alu.arith;
10770
10771 /* Don't recurse as we don't want to cost any borrow that
10772 we've stripped. */
10773 *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10774 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10775 return true;
10776 }
10777
10778 if (GET_MODE_CLASS (mode) == MODE_INT
10779 && GET_MODE_SIZE (mode) < 4)
10780 {
10781 rtx shift_op, shift_reg;
10782 shift_reg = NULL;
10783
10784 /* We check both sides of the MINUS for shifter operands since,
10785 unlike PLUS, it's not commutative. */
10786
10787 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10788 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10789
10790 /* Slightly disparage, as we might need to widen the result. */
10791 *cost += 1;
10792 if (speed_p)
10793 *cost += extra_cost->alu.arith;
10794
10795 if (CONST_INT_P (XEXP (x, 0)))
10796 {
10797 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10798 return true;
10799 }
10800
10801 return false;
10802 }
10803
10804 if (mode == DImode)
10805 {
10806 *cost += COSTS_N_INSNS (1);
10807
10808 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10809 {
10810 rtx op1 = XEXP (x, 1);
10811
10812 if (speed_p)
10813 *cost += 2 * extra_cost->alu.arith;
10814
10815 if (GET_CODE (op1) == ZERO_EXTEND)
10816 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10817 0, speed_p);
10818 else
10819 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10820 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10821 0, speed_p);
10822 return true;
10823 }
10824 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10825 {
10826 if (speed_p)
10827 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10828 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10829 0, speed_p)
10830 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10831 return true;
10832 }
10833 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10834 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10835 {
10836 if (speed_p)
10837 *cost += (extra_cost->alu.arith
10838 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10839 ? extra_cost->alu.arith
10840 : extra_cost->alu.arith_shift));
10841 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10842 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10843 GET_CODE (XEXP (x, 1)), 0, speed_p));
10844 return true;
10845 }
10846
10847 if (speed_p)
10848 *cost += 2 * extra_cost->alu.arith;
10849 return false;
10850 }
10851
10852 /* Vector mode? */
10853
10854 *cost = LIBCALL_COST (2);
10855 return false;
10856
10857 case PLUS:
10858 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10859 && (mode == SFmode || !TARGET_VFP_SINGLE))
10860 {
10861 if (GET_CODE (XEXP (x, 0)) == MULT)
10862 {
10863 rtx mul_op0, mul_op1, add_op;
10864
10865 if (speed_p)
10866 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10867
10868 mul_op0 = XEXP (XEXP (x, 0), 0);
10869 mul_op1 = XEXP (XEXP (x, 0), 1);
10870 add_op = XEXP (x, 1);
10871
10872 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10873 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10874 + rtx_cost (add_op, mode, code, 0, speed_p));
10875
10876 return true;
10877 }
10878
10879 if (speed_p)
10880 *cost += extra_cost->fp[mode != SFmode].addsub;
10881 return false;
10882 }
10883 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10884 {
10885 *cost = LIBCALL_COST (2);
10886 return false;
10887 }
10888
10889 /* Narrow modes can be synthesized in SImode, but the range
10890 of useful sub-operations is limited. Check for shift operations
10891 on one of the operands. Only left shifts can be used in the
10892 narrow modes. */
10893 if (GET_MODE_CLASS (mode) == MODE_INT
10894 && GET_MODE_SIZE (mode) < 4)
10895 {
10896 rtx shift_op, shift_reg;
10897 shift_reg = NULL;
10898
10899 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10900
10901 if (CONST_INT_P (XEXP (x, 1)))
10902 {
10903 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10904 INTVAL (XEXP (x, 1)), NULL_RTX,
10905 NULL_RTX, 1, 0);
10906 *cost = COSTS_N_INSNS (insns);
10907 if (speed_p)
10908 *cost += insns * extra_cost->alu.arith;
10909 /* Slightly penalize a narrow operation as the result may
10910 need widening. */
10911 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10912 return true;
10913 }
10914
10915 /* Slightly penalize a narrow operation as the result may
10916 need widening. */
10917 *cost += 1;
10918 if (speed_p)
10919 *cost += extra_cost->alu.arith;
10920
10921 return false;
10922 }
10923
10924 if (mode == SImode)
10925 {
10926 rtx shift_op, shift_reg;
10927
10928 if (TARGET_INT_SIMD
10929 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10930 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10931 {
10932 /* UXTA[BH] or SXTA[BH]. */
10933 if (speed_p)
10934 *cost += extra_cost->alu.extend_arith;
10935 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10936 0, speed_p)
10937 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10938 return true;
10939 }
10940
10941 rtx op0 = XEXP (x, 0);
10942 rtx op1 = XEXP (x, 1);
10943
10944 /* Handle a side effect of adding in the carry to an addition. */
10945 if (GET_CODE (op0) == PLUS
10946 && arm_carry_operation (op1, mode))
10947 {
10948 op1 = XEXP (op0, 1);
10949 op0 = XEXP (op0, 0);
10950 }
10951 else if (GET_CODE (op1) == PLUS
10952 && arm_carry_operation (op0, mode))
10953 {
10954 op0 = XEXP (op1, 0);
10955 op1 = XEXP (op1, 1);
10956 }
10957 else if (GET_CODE (op0) == PLUS)
10958 {
10959 op0 = strip_carry_operation (op0);
10960 if (swap_commutative_operands_p (op0, op1))
10961 std::swap (op0, op1);
10962 }
10963
10964 if (arm_carry_operation (op0, mode))
10965 {
10966 /* Adding the carry to a register is a canonicalization of
10967 adding 0 to the register plus the carry. */
10968 if (speed_p)
10969 *cost += extra_cost->alu.arith;
10970 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10971 return true;
10972 }
10973
10974 shift_reg = NULL;
10975 shift_op = shifter_op_p (op0, &shift_reg);
10976 if (shift_op != NULL)
10977 {
10978 if (shift_reg)
10979 {
10980 if (speed_p)
10981 *cost += extra_cost->alu.arith_shift_reg;
10982 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10983 }
10984 else if (speed_p)
10985 *cost += extra_cost->alu.arith_shift;
10986
10987 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10988 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10989 return true;
10990 }
10991
10992 if (GET_CODE (op0) == MULT)
10993 {
10994 rtx mul_op = op0;
10995
10996 if (TARGET_DSP_MULTIPLY
10997 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10998 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10999 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
11000 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
11001 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
11002 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
11003 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
11004 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
11005 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
11006 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
11007 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
11008 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
11009 == 16))))))
11010 {
11011 /* SMLA[BT][BT]. */
11012 if (speed_p)
11013 *cost += extra_cost->mult[0].extend_add;
11014 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
11015 SIGN_EXTEND, 0, speed_p)
11016 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
11017 SIGN_EXTEND, 0, speed_p)
11018 + rtx_cost (op1, mode, PLUS, 1, speed_p));
11019 return true;
11020 }
11021
11022 if (speed_p)
11023 *cost += extra_cost->mult[0].add;
11024 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
11025 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
11026 + rtx_cost (op1, mode, PLUS, 1, speed_p));
11027 return true;
11028 }
11029
11030 if (CONST_INT_P (op1))
11031 {
11032 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
11033 INTVAL (op1), NULL_RTX,
11034 NULL_RTX, 1, 0);
11035 *cost = COSTS_N_INSNS (insns);
11036 if (speed_p)
11037 *cost += insns * extra_cost->alu.arith;
11038 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11039 return true;
11040 }
11041
11042 if (speed_p)
11043 *cost += extra_cost->alu.arith;
11044
11045 /* Don't recurse here because we want to test the operands
11046 without any carry operation. */
11047 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11048 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
11049 return true;
11050 }
11051
11052 if (mode == DImode)
11053 {
11054 if (GET_CODE (XEXP (x, 0)) == MULT
11055 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
11056 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
11057 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
11058 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
11059 {
11060 if (speed_p)
11061 *cost += extra_cost->mult[1].extend_add;
11062 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11063 ZERO_EXTEND, 0, speed_p)
11064 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
11065 ZERO_EXTEND, 0, speed_p)
11066 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11067 return true;
11068 }
11069
11070 *cost += COSTS_N_INSNS (1);
11071
11072 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11073 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
11074 {
11075 if (speed_p)
11076 *cost += (extra_cost->alu.arith
11077 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11078 ? extra_cost->alu.arith
11079 : extra_cost->alu.arith_shift));
11080
11081 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
11082 0, speed_p)
11083 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11084 return true;
11085 }
11086
11087 if (speed_p)
11088 *cost += 2 * extra_cost->alu.arith;
11089 return false;
11090 }
11091
11092 /* Vector mode? */
11093 *cost = LIBCALL_COST (2);
11094 return false;
11095 case IOR:
11096 {
11097 rtx sub0, sub1;
11098 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
11099 {
11100 if (speed_p)
11101 *cost += extra_cost->alu.rev;
11102
11103 return true;
11104 }
11105 else if (mode == SImode && arm_arch_thumb2
11106 && arm_bfi_p (x, &sub0, &sub1))
11107 {
11108 *cost += rtx_cost (sub0, mode, ZERO_EXTRACT, 1, speed_p);
11109 *cost += rtx_cost (sub1, mode, ZERO_EXTRACT, 0, speed_p);
11110 if (speed_p)
11111 *cost += extra_cost->alu.bfi;
11112
11113 return true;
11114 }
11115 }
11116
11117 /* Fall through. */
11118 case AND: case XOR:
11119 if (mode == SImode)
11120 {
11121 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
11122 rtx op0 = XEXP (x, 0);
11123 rtx shift_op, shift_reg;
11124
11125 if (subcode == NOT
11126 && (code == AND
11127 || (code == IOR && TARGET_THUMB2)))
11128 op0 = XEXP (op0, 0);
11129
11130 shift_reg = NULL;
11131 shift_op = shifter_op_p (op0, &shift_reg);
11132 if (shift_op != NULL)
11133 {
11134 if (shift_reg)
11135 {
11136 if (speed_p)
11137 *cost += extra_cost->alu.log_shift_reg;
11138 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11139 }
11140 else if (speed_p)
11141 *cost += extra_cost->alu.log_shift;
11142
11143 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
11144 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11145 return true;
11146 }
11147
11148 if (CONST_INT_P (XEXP (x, 1)))
11149 {
11150 int insns = arm_gen_constant (code, SImode, NULL_RTX,
11151 INTVAL (XEXP (x, 1)), NULL_RTX,
11152 NULL_RTX, 1, 0);
11153
11154 *cost = COSTS_N_INSNS (insns);
11155 if (speed_p)
11156 *cost += insns * extra_cost->alu.logical;
11157 *cost += rtx_cost (op0, mode, code, 0, speed_p);
11158 return true;
11159 }
11160
11161 if (speed_p)
11162 *cost += extra_cost->alu.logical;
11163 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
11164 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11165 return true;
11166 }
11167
11168 if (mode == DImode)
11169 {
11170 rtx op0 = XEXP (x, 0);
11171 enum rtx_code subcode = GET_CODE (op0);
11172
11173 *cost += COSTS_N_INSNS (1);
11174
11175 if (subcode == NOT
11176 && (code == AND
11177 || (code == IOR && TARGET_THUMB2)))
11178 op0 = XEXP (op0, 0);
11179
11180 if (GET_CODE (op0) == ZERO_EXTEND)
11181 {
11182 if (speed_p)
11183 *cost += 2 * extra_cost->alu.logical;
11184
11185 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
11186 0, speed_p)
11187 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11188 return true;
11189 }
11190 else if (GET_CODE (op0) == SIGN_EXTEND)
11191 {
11192 if (speed_p)
11193 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
11194
11195 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
11196 0, speed_p)
11197 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11198 return true;
11199 }
11200
11201 if (speed_p)
11202 *cost += 2 * extra_cost->alu.logical;
11203
11204 return true;
11205 }
11206 /* Vector mode? */
11207
11208 *cost = LIBCALL_COST (2);
11209 return false;
11210
11211 case MULT:
11212 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11213 && (mode == SFmode || !TARGET_VFP_SINGLE))
11214 {
11215 rtx op0 = XEXP (x, 0);
11216
11217 if (GET_CODE (op0) == NEG && !flag_rounding_math)
11218 op0 = XEXP (op0, 0);
11219
11220 if (speed_p)
11221 *cost += extra_cost->fp[mode != SFmode].mult;
11222
11223 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
11224 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
11225 return true;
11226 }
11227 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11228 {
11229 *cost = LIBCALL_COST (2);
11230 return false;
11231 }
11232
11233 if (mode == SImode)
11234 {
11235 if (TARGET_DSP_MULTIPLY
11236 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11237 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11238 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11239 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11240 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
11241 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11242 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11243 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
11244 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11245 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11246 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11247 && (INTVAL (XEXP (XEXP (x, 1), 1))
11248 == 16))))))
11249 {
11250 /* SMUL[TB][TB]. */
11251 if (speed_p)
11252 *cost += extra_cost->mult[0].extend;
11253 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
11254 SIGN_EXTEND, 0, speed_p);
11255 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
11256 SIGN_EXTEND, 1, speed_p);
11257 return true;
11258 }
11259 if (speed_p)
11260 *cost += extra_cost->mult[0].simple;
11261 return false;
11262 }
11263
11264 if (mode == DImode)
11265 {
11266 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11267 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
11268 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11269 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
11270 {
11271 if (speed_p)
11272 *cost += extra_cost->mult[1].extend;
11273 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
11274 ZERO_EXTEND, 0, speed_p)
11275 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
11276 ZERO_EXTEND, 0, speed_p));
11277 return true;
11278 }
11279
11280 *cost = LIBCALL_COST (2);
11281 return false;
11282 }
11283
11284 /* Vector mode? */
11285 *cost = LIBCALL_COST (2);
11286 return false;
11287
11288 case NEG:
11289 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11290 && (mode == SFmode || !TARGET_VFP_SINGLE))
11291 {
11292 if (GET_CODE (XEXP (x, 0)) == MULT)
11293 {
11294 /* VNMUL. */
11295 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11296 return true;
11297 }
11298
11299 if (speed_p)
11300 *cost += extra_cost->fp[mode != SFmode].neg;
11301
11302 return false;
11303 }
11304 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11305 {
11306 *cost = LIBCALL_COST (1);
11307 return false;
11308 }
11309
11310 if (mode == SImode)
11311 {
11312 if (GET_CODE (XEXP (x, 0)) == ABS)
11313 {
11314 *cost += COSTS_N_INSNS (1);
11315 /* Assume the non-flag-changing variant. */
11316 if (speed_p)
11317 *cost += (extra_cost->alu.log_shift
11318 + extra_cost->alu.arith_shift);
11319 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11320 return true;
11321 }
11322
11323 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11324 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11325 {
11326 *cost += COSTS_N_INSNS (1);
11327 /* No extra cost for MOV imm and MVN imm. */
11328 /* If the comparison op is using the flags, there's no further
11329 cost, otherwise we need to add the cost of the comparison. */
11330 if (!(REG_P (XEXP (XEXP (x, 0), 0))
11331 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11332 && XEXP (XEXP (x, 0), 1) == const0_rtx))
11333 {
11334 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11335 *cost += (COSTS_N_INSNS (1)
11336 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11337 0, speed_p)
11338 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11339 1, speed_p));
11340 if (speed_p)
11341 *cost += extra_cost->alu.arith;
11342 }
11343 return true;
11344 }
11345
11346 if (speed_p)
11347 *cost += extra_cost->alu.arith;
11348 return false;
11349 }
11350
11351 if (GET_MODE_CLASS (mode) == MODE_INT
11352 && GET_MODE_SIZE (mode) < 4)
11353 {
11354 /* Slightly disparage, as we might need an extend operation. */
11355 *cost += 1;
11356 if (speed_p)
11357 *cost += extra_cost->alu.arith;
11358 return false;
11359 }
11360
11361 if (mode == DImode)
11362 {
11363 *cost += COSTS_N_INSNS (1);
11364 if (speed_p)
11365 *cost += 2 * extra_cost->alu.arith;
11366 return false;
11367 }
11368
11369 /* Vector mode? */
11370 *cost = LIBCALL_COST (1);
11371 return false;
11372
11373 case NOT:
11374 if (mode == SImode)
11375 {
11376 rtx shift_op;
11377 rtx shift_reg = NULL;
11378
11379 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11380
11381 if (shift_op)
11382 {
11383 if (shift_reg != NULL)
11384 {
11385 if (speed_p)
11386 *cost += extra_cost->alu.log_shift_reg;
11387 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11388 }
11389 else if (speed_p)
11390 *cost += extra_cost->alu.log_shift;
11391 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11392 return true;
11393 }
11394
11395 if (speed_p)
11396 *cost += extra_cost->alu.logical;
11397 return false;
11398 }
11399 if (mode == DImode)
11400 {
11401 *cost += COSTS_N_INSNS (1);
11402 return false;
11403 }
11404
11405 /* Vector mode? */
11406
11407 *cost += LIBCALL_COST (1);
11408 return false;
11409
11410 case IF_THEN_ELSE:
11411 {
11412 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11413 {
11414 *cost += COSTS_N_INSNS (3);
11415 return true;
11416 }
11417 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11418 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11419
11420 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11421 /* Assume that if one arm of the if_then_else is a register,
11422 that it will be tied with the result and eliminate the
11423 conditional insn. */
11424 if (REG_P (XEXP (x, 1)))
11425 *cost += op2cost;
11426 else if (REG_P (XEXP (x, 2)))
11427 *cost += op1cost;
11428 else
11429 {
11430 if (speed_p)
11431 {
11432 if (extra_cost->alu.non_exec_costs_exec)
11433 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11434 else
11435 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11436 }
11437 else
11438 *cost += op1cost + op2cost;
11439 }
11440 }
11441 return true;
11442
11443 case COMPARE:
11444 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11445 *cost = 0;
11446 else
11447 {
11448 machine_mode op0mode;
11449 /* We'll mostly assume that the cost of a compare is the cost of the
11450 LHS. However, there are some notable exceptions. */
11451
11452 /* Floating point compares are never done as side-effects. */
11453 op0mode = GET_MODE (XEXP (x, 0));
11454 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11455 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11456 {
11457 if (speed_p)
11458 *cost += extra_cost->fp[op0mode != SFmode].compare;
11459
11460 if (XEXP (x, 1) == CONST0_RTX (op0mode))
11461 {
11462 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11463 return true;
11464 }
11465
11466 return false;
11467 }
11468 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11469 {
11470 *cost = LIBCALL_COST (2);
11471 return false;
11472 }
11473
11474 /* DImode compares normally take two insns. */
11475 if (op0mode == DImode)
11476 {
11477 *cost += COSTS_N_INSNS (1);
11478 if (speed_p)
11479 *cost += 2 * extra_cost->alu.arith;
11480 return false;
11481 }
11482
11483 if (op0mode == SImode)
11484 {
11485 rtx shift_op;
11486 rtx shift_reg;
11487
11488 if (XEXP (x, 1) == const0_rtx
11489 && !(REG_P (XEXP (x, 0))
11490 || (GET_CODE (XEXP (x, 0)) == SUBREG
11491 && REG_P (SUBREG_REG (XEXP (x, 0))))))
11492 {
11493 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11494
11495 /* Multiply operations that set the flags are often
11496 significantly more expensive. */
11497 if (speed_p
11498 && GET_CODE (XEXP (x, 0)) == MULT
11499 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11500 *cost += extra_cost->mult[0].flag_setting;
11501
11502 if (speed_p
11503 && GET_CODE (XEXP (x, 0)) == PLUS
11504 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11505 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11506 0), 1), mode))
11507 *cost += extra_cost->mult[0].flag_setting;
11508 return true;
11509 }
11510
11511 shift_reg = NULL;
11512 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11513 if (shift_op != NULL)
11514 {
11515 if (shift_reg != NULL)
11516 {
11517 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11518 1, speed_p);
11519 if (speed_p)
11520 *cost += extra_cost->alu.arith_shift_reg;
11521 }
11522 else if (speed_p)
11523 *cost += extra_cost->alu.arith_shift;
11524 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11525 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11526 return true;
11527 }
11528
11529 if (speed_p)
11530 *cost += extra_cost->alu.arith;
11531 if (CONST_INT_P (XEXP (x, 1))
11532 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11533 {
11534 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11535 return true;
11536 }
11537 return false;
11538 }
11539
11540 /* Vector mode? */
11541
11542 *cost = LIBCALL_COST (2);
11543 return false;
11544 }
11545 return true;
11546
11547 case EQ:
11548 case GE:
11549 case GT:
11550 case LE:
11551 case LT:
11552 /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11553 vcle and vclt). */
11554 if (TARGET_NEON
11555 && TARGET_HARD_FLOAT
11556 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11557 && (XEXP (x, 1) == CONST0_RTX (mode)))
11558 {
11559 *cost = 0;
11560 return true;
11561 }
11562
11563 /* Fall through. */
11564 case NE:
11565 case LTU:
11566 case LEU:
11567 case GEU:
11568 case GTU:
11569 case ORDERED:
11570 case UNORDERED:
11571 case UNEQ:
11572 case UNLE:
11573 case UNLT:
11574 case UNGE:
11575 case UNGT:
11576 case LTGT:
11577 if (outer_code == SET)
11578 {
11579 /* Is it a store-flag operation? */
11580 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11581 && XEXP (x, 1) == const0_rtx)
11582 {
11583 /* Thumb also needs an IT insn. */
11584 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11585 return true;
11586 }
11587 if (XEXP (x, 1) == const0_rtx)
11588 {
11589 switch (code)
11590 {
11591 case LT:
11592 /* LSR Rd, Rn, #31. */
11593 if (speed_p)
11594 *cost += extra_cost->alu.shift;
11595 break;
11596
11597 case EQ:
11598 /* RSBS T1, Rn, #0
11599 ADC Rd, Rn, T1. */
11600
11601 case NE:
11602 /* SUBS T1, Rn, #1
11603 SBC Rd, Rn, T1. */
11604 *cost += COSTS_N_INSNS (1);
11605 break;
11606
11607 case LE:
11608 /* RSBS T1, Rn, Rn, LSR #31
11609 ADC Rd, Rn, T1. */
11610 *cost += COSTS_N_INSNS (1);
11611 if (speed_p)
11612 *cost += extra_cost->alu.arith_shift;
11613 break;
11614
11615 case GT:
11616 /* RSB Rd, Rn, Rn, ASR #1
11617 LSR Rd, Rd, #31. */
11618 *cost += COSTS_N_INSNS (1);
11619 if (speed_p)
11620 *cost += (extra_cost->alu.arith_shift
11621 + extra_cost->alu.shift);
11622 break;
11623
11624 case GE:
11625 /* ASR Rd, Rn, #31
11626 ADD Rd, Rn, #1. */
11627 *cost += COSTS_N_INSNS (1);
11628 if (speed_p)
11629 *cost += extra_cost->alu.shift;
11630 break;
11631
11632 default:
11633 /* Remaining cases are either meaningless or would take
11634 three insns anyway. */
11635 *cost = COSTS_N_INSNS (3);
11636 break;
11637 }
11638 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11639 return true;
11640 }
11641 else
11642 {
11643 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11644 if (CONST_INT_P (XEXP (x, 1))
11645 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11646 {
11647 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11648 return true;
11649 }
11650
11651 return false;
11652 }
11653 }
11654 /* Not directly inside a set. If it involves the condition code
11655 register it must be the condition for a branch, cond_exec or
11656 I_T_E operation. Since the comparison is performed elsewhere
11657 this is just the control part which has no additional
11658 cost. */
11659 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11660 && XEXP (x, 1) == const0_rtx)
11661 {
11662 *cost = 0;
11663 return true;
11664 }
11665 return false;
11666
11667 case ABS:
11668 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11669 && (mode == SFmode || !TARGET_VFP_SINGLE))
11670 {
11671 if (speed_p)
11672 *cost += extra_cost->fp[mode != SFmode].neg;
11673
11674 return false;
11675 }
11676 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11677 {
11678 *cost = LIBCALL_COST (1);
11679 return false;
11680 }
11681
11682 if (mode == SImode)
11683 {
11684 if (speed_p)
11685 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11686 return false;
11687 }
11688 /* Vector mode? */
11689 *cost = LIBCALL_COST (1);
11690 return false;
11691
11692 case SIGN_EXTEND:
11693 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11694 && MEM_P (XEXP (x, 0)))
11695 {
11696 if (mode == DImode)
11697 *cost += COSTS_N_INSNS (1);
11698
11699 if (!speed_p)
11700 return true;
11701
11702 if (GET_MODE (XEXP (x, 0)) == SImode)
11703 *cost += extra_cost->ldst.load;
11704 else
11705 *cost += extra_cost->ldst.load_sign_extend;
11706
11707 if (mode == DImode)
11708 *cost += extra_cost->alu.shift;
11709
11710 return true;
11711 }
11712
11713 /* Widening from less than 32-bits requires an extend operation. */
11714 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11715 {
11716 /* We have SXTB/SXTH. */
11717 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11718 if (speed_p)
11719 *cost += extra_cost->alu.extend;
11720 }
11721 else if (GET_MODE (XEXP (x, 0)) != SImode)
11722 {
11723 /* Needs two shifts. */
11724 *cost += COSTS_N_INSNS (1);
11725 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11726 if (speed_p)
11727 *cost += 2 * extra_cost->alu.shift;
11728 }
11729
11730 /* Widening beyond 32-bits requires one more insn. */
11731 if (mode == DImode)
11732 {
11733 *cost += COSTS_N_INSNS (1);
11734 if (speed_p)
11735 *cost += extra_cost->alu.shift;
11736 }
11737
11738 return true;
11739
11740 case ZERO_EXTEND:
11741 if ((arm_arch4
11742 || GET_MODE (XEXP (x, 0)) == SImode
11743 || GET_MODE (XEXP (x, 0)) == QImode)
11744 && MEM_P (XEXP (x, 0)))
11745 {
11746 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11747
11748 if (mode == DImode)
11749 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11750
11751 return true;
11752 }
11753
11754 /* Widening from less than 32-bits requires an extend operation. */
11755 if (GET_MODE (XEXP (x, 0)) == QImode)
11756 {
11757 /* UXTB can be a shorter instruction in Thumb2, but it might
11758 be slower than the AND Rd, Rn, #255 alternative. When
11759 optimizing for speed it should never be slower to use
11760 AND, and we don't really model 16-bit vs 32-bit insns
11761 here. */
11762 if (speed_p)
11763 *cost += extra_cost->alu.logical;
11764 }
11765 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11766 {
11767 /* We have UXTB/UXTH. */
11768 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11769 if (speed_p)
11770 *cost += extra_cost->alu.extend;
11771 }
11772 else if (GET_MODE (XEXP (x, 0)) != SImode)
11773 {
11774 /* Needs two shifts. It's marginally preferable to use
11775 shifts rather than two BIC instructions as the second
11776 shift may merge with a subsequent insn as a shifter
11777 op. */
11778 *cost = COSTS_N_INSNS (2);
11779 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11780 if (speed_p)
11781 *cost += 2 * extra_cost->alu.shift;
11782 }
11783
11784 /* Widening beyond 32-bits requires one more insn. */
11785 if (mode == DImode)
11786 {
11787 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11788 }
11789
11790 return true;
11791
11792 case CONST_INT:
11793 *cost = 0;
11794 /* CONST_INT has no mode, so we cannot tell for sure how many
11795 insns are really going to be needed. The best we can do is
11796 look at the value passed. If it fits in SImode, then assume
11797 that's the mode it will be used for. Otherwise assume it
11798 will be used in DImode. */
11799 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11800 mode = SImode;
11801 else
11802 mode = DImode;
11803
11804 /* Avoid blowing up in arm_gen_constant (). */
11805 if (!(outer_code == PLUS
11806 || outer_code == AND
11807 || outer_code == IOR
11808 || outer_code == XOR
11809 || outer_code == MINUS))
11810 outer_code = SET;
11811
11812 const_int_cost:
11813 if (mode == SImode)
11814 {
11815 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11816 INTVAL (x), NULL, NULL,
11817 0, 0));
11818 /* Extra costs? */
11819 }
11820 else
11821 {
11822 *cost += COSTS_N_INSNS (arm_gen_constant
11823 (outer_code, SImode, NULL,
11824 trunc_int_for_mode (INTVAL (x), SImode),
11825 NULL, NULL, 0, 0)
11826 + arm_gen_constant (outer_code, SImode, NULL,
11827 INTVAL (x) >> 32, NULL,
11828 NULL, 0, 0));
11829 /* Extra costs? */
11830 }
11831
11832 return true;
11833
11834 case CONST:
11835 case LABEL_REF:
11836 case SYMBOL_REF:
11837 if (speed_p)
11838 {
11839 if (arm_arch_thumb2 && !flag_pic)
11840 *cost += COSTS_N_INSNS (1);
11841 else
11842 *cost += extra_cost->ldst.load;
11843 }
11844 else
11845 *cost += COSTS_N_INSNS (1);
11846
11847 if (flag_pic)
11848 {
11849 *cost += COSTS_N_INSNS (1);
11850 if (speed_p)
11851 *cost += extra_cost->alu.arith;
11852 }
11853
11854 return true;
11855
11856 case CONST_FIXED:
11857 *cost = COSTS_N_INSNS (4);
11858 /* Fixme. */
11859 return true;
11860
11861 case CONST_DOUBLE:
11862 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11863 && (mode == SFmode || !TARGET_VFP_SINGLE))
11864 {
11865 if (vfp3_const_double_rtx (x))
11866 {
11867 if (speed_p)
11868 *cost += extra_cost->fp[mode == DFmode].fpconst;
11869 return true;
11870 }
11871
11872 if (speed_p)
11873 {
11874 if (mode == DFmode)
11875 *cost += extra_cost->ldst.loadd;
11876 else
11877 *cost += extra_cost->ldst.loadf;
11878 }
11879 else
11880 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11881
11882 return true;
11883 }
11884 *cost = COSTS_N_INSNS (4);
11885 return true;
11886
11887 case CONST_VECTOR:
11888 /* Fixme. */
11889 if (((TARGET_NEON && TARGET_HARD_FLOAT
11890 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11891 || TARGET_HAVE_MVE)
11892 && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11893 *cost = COSTS_N_INSNS (1);
11894 else
11895 *cost = COSTS_N_INSNS (4);
11896 return true;
11897
11898 case HIGH:
11899 case LO_SUM:
11900 /* When optimizing for size, we prefer constant pool entries to
11901 MOVW/MOVT pairs, so bump the cost of these slightly. */
11902 if (!speed_p)
11903 *cost += 1;
11904 return true;
11905
11906 case CLZ:
11907 if (speed_p)
11908 *cost += extra_cost->alu.clz;
11909 return false;
11910
11911 case SMIN:
11912 if (XEXP (x, 1) == const0_rtx)
11913 {
11914 if (speed_p)
11915 *cost += extra_cost->alu.log_shift;
11916 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11917 return true;
11918 }
11919 /* Fall through. */
11920 case SMAX:
11921 case UMIN:
11922 case UMAX:
11923 *cost += COSTS_N_INSNS (1);
11924 return false;
11925
11926 case TRUNCATE:
11927 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11928 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11929 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11930 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11931 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11932 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11933 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11934 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11935 == ZERO_EXTEND))))
11936 {
11937 if (speed_p)
11938 *cost += extra_cost->mult[1].extend;
11939 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11940 ZERO_EXTEND, 0, speed_p)
11941 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11942 ZERO_EXTEND, 0, speed_p));
11943 return true;
11944 }
11945 *cost = LIBCALL_COST (1);
11946 return false;
11947
11948 case UNSPEC_VOLATILE:
11949 case UNSPEC:
11950 return arm_unspec_cost (x, outer_code, speed_p, cost);
11951
11952 case PC:
11953 /* Reading the PC is like reading any other register. Writing it
11954 is more expensive, but we take that into account elsewhere. */
11955 *cost = 0;
11956 return true;
11957
11958 case ZERO_EXTRACT:
11959 /* TODO: Simple zero_extract of bottom bits using AND. */
11960 /* Fall through. */
11961 case SIGN_EXTRACT:
11962 if (arm_arch6
11963 && mode == SImode
11964 && CONST_INT_P (XEXP (x, 1))
11965 && CONST_INT_P (XEXP (x, 2)))
11966 {
11967 if (speed_p)
11968 *cost += extra_cost->alu.bfx;
11969 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11970 return true;
11971 }
11972 /* Without UBFX/SBFX, need to resort to shift operations. */
11973 *cost += COSTS_N_INSNS (1);
11974 if (speed_p)
11975 *cost += 2 * extra_cost->alu.shift;
11976 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11977 return true;
11978
11979 case FLOAT_EXTEND:
11980 if (TARGET_HARD_FLOAT)
11981 {
11982 if (speed_p)
11983 *cost += extra_cost->fp[mode == DFmode].widen;
11984 if (!TARGET_VFP5
11985 && GET_MODE (XEXP (x, 0)) == HFmode)
11986 {
11987 /* Pre v8, widening HF->DF is a two-step process, first
11988 widening to SFmode. */
11989 *cost += COSTS_N_INSNS (1);
11990 if (speed_p)
11991 *cost += extra_cost->fp[0].widen;
11992 }
11993 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11994 return true;
11995 }
11996
11997 *cost = LIBCALL_COST (1);
11998 return false;
11999
12000 case FLOAT_TRUNCATE:
12001 if (TARGET_HARD_FLOAT)
12002 {
12003 if (speed_p)
12004 *cost += extra_cost->fp[mode == DFmode].narrow;
12005 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
12006 return true;
12007 /* Vector modes? */
12008 }
12009 *cost = LIBCALL_COST (1);
12010 return false;
12011
12012 case FMA:
12013 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
12014 {
12015 rtx op0 = XEXP (x, 0);
12016 rtx op1 = XEXP (x, 1);
12017 rtx op2 = XEXP (x, 2);
12018
12019
12020 /* vfms or vfnma. */
12021 if (GET_CODE (op0) == NEG)
12022 op0 = XEXP (op0, 0);
12023
12024 /* vfnms or vfnma. */
12025 if (GET_CODE (op2) == NEG)
12026 op2 = XEXP (op2, 0);
12027
12028 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
12029 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
12030 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
12031
12032 if (speed_p)
12033 *cost += extra_cost->fp[mode ==DFmode].fma;
12034
12035 return true;
12036 }
12037
12038 *cost = LIBCALL_COST (3);
12039 return false;
12040
12041 case FIX:
12042 case UNSIGNED_FIX:
12043 if (TARGET_HARD_FLOAT)
12044 {
12045 /* The *combine_vcvtf2i reduces a vmul+vcvt into
12046 a vcvt fixed-point conversion. */
12047 if (code == FIX && mode == SImode
12048 && GET_CODE (XEXP (x, 0)) == FIX
12049 && GET_MODE (XEXP (x, 0)) == SFmode
12050 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12051 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
12052 > 0)
12053 {
12054 if (speed_p)
12055 *cost += extra_cost->fp[0].toint;
12056
12057 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
12058 code, 0, speed_p);
12059 return true;
12060 }
12061
12062 if (GET_MODE_CLASS (mode) == MODE_INT)
12063 {
12064 mode = GET_MODE (XEXP (x, 0));
12065 if (speed_p)
12066 *cost += extra_cost->fp[mode == DFmode].toint;
12067 /* Strip of the 'cost' of rounding towards zero. */
12068 if (GET_CODE (XEXP (x, 0)) == FIX)
12069 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
12070 0, speed_p);
12071 else
12072 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
12073 /* ??? Increase the cost to deal with transferring from
12074 FP -> CORE registers? */
12075 return true;
12076 }
12077 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
12078 && TARGET_VFP5)
12079 {
12080 if (speed_p)
12081 *cost += extra_cost->fp[mode == DFmode].roundint;
12082 return false;
12083 }
12084 /* Vector costs? */
12085 }
12086 *cost = LIBCALL_COST (1);
12087 return false;
12088
12089 case FLOAT:
12090 case UNSIGNED_FLOAT:
12091 if (TARGET_HARD_FLOAT)
12092 {
12093 /* ??? Increase the cost to deal with transferring from CORE
12094 -> FP registers? */
12095 if (speed_p)
12096 *cost += extra_cost->fp[mode == DFmode].fromint;
12097 return false;
12098 }
12099 *cost = LIBCALL_COST (1);
12100 return false;
12101
12102 case CALL:
12103 return true;
12104
12105 case ASM_OPERANDS:
12106 {
12107 /* Just a guess. Guess number of instructions in the asm
12108 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
12109 though (see PR60663). */
12110 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
12111 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
12112
12113 *cost = COSTS_N_INSNS (asm_length + num_operands);
12114 return true;
12115 }
12116 default:
12117 if (mode != VOIDmode)
12118 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
12119 else
12120 *cost = COSTS_N_INSNS (4); /* Who knows? */
12121 return false;
12122 }
12123 }
12124
12125 #undef HANDLE_NARROW_SHIFT_ARITH
12126
12127 /* RTX costs entry point. */
12128
12129 static bool
12130 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
12131 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
12132 {
12133 bool result;
12134 int code = GET_CODE (x);
12135 gcc_assert (current_tune->insn_extra_cost);
12136
12137 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
12138 (enum rtx_code) outer_code,
12139 current_tune->insn_extra_cost,
12140 total, speed);
12141
12142 if (dump_file && arm_verbose_cost)
12143 {
12144 print_rtl_single (dump_file, x);
12145 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
12146 *total, result ? "final" : "partial");
12147 }
12148 return result;
12149 }
12150
12151 static int
12152 arm_insn_cost (rtx_insn *insn, bool speed)
12153 {
12154 int cost;
12155
12156 /* Don't cost a simple reg-reg move at a full insn cost: such moves
12157 will likely disappear during register allocation. */
12158 if (!reload_completed
12159 && GET_CODE (PATTERN (insn)) == SET
12160 && REG_P (SET_DEST (PATTERN (insn)))
12161 && REG_P (SET_SRC (PATTERN (insn))))
12162 return 2;
12163 cost = pattern_cost (PATTERN (insn), speed);
12164 /* If the cost is zero, then it's likely a complex insn. We don't want the
12165 cost of these to be less than something we know about. */
12166 return cost ? cost : COSTS_N_INSNS (2);
12167 }
12168
12169 /* All address computations that can be done are free, but rtx cost returns
12170 the same for practically all of them. So we weight the different types
12171 of address here in the order (most pref first):
12172 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
12173 static inline int
12174 arm_arm_address_cost (rtx x)
12175 {
12176 enum rtx_code c = GET_CODE (x);
12177
12178 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
12179 return 0;
12180 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
12181 return 10;
12182
12183 if (c == PLUS)
12184 {
12185 if (CONST_INT_P (XEXP (x, 1)))
12186 return 2;
12187
12188 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
12189 return 3;
12190
12191 return 4;
12192 }
12193
12194 return 6;
12195 }
12196
12197 static inline int
12198 arm_thumb_address_cost (rtx x)
12199 {
12200 enum rtx_code c = GET_CODE (x);
12201
12202 if (c == REG)
12203 return 1;
12204 if (c == PLUS
12205 && REG_P (XEXP (x, 0))
12206 && CONST_INT_P (XEXP (x, 1)))
12207 return 1;
12208
12209 return 2;
12210 }
12211
12212 static int
12213 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
12214 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
12215 {
12216 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
12217 }
12218
12219 /* Adjust cost hook for XScale. */
12220 static bool
12221 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12222 int * cost)
12223 {
12224 /* Some true dependencies can have a higher cost depending
12225 on precisely how certain input operands are used. */
12226 if (dep_type == 0
12227 && recog_memoized (insn) >= 0
12228 && recog_memoized (dep) >= 0)
12229 {
12230 int shift_opnum = get_attr_shift (insn);
12231 enum attr_type attr_type = get_attr_type (dep);
12232
12233 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12234 operand for INSN. If we have a shifted input operand and the
12235 instruction we depend on is another ALU instruction, then we may
12236 have to account for an additional stall. */
12237 if (shift_opnum != 0
12238 && (attr_type == TYPE_ALU_SHIFT_IMM_LSL_1TO4
12239 || attr_type == TYPE_ALU_SHIFT_IMM_OTHER
12240 || attr_type == TYPE_ALUS_SHIFT_IMM
12241 || attr_type == TYPE_LOGIC_SHIFT_IMM
12242 || attr_type == TYPE_LOGICS_SHIFT_IMM
12243 || attr_type == TYPE_ALU_SHIFT_REG
12244 || attr_type == TYPE_ALUS_SHIFT_REG
12245 || attr_type == TYPE_LOGIC_SHIFT_REG
12246 || attr_type == TYPE_LOGICS_SHIFT_REG
12247 || attr_type == TYPE_MOV_SHIFT
12248 || attr_type == TYPE_MVN_SHIFT
12249 || attr_type == TYPE_MOV_SHIFT_REG
12250 || attr_type == TYPE_MVN_SHIFT_REG))
12251 {
12252 rtx shifted_operand;
12253 int opno;
12254
12255 /* Get the shifted operand. */
12256 extract_insn (insn);
12257 shifted_operand = recog_data.operand[shift_opnum];
12258
12259 /* Iterate over all the operands in DEP. If we write an operand
12260 that overlaps with SHIFTED_OPERAND, then we have increase the
12261 cost of this dependency. */
12262 extract_insn (dep);
12263 preprocess_constraints (dep);
12264 for (opno = 0; opno < recog_data.n_operands; opno++)
12265 {
12266 /* We can ignore strict inputs. */
12267 if (recog_data.operand_type[opno] == OP_IN)
12268 continue;
12269
12270 if (reg_overlap_mentioned_p (recog_data.operand[opno],
12271 shifted_operand))
12272 {
12273 *cost = 2;
12274 return false;
12275 }
12276 }
12277 }
12278 }
12279 return true;
12280 }
12281
12282 /* Adjust cost hook for Cortex A9. */
12283 static bool
12284 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12285 int * cost)
12286 {
12287 switch (dep_type)
12288 {
12289 case REG_DEP_ANTI:
12290 *cost = 0;
12291 return false;
12292
12293 case REG_DEP_TRUE:
12294 case REG_DEP_OUTPUT:
12295 if (recog_memoized (insn) >= 0
12296 && recog_memoized (dep) >= 0)
12297 {
12298 if (GET_CODE (PATTERN (insn)) == SET)
12299 {
12300 if (GET_MODE_CLASS
12301 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
12302 || GET_MODE_CLASS
12303 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
12304 {
12305 enum attr_type attr_type_insn = get_attr_type (insn);
12306 enum attr_type attr_type_dep = get_attr_type (dep);
12307
12308 /* By default all dependencies of the form
12309 s0 = s0 <op> s1
12310 s0 = s0 <op> s2
12311 have an extra latency of 1 cycle because
12312 of the input and output dependency in this
12313 case. However this gets modeled as an true
12314 dependency and hence all these checks. */
12315 if (REG_P (SET_DEST (PATTERN (insn)))
12316 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
12317 {
12318 /* FMACS is a special case where the dependent
12319 instruction can be issued 3 cycles before
12320 the normal latency in case of an output
12321 dependency. */
12322 if ((attr_type_insn == TYPE_FMACS
12323 || attr_type_insn == TYPE_FMACD)
12324 && (attr_type_dep == TYPE_FMACS
12325 || attr_type_dep == TYPE_FMACD))
12326 {
12327 if (dep_type == REG_DEP_OUTPUT)
12328 *cost = insn_default_latency (dep) - 3;
12329 else
12330 *cost = insn_default_latency (dep);
12331 return false;
12332 }
12333 else
12334 {
12335 if (dep_type == REG_DEP_OUTPUT)
12336 *cost = insn_default_latency (dep) + 1;
12337 else
12338 *cost = insn_default_latency (dep);
12339 }
12340 return false;
12341 }
12342 }
12343 }
12344 }
12345 break;
12346
12347 default:
12348 gcc_unreachable ();
12349 }
12350
12351 return true;
12352 }
12353
12354 /* Adjust cost hook for FA726TE. */
12355 static bool
12356 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12357 int * cost)
12358 {
12359 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12360 have penalty of 3. */
12361 if (dep_type == REG_DEP_TRUE
12362 && recog_memoized (insn) >= 0
12363 && recog_memoized (dep) >= 0
12364 && get_attr_conds (dep) == CONDS_SET)
12365 {
12366 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
12367 if (get_attr_conds (insn) == CONDS_USE
12368 && get_attr_type (insn) != TYPE_BRANCH)
12369 {
12370 *cost = 3;
12371 return false;
12372 }
12373
12374 if (GET_CODE (PATTERN (insn)) == COND_EXEC
12375 || get_attr_conds (insn) == CONDS_USE)
12376 {
12377 *cost = 0;
12378 return false;
12379 }
12380 }
12381
12382 return true;
12383 }
12384
12385 /* Implement TARGET_REGISTER_MOVE_COST.
12386
12387 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12388 it is typically more expensive than a single memory access. We set
12389 the cost to less than two memory accesses so that floating
12390 point to integer conversion does not go through memory. */
12391
12392 int
12393 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12394 reg_class_t from, reg_class_t to)
12395 {
12396 if (TARGET_32BIT)
12397 {
12398 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12399 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12400 return 15;
12401 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12402 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12403 return 4;
12404 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12405 return 20;
12406 else
12407 return 2;
12408 }
12409 else
12410 {
12411 if (from == HI_REGS || to == HI_REGS)
12412 return 4;
12413 else
12414 return 2;
12415 }
12416 }
12417
12418 /* Implement TARGET_MEMORY_MOVE_COST. */
12419
12420 int
12421 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12422 bool in ATTRIBUTE_UNUSED)
12423 {
12424 if (TARGET_32BIT)
12425 return 10;
12426 else
12427 {
12428 if (GET_MODE_SIZE (mode) < 4)
12429 return 8;
12430 else
12431 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12432 }
12433 }
12434
12435 /* Vectorizer cost model implementation. */
12436
12437 /* Implement targetm.vectorize.builtin_vectorization_cost. */
12438 static int
12439 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12440 tree vectype,
12441 int misalign ATTRIBUTE_UNUSED)
12442 {
12443 unsigned elements;
12444
12445 switch (type_of_cost)
12446 {
12447 case scalar_stmt:
12448 return current_tune->vec_costs->scalar_stmt_cost;
12449
12450 case scalar_load:
12451 return current_tune->vec_costs->scalar_load_cost;
12452
12453 case scalar_store:
12454 return current_tune->vec_costs->scalar_store_cost;
12455
12456 case vector_stmt:
12457 return current_tune->vec_costs->vec_stmt_cost;
12458
12459 case vector_load:
12460 return current_tune->vec_costs->vec_align_load_cost;
12461
12462 case vector_store:
12463 return current_tune->vec_costs->vec_store_cost;
12464
12465 case vec_to_scalar:
12466 return current_tune->vec_costs->vec_to_scalar_cost;
12467
12468 case scalar_to_vec:
12469 return current_tune->vec_costs->scalar_to_vec_cost;
12470
12471 case unaligned_load:
12472 case vector_gather_load:
12473 return current_tune->vec_costs->vec_unalign_load_cost;
12474
12475 case unaligned_store:
12476 case vector_scatter_store:
12477 return current_tune->vec_costs->vec_unalign_store_cost;
12478
12479 case cond_branch_taken:
12480 return current_tune->vec_costs->cond_taken_branch_cost;
12481
12482 case cond_branch_not_taken:
12483 return current_tune->vec_costs->cond_not_taken_branch_cost;
12484
12485 case vec_perm:
12486 case vec_promote_demote:
12487 return current_tune->vec_costs->vec_stmt_cost;
12488
12489 case vec_construct:
12490 elements = TYPE_VECTOR_SUBPARTS (vectype);
12491 return elements / 2 + 1;
12492
12493 default:
12494 gcc_unreachable ();
12495 }
12496 }
12497
12498 /* Return true if and only if this insn can dual-issue only as older. */
12499 static bool
12500 cortexa7_older_only (rtx_insn *insn)
12501 {
12502 if (recog_memoized (insn) < 0)
12503 return false;
12504
12505 switch (get_attr_type (insn))
12506 {
12507 case TYPE_ALU_DSP_REG:
12508 case TYPE_ALU_SREG:
12509 case TYPE_ALUS_SREG:
12510 case TYPE_LOGIC_REG:
12511 case TYPE_LOGICS_REG:
12512 case TYPE_ADC_REG:
12513 case TYPE_ADCS_REG:
12514 case TYPE_ADR:
12515 case TYPE_BFM:
12516 case TYPE_REV:
12517 case TYPE_MVN_REG:
12518 case TYPE_SHIFT_IMM:
12519 case TYPE_SHIFT_REG:
12520 case TYPE_LOAD_BYTE:
12521 case TYPE_LOAD_4:
12522 case TYPE_STORE_4:
12523 case TYPE_FFARITHS:
12524 case TYPE_FADDS:
12525 case TYPE_FFARITHD:
12526 case TYPE_FADDD:
12527 case TYPE_FMOV:
12528 case TYPE_F_CVT:
12529 case TYPE_FCMPS:
12530 case TYPE_FCMPD:
12531 case TYPE_FCONSTS:
12532 case TYPE_FCONSTD:
12533 case TYPE_FMULS:
12534 case TYPE_FMACS:
12535 case TYPE_FMULD:
12536 case TYPE_FMACD:
12537 case TYPE_FDIVS:
12538 case TYPE_FDIVD:
12539 case TYPE_F_MRC:
12540 case TYPE_F_MRRC:
12541 case TYPE_F_FLAG:
12542 case TYPE_F_LOADS:
12543 case TYPE_F_STORES:
12544 return true;
12545 default:
12546 return false;
12547 }
12548 }
12549
12550 /* Return true if and only if this insn can dual-issue as younger. */
12551 static bool
12552 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12553 {
12554 if (recog_memoized (insn) < 0)
12555 {
12556 if (verbose > 5)
12557 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12558 return false;
12559 }
12560
12561 switch (get_attr_type (insn))
12562 {
12563 case TYPE_ALU_IMM:
12564 case TYPE_ALUS_IMM:
12565 case TYPE_LOGIC_IMM:
12566 case TYPE_LOGICS_IMM:
12567 case TYPE_EXTEND:
12568 case TYPE_MVN_IMM:
12569 case TYPE_MOV_IMM:
12570 case TYPE_MOV_REG:
12571 case TYPE_MOV_SHIFT:
12572 case TYPE_MOV_SHIFT_REG:
12573 case TYPE_BRANCH:
12574 case TYPE_CALL:
12575 return true;
12576 default:
12577 return false;
12578 }
12579 }
12580
12581
12582 /* Look for an instruction that can dual issue only as an older
12583 instruction, and move it in front of any instructions that can
12584 dual-issue as younger, while preserving the relative order of all
12585 other instructions in the ready list. This is a hueuristic to help
12586 dual-issue in later cycles, by postponing issue of more flexible
12587 instructions. This heuristic may affect dual issue opportunities
12588 in the current cycle. */
12589 static void
12590 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12591 int *n_readyp, int clock)
12592 {
12593 int i;
12594 int first_older_only = -1, first_younger = -1;
12595
12596 if (verbose > 5)
12597 fprintf (file,
12598 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12599 clock,
12600 *n_readyp);
12601
12602 /* Traverse the ready list from the head (the instruction to issue
12603 first), and looking for the first instruction that can issue as
12604 younger and the first instruction that can dual-issue only as
12605 older. */
12606 for (i = *n_readyp - 1; i >= 0; i--)
12607 {
12608 rtx_insn *insn = ready[i];
12609 if (cortexa7_older_only (insn))
12610 {
12611 first_older_only = i;
12612 if (verbose > 5)
12613 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12614 break;
12615 }
12616 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12617 first_younger = i;
12618 }
12619
12620 /* Nothing to reorder because either no younger insn found or insn
12621 that can dual-issue only as older appears before any insn that
12622 can dual-issue as younger. */
12623 if (first_younger == -1)
12624 {
12625 if (verbose > 5)
12626 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12627 return;
12628 }
12629
12630 /* Nothing to reorder because no older-only insn in the ready list. */
12631 if (first_older_only == -1)
12632 {
12633 if (verbose > 5)
12634 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12635 return;
12636 }
12637
12638 /* Move first_older_only insn before first_younger. */
12639 if (verbose > 5)
12640 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12641 INSN_UID(ready [first_older_only]),
12642 INSN_UID(ready [first_younger]));
12643 rtx_insn *first_older_only_insn = ready [first_older_only];
12644 for (i = first_older_only; i < first_younger; i++)
12645 {
12646 ready[i] = ready[i+1];
12647 }
12648
12649 ready[i] = first_older_only_insn;
12650 return;
12651 }
12652
12653 /* Implement TARGET_SCHED_REORDER. */
12654 static int
12655 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12656 int clock)
12657 {
12658 switch (arm_tune)
12659 {
12660 case TARGET_CPU_cortexa7:
12661 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12662 break;
12663 default:
12664 /* Do nothing for other cores. */
12665 break;
12666 }
12667
12668 return arm_issue_rate ();
12669 }
12670
12671 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12672 It corrects the value of COST based on the relationship between
12673 INSN and DEP through the dependence LINK. It returns the new
12674 value. There is a per-core adjust_cost hook to adjust scheduler costs
12675 and the per-core hook can choose to completely override the generic
12676 adjust_cost function. Only put bits of code into arm_adjust_cost that
12677 are common across all cores. */
12678 static int
12679 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12680 unsigned int)
12681 {
12682 rtx i_pat, d_pat;
12683
12684 /* When generating Thumb-1 code, we want to place flag-setting operations
12685 close to a conditional branch which depends on them, so that we can
12686 omit the comparison. */
12687 if (TARGET_THUMB1
12688 && dep_type == 0
12689 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12690 && recog_memoized (dep) >= 0
12691 && get_attr_conds (dep) == CONDS_SET)
12692 return 0;
12693
12694 if (current_tune->sched_adjust_cost != NULL)
12695 {
12696 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12697 return cost;
12698 }
12699
12700 /* XXX Is this strictly true? */
12701 if (dep_type == REG_DEP_ANTI
12702 || dep_type == REG_DEP_OUTPUT)
12703 return 0;
12704
12705 /* Call insns don't incur a stall, even if they follow a load. */
12706 if (dep_type == 0
12707 && CALL_P (insn))
12708 return 1;
12709
12710 if ((i_pat = single_set (insn)) != NULL
12711 && MEM_P (SET_SRC (i_pat))
12712 && (d_pat = single_set (dep)) != NULL
12713 && MEM_P (SET_DEST (d_pat)))
12714 {
12715 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12716 /* This is a load after a store, there is no conflict if the load reads
12717 from a cached area. Assume that loads from the stack, and from the
12718 constant pool are cached, and that others will miss. This is a
12719 hack. */
12720
12721 if ((SYMBOL_REF_P (src_mem)
12722 && CONSTANT_POOL_ADDRESS_P (src_mem))
12723 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12724 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12725 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12726 return 1;
12727 }
12728
12729 return cost;
12730 }
12731
12732 int
12733 arm_max_conditional_execute (void)
12734 {
12735 return max_insns_skipped;
12736 }
12737
12738 static int
12739 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12740 {
12741 if (TARGET_32BIT)
12742 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12743 else
12744 return (optimize > 0) ? 2 : 0;
12745 }
12746
12747 static int
12748 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12749 {
12750 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12751 }
12752
12753 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12754 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12755 sequences of non-executed instructions in IT blocks probably take the same
12756 amount of time as executed instructions (and the IT instruction itself takes
12757 space in icache). This function was experimentally determined to give good
12758 results on a popular embedded benchmark. */
12759
12760 static int
12761 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12762 {
12763 return (TARGET_32BIT && speed_p) ? 1
12764 : arm_default_branch_cost (speed_p, predictable_p);
12765 }
12766
12767 static int
12768 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12769 {
12770 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12771 }
12772
12773 static bool fp_consts_inited = false;
12774
12775 static REAL_VALUE_TYPE value_fp0;
12776
12777 static void
12778 init_fp_table (void)
12779 {
12780 REAL_VALUE_TYPE r;
12781
12782 r = REAL_VALUE_ATOF ("0", DFmode);
12783 value_fp0 = r;
12784 fp_consts_inited = true;
12785 }
12786
12787 /* Return TRUE if rtx X is a valid immediate FP constant. */
12788 int
12789 arm_const_double_rtx (rtx x)
12790 {
12791 const REAL_VALUE_TYPE *r;
12792
12793 if (!fp_consts_inited)
12794 init_fp_table ();
12795
12796 r = CONST_DOUBLE_REAL_VALUE (x);
12797 if (REAL_VALUE_MINUS_ZERO (*r))
12798 return 0;
12799
12800 if (real_equal (r, &value_fp0))
12801 return 1;
12802
12803 return 0;
12804 }
12805
12806 /* VFPv3 has a fairly wide range of representable immediates, formed from
12807 "quarter-precision" floating-point values. These can be evaluated using this
12808 formula (with ^ for exponentiation):
12809
12810 -1^s * n * 2^-r
12811
12812 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12813 16 <= n <= 31 and 0 <= r <= 7.
12814
12815 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12816
12817 - A (most-significant) is the sign bit.
12818 - BCD are the exponent (encoded as r XOR 3).
12819 - EFGH are the mantissa (encoded as n - 16).
12820 */
12821
12822 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12823 fconst[sd] instruction, or -1 if X isn't suitable. */
12824 static int
12825 vfp3_const_double_index (rtx x)
12826 {
12827 REAL_VALUE_TYPE r, m;
12828 int sign, exponent;
12829 unsigned HOST_WIDE_INT mantissa, mant_hi;
12830 unsigned HOST_WIDE_INT mask;
12831 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12832 bool fail;
12833
12834 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12835 return -1;
12836
12837 r = *CONST_DOUBLE_REAL_VALUE (x);
12838
12839 /* We can't represent these things, so detect them first. */
12840 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12841 return -1;
12842
12843 /* Extract sign, exponent and mantissa. */
12844 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12845 r = real_value_abs (&r);
12846 exponent = REAL_EXP (&r);
12847 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12848 highest (sign) bit, with a fixed binary point at bit point_pos.
12849 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12850 bits for the mantissa, this may fail (low bits would be lost). */
12851 real_ldexp (&m, &r, point_pos - exponent);
12852 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12853 mantissa = w.elt (0);
12854 mant_hi = w.elt (1);
12855
12856 /* If there are bits set in the low part of the mantissa, we can't
12857 represent this value. */
12858 if (mantissa != 0)
12859 return -1;
12860
12861 /* Now make it so that mantissa contains the most-significant bits, and move
12862 the point_pos to indicate that the least-significant bits have been
12863 discarded. */
12864 point_pos -= HOST_BITS_PER_WIDE_INT;
12865 mantissa = mant_hi;
12866
12867 /* We can permit four significant bits of mantissa only, plus a high bit
12868 which is always 1. */
12869 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12870 if ((mantissa & mask) != 0)
12871 return -1;
12872
12873 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12874 mantissa >>= point_pos - 5;
12875
12876 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12877 floating-point immediate zero with Neon using an integer-zero load, but
12878 that case is handled elsewhere.) */
12879 if (mantissa == 0)
12880 return -1;
12881
12882 gcc_assert (mantissa >= 16 && mantissa <= 31);
12883
12884 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12885 normalized significands are in the range [1, 2). (Our mantissa is shifted
12886 left 4 places at this point relative to normalized IEEE754 values). GCC
12887 internally uses [0.5, 1) (see real.cc), so the exponent returned from
12888 REAL_EXP must be altered. */
12889 exponent = 5 - exponent;
12890
12891 if (exponent < 0 || exponent > 7)
12892 return -1;
12893
12894 /* Sign, mantissa and exponent are now in the correct form to plug into the
12895 formula described in the comment above. */
12896 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12897 }
12898
12899 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12900 int
12901 vfp3_const_double_rtx (rtx x)
12902 {
12903 if (!TARGET_VFP3)
12904 return 0;
12905
12906 return vfp3_const_double_index (x) != -1;
12907 }
12908
12909 /* Recognize immediates which can be used in various Neon and MVE instructions.
12910 Legal immediates are described by the following table (for VMVN variants, the
12911 bitwise inverse of the constant shown is recognized. In either case, VMOV
12912 is output and the correct instruction to use for a given constant is chosen
12913 by the assembler). The constant shown is replicated across all elements of
12914 the destination vector.
12915
12916 insn elems variant constant (binary)
12917 ---- ----- ------- -----------------
12918 vmov i32 0 00000000 00000000 00000000 abcdefgh
12919 vmov i32 1 00000000 00000000 abcdefgh 00000000
12920 vmov i32 2 00000000 abcdefgh 00000000 00000000
12921 vmov i32 3 abcdefgh 00000000 00000000 00000000
12922 vmov i16 4 00000000 abcdefgh
12923 vmov i16 5 abcdefgh 00000000
12924 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12925 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12926 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12927 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12928 vmvn i16 10 00000000 abcdefgh
12929 vmvn i16 11 abcdefgh 00000000
12930 vmov i32 12 00000000 00000000 abcdefgh 11111111
12931 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12932 vmov i32 14 00000000 abcdefgh 11111111 11111111
12933 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12934 vmov i8 16 abcdefgh
12935 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12936 eeeeeeee ffffffff gggggggg hhhhhhhh
12937 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12938 vmov f32 19 00000000 00000000 00000000 00000000
12939
12940 For case 18, B = !b. Representable values are exactly those accepted by
12941 vfp3_const_double_index, but are output as floating-point numbers rather
12942 than indices.
12943
12944 For case 19, we will change it to vmov.i32 when assembling.
12945
12946 Variants 0-5 (inclusive) may also be used as immediates for the second
12947 operand of VORR/VBIC instructions.
12948
12949 The INVERSE argument causes the bitwise inverse of the given operand to be
12950 recognized instead (used for recognizing legal immediates for the VAND/VORN
12951 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12952 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12953 output, rather than the real insns vbic/vorr).
12954
12955 INVERSE makes no difference to the recognition of float vectors.
12956
12957 The return value is the variant of immediate as shown in the above table, or
12958 -1 if the given value doesn't match any of the listed patterns.
12959 */
12960 static int
12961 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12962 rtx *modconst, int *elementwidth)
12963 {
12964 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12965 matches = 1; \
12966 for (i = 0; i < idx; i += (STRIDE)) \
12967 if (!(TEST)) \
12968 matches = 0; \
12969 if (matches) \
12970 { \
12971 immtype = (CLASS); \
12972 elsize = (ELSIZE); \
12973 break; \
12974 }
12975
12976 unsigned int i, elsize = 0, idx = 0, n_elts;
12977 unsigned int innersize;
12978 unsigned char bytes[16] = {};
12979 int immtype = -1, matches;
12980 unsigned int invmask = inverse ? 0xff : 0;
12981 bool vector = GET_CODE (op) == CONST_VECTOR;
12982
12983 if (vector)
12984 n_elts = CONST_VECTOR_NUNITS (op);
12985 else
12986 {
12987 n_elts = 1;
12988 gcc_assert (mode != VOIDmode);
12989 }
12990
12991 innersize = GET_MODE_UNIT_SIZE (mode);
12992
12993 /* Only support 128-bit vectors for MVE. */
12994 if (TARGET_HAVE_MVE
12995 && (!vector
12996 || VALID_MVE_PRED_MODE (mode)
12997 || n_elts * innersize != 16))
12998 return -1;
12999
13000 if (!TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
13001 return -1;
13002
13003 /* Vectors of float constants. */
13004 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
13005 {
13006 rtx el0 = CONST_VECTOR_ELT (op, 0);
13007
13008 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
13009 return -1;
13010
13011 /* FP16 vectors cannot be represented. */
13012 if (GET_MODE_INNER (mode) == HFmode)
13013 return -1;
13014
13015 /* All elements in the vector must be the same. Note that 0.0 and -0.0
13016 are distinct in this context. */
13017 if (!const_vec_duplicate_p (op))
13018 return -1;
13019
13020 if (modconst)
13021 *modconst = CONST_VECTOR_ELT (op, 0);
13022
13023 if (elementwidth)
13024 *elementwidth = 0;
13025
13026 if (el0 == CONST0_RTX (GET_MODE (el0)))
13027 return 19;
13028 else
13029 return 18;
13030 }
13031
13032 /* The tricks done in the code below apply for little-endian vector layout.
13033 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
13034 FIXME: Implement logic for big-endian vectors. */
13035 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
13036 return -1;
13037
13038 /* Splat vector constant out into a byte vector. */
13039 for (i = 0; i < n_elts; i++)
13040 {
13041 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
13042 unsigned HOST_WIDE_INT elpart;
13043
13044 gcc_assert (CONST_INT_P (el));
13045 elpart = INTVAL (el);
13046
13047 for (unsigned int byte = 0; byte < innersize; byte++)
13048 {
13049 bytes[idx++] = (elpart & 0xff) ^ invmask;
13050 elpart >>= BITS_PER_UNIT;
13051 }
13052 }
13053
13054 /* Sanity check. */
13055 gcc_assert (idx == GET_MODE_SIZE (mode));
13056
13057 do
13058 {
13059 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
13060 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13061
13062 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13063 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13064
13065 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
13066 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13067
13068 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
13069 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
13070
13071 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
13072
13073 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
13074
13075 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
13076 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13077
13078 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13079 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13080
13081 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
13082 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13083
13084 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
13085 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
13086
13087 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
13088
13089 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
13090
13091 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13092 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13093
13094 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13095 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13096
13097 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
13098 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13099
13100 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
13101 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13102
13103 CHECK (1, 8, 16, bytes[i] == bytes[0]);
13104
13105 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
13106 && bytes[i] == bytes[(i + 8) % idx]);
13107 }
13108 while (0);
13109
13110 if (immtype == -1)
13111 return -1;
13112
13113 if (elementwidth)
13114 *elementwidth = elsize;
13115
13116 if (modconst)
13117 {
13118 unsigned HOST_WIDE_INT imm = 0;
13119
13120 /* Un-invert bytes of recognized vector, if necessary. */
13121 if (invmask != 0)
13122 for (i = 0; i < idx; i++)
13123 bytes[i] ^= invmask;
13124
13125 if (immtype == 17)
13126 {
13127 /* FIXME: Broken on 32-bit H_W_I hosts. */
13128 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
13129
13130 for (i = 0; i < 8; i++)
13131 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
13132 << (i * BITS_PER_UNIT);
13133
13134 *modconst = GEN_INT (imm);
13135 }
13136 else
13137 {
13138 unsigned HOST_WIDE_INT imm = 0;
13139
13140 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
13141 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
13142
13143 *modconst = GEN_INT (imm);
13144 }
13145 }
13146
13147 return immtype;
13148 #undef CHECK
13149 }
13150
13151 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13152 implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH
13153 (or zero for float elements), and a modified constant (whatever should be
13154 output for a VMOV) in *MODCONST. "neon_immediate_valid_for_move" function is
13155 modified to "simd_immediate_valid_for_move" as this function will be used
13156 both by neon and mve. */
13157 int
13158 simd_immediate_valid_for_move (rtx op, machine_mode mode,
13159 rtx *modconst, int *elementwidth)
13160 {
13161 rtx tmpconst;
13162 int tmpwidth;
13163 int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
13164
13165 if (retval == -1)
13166 return 0;
13167
13168 if (modconst)
13169 *modconst = tmpconst;
13170
13171 if (elementwidth)
13172 *elementwidth = tmpwidth;
13173
13174 return 1;
13175 }
13176
13177 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
13178 the immediate is valid, write a constant suitable for using as an operand
13179 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13180 *ELEMENTWIDTH. See simd_valid_immediate for description of INVERSE. */
13181
13182 int
13183 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
13184 rtx *modconst, int *elementwidth)
13185 {
13186 rtx tmpconst;
13187 int tmpwidth;
13188 int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
13189
13190 if (retval < 0 || retval > 5)
13191 return 0;
13192
13193 if (modconst)
13194 *modconst = tmpconst;
13195
13196 if (elementwidth)
13197 *elementwidth = tmpwidth;
13198
13199 return 1;
13200 }
13201
13202 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
13203 the immediate is valid, write a constant suitable for using as an operand
13204 to VSHR/VSHL to *MODCONST and the corresponding element width to
13205 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13206 because they have different limitations. */
13207
13208 int
13209 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
13210 rtx *modconst, int *elementwidth,
13211 bool isleftshift)
13212 {
13213 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
13214 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
13215 unsigned HOST_WIDE_INT last_elt = 0;
13216 unsigned HOST_WIDE_INT maxshift;
13217
13218 /* Split vector constant out into a byte vector. */
13219 for (i = 0; i < n_elts; i++)
13220 {
13221 rtx el = CONST_VECTOR_ELT (op, i);
13222 unsigned HOST_WIDE_INT elpart;
13223
13224 if (CONST_INT_P (el))
13225 elpart = INTVAL (el);
13226 else if (CONST_DOUBLE_P (el))
13227 return 0;
13228 else
13229 gcc_unreachable ();
13230
13231 if (i != 0 && elpart != last_elt)
13232 return 0;
13233
13234 last_elt = elpart;
13235 }
13236
13237 /* Shift less than element size. */
13238 maxshift = innersize * 8;
13239
13240 if (isleftshift)
13241 {
13242 /* Left shift immediate value can be from 0 to <size>-1. */
13243 if (last_elt >= maxshift)
13244 return 0;
13245 }
13246 else
13247 {
13248 /* Right shift immediate value can be from 1 to <size>. */
13249 if (last_elt == 0 || last_elt > maxshift)
13250 return 0;
13251 }
13252
13253 if (elementwidth)
13254 *elementwidth = innersize * 8;
13255
13256 if (modconst)
13257 *modconst = CONST_VECTOR_ELT (op, 0);
13258
13259 return 1;
13260 }
13261
13262 /* Return a string suitable for output of Neon immediate logic operation
13263 MNEM. */
13264
13265 char *
13266 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
13267 int inverse, int quad)
13268 {
13269 int width, is_valid;
13270 static char templ[40];
13271
13272 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
13273
13274 gcc_assert (is_valid != 0);
13275
13276 if (quad)
13277 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
13278 else
13279 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
13280
13281 return templ;
13282 }
13283
13284 /* Return a string suitable for output of Neon immediate shift operation
13285 (VSHR or VSHL) MNEM. */
13286
13287 char *
13288 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13289 machine_mode mode, int quad,
13290 bool isleftshift)
13291 {
13292 int width, is_valid;
13293 static char templ[40];
13294
13295 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13296 gcc_assert (is_valid != 0);
13297
13298 if (quad)
13299 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13300 else
13301 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13302
13303 return templ;
13304 }
13305
13306 /* Output a sequence of pairwise operations to implement a reduction.
13307 NOTE: We do "too much work" here, because pairwise operations work on two
13308 registers-worth of operands in one go. Unfortunately we can't exploit those
13309 extra calculations to do the full operation in fewer steps, I don't think.
13310 Although all vector elements of the result but the first are ignored, we
13311 actually calculate the same result in each of the elements. An alternative
13312 such as initially loading a vector with zero to use as each of the second
13313 operands would use up an additional register and take an extra instruction,
13314 for no particular gain. */
13315
13316 void
13317 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13318 rtx (*reduc) (rtx, rtx, rtx))
13319 {
13320 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13321 rtx tmpsum = op1;
13322
13323 for (i = parts / 2; i >= 1; i /= 2)
13324 {
13325 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13326 emit_insn (reduc (dest, tmpsum, tmpsum));
13327 tmpsum = dest;
13328 }
13329 }
13330
13331 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13332 loaded into a register using VDUP.
13333
13334 If this is the case, and GENERATE is set, we also generate
13335 instructions to do this and return an RTX to assign to the register. */
13336
13337 static rtx
13338 neon_vdup_constant (rtx vals, bool generate)
13339 {
13340 machine_mode mode = GET_MODE (vals);
13341 machine_mode inner_mode = GET_MODE_INNER (mode);
13342 rtx x;
13343
13344 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13345 return NULL_RTX;
13346
13347 if (!const_vec_duplicate_p (vals, &x))
13348 /* The elements are not all the same. We could handle repeating
13349 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13350 {0, C, 0, C, 0, C, 0, C} which can be loaded using
13351 vdup.i16). */
13352 return NULL_RTX;
13353
13354 if (!generate)
13355 return x;
13356
13357 /* We can load this constant by using VDUP and a constant in a
13358 single ARM register. This will be cheaper than a vector
13359 load. */
13360
13361 x = copy_to_mode_reg (inner_mode, x);
13362 return gen_vec_duplicate (mode, x);
13363 }
13364
13365 /* Return a HI representation of CONST_VEC suitable for MVE predicates. */
13366 rtx
13367 mve_bool_vec_to_const (rtx const_vec)
13368 {
13369 machine_mode mode = GET_MODE (const_vec);
13370
13371 if (!VECTOR_MODE_P (mode))
13372 return const_vec;
13373
13374 unsigned n_elts = GET_MODE_NUNITS (mode);
13375 unsigned el_prec = GET_MODE_PRECISION (GET_MODE_INNER (mode));
13376 unsigned shift_c = 16 / n_elts;
13377 unsigned i;
13378 int hi_val = 0;
13379
13380 for (i = 0; i < n_elts; i++)
13381 {
13382 rtx el = CONST_VECTOR_ELT (const_vec, i);
13383 unsigned HOST_WIDE_INT elpart;
13384
13385 gcc_assert (CONST_INT_P (el));
13386 elpart = INTVAL (el) & ((1U << el_prec) - 1);
13387
13388 unsigned index = BYTES_BIG_ENDIAN ? n_elts - i - 1 : i;
13389
13390 hi_val |= elpart << (index * shift_c);
13391 }
13392 /* We are using mov immediate to encode this constant which writes 32-bits
13393 so we need to make sure the top 16-bits are all 0, otherwise we can't
13394 guarantee we can actually write this immediate. */
13395 return gen_int_mode (hi_val, SImode);
13396 }
13397
13398 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13399 constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13400 into a register.
13401
13402 If this is the case, and GENERATE is set, we also generate code to do
13403 this and return an RTX to copy into the register. */
13404
13405 rtx
13406 neon_make_constant (rtx vals, bool generate)
13407 {
13408 machine_mode mode = GET_MODE (vals);
13409 rtx target;
13410 rtx const_vec = NULL_RTX;
13411 int n_elts = GET_MODE_NUNITS (mode);
13412 int n_const = 0;
13413 int i;
13414
13415 if (GET_CODE (vals) == CONST_VECTOR)
13416 const_vec = vals;
13417 else if (GET_CODE (vals) == PARALLEL)
13418 {
13419 /* A CONST_VECTOR must contain only CONST_INTs and
13420 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13421 Only store valid constants in a CONST_VECTOR. */
13422 for (i = 0; i < n_elts; ++i)
13423 {
13424 rtx x = XVECEXP (vals, 0, i);
13425 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13426 n_const++;
13427 }
13428 if (n_const == n_elts)
13429 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13430 }
13431 else
13432 gcc_unreachable ();
13433
13434 if (const_vec != NULL
13435 && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13436 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
13437 return const_vec;
13438 else if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE(mode))
13439 return mve_bool_vec_to_const (const_vec);
13440 else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13441 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
13442 pipeline cycle; creating the constant takes one or two ARM
13443 pipeline cycles. */
13444 return target;
13445 else if (const_vec != NULL_RTX)
13446 /* Load from constant pool. On Cortex-A8 this takes two cycles
13447 (for either double or quad vectors). We cannot take advantage
13448 of single-cycle VLD1 because we need a PC-relative addressing
13449 mode. */
13450 return arm_disable_literal_pool ? NULL_RTX : const_vec;
13451 else
13452 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13453 We cannot construct an initializer. */
13454 return NULL_RTX;
13455 }
13456
13457 /* Initialize vector TARGET to VALS. */
13458
13459 void
13460 neon_expand_vector_init (rtx target, rtx vals)
13461 {
13462 machine_mode mode = GET_MODE (target);
13463 machine_mode inner_mode = GET_MODE_INNER (mode);
13464 int n_elts = GET_MODE_NUNITS (mode);
13465 int n_var = 0, one_var = -1;
13466 bool all_same = true;
13467 rtx x, mem;
13468 int i;
13469
13470 for (i = 0; i < n_elts; ++i)
13471 {
13472 x = XVECEXP (vals, 0, i);
13473 if (!CONSTANT_P (x))
13474 ++n_var, one_var = i;
13475
13476 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13477 all_same = false;
13478 }
13479
13480 if (n_var == 0)
13481 {
13482 rtx constant = neon_make_constant (vals);
13483 if (constant != NULL_RTX)
13484 {
13485 emit_move_insn (target, constant);
13486 return;
13487 }
13488 }
13489
13490 /* Splat a single non-constant element if we can. */
13491 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13492 {
13493 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13494 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13495 return;
13496 }
13497
13498 /* One field is non-constant. Load constant then overwrite varying
13499 field. This is more efficient than using the stack. */
13500 if (n_var == 1)
13501 {
13502 rtx copy = copy_rtx (vals);
13503 rtx merge_mask = GEN_INT (1 << one_var);
13504
13505 /* Load constant part of vector, substitute neighboring value for
13506 varying element. */
13507 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13508 neon_expand_vector_init (target, copy);
13509
13510 /* Insert variable. */
13511 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13512 emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13513 return;
13514 }
13515
13516 /* Construct the vector in memory one field at a time
13517 and load the whole vector. */
13518 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13519 for (i = 0; i < n_elts; i++)
13520 emit_move_insn (adjust_address_nv (mem, inner_mode,
13521 i * GET_MODE_SIZE (inner_mode)),
13522 XVECEXP (vals, 0, i));
13523 emit_move_insn (target, mem);
13524 }
13525
13526 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
13527 ERR if it doesn't. EXP indicates the source location, which includes the
13528 inlining history for intrinsics. */
13529
13530 static void
13531 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13532 const_tree exp, const char *desc)
13533 {
13534 HOST_WIDE_INT lane;
13535
13536 gcc_assert (CONST_INT_P (operand));
13537
13538 lane = INTVAL (operand);
13539
13540 if (lane < low || lane >= high)
13541 {
13542 if (exp)
13543 error_at (EXPR_LOCATION (exp),
13544 "%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13545 else
13546 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13547 }
13548 }
13549
13550 /* Bounds-check lanes. */
13551
13552 void
13553 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13554 const_tree exp)
13555 {
13556 bounds_check (operand, low, high, exp, "lane");
13557 }
13558
13559 /* Bounds-check constants. */
13560
13561 void
13562 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13563 {
13564 bounds_check (operand, low, high, NULL_TREE, "constant");
13565 }
13566
13567 HOST_WIDE_INT
13568 neon_element_bits (machine_mode mode)
13569 {
13570 return GET_MODE_UNIT_BITSIZE (mode);
13571 }
13572
13573 \f
13574 /* Predicates for `match_operand' and `match_operator'. */
13575
13576 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13577 WB level is 2 if full writeback address modes are allowed, 1
13578 if limited writeback address modes (POST_INC and PRE_DEC) are
13579 allowed and 0 if no writeback at all is supported. */
13580
13581 int
13582 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13583 {
13584 gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13585 rtx ind;
13586
13587 /* Reject eliminable registers. */
13588 if (! (reload_in_progress || reload_completed || lra_in_progress)
13589 && ( reg_mentioned_p (frame_pointer_rtx, op)
13590 || reg_mentioned_p (arg_pointer_rtx, op)
13591 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13592 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13593 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13594 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13595 return FALSE;
13596
13597 /* Constants are converted into offsets from labels. */
13598 if (!MEM_P (op))
13599 return FALSE;
13600
13601 ind = XEXP (op, 0);
13602
13603 if (reload_completed
13604 && (LABEL_REF_P (ind)
13605 || (GET_CODE (ind) == CONST
13606 && GET_CODE (XEXP (ind, 0)) == PLUS
13607 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13608 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13609 return TRUE;
13610
13611 /* Match: (mem (reg)). */
13612 if (REG_P (ind))
13613 return arm_address_register_rtx_p (ind, 0);
13614
13615 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13616 acceptable in any case (subject to verification by
13617 arm_address_register_rtx_p). We need full writeback to accept
13618 PRE_INC and POST_DEC, and at least restricted writeback for
13619 PRE_INC and POST_DEC. */
13620 if (wb_level > 0
13621 && (GET_CODE (ind) == POST_INC
13622 || GET_CODE (ind) == PRE_DEC
13623 || (wb_level > 1
13624 && (GET_CODE (ind) == PRE_INC
13625 || GET_CODE (ind) == POST_DEC))))
13626 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13627
13628 if (wb_level > 1
13629 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13630 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13631 && GET_CODE (XEXP (ind, 1)) == PLUS
13632 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13633 ind = XEXP (ind, 1);
13634
13635 /* Match:
13636 (plus (reg)
13637 (const))
13638
13639 The encoded immediate for 16-bit modes is multiplied by 2,
13640 while the encoded immediate for 32-bit and 64-bit modes is
13641 multiplied by 4. */
13642 int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13643 if (GET_CODE (ind) == PLUS
13644 && REG_P (XEXP (ind, 0))
13645 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13646 && CONST_INT_P (XEXP (ind, 1))
13647 && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13648 && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13649 return TRUE;
13650
13651 return FALSE;
13652 }
13653
13654 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13655 WB is true if full writeback address modes are allowed and is false
13656 if limited writeback address modes (POST_INC and PRE_DEC) are
13657 allowed. */
13658
13659 int arm_coproc_mem_operand (rtx op, bool wb)
13660 {
13661 return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13662 }
13663
13664 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13665 context in which no writeback address modes are allowed. */
13666
13667 int
13668 arm_coproc_mem_operand_no_writeback (rtx op)
13669 {
13670 return arm_coproc_mem_operand_wb (op, 0);
13671 }
13672
13673 /* In non-STRICT mode, return the register number; in STRICT mode return
13674 the hard regno or the replacement if it won't be a mem. Otherwise, return
13675 the original pseudo number. */
13676 static int
13677 arm_effective_regno (rtx op, bool strict)
13678 {
13679 gcc_assert (REG_P (op));
13680 if (!strict || REGNO (op) < FIRST_PSEUDO_REGISTER
13681 || !reg_renumber || reg_renumber[REGNO (op)] < 0)
13682 return REGNO (op);
13683 return reg_renumber[REGNO (op)];
13684 }
13685
13686 /* This function returns TRUE on matching mode and op.
13687 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13688 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
13689 int
13690 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13691 {
13692 enum rtx_code code;
13693 int val, reg_no;
13694
13695 /* Match: (mem (reg)). */
13696 if (REG_P (op))
13697 {
13698 reg_no = arm_effective_regno (op, strict);
13699 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13700 ? reg_no <= LAST_LO_REGNUM
13701 : reg_no < LAST_ARM_REGNUM)
13702 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13703 }
13704 code = GET_CODE (op);
13705
13706 if ((code == POST_INC
13707 || code == PRE_DEC
13708 || code == PRE_INC
13709 || code == POST_DEC)
13710 && REG_P (XEXP (op, 0)))
13711 {
13712 reg_no = arm_effective_regno (XEXP (op, 0), strict);
13713 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13714 ? reg_no <= LAST_LO_REGNUM
13715 :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13716 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13717 }
13718 else if (((code == POST_MODIFY || code == PRE_MODIFY)
13719 && GET_CODE (XEXP (op, 1)) == PLUS
13720 && XEXP (op, 0) == XEXP (XEXP (op, 1), 0)
13721 && REG_P (XEXP (op, 0))
13722 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT)
13723 /* Make sure to only accept PLUS after reload_completed, otherwise
13724 this will interfere with auto_inc's pattern detection. */
13725 || (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
13726 && GET_CODE (XEXP (op, 1)) == CONST_INT))
13727 {
13728 reg_no = arm_effective_regno (XEXP (op, 0), strict);
13729 if (code == PLUS)
13730 val = INTVAL (XEXP (op, 1));
13731 else
13732 val = INTVAL (XEXP(XEXP (op, 1), 1));
13733
13734 switch (mode)
13735 {
13736 case E_V16QImode:
13737 case E_V8QImode:
13738 case E_V4QImode:
13739 if (abs (val) > 127)
13740 return FALSE;
13741 break;
13742 case E_V8HImode:
13743 case E_V8HFmode:
13744 case E_V4HImode:
13745 case E_V4HFmode:
13746 if (val % 2 != 0 || abs (val) > 254)
13747 return FALSE;
13748 break;
13749 case E_V4SImode:
13750 case E_V4SFmode:
13751 if (val % 4 != 0 || abs (val) > 508)
13752 return FALSE;
13753 break;
13754 default:
13755 return FALSE;
13756 }
13757 return ((!strict && reg_no >= FIRST_PSEUDO_REGISTER)
13758 || (MVE_STN_LDW_MODE (mode)
13759 ? reg_no <= LAST_LO_REGNUM
13760 : (reg_no < LAST_ARM_REGNUM
13761 && (code == PLUS || reg_no != SP_REGNUM))));
13762 }
13763 return FALSE;
13764 }
13765
13766 /* Return TRUE if OP is a memory operand which we can load or store a vector
13767 to/from. TYPE is one of the following values:
13768 0 - Vector load/stor (vldr)
13769 1 - Core registers (ldm)
13770 2 - Element/structure loads (vld1)
13771 */
13772 int
13773 neon_vector_mem_operand (rtx op, int type, bool strict)
13774 {
13775 rtx ind;
13776
13777 /* Reject eliminable registers. */
13778 if (strict && ! (reload_in_progress || reload_completed)
13779 && (reg_mentioned_p (frame_pointer_rtx, op)
13780 || reg_mentioned_p (arg_pointer_rtx, op)
13781 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13782 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13783 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13784 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13785 return FALSE;
13786
13787 /* Constants are converted into offsets from labels. */
13788 if (!MEM_P (op))
13789 return FALSE;
13790
13791 ind = XEXP (op, 0);
13792
13793 if (reload_completed
13794 && (LABEL_REF_P (ind)
13795 || (GET_CODE (ind) == CONST
13796 && GET_CODE (XEXP (ind, 0)) == PLUS
13797 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13798 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13799 return TRUE;
13800
13801 /* Match: (mem (reg)). */
13802 if (REG_P (ind))
13803 return arm_address_register_rtx_p (ind, 0);
13804
13805 /* Allow post-increment with Neon registers. */
13806 if ((type != 1 && GET_CODE (ind) == POST_INC)
13807 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13808 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13809
13810 /* Allow post-increment by register for VLDn */
13811 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13812 && GET_CODE (XEXP (ind, 1)) == PLUS
13813 && REG_P (XEXP (XEXP (ind, 1), 1))
13814 && REG_P (XEXP (ind, 0))
13815 && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13816 return true;
13817
13818 /* Match:
13819 (plus (reg)
13820 (const)). */
13821 if (type == 0
13822 && GET_CODE (ind) == PLUS
13823 && REG_P (XEXP (ind, 0))
13824 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13825 && CONST_INT_P (XEXP (ind, 1))
13826 && INTVAL (XEXP (ind, 1)) > -1024
13827 /* For quad modes, we restrict the constant offset to be slightly less
13828 than what the instruction format permits. We have no such constraint
13829 on double mode offsets. (This must match arm_legitimate_index_p.) */
13830 && (INTVAL (XEXP (ind, 1))
13831 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13832 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13833 return TRUE;
13834
13835 return FALSE;
13836 }
13837
13838 /* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
13839 type. */
13840 int
13841 mve_struct_mem_operand (rtx op)
13842 {
13843 rtx ind = XEXP (op, 0);
13844
13845 /* Match: (mem (reg)). */
13846 if (REG_P (ind))
13847 return arm_address_register_rtx_p (ind, 0);
13848
13849 /* Allow only post-increment by the mode size. */
13850 if (GET_CODE (ind) == POST_INC)
13851 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13852
13853 return FALSE;
13854 }
13855
13856 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13857 type. */
13858 int
13859 neon_struct_mem_operand (rtx op)
13860 {
13861 rtx ind;
13862
13863 /* Reject eliminable registers. */
13864 if (! (reload_in_progress || reload_completed)
13865 && ( reg_mentioned_p (frame_pointer_rtx, op)
13866 || reg_mentioned_p (arg_pointer_rtx, op)
13867 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13868 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13869 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13870 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13871 return FALSE;
13872
13873 /* Constants are converted into offsets from labels. */
13874 if (!MEM_P (op))
13875 return FALSE;
13876
13877 ind = XEXP (op, 0);
13878
13879 if (reload_completed
13880 && (LABEL_REF_P (ind)
13881 || (GET_CODE (ind) == CONST
13882 && GET_CODE (XEXP (ind, 0)) == PLUS
13883 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13884 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13885 return TRUE;
13886
13887 /* Match: (mem (reg)). */
13888 if (REG_P (ind))
13889 return arm_address_register_rtx_p (ind, 0);
13890
13891 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13892 if (GET_CODE (ind) == POST_INC
13893 || GET_CODE (ind) == PRE_DEC)
13894 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13895
13896 return FALSE;
13897 }
13898
13899 /* Prepares the operands for the VCMLA by lane instruction such that the right
13900 register number is selected. This instruction is special in that it always
13901 requires a D register, however there is a choice to be made between Dn[0],
13902 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13903
13904 The VCMLA by lane function always selects two values. For instance given D0
13905 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13906 used by the instruction. However given V4SF then index 0 and 1 are valid as
13907 D0[0] or D1[0] are both valid.
13908
13909 This function centralizes that information based on OPERANDS, OPERANDS[3]
13910 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13911 updated to contain the right index. */
13912
13913 rtx *
13914 neon_vcmla_lane_prepare_operands (rtx *operands)
13915 {
13916 int lane = INTVAL (operands[4]);
13917 machine_mode constmode = SImode;
13918 machine_mode mode = GET_MODE (operands[3]);
13919 int regno = REGNO (operands[3]);
13920 regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13921 if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13922 {
13923 operands[3] = gen_int_mode (regno + 1, constmode);
13924 operands[4]
13925 = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13926 }
13927 else
13928 {
13929 operands[3] = gen_int_mode (regno, constmode);
13930 operands[4] = gen_int_mode (lane, constmode);
13931 }
13932 return operands;
13933 }
13934
13935
13936 /* Return true if X is a register that will be eliminated later on. */
13937 int
13938 arm_eliminable_register (rtx x)
13939 {
13940 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13941 || REGNO (x) == ARG_POINTER_REGNUM
13942 || VIRTUAL_REGISTER_P (x));
13943 }
13944
13945 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13946 coprocessor registers. Otherwise return NO_REGS. */
13947
13948 enum reg_class
13949 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13950 {
13951 if (mode == HFmode)
13952 {
13953 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13954 return GENERAL_REGS;
13955 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13956 return NO_REGS;
13957 return GENERAL_REGS;
13958 }
13959
13960 /* The neon move patterns handle all legitimate vector and struct
13961 addresses. */
13962 if (TARGET_NEON
13963 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13964 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13965 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13966 || VALID_NEON_STRUCT_MODE (mode)))
13967 return NO_REGS;
13968
13969 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13970 return NO_REGS;
13971
13972 return GENERAL_REGS;
13973 }
13974
13975 /* Values which must be returned in the most-significant end of the return
13976 register. */
13977
13978 static bool
13979 arm_return_in_msb (const_tree valtype)
13980 {
13981 return (TARGET_AAPCS_BASED
13982 && BYTES_BIG_ENDIAN
13983 && (AGGREGATE_TYPE_P (valtype)
13984 || TREE_CODE (valtype) == COMPLEX_TYPE
13985 || FIXED_POINT_TYPE_P (valtype)));
13986 }
13987
13988 /* Return TRUE if X references a SYMBOL_REF. */
13989 int
13990 symbol_mentioned_p (rtx x)
13991 {
13992 const char * fmt;
13993 int i;
13994
13995 if (SYMBOL_REF_P (x))
13996 return 1;
13997
13998 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13999 are constant offsets, not symbols. */
14000 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
14001 return 0;
14002
14003 fmt = GET_RTX_FORMAT (GET_CODE (x));
14004
14005 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
14006 {
14007 if (fmt[i] == 'E')
14008 {
14009 int j;
14010
14011 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
14012 if (symbol_mentioned_p (XVECEXP (x, i, j)))
14013 return 1;
14014 }
14015 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
14016 return 1;
14017 }
14018
14019 return 0;
14020 }
14021
14022 /* Return TRUE if X references a LABEL_REF. */
14023 int
14024 label_mentioned_p (rtx x)
14025 {
14026 const char * fmt;
14027 int i;
14028
14029 if (LABEL_REF_P (x))
14030 return 1;
14031
14032 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
14033 instruction, but they are constant offsets, not symbols. */
14034 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
14035 return 0;
14036
14037 fmt = GET_RTX_FORMAT (GET_CODE (x));
14038 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
14039 {
14040 if (fmt[i] == 'E')
14041 {
14042 int j;
14043
14044 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
14045 if (label_mentioned_p (XVECEXP (x, i, j)))
14046 return 1;
14047 }
14048 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
14049 return 1;
14050 }
14051
14052 return 0;
14053 }
14054
14055 int
14056 tls_mentioned_p (rtx x)
14057 {
14058 switch (GET_CODE (x))
14059 {
14060 case CONST:
14061 return tls_mentioned_p (XEXP (x, 0));
14062
14063 case UNSPEC:
14064 if (XINT (x, 1) == UNSPEC_TLS)
14065 return 1;
14066
14067 /* Fall through. */
14068 default:
14069 return 0;
14070 }
14071 }
14072
14073 /* Must not copy any rtx that uses a pc-relative address.
14074 Also, disallow copying of load-exclusive instructions that
14075 may appear after splitting of compare-and-swap-style operations
14076 so as to prevent those loops from being transformed away from their
14077 canonical forms (see PR 69904). */
14078
14079 static bool
14080 arm_cannot_copy_insn_p (rtx_insn *insn)
14081 {
14082 /* The tls call insn cannot be copied, as it is paired with a data
14083 word. */
14084 if (recog_memoized (insn) == CODE_FOR_tlscall)
14085 return true;
14086
14087 subrtx_iterator::array_type array;
14088 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
14089 {
14090 const_rtx x = *iter;
14091 if (GET_CODE (x) == UNSPEC
14092 && (XINT (x, 1) == UNSPEC_PIC_BASE
14093 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
14094 return true;
14095 }
14096
14097 rtx set = single_set (insn);
14098 if (set)
14099 {
14100 rtx src = SET_SRC (set);
14101 if (GET_CODE (src) == ZERO_EXTEND)
14102 src = XEXP (src, 0);
14103
14104 /* Catch the load-exclusive and load-acquire operations. */
14105 if (GET_CODE (src) == UNSPEC_VOLATILE
14106 && (XINT (src, 1) == VUNSPEC_LL
14107 || XINT (src, 1) == VUNSPEC_LAX))
14108 return true;
14109 }
14110 return false;
14111 }
14112
14113 enum rtx_code
14114 minmax_code (rtx x)
14115 {
14116 enum rtx_code code = GET_CODE (x);
14117
14118 switch (code)
14119 {
14120 case SMAX:
14121 return GE;
14122 case SMIN:
14123 return LE;
14124 case UMIN:
14125 return LEU;
14126 case UMAX:
14127 return GEU;
14128 default:
14129 gcc_unreachable ();
14130 }
14131 }
14132
14133 /* Match pair of min/max operators that can be implemented via usat/ssat. */
14134
14135 bool
14136 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
14137 int *mask, bool *signed_sat)
14138 {
14139 /* The high bound must be a power of two minus one. */
14140 int log = exact_log2 (INTVAL (hi_bound) + 1);
14141 if (log == -1)
14142 return false;
14143
14144 /* The low bound is either zero (for usat) or one less than the
14145 negation of the high bound (for ssat). */
14146 if (INTVAL (lo_bound) == 0)
14147 {
14148 if (mask)
14149 *mask = log;
14150 if (signed_sat)
14151 *signed_sat = false;
14152
14153 return true;
14154 }
14155
14156 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
14157 {
14158 if (mask)
14159 *mask = log + 1;
14160 if (signed_sat)
14161 *signed_sat = true;
14162
14163 return true;
14164 }
14165
14166 return false;
14167 }
14168
14169 /* Return 1 if memory locations are adjacent. */
14170 int
14171 adjacent_mem_locations (rtx a, rtx b)
14172 {
14173 /* We don't guarantee to preserve the order of these memory refs. */
14174 if (volatile_refs_p (a) || volatile_refs_p (b))
14175 return 0;
14176
14177 if ((REG_P (XEXP (a, 0))
14178 || (GET_CODE (XEXP (a, 0)) == PLUS
14179 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
14180 && (REG_P (XEXP (b, 0))
14181 || (GET_CODE (XEXP (b, 0)) == PLUS
14182 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
14183 {
14184 HOST_WIDE_INT val0 = 0, val1 = 0;
14185 rtx reg0, reg1;
14186 int val_diff;
14187
14188 if (GET_CODE (XEXP (a, 0)) == PLUS)
14189 {
14190 reg0 = XEXP (XEXP (a, 0), 0);
14191 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
14192 }
14193 else
14194 reg0 = XEXP (a, 0);
14195
14196 if (GET_CODE (XEXP (b, 0)) == PLUS)
14197 {
14198 reg1 = XEXP (XEXP (b, 0), 0);
14199 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
14200 }
14201 else
14202 reg1 = XEXP (b, 0);
14203
14204 /* Don't accept any offset that will require multiple
14205 instructions to handle, since this would cause the
14206 arith_adjacentmem pattern to output an overlong sequence. */
14207 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
14208 return 0;
14209
14210 /* Don't allow an eliminable register: register elimination can make
14211 the offset too large. */
14212 if (arm_eliminable_register (reg0))
14213 return 0;
14214
14215 val_diff = val1 - val0;
14216
14217 if (arm_ld_sched)
14218 {
14219 /* If the target has load delay slots, then there's no benefit
14220 to using an ldm instruction unless the offset is zero and
14221 we are optimizing for size. */
14222 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
14223 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
14224 && (val_diff == 4 || val_diff == -4));
14225 }
14226
14227 return ((REGNO (reg0) == REGNO (reg1))
14228 && (val_diff == 4 || val_diff == -4));
14229 }
14230
14231 return 0;
14232 }
14233
14234 /* Return true if OP is a valid load or store multiple operation. LOAD is true
14235 for load operations, false for store operations. CONSECUTIVE is true
14236 if the register numbers in the operation must be consecutive in the register
14237 bank. RETURN_PC is true if value is to be loaded in PC.
14238 The pattern we are trying to match for load is:
14239 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14240 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14241 :
14242 :
14243 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14244 ]
14245 where
14246 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14247 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14248 3. If consecutive is TRUE, then for kth register being loaded,
14249 REGNO (R_dk) = REGNO (R_d0) + k.
14250 The pattern for store is similar. */
14251 bool
14252 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
14253 bool consecutive, bool return_pc)
14254 {
14255 HOST_WIDE_INT count = XVECLEN (op, 0);
14256 rtx reg, mem, addr;
14257 unsigned regno;
14258 unsigned first_regno;
14259 HOST_WIDE_INT i = 1, base = 0, offset = 0;
14260 rtx elt;
14261 bool addr_reg_in_reglist = false;
14262 bool update = false;
14263 int reg_increment;
14264 int offset_adj;
14265 int regs_per_val;
14266
14267 /* If not in SImode, then registers must be consecutive
14268 (e.g., VLDM instructions for DFmode). */
14269 gcc_assert ((mode == SImode) || consecutive);
14270 /* Setting return_pc for stores is illegal. */
14271 gcc_assert (!return_pc || load);
14272
14273 /* Set up the increments and the regs per val based on the mode. */
14274 reg_increment = GET_MODE_SIZE (mode);
14275 regs_per_val = reg_increment / 4;
14276 offset_adj = return_pc ? 1 : 0;
14277
14278 if (count <= 1
14279 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
14280 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
14281 return false;
14282
14283 /* Check if this is a write-back. */
14284 elt = XVECEXP (op, 0, offset_adj);
14285 if (GET_CODE (SET_SRC (elt)) == PLUS)
14286 {
14287 i++;
14288 base = 1;
14289 update = true;
14290
14291 /* The offset adjustment must be the number of registers being
14292 popped times the size of a single register. */
14293 if (!REG_P (SET_DEST (elt))
14294 || !REG_P (XEXP (SET_SRC (elt), 0))
14295 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
14296 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
14297 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
14298 ((count - 1 - offset_adj) * reg_increment))
14299 return false;
14300 }
14301
14302 i = i + offset_adj;
14303 base = base + offset_adj;
14304 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14305 success depends on the type: VLDM can do just one reg,
14306 LDM must do at least two. */
14307 if ((count <= i) && (mode == SImode))
14308 return false;
14309
14310 elt = XVECEXP (op, 0, i - 1);
14311 if (GET_CODE (elt) != SET)
14312 return false;
14313
14314 if (load)
14315 {
14316 reg = SET_DEST (elt);
14317 mem = SET_SRC (elt);
14318 }
14319 else
14320 {
14321 reg = SET_SRC (elt);
14322 mem = SET_DEST (elt);
14323 }
14324
14325 if (!REG_P (reg) || !MEM_P (mem))
14326 return false;
14327
14328 regno = REGNO (reg);
14329 first_regno = regno;
14330 addr = XEXP (mem, 0);
14331 if (GET_CODE (addr) == PLUS)
14332 {
14333 if (!CONST_INT_P (XEXP (addr, 1)))
14334 return false;
14335
14336 offset = INTVAL (XEXP (addr, 1));
14337 addr = XEXP (addr, 0);
14338 }
14339
14340 if (!REG_P (addr))
14341 return false;
14342
14343 /* Don't allow SP to be loaded unless it is also the base register. It
14344 guarantees that SP is reset correctly when an LDM instruction
14345 is interrupted. Otherwise, we might end up with a corrupt stack. */
14346 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14347 return false;
14348
14349 if (regno == REGNO (addr))
14350 addr_reg_in_reglist = true;
14351
14352 for (; i < count; i++)
14353 {
14354 elt = XVECEXP (op, 0, i);
14355 if (GET_CODE (elt) != SET)
14356 return false;
14357
14358 if (load)
14359 {
14360 reg = SET_DEST (elt);
14361 mem = SET_SRC (elt);
14362 }
14363 else
14364 {
14365 reg = SET_SRC (elt);
14366 mem = SET_DEST (elt);
14367 }
14368
14369 if (!REG_P (reg)
14370 || GET_MODE (reg) != mode
14371 || REGNO (reg) <= regno
14372 || (consecutive
14373 && (REGNO (reg) !=
14374 (unsigned int) (first_regno + regs_per_val * (i - base))))
14375 /* Don't allow SP to be loaded unless it is also the base register. It
14376 guarantees that SP is reset correctly when an LDM instruction
14377 is interrupted. Otherwise, we might end up with a corrupt stack. */
14378 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14379 || !MEM_P (mem)
14380 || GET_MODE (mem) != mode
14381 || ((GET_CODE (XEXP (mem, 0)) != PLUS
14382 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14383 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14384 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14385 offset + (i - base) * reg_increment))
14386 && (!REG_P (XEXP (mem, 0))
14387 || offset + (i - base) * reg_increment != 0)))
14388 return false;
14389
14390 regno = REGNO (reg);
14391 if (regno == REGNO (addr))
14392 addr_reg_in_reglist = true;
14393 }
14394
14395 if (load)
14396 {
14397 if (update && addr_reg_in_reglist)
14398 return false;
14399
14400 /* For Thumb-1, address register is always modified - either by write-back
14401 or by explicit load. If the pattern does not describe an update,
14402 then the address register must be in the list of loaded registers. */
14403 if (TARGET_THUMB1)
14404 return update || addr_reg_in_reglist;
14405 }
14406
14407 return true;
14408 }
14409
14410 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14411 or VSCCLRM (otherwise) insn. To be a valid CLRM pattern, OP must have the
14412 following form:
14413
14414 [(set (reg:SI <N>) (const_int 0))
14415 (set (reg:SI <M>) (const_int 0))
14416 ...
14417 (unspec_volatile [(const_int 0)]
14418 VUNSPEC_CLRM_APSR)
14419 (clobber (reg:CC CC_REGNUM))
14420 ]
14421
14422 Any number (including 0) of set expressions is valid, the volatile unspec is
14423 optional. All registers but SP and PC are allowed and registers must be in
14424 strict increasing order.
14425
14426 To be a valid VSCCLRM pattern, OP must have the following form:
14427
14428 [(unspec_volatile [(const_int 0)]
14429 VUNSPEC_VSCCLRM_VPR)
14430 (set (reg:SF <N>) (const_int 0))
14431 (set (reg:SF <M>) (const_int 0))
14432 ...
14433 ]
14434
14435 As with CLRM, any number (including 0) of set expressions is valid, however
14436 the volatile unspec is mandatory here. Any VFP single-precision register is
14437 accepted but all registers must be consecutive and in increasing order. */
14438
14439 bool
14440 clear_operation_p (rtx op, bool vfp)
14441 {
14442 unsigned regno;
14443 unsigned last_regno = INVALID_REGNUM;
14444 rtx elt, reg, zero;
14445 int count = XVECLEN (op, 0);
14446 int first_set = vfp ? 1 : 0;
14447 machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14448
14449 for (int i = first_set; i < count; i++)
14450 {
14451 elt = XVECEXP (op, 0, i);
14452
14453 if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14454 {
14455 if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14456 || XVECLEN (elt, 0) != 1
14457 || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14458 || i != count - 2)
14459 return false;
14460
14461 continue;
14462 }
14463
14464 if (GET_CODE (elt) == CLOBBER)
14465 continue;
14466
14467 if (GET_CODE (elt) != SET)
14468 return false;
14469
14470 reg = SET_DEST (elt);
14471 zero = SET_SRC (elt);
14472
14473 if (!REG_P (reg)
14474 || GET_MODE (reg) != expected_mode
14475 || zero != CONST0_RTX (SImode))
14476 return false;
14477
14478 regno = REGNO (reg);
14479
14480 if (vfp)
14481 {
14482 if (i != first_set && regno != last_regno + 1)
14483 return false;
14484 }
14485 else
14486 {
14487 if (regno == SP_REGNUM || regno == PC_REGNUM)
14488 return false;
14489 if (i != first_set && regno <= last_regno)
14490 return false;
14491 }
14492
14493 last_regno = regno;
14494 }
14495
14496 return true;
14497 }
14498
14499 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14500 or stores (depending on IS_STORE) into a load-multiple or store-multiple
14501 instruction. ADD_OFFSET is nonzero if the base address register needs
14502 to be modified with an add instruction before we can use it. */
14503
14504 static bool
14505 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14506 int nops, HOST_WIDE_INT add_offset)
14507 {
14508 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14509 if the offset isn't small enough. The reason 2 ldrs are faster
14510 is because these ARMs are able to do more than one cache access
14511 in a single cycle. The ARM9 and StrongARM have Harvard caches,
14512 whilst the ARM8 has a double bandwidth cache. This means that
14513 these cores can do both an instruction fetch and a data fetch in
14514 a single cycle, so the trick of calculating the address into a
14515 scratch register (one of the result regs) and then doing a load
14516 multiple actually becomes slower (and no smaller in code size).
14517 That is the transformation
14518
14519 ldr rd1, [rbase + offset]
14520 ldr rd2, [rbase + offset + 4]
14521
14522 to
14523
14524 add rd1, rbase, offset
14525 ldmia rd1, {rd1, rd2}
14526
14527 produces worse code -- '3 cycles + any stalls on rd2' instead of
14528 '2 cycles + any stalls on rd2'. On ARMs with only one cache
14529 access per cycle, the first sequence could never complete in less
14530 than 6 cycles, whereas the ldm sequence would only take 5 and
14531 would make better use of sequential accesses if not hitting the
14532 cache.
14533
14534 We cheat here and test 'arm_ld_sched' which we currently know to
14535 only be true for the ARM8, ARM9 and StrongARM. If this ever
14536 changes, then the test below needs to be reworked. */
14537 if (nops == 2 && arm_ld_sched && add_offset != 0)
14538 return false;
14539
14540 /* XScale has load-store double instructions, but they have stricter
14541 alignment requirements than load-store multiple, so we cannot
14542 use them.
14543
14544 For XScale ldm requires 2 + NREGS cycles to complete and blocks
14545 the pipeline until completion.
14546
14547 NREGS CYCLES
14548 1 3
14549 2 4
14550 3 5
14551 4 6
14552
14553 An ldr instruction takes 1-3 cycles, but does not block the
14554 pipeline.
14555
14556 NREGS CYCLES
14557 1 1-3
14558 2 2-6
14559 3 3-9
14560 4 4-12
14561
14562 Best case ldr will always win. However, the more ldr instructions
14563 we issue, the less likely we are to be able to schedule them well.
14564 Using ldr instructions also increases code size.
14565
14566 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14567 for counts of 3 or 4 regs. */
14568 if (nops <= 2 && arm_tune_xscale && !optimize_size)
14569 return false;
14570 return true;
14571 }
14572
14573 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14574 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14575 an array ORDER which describes the sequence to use when accessing the
14576 offsets that produces an ascending order. In this sequence, each
14577 offset must be larger by exactly 4 than the previous one. ORDER[0]
14578 must have been filled in with the lowest offset by the caller.
14579 If UNSORTED_REGS is nonnull, it is an array of register numbers that
14580 we use to verify that ORDER produces an ascending order of registers.
14581 Return true if it was possible to construct such an order, false if
14582 not. */
14583
14584 static bool
14585 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14586 int *unsorted_regs)
14587 {
14588 int i;
14589 for (i = 1; i < nops; i++)
14590 {
14591 int j;
14592
14593 order[i] = order[i - 1];
14594 for (j = 0; j < nops; j++)
14595 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14596 {
14597 /* We must find exactly one offset that is higher than the
14598 previous one by 4. */
14599 if (order[i] != order[i - 1])
14600 return false;
14601 order[i] = j;
14602 }
14603 if (order[i] == order[i - 1])
14604 return false;
14605 /* The register numbers must be ascending. */
14606 if (unsorted_regs != NULL
14607 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14608 return false;
14609 }
14610 return true;
14611 }
14612
14613 /* Used to determine in a peephole whether a sequence of load
14614 instructions can be changed into a load-multiple instruction.
14615 NOPS is the number of separate load instructions we are examining. The
14616 first NOPS entries in OPERANDS are the destination registers, the
14617 next NOPS entries are memory operands. If this function is
14618 successful, *BASE is set to the common base register of the memory
14619 accesses; *LOAD_OFFSET is set to the first memory location's offset
14620 from that base register.
14621 REGS is an array filled in with the destination register numbers.
14622 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14623 insn numbers to an ascending order of stores. If CHECK_REGS is true,
14624 the sequence of registers in REGS matches the loads from ascending memory
14625 locations, and the function verifies that the register numbers are
14626 themselves ascending. If CHECK_REGS is false, the register numbers
14627 are stored in the order they are found in the operands. */
14628 static int
14629 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14630 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14631 {
14632 int unsorted_regs[MAX_LDM_STM_OPS];
14633 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14634 int order[MAX_LDM_STM_OPS];
14635 int base_reg = -1;
14636 int i, ldm_case;
14637
14638 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14639 easily extended if required. */
14640 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14641
14642 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14643
14644 /* Loop over the operands and check that the memory references are
14645 suitable (i.e. immediate offsets from the same base register). At
14646 the same time, extract the target register, and the memory
14647 offsets. */
14648 for (i = 0; i < nops; i++)
14649 {
14650 rtx reg;
14651 rtx offset;
14652
14653 /* Convert a subreg of a mem into the mem itself. */
14654 if (GET_CODE (operands[nops + i]) == SUBREG)
14655 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14656
14657 gcc_assert (MEM_P (operands[nops + i]));
14658
14659 /* Don't reorder volatile memory references; it doesn't seem worth
14660 looking for the case where the order is ok anyway. */
14661 if (MEM_VOLATILE_P (operands[nops + i]))
14662 return 0;
14663
14664 offset = const0_rtx;
14665
14666 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14667 || (SUBREG_P (reg)
14668 && REG_P (reg = SUBREG_REG (reg))))
14669 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14670 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14671 || (SUBREG_P (reg)
14672 && REG_P (reg = SUBREG_REG (reg))))
14673 && (CONST_INT_P (offset
14674 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14675 {
14676 if (i == 0)
14677 {
14678 base_reg = REGNO (reg);
14679 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14680 return 0;
14681 }
14682 else if (base_reg != (int) REGNO (reg))
14683 /* Not addressed from the same base register. */
14684 return 0;
14685
14686 unsorted_regs[i] = (REG_P (operands[i])
14687 ? REGNO (operands[i])
14688 : REGNO (SUBREG_REG (operands[i])));
14689
14690 /* If it isn't an integer register, or if it overwrites the
14691 base register but isn't the last insn in the list, then
14692 we can't do this. */
14693 if (unsorted_regs[i] < 0
14694 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14695 || unsorted_regs[i] > 14
14696 || (i != nops - 1 && unsorted_regs[i] == base_reg))
14697 return 0;
14698
14699 /* Don't allow SP to be loaded unless it is also the base
14700 register. It guarantees that SP is reset correctly when
14701 an LDM instruction is interrupted. Otherwise, we might
14702 end up with a corrupt stack. */
14703 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14704 return 0;
14705
14706 unsorted_offsets[i] = INTVAL (offset);
14707 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14708 order[0] = i;
14709 }
14710 else
14711 /* Not a suitable memory address. */
14712 return 0;
14713 }
14714
14715 /* All the useful information has now been extracted from the
14716 operands into unsorted_regs and unsorted_offsets; additionally,
14717 order[0] has been set to the lowest offset in the list. Sort
14718 the offsets into order, verifying that they are adjacent, and
14719 check that the register numbers are ascending. */
14720 if (!compute_offset_order (nops, unsorted_offsets, order,
14721 check_regs ? unsorted_regs : NULL))
14722 return 0;
14723
14724 if (saved_order)
14725 memcpy (saved_order, order, sizeof order);
14726
14727 if (base)
14728 {
14729 *base = base_reg;
14730
14731 for (i = 0; i < nops; i++)
14732 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14733
14734 *load_offset = unsorted_offsets[order[0]];
14735 }
14736
14737 if (unsorted_offsets[order[0]] == 0)
14738 ldm_case = 1; /* ldmia */
14739 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14740 ldm_case = 2; /* ldmib */
14741 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14742 ldm_case = 3; /* ldmda */
14743 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14744 ldm_case = 4; /* ldmdb */
14745 else if (const_ok_for_arm (unsorted_offsets[order[0]])
14746 || const_ok_for_arm (-unsorted_offsets[order[0]]))
14747 ldm_case = 5;
14748 else
14749 return 0;
14750
14751 if (!multiple_operation_profitable_p (false, nops,
14752 ldm_case == 5
14753 ? unsorted_offsets[order[0]] : 0))
14754 return 0;
14755
14756 return ldm_case;
14757 }
14758
14759 /* Used to determine in a peephole whether a sequence of store instructions can
14760 be changed into a store-multiple instruction.
14761 NOPS is the number of separate store instructions we are examining.
14762 NOPS_TOTAL is the total number of instructions recognized by the peephole
14763 pattern.
14764 The first NOPS entries in OPERANDS are the source registers, the next
14765 NOPS entries are memory operands. If this function is successful, *BASE is
14766 set to the common base register of the memory accesses; *LOAD_OFFSET is set
14767 to the first memory location's offset from that base register. REGS is an
14768 array filled in with the source register numbers, REG_RTXS (if nonnull) is
14769 likewise filled with the corresponding rtx's.
14770 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14771 numbers to an ascending order of stores.
14772 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14773 from ascending memory locations, and the function verifies that the register
14774 numbers are themselves ascending. If CHECK_REGS is false, the register
14775 numbers are stored in the order they are found in the operands. */
14776 static int
14777 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14778 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14779 HOST_WIDE_INT *load_offset, bool check_regs)
14780 {
14781 int unsorted_regs[MAX_LDM_STM_OPS];
14782 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14783 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14784 int order[MAX_LDM_STM_OPS];
14785 int base_reg = -1;
14786 rtx base_reg_rtx = NULL;
14787 int i, stm_case;
14788
14789 /* Write back of base register is currently only supported for Thumb 1. */
14790 int base_writeback = TARGET_THUMB1;
14791
14792 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14793 easily extended if required. */
14794 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14795
14796 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14797
14798 /* Loop over the operands and check that the memory references are
14799 suitable (i.e. immediate offsets from the same base register). At
14800 the same time, extract the target register, and the memory
14801 offsets. */
14802 for (i = 0; i < nops; i++)
14803 {
14804 rtx reg;
14805 rtx offset;
14806
14807 /* Convert a subreg of a mem into the mem itself. */
14808 if (GET_CODE (operands[nops + i]) == SUBREG)
14809 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14810
14811 gcc_assert (MEM_P (operands[nops + i]));
14812
14813 /* Don't reorder volatile memory references; it doesn't seem worth
14814 looking for the case where the order is ok anyway. */
14815 if (MEM_VOLATILE_P (operands[nops + i]))
14816 return 0;
14817
14818 offset = const0_rtx;
14819
14820 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14821 || (SUBREG_P (reg)
14822 && REG_P (reg = SUBREG_REG (reg))))
14823 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14824 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14825 || (SUBREG_P (reg)
14826 && REG_P (reg = SUBREG_REG (reg))))
14827 && (CONST_INT_P (offset
14828 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14829 {
14830 unsorted_reg_rtxs[i] = (REG_P (operands[i])
14831 ? operands[i] : SUBREG_REG (operands[i]));
14832 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14833
14834 if (i == 0)
14835 {
14836 base_reg = REGNO (reg);
14837 base_reg_rtx = reg;
14838 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14839 return 0;
14840 }
14841 else if (base_reg != (int) REGNO (reg))
14842 /* Not addressed from the same base register. */
14843 return 0;
14844
14845 /* If it isn't an integer register, then we can't do this. */
14846 if (unsorted_regs[i] < 0
14847 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14848 /* The effects are unpredictable if the base register is
14849 both updated and stored. */
14850 || (base_writeback && unsorted_regs[i] == base_reg)
14851 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14852 || unsorted_regs[i] > 14)
14853 return 0;
14854
14855 unsorted_offsets[i] = INTVAL (offset);
14856 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14857 order[0] = i;
14858 }
14859 else
14860 /* Not a suitable memory address. */
14861 return 0;
14862 }
14863
14864 /* All the useful information has now been extracted from the
14865 operands into unsorted_regs and unsorted_offsets; additionally,
14866 order[0] has been set to the lowest offset in the list. Sort
14867 the offsets into order, verifying that they are adjacent, and
14868 check that the register numbers are ascending. */
14869 if (!compute_offset_order (nops, unsorted_offsets, order,
14870 check_regs ? unsorted_regs : NULL))
14871 return 0;
14872
14873 if (saved_order)
14874 memcpy (saved_order, order, sizeof order);
14875
14876 if (base)
14877 {
14878 *base = base_reg;
14879
14880 for (i = 0; i < nops; i++)
14881 {
14882 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14883 if (reg_rtxs)
14884 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14885 }
14886
14887 *load_offset = unsorted_offsets[order[0]];
14888 }
14889
14890 if (TARGET_THUMB1
14891 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14892 return 0;
14893
14894 if (unsorted_offsets[order[0]] == 0)
14895 stm_case = 1; /* stmia */
14896 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14897 stm_case = 2; /* stmib */
14898 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14899 stm_case = 3; /* stmda */
14900 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14901 stm_case = 4; /* stmdb */
14902 else
14903 return 0;
14904
14905 if (!multiple_operation_profitable_p (false, nops, 0))
14906 return 0;
14907
14908 return stm_case;
14909 }
14910 \f
14911 /* Routines for use in generating RTL. */
14912
14913 /* Generate a load-multiple instruction. COUNT is the number of loads in
14914 the instruction; REGS and MEMS are arrays containing the operands.
14915 BASEREG is the base register to be used in addressing the memory operands.
14916 WBACK_OFFSET is nonzero if the instruction should update the base
14917 register. */
14918
14919 static rtx
14920 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14921 HOST_WIDE_INT wback_offset)
14922 {
14923 int i = 0, j;
14924 rtx result;
14925
14926 if (!multiple_operation_profitable_p (false, count, 0))
14927 {
14928 rtx seq;
14929
14930 start_sequence ();
14931
14932 for (i = 0; i < count; i++)
14933 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14934
14935 if (wback_offset != 0)
14936 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14937
14938 seq = get_insns ();
14939 end_sequence ();
14940
14941 return seq;
14942 }
14943
14944 result = gen_rtx_PARALLEL (VOIDmode,
14945 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14946 if (wback_offset != 0)
14947 {
14948 XVECEXP (result, 0, 0)
14949 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14950 i = 1;
14951 count++;
14952 }
14953
14954 for (j = 0; i < count; i++, j++)
14955 XVECEXP (result, 0, i)
14956 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14957
14958 return result;
14959 }
14960
14961 /* Generate a store-multiple instruction. COUNT is the number of stores in
14962 the instruction; REGS and MEMS are arrays containing the operands.
14963 BASEREG is the base register to be used in addressing the memory operands.
14964 WBACK_OFFSET is nonzero if the instruction should update the base
14965 register. */
14966
14967 static rtx
14968 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14969 HOST_WIDE_INT wback_offset)
14970 {
14971 int i = 0, j;
14972 rtx result;
14973
14974 if (GET_CODE (basereg) == PLUS)
14975 basereg = XEXP (basereg, 0);
14976
14977 if (!multiple_operation_profitable_p (false, count, 0))
14978 {
14979 rtx seq;
14980
14981 start_sequence ();
14982
14983 for (i = 0; i < count; i++)
14984 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14985
14986 if (wback_offset != 0)
14987 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14988
14989 seq = get_insns ();
14990 end_sequence ();
14991
14992 return seq;
14993 }
14994
14995 result = gen_rtx_PARALLEL (VOIDmode,
14996 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14997 if (wback_offset != 0)
14998 {
14999 XVECEXP (result, 0, 0)
15000 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
15001 i = 1;
15002 count++;
15003 }
15004
15005 for (j = 0; i < count; i++, j++)
15006 XVECEXP (result, 0, i)
15007 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
15008
15009 return result;
15010 }
15011
15012 /* Generate either a load-multiple or a store-multiple instruction. This
15013 function can be used in situations where we can start with a single MEM
15014 rtx and adjust its address upwards.
15015 COUNT is the number of operations in the instruction, not counting a
15016 possible update of the base register. REGS is an array containing the
15017 register operands.
15018 BASEREG is the base register to be used in addressing the memory operands,
15019 which are constructed from BASEMEM.
15020 WRITE_BACK specifies whether the generated instruction should include an
15021 update of the base register.
15022 OFFSETP is used to pass an offset to and from this function; this offset
15023 is not used when constructing the address (instead BASEMEM should have an
15024 appropriate offset in its address), it is used only for setting
15025 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
15026
15027 static rtx
15028 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
15029 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
15030 {
15031 rtx mems[MAX_LDM_STM_OPS];
15032 HOST_WIDE_INT offset = *offsetp;
15033 int i;
15034
15035 gcc_assert (count <= MAX_LDM_STM_OPS);
15036
15037 if (GET_CODE (basereg) == PLUS)
15038 basereg = XEXP (basereg, 0);
15039
15040 for (i = 0; i < count; i++)
15041 {
15042 rtx addr = plus_constant (Pmode, basereg, i * 4);
15043 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
15044 offset += 4;
15045 }
15046
15047 if (write_back)
15048 *offsetp = offset;
15049
15050 if (is_load)
15051 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
15052 write_back ? 4 * count : 0);
15053 else
15054 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
15055 write_back ? 4 * count : 0);
15056 }
15057
15058 rtx
15059 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
15060 rtx basemem, HOST_WIDE_INT *offsetp)
15061 {
15062 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
15063 offsetp);
15064 }
15065
15066 rtx
15067 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
15068 rtx basemem, HOST_WIDE_INT *offsetp)
15069 {
15070 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
15071 offsetp);
15072 }
15073
15074 /* Called from a peephole2 expander to turn a sequence of loads into an
15075 LDM instruction. OPERANDS are the operands found by the peephole matcher;
15076 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
15077 is true if we can reorder the registers because they are used commutatively
15078 subsequently.
15079 Returns true iff we could generate a new instruction. */
15080
15081 bool
15082 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
15083 {
15084 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15085 rtx mems[MAX_LDM_STM_OPS];
15086 int i, j, base_reg;
15087 rtx base_reg_rtx;
15088 HOST_WIDE_INT offset;
15089 int write_back = FALSE;
15090 int ldm_case;
15091 rtx addr;
15092
15093 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
15094 &base_reg, &offset, !sort_regs);
15095
15096 if (ldm_case == 0)
15097 return false;
15098
15099 if (sort_regs)
15100 for (i = 0; i < nops - 1; i++)
15101 for (j = i + 1; j < nops; j++)
15102 if (regs[i] > regs[j])
15103 {
15104 int t = regs[i];
15105 regs[i] = regs[j];
15106 regs[j] = t;
15107 }
15108 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15109
15110 if (TARGET_THUMB1)
15111 {
15112 gcc_assert (ldm_case == 1 || ldm_case == 5);
15113
15114 /* Thumb-1 ldm uses writeback except if the base is loaded. */
15115 write_back = true;
15116 for (i = 0; i < nops; i++)
15117 if (base_reg == regs[i])
15118 write_back = false;
15119
15120 /* Ensure the base is dead if it is updated. */
15121 if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
15122 return false;
15123 }
15124
15125 if (ldm_case == 5)
15126 {
15127 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
15128 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
15129 offset = 0;
15130 base_reg_rtx = newbase;
15131 }
15132
15133 for (i = 0; i < nops; i++)
15134 {
15135 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15136 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15137 SImode, addr, 0);
15138 }
15139 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
15140 write_back ? offset + i * 4 : 0));
15141 return true;
15142 }
15143
15144 /* Called from a peephole2 expander to turn a sequence of stores into an
15145 STM instruction. OPERANDS are the operands found by the peephole matcher;
15146 NOPS indicates how many separate stores we are trying to combine.
15147 Returns true iff we could generate a new instruction. */
15148
15149 bool
15150 gen_stm_seq (rtx *operands, int nops)
15151 {
15152 int i;
15153 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15154 rtx mems[MAX_LDM_STM_OPS];
15155 int base_reg;
15156 rtx base_reg_rtx;
15157 HOST_WIDE_INT offset;
15158 int write_back = FALSE;
15159 int stm_case;
15160 rtx addr;
15161 bool base_reg_dies;
15162
15163 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
15164 mem_order, &base_reg, &offset, true);
15165
15166 if (stm_case == 0)
15167 return false;
15168
15169 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15170
15171 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
15172 if (TARGET_THUMB1)
15173 {
15174 gcc_assert (base_reg_dies);
15175 write_back = TRUE;
15176 }
15177
15178 if (stm_case == 5)
15179 {
15180 gcc_assert (base_reg_dies);
15181 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15182 offset = 0;
15183 }
15184
15185 addr = plus_constant (Pmode, base_reg_rtx, offset);
15186
15187 for (i = 0; i < nops; i++)
15188 {
15189 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15190 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15191 SImode, addr, 0);
15192 }
15193 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
15194 write_back ? offset + i * 4 : 0));
15195 return true;
15196 }
15197
15198 /* Called from a peephole2 expander to turn a sequence of stores that are
15199 preceded by constant loads into an STM instruction. OPERANDS are the
15200 operands found by the peephole matcher; NOPS indicates how many
15201 separate stores we are trying to combine; there are 2 * NOPS
15202 instructions in the peephole.
15203 Returns true iff we could generate a new instruction. */
15204
15205 bool
15206 gen_const_stm_seq (rtx *operands, int nops)
15207 {
15208 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
15209 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15210 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
15211 rtx mems[MAX_LDM_STM_OPS];
15212 int base_reg;
15213 rtx base_reg_rtx;
15214 HOST_WIDE_INT offset;
15215 int write_back = FALSE;
15216 int stm_case;
15217 rtx addr;
15218 bool base_reg_dies;
15219 int i, j;
15220 HARD_REG_SET allocated;
15221
15222 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
15223 mem_order, &base_reg, &offset, false);
15224
15225 if (stm_case == 0)
15226 return false;
15227
15228 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
15229
15230 /* If the same register is used more than once, try to find a free
15231 register. */
15232 CLEAR_HARD_REG_SET (allocated);
15233 for (i = 0; i < nops; i++)
15234 {
15235 for (j = i + 1; j < nops; j++)
15236 if (regs[i] == regs[j])
15237 {
15238 rtx t = peep2_find_free_register (0, nops * 2,
15239 TARGET_THUMB1 ? "l" : "r",
15240 SImode, &allocated);
15241 if (t == NULL_RTX)
15242 return false;
15243 reg_rtxs[i] = t;
15244 regs[i] = REGNO (t);
15245 }
15246 }
15247
15248 /* Compute an ordering that maps the register numbers to an ascending
15249 sequence. */
15250 reg_order[0] = 0;
15251 for (i = 0; i < nops; i++)
15252 if (regs[i] < regs[reg_order[0]])
15253 reg_order[0] = i;
15254
15255 for (i = 1; i < nops; i++)
15256 {
15257 int this_order = reg_order[i - 1];
15258 for (j = 0; j < nops; j++)
15259 if (regs[j] > regs[reg_order[i - 1]]
15260 && (this_order == reg_order[i - 1]
15261 || regs[j] < regs[this_order]))
15262 this_order = j;
15263 reg_order[i] = this_order;
15264 }
15265
15266 /* Ensure that registers that must be live after the instruction end
15267 up with the correct value. */
15268 for (i = 0; i < nops; i++)
15269 {
15270 int this_order = reg_order[i];
15271 if ((this_order != mem_order[i]
15272 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
15273 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
15274 return false;
15275 }
15276
15277 /* Load the constants. */
15278 for (i = 0; i < nops; i++)
15279 {
15280 rtx op = operands[2 * nops + mem_order[i]];
15281 sorted_regs[i] = regs[reg_order[i]];
15282 emit_move_insn (reg_rtxs[reg_order[i]], op);
15283 }
15284
15285 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15286
15287 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
15288 if (TARGET_THUMB1)
15289 {
15290 gcc_assert (base_reg_dies);
15291 write_back = TRUE;
15292 }
15293
15294 if (stm_case == 5)
15295 {
15296 gcc_assert (base_reg_dies);
15297 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15298 offset = 0;
15299 }
15300
15301 addr = plus_constant (Pmode, base_reg_rtx, offset);
15302
15303 for (i = 0; i < nops; i++)
15304 {
15305 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15306 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15307 SImode, addr, 0);
15308 }
15309 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
15310 write_back ? offset + i * 4 : 0));
15311 return true;
15312 }
15313
15314 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15315 unaligned copies on processors which support unaligned semantics for those
15316 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
15317 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15318 An interleave factor of 1 (the minimum) will perform no interleaving.
15319 Load/store multiple are used for aligned addresses where possible. */
15320
15321 static void
15322 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
15323 HOST_WIDE_INT length,
15324 unsigned int interleave_factor)
15325 {
15326 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
15327 int *regnos = XALLOCAVEC (int, interleave_factor);
15328 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
15329 HOST_WIDE_INT i, j;
15330 HOST_WIDE_INT remaining = length, words;
15331 rtx halfword_tmp = NULL, byte_tmp = NULL;
15332 rtx dst, src;
15333 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
15334 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
15335 HOST_WIDE_INT srcoffset, dstoffset;
15336 HOST_WIDE_INT src_autoinc, dst_autoinc;
15337 rtx mem, addr;
15338
15339 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
15340
15341 /* Use hard registers if we have aligned source or destination so we can use
15342 load/store multiple with contiguous registers. */
15343 if (dst_aligned || src_aligned)
15344 for (i = 0; i < interleave_factor; i++)
15345 regs[i] = gen_rtx_REG (SImode, i);
15346 else
15347 for (i = 0; i < interleave_factor; i++)
15348 regs[i] = gen_reg_rtx (SImode);
15349
15350 dst = copy_addr_to_reg (XEXP (dstbase, 0));
15351 src = copy_addr_to_reg (XEXP (srcbase, 0));
15352
15353 srcoffset = dstoffset = 0;
15354
15355 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15356 For copying the last bytes we want to subtract this offset again. */
15357 src_autoinc = dst_autoinc = 0;
15358
15359 for (i = 0; i < interleave_factor; i++)
15360 regnos[i] = i;
15361
15362 /* Copy BLOCK_SIZE_BYTES chunks. */
15363
15364 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15365 {
15366 /* Load words. */
15367 if (src_aligned && interleave_factor > 1)
15368 {
15369 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15370 TRUE, srcbase, &srcoffset));
15371 src_autoinc += UNITS_PER_WORD * interleave_factor;
15372 }
15373 else
15374 {
15375 for (j = 0; j < interleave_factor; j++)
15376 {
15377 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15378 - src_autoinc));
15379 mem = adjust_automodify_address (srcbase, SImode, addr,
15380 srcoffset + j * UNITS_PER_WORD);
15381 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15382 }
15383 srcoffset += block_size_bytes;
15384 }
15385
15386 /* Store words. */
15387 if (dst_aligned && interleave_factor > 1)
15388 {
15389 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15390 TRUE, dstbase, &dstoffset));
15391 dst_autoinc += UNITS_PER_WORD * interleave_factor;
15392 }
15393 else
15394 {
15395 for (j = 0; j < interleave_factor; j++)
15396 {
15397 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15398 - dst_autoinc));
15399 mem = adjust_automodify_address (dstbase, SImode, addr,
15400 dstoffset + j * UNITS_PER_WORD);
15401 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15402 }
15403 dstoffset += block_size_bytes;
15404 }
15405
15406 remaining -= block_size_bytes;
15407 }
15408
15409 /* Copy any whole words left (note these aren't interleaved with any
15410 subsequent halfword/byte load/stores in the interests of simplicity). */
15411
15412 words = remaining / UNITS_PER_WORD;
15413
15414 gcc_assert (words < interleave_factor);
15415
15416 if (src_aligned && words > 1)
15417 {
15418 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15419 &srcoffset));
15420 src_autoinc += UNITS_PER_WORD * words;
15421 }
15422 else
15423 {
15424 for (j = 0; j < words; j++)
15425 {
15426 addr = plus_constant (Pmode, src,
15427 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15428 mem = adjust_automodify_address (srcbase, SImode, addr,
15429 srcoffset + j * UNITS_PER_WORD);
15430 if (src_aligned)
15431 emit_move_insn (regs[j], mem);
15432 else
15433 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15434 }
15435 srcoffset += words * UNITS_PER_WORD;
15436 }
15437
15438 if (dst_aligned && words > 1)
15439 {
15440 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15441 &dstoffset));
15442 dst_autoinc += words * UNITS_PER_WORD;
15443 }
15444 else
15445 {
15446 for (j = 0; j < words; j++)
15447 {
15448 addr = plus_constant (Pmode, dst,
15449 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15450 mem = adjust_automodify_address (dstbase, SImode, addr,
15451 dstoffset + j * UNITS_PER_WORD);
15452 if (dst_aligned)
15453 emit_move_insn (mem, regs[j]);
15454 else
15455 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15456 }
15457 dstoffset += words * UNITS_PER_WORD;
15458 }
15459
15460 remaining -= words * UNITS_PER_WORD;
15461
15462 gcc_assert (remaining < 4);
15463
15464 /* Copy a halfword if necessary. */
15465
15466 if (remaining >= 2)
15467 {
15468 halfword_tmp = gen_reg_rtx (SImode);
15469
15470 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15471 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15472 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15473
15474 /* Either write out immediately, or delay until we've loaded the last
15475 byte, depending on interleave factor. */
15476 if (interleave_factor == 1)
15477 {
15478 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15479 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15480 emit_insn (gen_unaligned_storehi (mem,
15481 gen_lowpart (HImode, halfword_tmp)));
15482 halfword_tmp = NULL;
15483 dstoffset += 2;
15484 }
15485
15486 remaining -= 2;
15487 srcoffset += 2;
15488 }
15489
15490 gcc_assert (remaining < 2);
15491
15492 /* Copy last byte. */
15493
15494 if ((remaining & 1) != 0)
15495 {
15496 byte_tmp = gen_reg_rtx (SImode);
15497
15498 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15499 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15500 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15501
15502 if (interleave_factor == 1)
15503 {
15504 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15505 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15506 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15507 byte_tmp = NULL;
15508 dstoffset++;
15509 }
15510
15511 remaining--;
15512 srcoffset++;
15513 }
15514
15515 /* Store last halfword if we haven't done so already. */
15516
15517 if (halfword_tmp)
15518 {
15519 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15520 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15521 emit_insn (gen_unaligned_storehi (mem,
15522 gen_lowpart (HImode, halfword_tmp)));
15523 dstoffset += 2;
15524 }
15525
15526 /* Likewise for last byte. */
15527
15528 if (byte_tmp)
15529 {
15530 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15531 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15532 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15533 dstoffset++;
15534 }
15535
15536 gcc_assert (remaining == 0 && srcoffset == dstoffset);
15537 }
15538
15539 /* From mips_adjust_block_mem:
15540
15541 Helper function for doing a loop-based block operation on memory
15542 reference MEM. Each iteration of the loop will operate on LENGTH
15543 bytes of MEM.
15544
15545 Create a new base register for use within the loop and point it to
15546 the start of MEM. Create a new memory reference that uses this
15547 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
15548
15549 static void
15550 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15551 rtx *loop_mem)
15552 {
15553 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15554
15555 /* Although the new mem does not refer to a known location,
15556 it does keep up to LENGTH bytes of alignment. */
15557 *loop_mem = change_address (mem, BLKmode, *loop_reg);
15558 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15559 }
15560
15561 /* From mips_block_move_loop:
15562
15563 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15564 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
15565 the memory regions do not overlap. */
15566
15567 static void
15568 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15569 unsigned int interleave_factor,
15570 HOST_WIDE_INT bytes_per_iter)
15571 {
15572 rtx src_reg, dest_reg, final_src, test;
15573 HOST_WIDE_INT leftover;
15574
15575 leftover = length % bytes_per_iter;
15576 length -= leftover;
15577
15578 /* Create registers and memory references for use within the loop. */
15579 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15580 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15581
15582 /* Calculate the value that SRC_REG should have after the last iteration of
15583 the loop. */
15584 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15585 0, 0, OPTAB_WIDEN);
15586
15587 /* Emit the start of the loop. */
15588 rtx_code_label *label = gen_label_rtx ();
15589 emit_label (label);
15590
15591 /* Emit the loop body. */
15592 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15593 interleave_factor);
15594
15595 /* Move on to the next block. */
15596 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15597 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15598
15599 /* Emit the loop condition. */
15600 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15601 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15602
15603 /* Mop up any left-over bytes. */
15604 if (leftover)
15605 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15606 }
15607
15608 /* Emit a block move when either the source or destination is unaligned (not
15609 aligned to a four-byte boundary). This may need further tuning depending on
15610 core type, optimize_size setting, etc. */
15611
15612 static int
15613 arm_cpymemqi_unaligned (rtx *operands)
15614 {
15615 HOST_WIDE_INT length = INTVAL (operands[2]);
15616
15617 if (optimize_size)
15618 {
15619 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15620 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15621 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15622 size of code if optimizing for size. We'll use ldm/stm if src_aligned
15623 or dst_aligned though: allow more interleaving in those cases since the
15624 resulting code can be smaller. */
15625 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15626 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15627
15628 if (length > 12)
15629 arm_block_move_unaligned_loop (operands[0], operands[1], length,
15630 interleave_factor, bytes_per_iter);
15631 else
15632 arm_block_move_unaligned_straight (operands[0], operands[1], length,
15633 interleave_factor);
15634 }
15635 else
15636 {
15637 /* Note that the loop created by arm_block_move_unaligned_loop may be
15638 subject to loop unrolling, which makes tuning this condition a little
15639 redundant. */
15640 if (length > 32)
15641 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15642 else
15643 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15644 }
15645
15646 return 1;
15647 }
15648
15649 int
15650 arm_gen_cpymemqi (rtx *operands)
15651 {
15652 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15653 HOST_WIDE_INT srcoffset, dstoffset;
15654 rtx src, dst, srcbase, dstbase;
15655 rtx part_bytes_reg = NULL;
15656 rtx mem;
15657
15658 if (!CONST_INT_P (operands[2])
15659 || !CONST_INT_P (operands[3])
15660 || INTVAL (operands[2]) > 64)
15661 return 0;
15662
15663 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15664 return arm_cpymemqi_unaligned (operands);
15665
15666 if (INTVAL (operands[3]) & 3)
15667 return 0;
15668
15669 dstbase = operands[0];
15670 srcbase = operands[1];
15671
15672 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15673 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15674
15675 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15676 out_words_to_go = INTVAL (operands[2]) / 4;
15677 last_bytes = INTVAL (operands[2]) & 3;
15678 dstoffset = srcoffset = 0;
15679
15680 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15681 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15682
15683 while (in_words_to_go >= 2)
15684 {
15685 if (in_words_to_go > 4)
15686 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15687 TRUE, srcbase, &srcoffset));
15688 else
15689 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15690 src, FALSE, srcbase,
15691 &srcoffset));
15692
15693 if (out_words_to_go)
15694 {
15695 if (out_words_to_go > 4)
15696 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15697 TRUE, dstbase, &dstoffset));
15698 else if (out_words_to_go != 1)
15699 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15700 out_words_to_go, dst,
15701 (last_bytes == 0
15702 ? FALSE : TRUE),
15703 dstbase, &dstoffset));
15704 else
15705 {
15706 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15707 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15708 if (last_bytes != 0)
15709 {
15710 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15711 dstoffset += 4;
15712 }
15713 }
15714 }
15715
15716 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15717 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15718 }
15719
15720 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
15721 if (out_words_to_go)
15722 {
15723 rtx sreg;
15724
15725 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15726 sreg = copy_to_reg (mem);
15727
15728 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15729 emit_move_insn (mem, sreg);
15730 in_words_to_go--;
15731
15732 gcc_assert (!in_words_to_go); /* Sanity check */
15733 }
15734
15735 if (in_words_to_go)
15736 {
15737 gcc_assert (in_words_to_go > 0);
15738
15739 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15740 part_bytes_reg = copy_to_mode_reg (SImode, mem);
15741 }
15742
15743 gcc_assert (!last_bytes || part_bytes_reg);
15744
15745 if (BYTES_BIG_ENDIAN && last_bytes)
15746 {
15747 rtx tmp = gen_reg_rtx (SImode);
15748
15749 /* The bytes we want are in the top end of the word. */
15750 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15751 GEN_INT (8 * (4 - last_bytes))));
15752 part_bytes_reg = tmp;
15753
15754 while (last_bytes)
15755 {
15756 mem = adjust_automodify_address (dstbase, QImode,
15757 plus_constant (Pmode, dst,
15758 last_bytes - 1),
15759 dstoffset + last_bytes - 1);
15760 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15761
15762 if (--last_bytes)
15763 {
15764 tmp = gen_reg_rtx (SImode);
15765 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15766 part_bytes_reg = tmp;
15767 }
15768 }
15769
15770 }
15771 else
15772 {
15773 if (last_bytes > 1)
15774 {
15775 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15776 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15777 last_bytes -= 2;
15778 if (last_bytes)
15779 {
15780 rtx tmp = gen_reg_rtx (SImode);
15781 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15782 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15783 part_bytes_reg = tmp;
15784 dstoffset += 2;
15785 }
15786 }
15787
15788 if (last_bytes)
15789 {
15790 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15791 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15792 }
15793 }
15794
15795 return 1;
15796 }
15797
15798 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15799 by mode size. */
15800 inline static rtx
15801 next_consecutive_mem (rtx mem)
15802 {
15803 machine_mode mode = GET_MODE (mem);
15804 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15805 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15806
15807 return adjust_automodify_address (mem, mode, addr, offset);
15808 }
15809
15810 /* Copy using LDRD/STRD instructions whenever possible.
15811 Returns true upon success. */
15812 bool
15813 gen_cpymem_ldrd_strd (rtx *operands)
15814 {
15815 unsigned HOST_WIDE_INT len;
15816 HOST_WIDE_INT align;
15817 rtx src, dst, base;
15818 rtx reg0;
15819 bool src_aligned, dst_aligned;
15820 bool src_volatile, dst_volatile;
15821
15822 gcc_assert (CONST_INT_P (operands[2]));
15823 gcc_assert (CONST_INT_P (operands[3]));
15824
15825 len = UINTVAL (operands[2]);
15826 if (len > 64)
15827 return false;
15828
15829 /* Maximum alignment we can assume for both src and dst buffers. */
15830 align = INTVAL (operands[3]);
15831
15832 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15833 return false;
15834
15835 /* Place src and dst addresses in registers
15836 and update the corresponding mem rtx. */
15837 dst = operands[0];
15838 dst_volatile = MEM_VOLATILE_P (dst);
15839 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15840 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15841 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15842
15843 src = operands[1];
15844 src_volatile = MEM_VOLATILE_P (src);
15845 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15846 base = copy_to_mode_reg (SImode, XEXP (src, 0));
15847 src = adjust_automodify_address (src, VOIDmode, base, 0);
15848
15849 if (!unaligned_access && !(src_aligned && dst_aligned))
15850 return false;
15851
15852 if (src_volatile || dst_volatile)
15853 return false;
15854
15855 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15856 if (!(dst_aligned || src_aligned))
15857 return arm_gen_cpymemqi (operands);
15858
15859 /* If the either src or dst is unaligned we'll be accessing it as pairs
15860 of unaligned SImode accesses. Otherwise we can generate DImode
15861 ldrd/strd instructions. */
15862 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15863 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15864
15865 while (len >= 8)
15866 {
15867 len -= 8;
15868 reg0 = gen_reg_rtx (DImode);
15869 rtx first_reg = NULL_RTX;
15870 rtx second_reg = NULL_RTX;
15871
15872 if (!src_aligned || !dst_aligned)
15873 {
15874 if (BYTES_BIG_ENDIAN)
15875 {
15876 second_reg = gen_lowpart (SImode, reg0);
15877 first_reg = gen_highpart_mode (SImode, DImode, reg0);
15878 }
15879 else
15880 {
15881 first_reg = gen_lowpart (SImode, reg0);
15882 second_reg = gen_highpart_mode (SImode, DImode, reg0);
15883 }
15884 }
15885 if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15886 emit_move_insn (reg0, src);
15887 else if (src_aligned)
15888 emit_insn (gen_unaligned_loaddi (reg0, src));
15889 else
15890 {
15891 emit_insn (gen_unaligned_loadsi (first_reg, src));
15892 src = next_consecutive_mem (src);
15893 emit_insn (gen_unaligned_loadsi (second_reg, src));
15894 }
15895
15896 if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15897 emit_move_insn (dst, reg0);
15898 else if (dst_aligned)
15899 emit_insn (gen_unaligned_storedi (dst, reg0));
15900 else
15901 {
15902 emit_insn (gen_unaligned_storesi (dst, first_reg));
15903 dst = next_consecutive_mem (dst);
15904 emit_insn (gen_unaligned_storesi (dst, second_reg));
15905 }
15906
15907 src = next_consecutive_mem (src);
15908 dst = next_consecutive_mem (dst);
15909 }
15910
15911 gcc_assert (len < 8);
15912 if (len >= 4)
15913 {
15914 /* More than a word but less than a double-word to copy. Copy a word. */
15915 reg0 = gen_reg_rtx (SImode);
15916 src = adjust_address (src, SImode, 0);
15917 dst = adjust_address (dst, SImode, 0);
15918 if (src_aligned)
15919 emit_move_insn (reg0, src);
15920 else
15921 emit_insn (gen_unaligned_loadsi (reg0, src));
15922
15923 if (dst_aligned)
15924 emit_move_insn (dst, reg0);
15925 else
15926 emit_insn (gen_unaligned_storesi (dst, reg0));
15927
15928 src = next_consecutive_mem (src);
15929 dst = next_consecutive_mem (dst);
15930 len -= 4;
15931 }
15932
15933 if (len == 0)
15934 return true;
15935
15936 /* Copy the remaining bytes. */
15937 if (len >= 2)
15938 {
15939 dst = adjust_address (dst, HImode, 0);
15940 src = adjust_address (src, HImode, 0);
15941 reg0 = gen_reg_rtx (SImode);
15942 if (src_aligned)
15943 emit_insn (gen_zero_extendhisi2 (reg0, src));
15944 else
15945 emit_insn (gen_unaligned_loadhiu (reg0, src));
15946
15947 if (dst_aligned)
15948 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15949 else
15950 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15951
15952 src = next_consecutive_mem (src);
15953 dst = next_consecutive_mem (dst);
15954 if (len == 2)
15955 return true;
15956 }
15957
15958 dst = adjust_address (dst, QImode, 0);
15959 src = adjust_address (src, QImode, 0);
15960 reg0 = gen_reg_rtx (QImode);
15961 emit_move_insn (reg0, src);
15962 emit_move_insn (dst, reg0);
15963 return true;
15964 }
15965
15966 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15967 into its component 32-bit subregs. OP2 may be an immediate
15968 constant and we want to simplify it in that case. */
15969 void
15970 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15971 rtx *lo_op2, rtx *hi_op2)
15972 {
15973 *lo_op1 = gen_lowpart (SImode, op1);
15974 *hi_op1 = gen_highpart (SImode, op1);
15975 *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15976 subreg_lowpart_offset (SImode, DImode));
15977 *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15978 subreg_highpart_offset (SImode, DImode));
15979 }
15980
15981 /* Select a dominance comparison mode if possible for a test of the general
15982 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15983 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15984 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15985 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15986 In all cases OP will be either EQ or NE, but we don't need to know which
15987 here. If we are unable to support a dominance comparison we return
15988 CC mode. This will then fail to match for the RTL expressions that
15989 generate this call. */
15990 machine_mode
15991 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15992 {
15993 enum rtx_code cond1, cond2;
15994 int swapped = 0;
15995
15996 /* Currently we will probably get the wrong result if the individual
15997 comparisons are not simple. This also ensures that it is safe to
15998 reverse a comparison if necessary. */
15999 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
16000 != CCmode)
16001 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
16002 != CCmode))
16003 return CCmode;
16004
16005 /* The if_then_else variant of this tests the second condition if the
16006 first passes, but is true if the first fails. Reverse the first
16007 condition to get a true "inclusive-or" expression. */
16008 if (cond_or == DOM_CC_NX_OR_Y)
16009 cond1 = reverse_condition (cond1);
16010
16011 /* If the comparisons are not equal, and one doesn't dominate the other,
16012 then we can't do this. */
16013 if (cond1 != cond2
16014 && !comparison_dominates_p (cond1, cond2)
16015 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
16016 return CCmode;
16017
16018 if (swapped)
16019 std::swap (cond1, cond2);
16020
16021 switch (cond1)
16022 {
16023 case EQ:
16024 if (cond_or == DOM_CC_X_AND_Y)
16025 return CC_DEQmode;
16026
16027 switch (cond2)
16028 {
16029 case EQ: return CC_DEQmode;
16030 case LE: return CC_DLEmode;
16031 case LEU: return CC_DLEUmode;
16032 case GE: return CC_DGEmode;
16033 case GEU: return CC_DGEUmode;
16034 default: gcc_unreachable ();
16035 }
16036
16037 case LT:
16038 if (cond_or == DOM_CC_X_AND_Y)
16039 return CC_DLTmode;
16040
16041 switch (cond2)
16042 {
16043 case LT:
16044 return CC_DLTmode;
16045 case LE:
16046 return CC_DLEmode;
16047 case NE:
16048 return CC_DNEmode;
16049 default:
16050 gcc_unreachable ();
16051 }
16052
16053 case GT:
16054 if (cond_or == DOM_CC_X_AND_Y)
16055 return CC_DGTmode;
16056
16057 switch (cond2)
16058 {
16059 case GT:
16060 return CC_DGTmode;
16061 case GE:
16062 return CC_DGEmode;
16063 case NE:
16064 return CC_DNEmode;
16065 default:
16066 gcc_unreachable ();
16067 }
16068
16069 case LTU:
16070 if (cond_or == DOM_CC_X_AND_Y)
16071 return CC_DLTUmode;
16072
16073 switch (cond2)
16074 {
16075 case LTU:
16076 return CC_DLTUmode;
16077 case LEU:
16078 return CC_DLEUmode;
16079 case NE:
16080 return CC_DNEmode;
16081 default:
16082 gcc_unreachable ();
16083 }
16084
16085 case GTU:
16086 if (cond_or == DOM_CC_X_AND_Y)
16087 return CC_DGTUmode;
16088
16089 switch (cond2)
16090 {
16091 case GTU:
16092 return CC_DGTUmode;
16093 case GEU:
16094 return CC_DGEUmode;
16095 case NE:
16096 return CC_DNEmode;
16097 default:
16098 gcc_unreachable ();
16099 }
16100
16101 /* The remaining cases only occur when both comparisons are the
16102 same. */
16103 case NE:
16104 gcc_assert (cond1 == cond2);
16105 return CC_DNEmode;
16106
16107 case LE:
16108 gcc_assert (cond1 == cond2);
16109 return CC_DLEmode;
16110
16111 case GE:
16112 gcc_assert (cond1 == cond2);
16113 return CC_DGEmode;
16114
16115 case LEU:
16116 gcc_assert (cond1 == cond2);
16117 return CC_DLEUmode;
16118
16119 case GEU:
16120 gcc_assert (cond1 == cond2);
16121 return CC_DGEUmode;
16122
16123 default:
16124 gcc_unreachable ();
16125 }
16126 }
16127
16128 machine_mode
16129 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
16130 {
16131 /* All floating point compares return CCFP if it is an equality
16132 comparison, and CCFPE otherwise. */
16133 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16134 {
16135 switch (op)
16136 {
16137 case EQ:
16138 case NE:
16139 case UNORDERED:
16140 case ORDERED:
16141 case UNLT:
16142 case UNLE:
16143 case UNGT:
16144 case UNGE:
16145 case UNEQ:
16146 case LTGT:
16147 return CCFPmode;
16148
16149 case LT:
16150 case LE:
16151 case GT:
16152 case GE:
16153 return CCFPEmode;
16154
16155 default:
16156 gcc_unreachable ();
16157 }
16158 }
16159
16160 /* A compare with a shifted operand. Because of canonicalization, the
16161 comparison will have to be swapped when we emit the assembler. */
16162 if (GET_MODE (y) == SImode
16163 && (REG_P (y) || (SUBREG_P (y)))
16164 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16165 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
16166 || GET_CODE (x) == ROTATERT))
16167 return CC_SWPmode;
16168
16169 /* A widened compare of the sum of a value plus a carry against a
16170 constant. This is a representation of RSC. We want to swap the
16171 result of the comparison at output. Not valid if the Z bit is
16172 needed. */
16173 if (GET_MODE (x) == DImode
16174 && GET_CODE (x) == PLUS
16175 && arm_borrow_operation (XEXP (x, 1), DImode)
16176 && CONST_INT_P (y)
16177 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16178 && (op == LE || op == GT))
16179 || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
16180 && (op == LEU || op == GTU))))
16181 return CC_SWPmode;
16182
16183 /* If X is a constant we want to use CC_RSBmode. This is
16184 non-canonical, but arm_gen_compare_reg uses this to generate the
16185 correct canonical form. */
16186 if (GET_MODE (y) == SImode
16187 && (REG_P (y) || SUBREG_P (y))
16188 && CONST_INT_P (x))
16189 return CC_RSBmode;
16190
16191 /* This operation is performed swapped, but since we only rely on the Z
16192 flag we don't need an additional mode. */
16193 if (GET_MODE (y) == SImode
16194 && (REG_P (y) || (SUBREG_P (y)))
16195 && GET_CODE (x) == NEG
16196 && (op == EQ || op == NE))
16197 return CC_Zmode;
16198
16199 /* This is a special case that is used by combine to allow a
16200 comparison of a shifted byte load to be split into a zero-extend
16201 followed by a comparison of the shifted integer (only valid for
16202 equalities and unsigned inequalities). */
16203 if (GET_MODE (x) == SImode
16204 && GET_CODE (x) == ASHIFT
16205 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
16206 && GET_CODE (XEXP (x, 0)) == SUBREG
16207 && MEM_P (SUBREG_REG (XEXP (x, 0)))
16208 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
16209 && (op == EQ || op == NE
16210 || op == GEU || op == GTU || op == LTU || op == LEU)
16211 && CONST_INT_P (y))
16212 return CC_Zmode;
16213
16214 /* A construct for a conditional compare, if the false arm contains
16215 0, then both conditions must be true, otherwise either condition
16216 must be true. Not all conditions are possible, so CCmode is
16217 returned if it can't be done. */
16218 if (GET_CODE (x) == IF_THEN_ELSE
16219 && (XEXP (x, 2) == const0_rtx
16220 || XEXP (x, 2) == const1_rtx)
16221 && COMPARISON_P (XEXP (x, 0))
16222 && COMPARISON_P (XEXP (x, 1)))
16223 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16224 INTVAL (XEXP (x, 2)));
16225
16226 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
16227 if (GET_CODE (x) == AND
16228 && (op == EQ || op == NE)
16229 && COMPARISON_P (XEXP (x, 0))
16230 && COMPARISON_P (XEXP (x, 1)))
16231 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16232 DOM_CC_X_AND_Y);
16233
16234 if (GET_CODE (x) == IOR
16235 && (op == EQ || op == NE)
16236 && COMPARISON_P (XEXP (x, 0))
16237 && COMPARISON_P (XEXP (x, 1)))
16238 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16239 DOM_CC_X_OR_Y);
16240
16241 /* An operation (on Thumb) where we want to test for a single bit.
16242 This is done by shifting that bit up into the top bit of a
16243 scratch register; we can then branch on the sign bit. */
16244 if (TARGET_THUMB1
16245 && GET_MODE (x) == SImode
16246 && (op == EQ || op == NE)
16247 && GET_CODE (x) == ZERO_EXTRACT
16248 && XEXP (x, 1) == const1_rtx)
16249 return CC_Nmode;
16250
16251 /* An operation that sets the condition codes as a side-effect, the
16252 V flag is not set correctly, so we can only use comparisons where
16253 this doesn't matter. (For LT and GE we can use "mi" and "pl"
16254 instead.) */
16255 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
16256 if (GET_MODE (x) == SImode
16257 && y == const0_rtx
16258 && (op == EQ || op == NE || op == LT || op == GE)
16259 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
16260 || GET_CODE (x) == AND || GET_CODE (x) == IOR
16261 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
16262 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
16263 || GET_CODE (x) == LSHIFTRT
16264 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16265 || GET_CODE (x) == ROTATERT
16266 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
16267 return CC_NZmode;
16268
16269 /* A comparison of ~reg with a const is really a special
16270 canoncialization of compare (~const, reg), which is a reverse
16271 subtract operation. We may not get here if CONST is 0, but that
16272 doesn't matter because ~0 isn't a valid immediate for RSB. */
16273 if (GET_MODE (x) == SImode
16274 && GET_CODE (x) == NOT
16275 && CONST_INT_P (y))
16276 return CC_RSBmode;
16277
16278 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
16279 return CC_Zmode;
16280
16281 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
16282 && GET_CODE (x) == PLUS
16283 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
16284 return CC_Cmode;
16285
16286 if (GET_MODE (x) == DImode
16287 && GET_CODE (x) == PLUS
16288 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
16289 && CONST_INT_P (y)
16290 && UINTVAL (y) == 0x800000000
16291 && (op == GEU || op == LTU))
16292 return CC_ADCmode;
16293
16294 if (GET_MODE (x) == DImode
16295 && (op == GE || op == LT)
16296 && GET_CODE (x) == SIGN_EXTEND
16297 && ((GET_CODE (y) == PLUS
16298 && arm_borrow_operation (XEXP (y, 0), DImode))
16299 || arm_borrow_operation (y, DImode)))
16300 return CC_NVmode;
16301
16302 if (GET_MODE (x) == DImode
16303 && (op == GEU || op == LTU)
16304 && GET_CODE (x) == ZERO_EXTEND
16305 && ((GET_CODE (y) == PLUS
16306 && arm_borrow_operation (XEXP (y, 0), DImode))
16307 || arm_borrow_operation (y, DImode)))
16308 return CC_Bmode;
16309
16310 if (GET_MODE (x) == DImode
16311 && (op == EQ || op == NE)
16312 && (GET_CODE (x) == PLUS
16313 || GET_CODE (x) == MINUS)
16314 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16315 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
16316 && GET_CODE (y) == SIGN_EXTEND
16317 && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
16318 return CC_Vmode;
16319
16320 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
16321 return GET_MODE (x);
16322
16323 return CCmode;
16324 }
16325
16326 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
16327 the sequence of instructions needed to generate a suitable condition
16328 code register. Return the CC register result. */
16329 static rtx
16330 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16331 {
16332 machine_mode mode;
16333 rtx cc_reg;
16334
16335 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
16336 gcc_assert (TARGET_32BIT);
16337 gcc_assert (!CONST_INT_P (x));
16338
16339 rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
16340 subreg_lowpart_offset (SImode, DImode));
16341 rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
16342 subreg_highpart_offset (SImode, DImode));
16343 rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
16344 subreg_lowpart_offset (SImode, DImode));
16345 rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
16346 subreg_highpart_offset (SImode, DImode));
16347 switch (code)
16348 {
16349 case EQ:
16350 case NE:
16351 {
16352 if (y_lo == const0_rtx || y_hi == const0_rtx)
16353 {
16354 if (y_lo != const0_rtx)
16355 {
16356 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16357
16358 gcc_assert (y_hi == const0_rtx);
16359 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16360 if (!arm_add_operand (y_lo, SImode))
16361 y_lo = force_reg (SImode, y_lo);
16362 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16363 x_lo = scratch2;
16364 }
16365 else if (y_hi != const0_rtx)
16366 {
16367 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16368
16369 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16370 if (!arm_add_operand (y_hi, SImode))
16371 y_hi = force_reg (SImode, y_hi);
16372 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16373 x_hi = scratch2;
16374 }
16375
16376 if (!scratch)
16377 {
16378 gcc_assert (!reload_completed);
16379 scratch = gen_rtx_SCRATCH (SImode);
16380 }
16381
16382 rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16383 cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16384
16385 rtx set
16386 = gen_rtx_SET (cc_reg,
16387 gen_rtx_COMPARE (CC_NZmode,
16388 gen_rtx_IOR (SImode, x_lo, x_hi),
16389 const0_rtx));
16390 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16391 clobber)));
16392 return cc_reg;
16393 }
16394
16395 if (!arm_add_operand (y_lo, SImode))
16396 y_lo = force_reg (SImode, y_lo);
16397
16398 if (!arm_add_operand (y_hi, SImode))
16399 y_hi = force_reg (SImode, y_hi);
16400
16401 rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16402 rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16403 rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16404 mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16405 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16406
16407 emit_insn (gen_rtx_SET (cc_reg,
16408 gen_rtx_COMPARE (mode, conjunction,
16409 const0_rtx)));
16410 return cc_reg;
16411 }
16412
16413 case LT:
16414 case GE:
16415 {
16416 if (y_lo == const0_rtx)
16417 {
16418 /* If the low word of y is 0, then this is simply a normal
16419 compare of the upper words. */
16420 if (!arm_add_operand (y_hi, SImode))
16421 y_hi = force_reg (SImode, y_hi);
16422
16423 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16424 }
16425
16426 if (!arm_add_operand (y_lo, SImode))
16427 y_lo = force_reg (SImode, y_lo);
16428
16429 rtx cmp1
16430 = gen_rtx_LTU (DImode,
16431 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16432 const0_rtx);
16433
16434 if (!scratch)
16435 scratch = gen_rtx_SCRATCH (SImode);
16436
16437 if (!arm_not_operand (y_hi, SImode))
16438 y_hi = force_reg (SImode, y_hi);
16439
16440 rtx_insn *insn;
16441 if (y_hi == const0_rtx)
16442 insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16443 cmp1));
16444 else if (CONST_INT_P (y_hi))
16445 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16446 y_hi, cmp1));
16447 else
16448 insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16449 cmp1));
16450 return SET_DEST (single_set (insn));
16451 }
16452
16453 case LE:
16454 case GT:
16455 {
16456 /* During expansion, we only expect to get here if y is a
16457 constant that we want to handle, otherwise we should have
16458 swapped the operands already. */
16459 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16460
16461 if (!const_ok_for_arm (INTVAL (y_lo)))
16462 y_lo = force_reg (SImode, y_lo);
16463
16464 /* Perform a reverse subtract and compare. */
16465 rtx cmp1
16466 = gen_rtx_LTU (DImode,
16467 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16468 const0_rtx);
16469 rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16470 x_hi, cmp1));
16471 return SET_DEST (single_set (insn));
16472 }
16473
16474 case LTU:
16475 case GEU:
16476 {
16477 if (y_lo == const0_rtx)
16478 {
16479 /* If the low word of y is 0, then this is simply a normal
16480 compare of the upper words. */
16481 if (!arm_add_operand (y_hi, SImode))
16482 y_hi = force_reg (SImode, y_hi);
16483
16484 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16485 }
16486
16487 if (!arm_add_operand (y_lo, SImode))
16488 y_lo = force_reg (SImode, y_lo);
16489
16490 rtx cmp1
16491 = gen_rtx_LTU (DImode,
16492 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16493 const0_rtx);
16494
16495 if (!scratch)
16496 scratch = gen_rtx_SCRATCH (SImode);
16497 if (!arm_not_operand (y_hi, SImode))
16498 y_hi = force_reg (SImode, y_hi);
16499
16500 rtx_insn *insn;
16501 if (y_hi == const0_rtx)
16502 insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16503 cmp1));
16504 else if (CONST_INT_P (y_hi))
16505 {
16506 /* Constant is viewed as unsigned when zero-extended. */
16507 y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16508 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16509 y_hi, cmp1));
16510 }
16511 else
16512 insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16513 cmp1));
16514 return SET_DEST (single_set (insn));
16515 }
16516
16517 case LEU:
16518 case GTU:
16519 {
16520 /* During expansion, we only expect to get here if y is a
16521 constant that we want to handle, otherwise we should have
16522 swapped the operands already. */
16523 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16524
16525 if (!const_ok_for_arm (INTVAL (y_lo)))
16526 y_lo = force_reg (SImode, y_lo);
16527
16528 /* Perform a reverse subtract and compare. */
16529 rtx cmp1
16530 = gen_rtx_LTU (DImode,
16531 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16532 const0_rtx);
16533 y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16534 rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16535 x_hi, cmp1));
16536 return SET_DEST (single_set (insn));
16537 }
16538
16539 default:
16540 gcc_unreachable ();
16541 }
16542 }
16543
16544 /* X and Y are two things to compare using CODE. Emit the compare insn and
16545 return the rtx for register 0 in the proper mode. */
16546 rtx
16547 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16548 {
16549 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16550 return arm_gen_dicompare_reg (code, x, y, scratch);
16551
16552 machine_mode mode = SELECT_CC_MODE (code, x, y);
16553 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16554 if (mode == CC_RSBmode)
16555 {
16556 if (!scratch)
16557 scratch = gen_rtx_SCRATCH (SImode);
16558 emit_insn (gen_rsb_imm_compare_scratch (scratch,
16559 GEN_INT (~UINTVAL (x)), y));
16560 }
16561 else
16562 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16563
16564 return cc_reg;
16565 }
16566
16567 /* Generate a sequence of insns that will generate the correct return
16568 address mask depending on the physical architecture that the program
16569 is running on. */
16570 rtx
16571 arm_gen_return_addr_mask (void)
16572 {
16573 rtx reg = gen_reg_rtx (Pmode);
16574
16575 emit_insn (gen_return_addr_mask (reg));
16576 return reg;
16577 }
16578
16579 void
16580 arm_reload_in_hi (rtx *operands)
16581 {
16582 rtx ref = operands[1];
16583 rtx base, scratch;
16584 HOST_WIDE_INT offset = 0;
16585
16586 if (SUBREG_P (ref))
16587 {
16588 offset = SUBREG_BYTE (ref);
16589 ref = SUBREG_REG (ref);
16590 }
16591
16592 if (REG_P (ref))
16593 {
16594 /* We have a pseudo which has been spilt onto the stack; there
16595 are two cases here: the first where there is a simple
16596 stack-slot replacement and a second where the stack-slot is
16597 out of range, or is used as a subreg. */
16598 if (reg_equiv_mem (REGNO (ref)))
16599 {
16600 ref = reg_equiv_mem (REGNO (ref));
16601 base = find_replacement (&XEXP (ref, 0));
16602 }
16603 else
16604 /* The slot is out of range, or was dressed up in a SUBREG. */
16605 base = reg_equiv_address (REGNO (ref));
16606
16607 /* PR 62554: If there is no equivalent memory location then just move
16608 the value as an SImode register move. This happens when the target
16609 architecture variant does not have an HImode register move. */
16610 if (base == NULL)
16611 {
16612 gcc_assert (REG_P (operands[0]));
16613 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16614 gen_rtx_SUBREG (SImode, ref, 0)));
16615 return;
16616 }
16617 }
16618 else
16619 base = find_replacement (&XEXP (ref, 0));
16620
16621 /* Handle the case where the address is too complex to be offset by 1. */
16622 if (GET_CODE (base) == MINUS
16623 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16624 {
16625 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16626
16627 emit_set_insn (base_plus, base);
16628 base = base_plus;
16629 }
16630 else if (GET_CODE (base) == PLUS)
16631 {
16632 /* The addend must be CONST_INT, or we would have dealt with it above. */
16633 HOST_WIDE_INT hi, lo;
16634
16635 offset += INTVAL (XEXP (base, 1));
16636 base = XEXP (base, 0);
16637
16638 /* Rework the address into a legal sequence of insns. */
16639 /* Valid range for lo is -4095 -> 4095 */
16640 lo = (offset >= 0
16641 ? (offset & 0xfff)
16642 : -((-offset) & 0xfff));
16643
16644 /* Corner case, if lo is the max offset then we would be out of range
16645 once we have added the additional 1 below, so bump the msb into the
16646 pre-loading insn(s). */
16647 if (lo == 4095)
16648 lo &= 0x7ff;
16649
16650 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16651 ^ (HOST_WIDE_INT) 0x80000000)
16652 - (HOST_WIDE_INT) 0x80000000);
16653
16654 gcc_assert (hi + lo == offset);
16655
16656 if (hi != 0)
16657 {
16658 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16659
16660 /* Get the base address; addsi3 knows how to handle constants
16661 that require more than one insn. */
16662 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16663 base = base_plus;
16664 offset = lo;
16665 }
16666 }
16667
16668 /* Operands[2] may overlap operands[0] (though it won't overlap
16669 operands[1]), that's why we asked for a DImode reg -- so we can
16670 use the bit that does not overlap. */
16671 if (REGNO (operands[2]) == REGNO (operands[0]))
16672 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16673 else
16674 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16675
16676 emit_insn (gen_zero_extendqisi2 (scratch,
16677 gen_rtx_MEM (QImode,
16678 plus_constant (Pmode, base,
16679 offset))));
16680 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16681 gen_rtx_MEM (QImode,
16682 plus_constant (Pmode, base,
16683 offset + 1))));
16684 if (!BYTES_BIG_ENDIAN)
16685 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16686 gen_rtx_IOR (SImode,
16687 gen_rtx_ASHIFT
16688 (SImode,
16689 gen_rtx_SUBREG (SImode, operands[0], 0),
16690 GEN_INT (8)),
16691 scratch));
16692 else
16693 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16694 gen_rtx_IOR (SImode,
16695 gen_rtx_ASHIFT (SImode, scratch,
16696 GEN_INT (8)),
16697 gen_rtx_SUBREG (SImode, operands[0], 0)));
16698 }
16699
16700 /* Handle storing a half-word to memory during reload by synthesizing as two
16701 byte stores. Take care not to clobber the input values until after we
16702 have moved them somewhere safe. This code assumes that if the DImode
16703 scratch in operands[2] overlaps either the input value or output address
16704 in some way, then that value must die in this insn (we absolutely need
16705 two scratch registers for some corner cases). */
16706 void
16707 arm_reload_out_hi (rtx *operands)
16708 {
16709 rtx ref = operands[0];
16710 rtx outval = operands[1];
16711 rtx base, scratch;
16712 HOST_WIDE_INT offset = 0;
16713
16714 if (SUBREG_P (ref))
16715 {
16716 offset = SUBREG_BYTE (ref);
16717 ref = SUBREG_REG (ref);
16718 }
16719
16720 if (REG_P (ref))
16721 {
16722 /* We have a pseudo which has been spilt onto the stack; there
16723 are two cases here: the first where there is a simple
16724 stack-slot replacement and a second where the stack-slot is
16725 out of range, or is used as a subreg. */
16726 if (reg_equiv_mem (REGNO (ref)))
16727 {
16728 ref = reg_equiv_mem (REGNO (ref));
16729 base = find_replacement (&XEXP (ref, 0));
16730 }
16731 else
16732 /* The slot is out of range, or was dressed up in a SUBREG. */
16733 base = reg_equiv_address (REGNO (ref));
16734
16735 /* PR 62254: If there is no equivalent memory location then just move
16736 the value as an SImode register move. This happens when the target
16737 architecture variant does not have an HImode register move. */
16738 if (base == NULL)
16739 {
16740 gcc_assert (REG_P (outval) || SUBREG_P (outval));
16741
16742 if (REG_P (outval))
16743 {
16744 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16745 gen_rtx_SUBREG (SImode, outval, 0)));
16746 }
16747 else /* SUBREG_P (outval) */
16748 {
16749 if (GET_MODE (SUBREG_REG (outval)) == SImode)
16750 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16751 SUBREG_REG (outval)));
16752 else
16753 /* FIXME: Handle other cases ? */
16754 gcc_unreachable ();
16755 }
16756 return;
16757 }
16758 }
16759 else
16760 base = find_replacement (&XEXP (ref, 0));
16761
16762 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16763
16764 /* Handle the case where the address is too complex to be offset by 1. */
16765 if (GET_CODE (base) == MINUS
16766 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16767 {
16768 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16769
16770 /* Be careful not to destroy OUTVAL. */
16771 if (reg_overlap_mentioned_p (base_plus, outval))
16772 {
16773 /* Updating base_plus might destroy outval, see if we can
16774 swap the scratch and base_plus. */
16775 if (!reg_overlap_mentioned_p (scratch, outval))
16776 std::swap (scratch, base_plus);
16777 else
16778 {
16779 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16780
16781 /* Be conservative and copy OUTVAL into the scratch now,
16782 this should only be necessary if outval is a subreg
16783 of something larger than a word. */
16784 /* XXX Might this clobber base? I can't see how it can,
16785 since scratch is known to overlap with OUTVAL, and
16786 must be wider than a word. */
16787 emit_insn (gen_movhi (scratch_hi, outval));
16788 outval = scratch_hi;
16789 }
16790 }
16791
16792 emit_set_insn (base_plus, base);
16793 base = base_plus;
16794 }
16795 else if (GET_CODE (base) == PLUS)
16796 {
16797 /* The addend must be CONST_INT, or we would have dealt with it above. */
16798 HOST_WIDE_INT hi, lo;
16799
16800 offset += INTVAL (XEXP (base, 1));
16801 base = XEXP (base, 0);
16802
16803 /* Rework the address into a legal sequence of insns. */
16804 /* Valid range for lo is -4095 -> 4095 */
16805 lo = (offset >= 0
16806 ? (offset & 0xfff)
16807 : -((-offset) & 0xfff));
16808
16809 /* Corner case, if lo is the max offset then we would be out of range
16810 once we have added the additional 1 below, so bump the msb into the
16811 pre-loading insn(s). */
16812 if (lo == 4095)
16813 lo &= 0x7ff;
16814
16815 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16816 ^ (HOST_WIDE_INT) 0x80000000)
16817 - (HOST_WIDE_INT) 0x80000000);
16818
16819 gcc_assert (hi + lo == offset);
16820
16821 if (hi != 0)
16822 {
16823 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16824
16825 /* Be careful not to destroy OUTVAL. */
16826 if (reg_overlap_mentioned_p (base_plus, outval))
16827 {
16828 /* Updating base_plus might destroy outval, see if we
16829 can swap the scratch and base_plus. */
16830 if (!reg_overlap_mentioned_p (scratch, outval))
16831 std::swap (scratch, base_plus);
16832 else
16833 {
16834 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16835
16836 /* Be conservative and copy outval into scratch now,
16837 this should only be necessary if outval is a
16838 subreg of something larger than a word. */
16839 /* XXX Might this clobber base? I can't see how it
16840 can, since scratch is known to overlap with
16841 outval. */
16842 emit_insn (gen_movhi (scratch_hi, outval));
16843 outval = scratch_hi;
16844 }
16845 }
16846
16847 /* Get the base address; addsi3 knows how to handle constants
16848 that require more than one insn. */
16849 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16850 base = base_plus;
16851 offset = lo;
16852 }
16853 }
16854
16855 if (BYTES_BIG_ENDIAN)
16856 {
16857 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16858 plus_constant (Pmode, base,
16859 offset + 1)),
16860 gen_lowpart (QImode, outval)));
16861 emit_insn (gen_lshrsi3 (scratch,
16862 gen_rtx_SUBREG (SImode, outval, 0),
16863 GEN_INT (8)));
16864 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16865 offset)),
16866 gen_lowpart (QImode, scratch)));
16867 }
16868 else
16869 {
16870 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16871 offset)),
16872 gen_lowpart (QImode, outval)));
16873 emit_insn (gen_lshrsi3 (scratch,
16874 gen_rtx_SUBREG (SImode, outval, 0),
16875 GEN_INT (8)));
16876 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16877 plus_constant (Pmode, base,
16878 offset + 1)),
16879 gen_lowpart (QImode, scratch)));
16880 }
16881 }
16882
16883 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16884 (padded to the size of a word) should be passed in a register. */
16885
16886 static bool
16887 arm_must_pass_in_stack (const function_arg_info &arg)
16888 {
16889 if (TARGET_AAPCS_BASED)
16890 return must_pass_in_stack_var_size (arg);
16891 else
16892 return must_pass_in_stack_var_size_or_pad (arg);
16893 }
16894
16895
16896 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16897 byte of a stack argument has useful data. For legacy APCS ABIs we use
16898 the default. For AAPCS based ABIs small aggregate types are placed
16899 in the lowest memory address. */
16900
16901 static pad_direction
16902 arm_function_arg_padding (machine_mode mode, const_tree type)
16903 {
16904 if (!TARGET_AAPCS_BASED)
16905 return default_function_arg_padding (mode, type);
16906
16907 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16908 return PAD_DOWNWARD;
16909
16910 return PAD_UPWARD;
16911 }
16912
16913
16914 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16915 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16916 register has useful data, and return the opposite if the most
16917 significant byte does. */
16918
16919 bool
16920 arm_pad_reg_upward (machine_mode mode,
16921 tree type, int first ATTRIBUTE_UNUSED)
16922 {
16923 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16924 {
16925 /* For AAPCS, small aggregates, small fixed-point types,
16926 and small complex types are always padded upwards. */
16927 if (type)
16928 {
16929 if ((AGGREGATE_TYPE_P (type)
16930 || TREE_CODE (type) == COMPLEX_TYPE
16931 || FIXED_POINT_TYPE_P (type))
16932 && int_size_in_bytes (type) <= 4)
16933 return true;
16934 }
16935 else
16936 {
16937 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16938 && GET_MODE_SIZE (mode) <= 4)
16939 return true;
16940 }
16941 }
16942
16943 /* Otherwise, use default padding. */
16944 return !BYTES_BIG_ENDIAN;
16945 }
16946
16947 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16948 assuming that the address in the base register is word aligned. */
16949 bool
16950 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16951 {
16952 HOST_WIDE_INT max_offset;
16953
16954 /* Offset must be a multiple of 4 in Thumb mode. */
16955 if (TARGET_THUMB2 && ((offset & 3) != 0))
16956 return false;
16957
16958 if (TARGET_THUMB2)
16959 max_offset = 1020;
16960 else if (TARGET_ARM)
16961 max_offset = 255;
16962 else
16963 return false;
16964
16965 return ((offset <= max_offset) && (offset >= -max_offset));
16966 }
16967
16968 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16969 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16970 Assumes that the address in the base register RN is word aligned. Pattern
16971 guarantees that both memory accesses use the same base register,
16972 the offsets are constants within the range, and the gap between the offsets is 4.
16973 If preload complete then check that registers are legal. WBACK indicates whether
16974 address is updated. LOAD indicates whether memory access is load or store. */
16975 bool
16976 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16977 bool wback, bool load)
16978 {
16979 unsigned int t, t2, n;
16980
16981 if (!reload_completed)
16982 return true;
16983
16984 if (!offset_ok_for_ldrd_strd (offset))
16985 return false;
16986
16987 t = REGNO (rt);
16988 t2 = REGNO (rt2);
16989 n = REGNO (rn);
16990
16991 if ((TARGET_THUMB2)
16992 && ((wback && (n == t || n == t2))
16993 || (t == SP_REGNUM)
16994 || (t == PC_REGNUM)
16995 || (t2 == SP_REGNUM)
16996 || (t2 == PC_REGNUM)
16997 || (!load && (n == PC_REGNUM))
16998 || (load && (t == t2))
16999 /* Triggers Cortex-M3 LDRD errata. */
17000 || (!wback && load && fix_cm3_ldrd && (n == t))))
17001 return false;
17002
17003 if ((TARGET_ARM)
17004 && ((wback && (n == t || n == t2))
17005 || (t2 == PC_REGNUM)
17006 || (t % 2 != 0) /* First destination register is not even. */
17007 || (t2 != t + 1)
17008 /* PC can be used as base register (for offset addressing only),
17009 but it is depricated. */
17010 || (n == PC_REGNUM)))
17011 return false;
17012
17013 return true;
17014 }
17015
17016 /* Return true if a 64-bit access with alignment ALIGN and with a
17017 constant offset OFFSET from the base pointer is permitted on this
17018 architecture. */
17019 static bool
17020 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
17021 {
17022 return (unaligned_access
17023 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
17024 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
17025 }
17026
17027 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
17028 operand MEM's address contains an immediate offset from the base
17029 register and has no side effects, in which case it sets BASE,
17030 OFFSET and ALIGN accordingly. */
17031 static bool
17032 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
17033 {
17034 rtx addr;
17035
17036 gcc_assert (base != NULL && offset != NULL);
17037
17038 /* TODO: Handle more general memory operand patterns, such as
17039 PRE_DEC and PRE_INC. */
17040
17041 if (side_effects_p (mem))
17042 return false;
17043
17044 /* Can't deal with subregs. */
17045 if (SUBREG_P (mem))
17046 return false;
17047
17048 gcc_assert (MEM_P (mem));
17049
17050 *offset = const0_rtx;
17051 *align = MEM_ALIGN (mem);
17052
17053 addr = XEXP (mem, 0);
17054
17055 /* If addr isn't valid for DImode, then we can't handle it. */
17056 if (!arm_legitimate_address_p (DImode, addr,
17057 reload_in_progress || reload_completed))
17058 return false;
17059
17060 if (REG_P (addr))
17061 {
17062 *base = addr;
17063 return true;
17064 }
17065 else if (GET_CODE (addr) == PLUS)
17066 {
17067 *base = XEXP (addr, 0);
17068 *offset = XEXP (addr, 1);
17069 return (REG_P (*base) && CONST_INT_P (*offset));
17070 }
17071
17072 return false;
17073 }
17074
17075 /* Called from a peephole2 to replace two word-size accesses with a
17076 single LDRD/STRD instruction. Returns true iff we can generate a
17077 new instruction sequence. That is, both accesses use the same base
17078 register and the gap between constant offsets is 4. This function
17079 may reorder its operands to match ldrd/strd RTL templates.
17080 OPERANDS are the operands found by the peephole matcher;
17081 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
17082 corresponding memory operands. LOAD indicaates whether the access
17083 is load or store. CONST_STORE indicates a store of constant
17084 integer values held in OPERANDS[4,5] and assumes that the pattern
17085 is of length 4 insn, for the purpose of checking dead registers.
17086 COMMUTE indicates that register operands may be reordered. */
17087 bool
17088 gen_operands_ldrd_strd (rtx *operands, bool load,
17089 bool const_store, bool commute)
17090 {
17091 int nops = 2;
17092 HOST_WIDE_INT offsets[2], offset, align[2];
17093 rtx base = NULL_RTX;
17094 rtx cur_base, cur_offset, tmp;
17095 int i, gap;
17096 HARD_REG_SET regset;
17097
17098 gcc_assert (!const_store || !load);
17099 /* Check that the memory references are immediate offsets from the
17100 same base register. Extract the base register, the destination
17101 registers, and the corresponding memory offsets. */
17102 for (i = 0; i < nops; i++)
17103 {
17104 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17105 &align[i]))
17106 return false;
17107
17108 if (i == 0)
17109 base = cur_base;
17110 else if (REGNO (base) != REGNO (cur_base))
17111 return false;
17112
17113 offsets[i] = INTVAL (cur_offset);
17114 if (GET_CODE (operands[i]) == SUBREG)
17115 {
17116 tmp = SUBREG_REG (operands[i]);
17117 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
17118 operands[i] = tmp;
17119 }
17120 }
17121
17122 /* Make sure there is no dependency between the individual loads. */
17123 if (load && REGNO (operands[0]) == REGNO (base))
17124 return false; /* RAW */
17125
17126 if (load && REGNO (operands[0]) == REGNO (operands[1]))
17127 return false; /* WAW */
17128
17129 /* If the same input register is used in both stores
17130 when storing different constants, try to find a free register.
17131 For example, the code
17132 mov r0, 0
17133 str r0, [r2]
17134 mov r0, 1
17135 str r0, [r2, #4]
17136 can be transformed into
17137 mov r1, 0
17138 mov r0, 1
17139 strd r1, r0, [r2]
17140 in Thumb mode assuming that r1 is free.
17141 For ARM mode do the same but only if the starting register
17142 can be made to be even. */
17143 if (const_store
17144 && REGNO (operands[0]) == REGNO (operands[1])
17145 && INTVAL (operands[4]) != INTVAL (operands[5]))
17146 {
17147 if (TARGET_THUMB2)
17148 {
17149 CLEAR_HARD_REG_SET (regset);
17150 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17151 if (tmp == NULL_RTX)
17152 return false;
17153
17154 /* Use the new register in the first load to ensure that
17155 if the original input register is not dead after peephole,
17156 then it will have the correct constant value. */
17157 operands[0] = tmp;
17158 }
17159 else if (TARGET_ARM)
17160 {
17161 int regno = REGNO (operands[0]);
17162 if (!peep2_reg_dead_p (4, operands[0]))
17163 {
17164 /* When the input register is even and is not dead after the
17165 pattern, it has to hold the second constant but we cannot
17166 form a legal STRD in ARM mode with this register as the second
17167 register. */
17168 if (regno % 2 == 0)
17169 return false;
17170
17171 /* Is regno-1 free? */
17172 SET_HARD_REG_SET (regset);
17173 CLEAR_HARD_REG_BIT(regset, regno - 1);
17174 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17175 if (tmp == NULL_RTX)
17176 return false;
17177
17178 operands[0] = tmp;
17179 }
17180 else
17181 {
17182 /* Find a DImode register. */
17183 CLEAR_HARD_REG_SET (regset);
17184 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17185 if (tmp != NULL_RTX)
17186 {
17187 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17188 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17189 }
17190 else
17191 {
17192 /* Can we use the input register to form a DI register? */
17193 SET_HARD_REG_SET (regset);
17194 CLEAR_HARD_REG_BIT(regset,
17195 regno % 2 == 0 ? regno + 1 : regno - 1);
17196 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17197 if (tmp == NULL_RTX)
17198 return false;
17199 operands[regno % 2 == 1 ? 0 : 1] = tmp;
17200 }
17201 }
17202
17203 gcc_assert (operands[0] != NULL_RTX);
17204 gcc_assert (operands[1] != NULL_RTX);
17205 gcc_assert (REGNO (operands[0]) % 2 == 0);
17206 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
17207 }
17208 }
17209
17210 /* Make sure the instructions are ordered with lower memory access first. */
17211 if (offsets[0] > offsets[1])
17212 {
17213 gap = offsets[0] - offsets[1];
17214 offset = offsets[1];
17215
17216 /* Swap the instructions such that lower memory is accessed first. */
17217 std::swap (operands[0], operands[1]);
17218 std::swap (operands[2], operands[3]);
17219 std::swap (align[0], align[1]);
17220 if (const_store)
17221 std::swap (operands[4], operands[5]);
17222 }
17223 else
17224 {
17225 gap = offsets[1] - offsets[0];
17226 offset = offsets[0];
17227 }
17228
17229 /* Make sure accesses are to consecutive memory locations. */
17230 if (gap != GET_MODE_SIZE (SImode))
17231 return false;
17232
17233 if (!align_ok_ldrd_strd (align[0], offset))
17234 return false;
17235
17236 /* Make sure we generate legal instructions. */
17237 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17238 false, load))
17239 return true;
17240
17241 /* In Thumb state, where registers are almost unconstrained, there
17242 is little hope to fix it. */
17243 if (TARGET_THUMB2)
17244 return false;
17245
17246 if (load && commute)
17247 {
17248 /* Try reordering registers. */
17249 std::swap (operands[0], operands[1]);
17250 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17251 false, load))
17252 return true;
17253 }
17254
17255 if (const_store)
17256 {
17257 /* If input registers are dead after this pattern, they can be
17258 reordered or replaced by other registers that are free in the
17259 current pattern. */
17260 if (!peep2_reg_dead_p (4, operands[0])
17261 || !peep2_reg_dead_p (4, operands[1]))
17262 return false;
17263
17264 /* Try to reorder the input registers. */
17265 /* For example, the code
17266 mov r0, 0
17267 mov r1, 1
17268 str r1, [r2]
17269 str r0, [r2, #4]
17270 can be transformed into
17271 mov r1, 0
17272 mov r0, 1
17273 strd r0, [r2]
17274 */
17275 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
17276 false, false))
17277 {
17278 std::swap (operands[0], operands[1]);
17279 return true;
17280 }
17281
17282 /* Try to find a free DI register. */
17283 CLEAR_HARD_REG_SET (regset);
17284 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
17285 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
17286 while (true)
17287 {
17288 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17289 if (tmp == NULL_RTX)
17290 return false;
17291
17292 /* DREG must be an even-numbered register in DImode.
17293 Split it into SI registers. */
17294 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17295 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17296 gcc_assert (operands[0] != NULL_RTX);
17297 gcc_assert (operands[1] != NULL_RTX);
17298 gcc_assert (REGNO (operands[0]) % 2 == 0);
17299 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
17300
17301 return (operands_ok_ldrd_strd (operands[0], operands[1],
17302 base, offset,
17303 false, load));
17304 }
17305 }
17306
17307 return false;
17308 }
17309
17310
17311 /* Return true if parallel execution of the two word-size accesses provided
17312 could be satisfied with a single LDRD/STRD instruction. Two word-size
17313 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17314 register operands and OPERANDS[2,3] are the corresponding memory operands.
17315 */
17316 bool
17317 valid_operands_ldrd_strd (rtx *operands, bool load)
17318 {
17319 int nops = 2;
17320 HOST_WIDE_INT offsets[2], offset, align[2];
17321 rtx base = NULL_RTX;
17322 rtx cur_base, cur_offset;
17323 int i, gap;
17324
17325 /* Check that the memory references are immediate offsets from the
17326 same base register. Extract the base register, the destination
17327 registers, and the corresponding memory offsets. */
17328 for (i = 0; i < nops; i++)
17329 {
17330 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17331 &align[i]))
17332 return false;
17333
17334 if (i == 0)
17335 base = cur_base;
17336 else if (REGNO (base) != REGNO (cur_base))
17337 return false;
17338
17339 offsets[i] = INTVAL (cur_offset);
17340 if (GET_CODE (operands[i]) == SUBREG)
17341 return false;
17342 }
17343
17344 if (offsets[0] > offsets[1])
17345 return false;
17346
17347 gap = offsets[1] - offsets[0];
17348 offset = offsets[0];
17349
17350 /* Make sure accesses are to consecutive memory locations. */
17351 if (gap != GET_MODE_SIZE (SImode))
17352 return false;
17353
17354 if (!align_ok_ldrd_strd (align[0], offset))
17355 return false;
17356
17357 return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17358 false, load);
17359 }
17360
17361 \f
17362 /* Print a symbolic form of X to the debug file, F. */
17363 static void
17364 arm_print_value (FILE *f, rtx x)
17365 {
17366 switch (GET_CODE (x))
17367 {
17368 case CONST_INT:
17369 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17370 return;
17371
17372 case CONST_DOUBLE:
17373 {
17374 char fpstr[20];
17375 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17376 sizeof (fpstr), 0, 1);
17377 fputs (fpstr, f);
17378 }
17379 return;
17380
17381 case CONST_VECTOR:
17382 {
17383 int i;
17384
17385 fprintf (f, "<");
17386 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17387 {
17388 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17389 if (i < (CONST_VECTOR_NUNITS (x) - 1))
17390 fputc (',', f);
17391 }
17392 fprintf (f, ">");
17393 }
17394 return;
17395
17396 case CONST_STRING:
17397 fprintf (f, "\"%s\"", XSTR (x, 0));
17398 return;
17399
17400 case SYMBOL_REF:
17401 fprintf (f, "`%s'", XSTR (x, 0));
17402 return;
17403
17404 case LABEL_REF:
17405 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17406 return;
17407
17408 case CONST:
17409 arm_print_value (f, XEXP (x, 0));
17410 return;
17411
17412 case PLUS:
17413 arm_print_value (f, XEXP (x, 0));
17414 fprintf (f, "+");
17415 arm_print_value (f, XEXP (x, 1));
17416 return;
17417
17418 case PC:
17419 fprintf (f, "pc");
17420 return;
17421
17422 default:
17423 fprintf (f, "????");
17424 return;
17425 }
17426 }
17427 \f
17428 /* Routines for manipulation of the constant pool. */
17429
17430 /* Arm instructions cannot load a large constant directly into a
17431 register; they have to come from a pc relative load. The constant
17432 must therefore be placed in the addressable range of the pc
17433 relative load. Depending on the precise pc relative load
17434 instruction the range is somewhere between 256 bytes and 4k. This
17435 means that we often have to dump a constant inside a function, and
17436 generate code to branch around it.
17437
17438 It is important to minimize this, since the branches will slow
17439 things down and make the code larger.
17440
17441 Normally we can hide the table after an existing unconditional
17442 branch so that there is no interruption of the flow, but in the
17443 worst case the code looks like this:
17444
17445 ldr rn, L1
17446 ...
17447 b L2
17448 align
17449 L1: .long value
17450 L2:
17451 ...
17452
17453 ldr rn, L3
17454 ...
17455 b L4
17456 align
17457 L3: .long value
17458 L4:
17459 ...
17460
17461 We fix this by performing a scan after scheduling, which notices
17462 which instructions need to have their operands fetched from the
17463 constant table and builds the table.
17464
17465 The algorithm starts by building a table of all the constants that
17466 need fixing up and all the natural barriers in the function (places
17467 where a constant table can be dropped without breaking the flow).
17468 For each fixup we note how far the pc-relative replacement will be
17469 able to reach and the offset of the instruction into the function.
17470
17471 Having built the table we then group the fixes together to form
17472 tables that are as large as possible (subject to addressing
17473 constraints) and emit each table of constants after the last
17474 barrier that is within range of all the instructions in the group.
17475 If a group does not contain a barrier, then we forcibly create one
17476 by inserting a jump instruction into the flow. Once the table has
17477 been inserted, the insns are then modified to reference the
17478 relevant entry in the pool.
17479
17480 Possible enhancements to the algorithm (not implemented) are:
17481
17482 1) For some processors and object formats, there may be benefit in
17483 aligning the pools to the start of cache lines; this alignment
17484 would need to be taken into account when calculating addressability
17485 of a pool. */
17486
17487 /* These typedefs are located at the start of this file, so that
17488 they can be used in the prototypes there. This comment is to
17489 remind readers of that fact so that the following structures
17490 can be understood more easily.
17491
17492 typedef struct minipool_node Mnode;
17493 typedef struct minipool_fixup Mfix; */
17494
17495 struct minipool_node
17496 {
17497 /* Doubly linked chain of entries. */
17498 Mnode * next;
17499 Mnode * prev;
17500 /* The maximum offset into the code that this entry can be placed. While
17501 pushing fixes for forward references, all entries are sorted in order
17502 of increasing max_address. */
17503 HOST_WIDE_INT max_address;
17504 /* Similarly for an entry inserted for a backwards ref. */
17505 HOST_WIDE_INT min_address;
17506 /* The number of fixes referencing this entry. This can become zero
17507 if we "unpush" an entry. In this case we ignore the entry when we
17508 come to emit the code. */
17509 int refcount;
17510 /* The offset from the start of the minipool. */
17511 HOST_WIDE_INT offset;
17512 /* The value in table. */
17513 rtx value;
17514 /* The mode of value. */
17515 machine_mode mode;
17516 /* The size of the value. With iWMMXt enabled
17517 sizes > 4 also imply an alignment of 8-bytes. */
17518 int fix_size;
17519 };
17520
17521 struct minipool_fixup
17522 {
17523 Mfix * next;
17524 rtx_insn * insn;
17525 HOST_WIDE_INT address;
17526 rtx * loc;
17527 machine_mode mode;
17528 int fix_size;
17529 rtx value;
17530 Mnode * minipool;
17531 HOST_WIDE_INT forwards;
17532 HOST_WIDE_INT backwards;
17533 };
17534
17535 /* Fixes less than a word need padding out to a word boundary. */
17536 #define MINIPOOL_FIX_SIZE(mode) \
17537 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17538
17539 static Mnode * minipool_vector_head;
17540 static Mnode * minipool_vector_tail;
17541 static rtx_code_label *minipool_vector_label;
17542 static int minipool_pad;
17543
17544 /* The linked list of all minipool fixes required for this function. */
17545 Mfix * minipool_fix_head;
17546 Mfix * minipool_fix_tail;
17547 /* The fix entry for the current minipool, once it has been placed. */
17548 Mfix * minipool_barrier;
17549
17550 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17551 #define JUMP_TABLES_IN_TEXT_SECTION 0
17552 #endif
17553
17554 static HOST_WIDE_INT
17555 get_jump_table_size (rtx_jump_table_data *insn)
17556 {
17557 /* ADDR_VECs only take room if read-only data does into the text
17558 section. */
17559 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17560 {
17561 rtx body = PATTERN (insn);
17562 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17563 HOST_WIDE_INT size;
17564 HOST_WIDE_INT modesize;
17565
17566 modesize = GET_MODE_SIZE (GET_MODE (body));
17567 size = modesize * XVECLEN (body, elt);
17568 switch (modesize)
17569 {
17570 case 1:
17571 /* Round up size of TBB table to a halfword boundary. */
17572 size = (size + 1) & ~HOST_WIDE_INT_1;
17573 break;
17574 case 2:
17575 /* No padding necessary for TBH. */
17576 break;
17577 case 4:
17578 /* Add two bytes for alignment on Thumb. */
17579 if (TARGET_THUMB)
17580 size += 2;
17581 break;
17582 default:
17583 gcc_unreachable ();
17584 }
17585 return size;
17586 }
17587
17588 return 0;
17589 }
17590
17591 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17592 function descriptor) into a register and the GOT address into the
17593 FDPIC register, returning an rtx for the register holding the
17594 function address. */
17595
17596 rtx
17597 arm_load_function_descriptor (rtx funcdesc)
17598 {
17599 rtx fnaddr_reg = gen_reg_rtx (Pmode);
17600 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17601 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17602 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17603
17604 emit_move_insn (fnaddr_reg, fnaddr);
17605
17606 /* The ABI requires the entry point address to be loaded first, but
17607 since we cannot support lazy binding for lack of atomic load of
17608 two 32-bits values, we do not need to bother to prevent the
17609 previous load from being moved after that of the GOT address. */
17610 emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17611
17612 return fnaddr_reg;
17613 }
17614
17615 /* Return the maximum amount of padding that will be inserted before
17616 label LABEL. */
17617 static HOST_WIDE_INT
17618 get_label_padding (rtx label)
17619 {
17620 HOST_WIDE_INT align, min_insn_size;
17621
17622 align = 1 << label_to_alignment (label).levels[0].log;
17623 min_insn_size = TARGET_THUMB ? 2 : 4;
17624 return align > min_insn_size ? align - min_insn_size : 0;
17625 }
17626
17627 /* Move a minipool fix MP from its current location to before MAX_MP.
17628 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17629 constraints may need updating. */
17630 static Mnode *
17631 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17632 HOST_WIDE_INT max_address)
17633 {
17634 /* The code below assumes these are different. */
17635 gcc_assert (mp != max_mp);
17636
17637 if (max_mp == NULL)
17638 {
17639 if (max_address < mp->max_address)
17640 mp->max_address = max_address;
17641 }
17642 else
17643 {
17644 if (max_address > max_mp->max_address - mp->fix_size)
17645 mp->max_address = max_mp->max_address - mp->fix_size;
17646 else
17647 mp->max_address = max_address;
17648
17649 /* Unlink MP from its current position. Since max_mp is non-null,
17650 mp->prev must be non-null. */
17651 mp->prev->next = mp->next;
17652 if (mp->next != NULL)
17653 mp->next->prev = mp->prev;
17654 else
17655 minipool_vector_tail = mp->prev;
17656
17657 /* Re-insert it before MAX_MP. */
17658 mp->next = max_mp;
17659 mp->prev = max_mp->prev;
17660 max_mp->prev = mp;
17661
17662 if (mp->prev != NULL)
17663 mp->prev->next = mp;
17664 else
17665 minipool_vector_head = mp;
17666 }
17667
17668 /* Save the new entry. */
17669 max_mp = mp;
17670
17671 /* Scan over the preceding entries and adjust their addresses as
17672 required. */
17673 while (mp->prev != NULL
17674 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17675 {
17676 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17677 mp = mp->prev;
17678 }
17679
17680 return max_mp;
17681 }
17682
17683 /* Add a constant to the minipool for a forward reference. Returns the
17684 node added or NULL if the constant will not fit in this pool. */
17685 static Mnode *
17686 add_minipool_forward_ref (Mfix *fix)
17687 {
17688 /* If set, max_mp is the first pool_entry that has a lower
17689 constraint than the one we are trying to add. */
17690 Mnode * max_mp = NULL;
17691 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17692 Mnode * mp;
17693
17694 /* If the minipool starts before the end of FIX->INSN then this FIX
17695 cannot be placed into the current pool. Furthermore, adding the
17696 new constant pool entry may cause the pool to start FIX_SIZE bytes
17697 earlier. */
17698 if (minipool_vector_head &&
17699 (fix->address + get_attr_length (fix->insn)
17700 >= minipool_vector_head->max_address - fix->fix_size))
17701 return NULL;
17702
17703 /* Scan the pool to see if a constant with the same value has
17704 already been added. While we are doing this, also note the
17705 location where we must insert the constant if it doesn't already
17706 exist. */
17707 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17708 {
17709 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17710 && fix->mode == mp->mode
17711 && (!LABEL_P (fix->value)
17712 || (CODE_LABEL_NUMBER (fix->value)
17713 == CODE_LABEL_NUMBER (mp->value)))
17714 && rtx_equal_p (fix->value, mp->value))
17715 {
17716 /* More than one fix references this entry. */
17717 mp->refcount++;
17718 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17719 }
17720
17721 /* Note the insertion point if necessary. */
17722 if (max_mp == NULL
17723 && mp->max_address > max_address)
17724 max_mp = mp;
17725
17726 /* If we are inserting an 8-bytes aligned quantity and
17727 we have not already found an insertion point, then
17728 make sure that all such 8-byte aligned quantities are
17729 placed at the start of the pool. */
17730 if (ARM_DOUBLEWORD_ALIGN
17731 && max_mp == NULL
17732 && fix->fix_size >= 8
17733 && mp->fix_size < 8)
17734 {
17735 max_mp = mp;
17736 max_address = mp->max_address;
17737 }
17738 }
17739
17740 /* The value is not currently in the minipool, so we need to create
17741 a new entry for it. If MAX_MP is NULL, the entry will be put on
17742 the end of the list since the placement is less constrained than
17743 any existing entry. Otherwise, we insert the new fix before
17744 MAX_MP and, if necessary, adjust the constraints on the other
17745 entries. */
17746 mp = XNEW (Mnode);
17747 mp->fix_size = fix->fix_size;
17748 mp->mode = fix->mode;
17749 mp->value = fix->value;
17750 mp->refcount = 1;
17751 /* Not yet required for a backwards ref. */
17752 mp->min_address = -65536;
17753
17754 if (max_mp == NULL)
17755 {
17756 mp->max_address = max_address;
17757 mp->next = NULL;
17758 mp->prev = minipool_vector_tail;
17759
17760 if (mp->prev == NULL)
17761 {
17762 minipool_vector_head = mp;
17763 minipool_vector_label = gen_label_rtx ();
17764 }
17765 else
17766 mp->prev->next = mp;
17767
17768 minipool_vector_tail = mp;
17769 }
17770 else
17771 {
17772 if (max_address > max_mp->max_address - mp->fix_size)
17773 mp->max_address = max_mp->max_address - mp->fix_size;
17774 else
17775 mp->max_address = max_address;
17776
17777 mp->next = max_mp;
17778 mp->prev = max_mp->prev;
17779 max_mp->prev = mp;
17780 if (mp->prev != NULL)
17781 mp->prev->next = mp;
17782 else
17783 minipool_vector_head = mp;
17784 }
17785
17786 /* Save the new entry. */
17787 max_mp = mp;
17788
17789 /* Scan over the preceding entries and adjust their addresses as
17790 required. */
17791 while (mp->prev != NULL
17792 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17793 {
17794 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17795 mp = mp->prev;
17796 }
17797
17798 return max_mp;
17799 }
17800
17801 static Mnode *
17802 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17803 HOST_WIDE_INT min_address)
17804 {
17805 HOST_WIDE_INT offset;
17806
17807 /* The code below assumes these are different. */
17808 gcc_assert (mp != min_mp);
17809
17810 if (min_mp == NULL)
17811 {
17812 if (min_address > mp->min_address)
17813 mp->min_address = min_address;
17814 }
17815 else
17816 {
17817 /* We will adjust this below if it is too loose. */
17818 mp->min_address = min_address;
17819
17820 /* Unlink MP from its current position. Since min_mp is non-null,
17821 mp->next must be non-null. */
17822 mp->next->prev = mp->prev;
17823 if (mp->prev != NULL)
17824 mp->prev->next = mp->next;
17825 else
17826 minipool_vector_head = mp->next;
17827
17828 /* Reinsert it after MIN_MP. */
17829 mp->prev = min_mp;
17830 mp->next = min_mp->next;
17831 min_mp->next = mp;
17832 if (mp->next != NULL)
17833 mp->next->prev = mp;
17834 else
17835 minipool_vector_tail = mp;
17836 }
17837
17838 min_mp = mp;
17839
17840 offset = 0;
17841 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17842 {
17843 mp->offset = offset;
17844 if (mp->refcount > 0)
17845 offset += mp->fix_size;
17846
17847 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17848 mp->next->min_address = mp->min_address + mp->fix_size;
17849 }
17850
17851 return min_mp;
17852 }
17853
17854 /* Add a constant to the minipool for a backward reference. Returns the
17855 node added or NULL if the constant will not fit in this pool.
17856
17857 Note that the code for insertion for a backwards reference can be
17858 somewhat confusing because the calculated offsets for each fix do
17859 not take into account the size of the pool (which is still under
17860 construction. */
17861 static Mnode *
17862 add_minipool_backward_ref (Mfix *fix)
17863 {
17864 /* If set, min_mp is the last pool_entry that has a lower constraint
17865 than the one we are trying to add. */
17866 Mnode *min_mp = NULL;
17867 /* This can be negative, since it is only a constraint. */
17868 HOST_WIDE_INT min_address = fix->address - fix->backwards;
17869 Mnode *mp;
17870
17871 /* If we can't reach the current pool from this insn, or if we can't
17872 insert this entry at the end of the pool without pushing other
17873 fixes out of range, then we don't try. This ensures that we
17874 can't fail later on. */
17875 if (min_address >= minipool_barrier->address
17876 || (minipool_vector_tail->min_address + fix->fix_size
17877 >= minipool_barrier->address))
17878 return NULL;
17879
17880 /* Scan the pool to see if a constant with the same value has
17881 already been added. While we are doing this, also note the
17882 location where we must insert the constant if it doesn't already
17883 exist. */
17884 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17885 {
17886 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17887 && fix->mode == mp->mode
17888 && (!LABEL_P (fix->value)
17889 || (CODE_LABEL_NUMBER (fix->value)
17890 == CODE_LABEL_NUMBER (mp->value)))
17891 && rtx_equal_p (fix->value, mp->value)
17892 /* Check that there is enough slack to move this entry to the
17893 end of the table (this is conservative). */
17894 && (mp->max_address
17895 > (minipool_barrier->address
17896 + minipool_vector_tail->offset
17897 + minipool_vector_tail->fix_size)))
17898 {
17899 mp->refcount++;
17900 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17901 }
17902
17903 if (min_mp != NULL)
17904 mp->min_address += fix->fix_size;
17905 else
17906 {
17907 /* Note the insertion point if necessary. */
17908 if (mp->min_address < min_address)
17909 {
17910 /* For now, we do not allow the insertion of 8-byte alignment
17911 requiring nodes anywhere but at the start of the pool. */
17912 if (ARM_DOUBLEWORD_ALIGN
17913 && fix->fix_size >= 8 && mp->fix_size < 8)
17914 return NULL;
17915 else
17916 min_mp = mp;
17917 }
17918 else if (mp->max_address
17919 < minipool_barrier->address + mp->offset + fix->fix_size)
17920 {
17921 /* Inserting before this entry would push the fix beyond
17922 its maximum address (which can happen if we have
17923 re-located a forwards fix); force the new fix to come
17924 after it. */
17925 if (ARM_DOUBLEWORD_ALIGN
17926 && fix->fix_size >= 8 && mp->fix_size < 8)
17927 return NULL;
17928 else
17929 {
17930 min_mp = mp;
17931 min_address = mp->min_address + fix->fix_size;
17932 }
17933 }
17934 /* Do not insert a non-8-byte aligned quantity before 8-byte
17935 aligned quantities. */
17936 else if (ARM_DOUBLEWORD_ALIGN
17937 && fix->fix_size < 8
17938 && mp->fix_size >= 8)
17939 {
17940 min_mp = mp;
17941 min_address = mp->min_address + fix->fix_size;
17942 }
17943 }
17944 }
17945
17946 /* We need to create a new entry. */
17947 mp = XNEW (Mnode);
17948 mp->fix_size = fix->fix_size;
17949 mp->mode = fix->mode;
17950 mp->value = fix->value;
17951 mp->refcount = 1;
17952 mp->max_address = minipool_barrier->address + 65536;
17953
17954 mp->min_address = min_address;
17955
17956 if (min_mp == NULL)
17957 {
17958 mp->prev = NULL;
17959 mp->next = minipool_vector_head;
17960
17961 if (mp->next == NULL)
17962 {
17963 minipool_vector_tail = mp;
17964 minipool_vector_label = gen_label_rtx ();
17965 }
17966 else
17967 mp->next->prev = mp;
17968
17969 minipool_vector_head = mp;
17970 }
17971 else
17972 {
17973 mp->next = min_mp->next;
17974 mp->prev = min_mp;
17975 min_mp->next = mp;
17976
17977 if (mp->next != NULL)
17978 mp->next->prev = mp;
17979 else
17980 minipool_vector_tail = mp;
17981 }
17982
17983 /* Save the new entry. */
17984 min_mp = mp;
17985
17986 if (mp->prev)
17987 mp = mp->prev;
17988 else
17989 mp->offset = 0;
17990
17991 /* Scan over the following entries and adjust their offsets. */
17992 while (mp->next != NULL)
17993 {
17994 if (mp->next->min_address < mp->min_address + mp->fix_size)
17995 mp->next->min_address = mp->min_address + mp->fix_size;
17996
17997 if (mp->refcount)
17998 mp->next->offset = mp->offset + mp->fix_size;
17999 else
18000 mp->next->offset = mp->offset;
18001
18002 mp = mp->next;
18003 }
18004
18005 return min_mp;
18006 }
18007
18008 static void
18009 assign_minipool_offsets (Mfix *barrier)
18010 {
18011 HOST_WIDE_INT offset = 0;
18012 Mnode *mp;
18013
18014 minipool_barrier = barrier;
18015
18016 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
18017 {
18018 mp->offset = offset;
18019
18020 if (mp->refcount > 0)
18021 offset += mp->fix_size;
18022 }
18023 }
18024
18025 /* Output the literal table */
18026 static void
18027 dump_minipool (rtx_insn *scan)
18028 {
18029 Mnode * mp;
18030 Mnode * nmp;
18031 int align64 = 0;
18032
18033 if (ARM_DOUBLEWORD_ALIGN)
18034 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
18035 if (mp->refcount > 0 && mp->fix_size >= 8)
18036 {
18037 align64 = 1;
18038 break;
18039 }
18040
18041 if (dump_file)
18042 fprintf (dump_file,
18043 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
18044 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
18045
18046 scan = emit_label_after (gen_label_rtx (), scan);
18047 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
18048 scan = emit_label_after (minipool_vector_label, scan);
18049
18050 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
18051 {
18052 if (mp->refcount > 0)
18053 {
18054 if (dump_file)
18055 {
18056 fprintf (dump_file,
18057 ";; Offset %u, min %ld, max %ld ",
18058 (unsigned) mp->offset, (unsigned long) mp->min_address,
18059 (unsigned long) mp->max_address);
18060 arm_print_value (dump_file, mp->value);
18061 fputc ('\n', dump_file);
18062 }
18063
18064 rtx val = copy_rtx (mp->value);
18065
18066 switch (GET_MODE_SIZE (mp->mode))
18067 {
18068 #ifdef HAVE_consttable_1
18069 case 1:
18070 scan = emit_insn_after (gen_consttable_1 (val), scan);
18071 break;
18072
18073 #endif
18074 #ifdef HAVE_consttable_2
18075 case 2:
18076 scan = emit_insn_after (gen_consttable_2 (val), scan);
18077 break;
18078
18079 #endif
18080 #ifdef HAVE_consttable_4
18081 case 4:
18082 scan = emit_insn_after (gen_consttable_4 (val), scan);
18083 break;
18084
18085 #endif
18086 #ifdef HAVE_consttable_8
18087 case 8:
18088 scan = emit_insn_after (gen_consttable_8 (val), scan);
18089 break;
18090
18091 #endif
18092 #ifdef HAVE_consttable_16
18093 case 16:
18094 scan = emit_insn_after (gen_consttable_16 (val), scan);
18095 break;
18096
18097 #endif
18098 default:
18099 gcc_unreachable ();
18100 }
18101 }
18102
18103 nmp = mp->next;
18104 free (mp);
18105 }
18106
18107 minipool_vector_head = minipool_vector_tail = NULL;
18108 scan = emit_insn_after (gen_consttable_end (), scan);
18109 scan = emit_barrier_after (scan);
18110 }
18111
18112 /* Return the cost of forcibly inserting a barrier after INSN. */
18113 static int
18114 arm_barrier_cost (rtx_insn *insn)
18115 {
18116 /* Basing the location of the pool on the loop depth is preferable,
18117 but at the moment, the basic block information seems to be
18118 corrupt by this stage of the compilation. */
18119 int base_cost = 50;
18120 rtx_insn *next = next_nonnote_insn (insn);
18121
18122 if (next != NULL && LABEL_P (next))
18123 base_cost -= 20;
18124
18125 switch (GET_CODE (insn))
18126 {
18127 case CODE_LABEL:
18128 /* It will always be better to place the table before the label, rather
18129 than after it. */
18130 return 50;
18131
18132 case INSN:
18133 case CALL_INSN:
18134 return base_cost;
18135
18136 case JUMP_INSN:
18137 return base_cost - 10;
18138
18139 default:
18140 return base_cost + 10;
18141 }
18142 }
18143
18144 /* Find the best place in the insn stream in the range
18145 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18146 Create the barrier by inserting a jump and add a new fix entry for
18147 it. */
18148 static Mfix *
18149 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
18150 {
18151 HOST_WIDE_INT count = 0;
18152 rtx_barrier *barrier;
18153 rtx_insn *from = fix->insn;
18154 /* The instruction after which we will insert the jump. */
18155 rtx_insn *selected = NULL;
18156 int selected_cost;
18157 /* The address at which the jump instruction will be placed. */
18158 HOST_WIDE_INT selected_address;
18159 Mfix * new_fix;
18160 HOST_WIDE_INT max_count = max_address - fix->address;
18161 rtx_code_label *label = gen_label_rtx ();
18162
18163 selected_cost = arm_barrier_cost (from);
18164 selected_address = fix->address;
18165
18166 while (from && count < max_count)
18167 {
18168 rtx_jump_table_data *tmp;
18169 int new_cost;
18170
18171 /* This code shouldn't have been called if there was a natural barrier
18172 within range. */
18173 gcc_assert (!BARRIER_P (from));
18174
18175 /* Count the length of this insn. This must stay in sync with the
18176 code that pushes minipool fixes. */
18177 if (LABEL_P (from))
18178 count += get_label_padding (from);
18179 else
18180 count += get_attr_length (from);
18181
18182 /* If there is a jump table, add its length. */
18183 if (tablejump_p (from, NULL, &tmp))
18184 {
18185 count += get_jump_table_size (tmp);
18186
18187 /* Jump tables aren't in a basic block, so base the cost on
18188 the dispatch insn. If we select this location, we will
18189 still put the pool after the table. */
18190 new_cost = arm_barrier_cost (from);
18191
18192 if (count < max_count
18193 && (!selected || new_cost <= selected_cost))
18194 {
18195 selected = tmp;
18196 selected_cost = new_cost;
18197 selected_address = fix->address + count;
18198 }
18199
18200 /* Continue after the dispatch table. */
18201 from = NEXT_INSN (tmp);
18202 continue;
18203 }
18204
18205 new_cost = arm_barrier_cost (from);
18206
18207 if (count < max_count
18208 && (!selected || new_cost <= selected_cost))
18209 {
18210 selected = from;
18211 selected_cost = new_cost;
18212 selected_address = fix->address + count;
18213 }
18214
18215 from = NEXT_INSN (from);
18216 }
18217
18218 /* Make sure that we found a place to insert the jump. */
18219 gcc_assert (selected);
18220
18221 /* Create a new JUMP_INSN that branches around a barrier. */
18222 from = emit_jump_insn_after (gen_jump (label), selected);
18223 JUMP_LABEL (from) = label;
18224 barrier = emit_barrier_after (from);
18225 emit_label_after (label, barrier);
18226
18227 /* Create a minipool barrier entry for the new barrier. */
18228 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
18229 new_fix->insn = barrier;
18230 new_fix->address = selected_address;
18231 new_fix->next = fix->next;
18232 fix->next = new_fix;
18233
18234 return new_fix;
18235 }
18236
18237 /* Record that there is a natural barrier in the insn stream at
18238 ADDRESS. */
18239 static void
18240 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
18241 {
18242 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18243
18244 fix->insn = insn;
18245 fix->address = address;
18246
18247 fix->next = NULL;
18248 if (minipool_fix_head != NULL)
18249 minipool_fix_tail->next = fix;
18250 else
18251 minipool_fix_head = fix;
18252
18253 minipool_fix_tail = fix;
18254 }
18255
18256 /* Record INSN, which will need fixing up to load a value from the
18257 minipool. ADDRESS is the offset of the insn since the start of the
18258 function; LOC is a pointer to the part of the insn which requires
18259 fixing; VALUE is the constant that must be loaded, which is of type
18260 MODE. */
18261 static void
18262 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
18263 machine_mode mode, rtx value)
18264 {
18265 gcc_assert (!arm_disable_literal_pool);
18266 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18267
18268 fix->insn = insn;
18269 fix->address = address;
18270 fix->loc = loc;
18271 fix->mode = mode;
18272 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
18273 fix->value = value;
18274 fix->forwards = get_attr_pool_range (insn);
18275 fix->backwards = get_attr_neg_pool_range (insn);
18276 fix->minipool = NULL;
18277
18278 /* If an insn doesn't have a range defined for it, then it isn't
18279 expecting to be reworked by this code. Better to stop now than
18280 to generate duff assembly code. */
18281 gcc_assert (fix->forwards || fix->backwards);
18282
18283 /* If an entry requires 8-byte alignment then assume all constant pools
18284 require 4 bytes of padding. Trying to do this later on a per-pool
18285 basis is awkward because existing pool entries have to be modified. */
18286 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
18287 minipool_pad = 4;
18288
18289 if (dump_file)
18290 {
18291 fprintf (dump_file,
18292 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18293 GET_MODE_NAME (mode),
18294 INSN_UID (insn), (unsigned long) address,
18295 -1 * (long)fix->backwards, (long)fix->forwards);
18296 arm_print_value (dump_file, fix->value);
18297 fprintf (dump_file, "\n");
18298 }
18299
18300 /* Add it to the chain of fixes. */
18301 fix->next = NULL;
18302
18303 if (minipool_fix_head != NULL)
18304 minipool_fix_tail->next = fix;
18305 else
18306 minipool_fix_head = fix;
18307
18308 minipool_fix_tail = fix;
18309 }
18310
18311 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18312 Returns the number of insns needed, or 99 if we always want to synthesize
18313 the value. */
18314 int
18315 arm_max_const_double_inline_cost ()
18316 {
18317 return ((optimize_size || arm_ld_sched) ? 3 : 4);
18318 }
18319
18320 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18321 Returns the number of insns needed, or 99 if we don't know how to
18322 do it. */
18323 int
18324 arm_const_double_inline_cost (rtx val)
18325 {
18326 rtx lowpart, highpart;
18327 machine_mode mode;
18328
18329 mode = GET_MODE (val);
18330
18331 if (mode == VOIDmode)
18332 mode = DImode;
18333
18334 gcc_assert (GET_MODE_SIZE (mode) == 8);
18335
18336 lowpart = gen_lowpart (SImode, val);
18337 highpart = gen_highpart_mode (SImode, mode, val);
18338
18339 gcc_assert (CONST_INT_P (lowpart));
18340 gcc_assert (CONST_INT_P (highpart));
18341
18342 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
18343 NULL_RTX, NULL_RTX, 0, 0)
18344 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
18345 NULL_RTX, NULL_RTX, 0, 0));
18346 }
18347
18348 /* Cost of loading a SImode constant. */
18349 static inline int
18350 arm_const_inline_cost (enum rtx_code code, rtx val)
18351 {
18352 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
18353 NULL_RTX, NULL_RTX, 1, 0);
18354 }
18355
18356 /* Return true if it is worthwhile to split a 64-bit constant into two
18357 32-bit operations. This is the case if optimizing for size, or
18358 if we have load delay slots, or if one 32-bit part can be done with
18359 a single data operation. */
18360 bool
18361 arm_const_double_by_parts (rtx val)
18362 {
18363 machine_mode mode = GET_MODE (val);
18364 rtx part;
18365
18366 if (optimize_size || arm_ld_sched)
18367 return true;
18368
18369 if (mode == VOIDmode)
18370 mode = DImode;
18371
18372 part = gen_highpart_mode (SImode, mode, val);
18373
18374 gcc_assert (CONST_INT_P (part));
18375
18376 if (const_ok_for_arm (INTVAL (part))
18377 || const_ok_for_arm (~INTVAL (part)))
18378 return true;
18379
18380 part = gen_lowpart (SImode, val);
18381
18382 gcc_assert (CONST_INT_P (part));
18383
18384 if (const_ok_for_arm (INTVAL (part))
18385 || const_ok_for_arm (~INTVAL (part)))
18386 return true;
18387
18388 return false;
18389 }
18390
18391 /* Return true if it is possible to inline both the high and low parts
18392 of a 64-bit constant into 32-bit data processing instructions. */
18393 bool
18394 arm_const_double_by_immediates (rtx val)
18395 {
18396 machine_mode mode = GET_MODE (val);
18397 rtx part;
18398
18399 if (mode == VOIDmode)
18400 mode = DImode;
18401
18402 part = gen_highpart_mode (SImode, mode, val);
18403
18404 gcc_assert (CONST_INT_P (part));
18405
18406 if (!const_ok_for_arm (INTVAL (part)))
18407 return false;
18408
18409 part = gen_lowpart (SImode, val);
18410
18411 gcc_assert (CONST_INT_P (part));
18412
18413 if (!const_ok_for_arm (INTVAL (part)))
18414 return false;
18415
18416 return true;
18417 }
18418
18419 /* Scan INSN and note any of its operands that need fixing.
18420 If DO_PUSHES is false we do not actually push any of the fixups
18421 needed. */
18422 static void
18423 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18424 {
18425 int opno;
18426
18427 extract_constrain_insn (insn);
18428
18429 if (recog_data.n_alternatives == 0)
18430 return;
18431
18432 /* Fill in recog_op_alt with information about the constraints of
18433 this insn. */
18434 preprocess_constraints (insn);
18435
18436 const operand_alternative *op_alt = which_op_alt ();
18437 for (opno = 0; opno < recog_data.n_operands; opno++)
18438 {
18439 /* Things we need to fix can only occur in inputs. */
18440 if (recog_data.operand_type[opno] != OP_IN)
18441 continue;
18442
18443 /* If this alternative is a memory reference, then any mention
18444 of constants in this alternative is really to fool reload
18445 into allowing us to accept one there. We need to fix them up
18446 now so that we output the right code. */
18447 if (op_alt[opno].memory_ok)
18448 {
18449 rtx op = recog_data.operand[opno];
18450
18451 if (CONSTANT_P (op))
18452 {
18453 if (do_pushes)
18454 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18455 recog_data.operand_mode[opno], op);
18456 }
18457 else if (MEM_P (op)
18458 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18459 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18460 {
18461 if (do_pushes)
18462 {
18463 rtx cop = avoid_constant_pool_reference (op);
18464
18465 /* Casting the address of something to a mode narrower
18466 than a word can cause avoid_constant_pool_reference()
18467 to return the pool reference itself. That's no good to
18468 us here. Lets just hope that we can use the
18469 constant pool value directly. */
18470 if (op == cop)
18471 cop = get_pool_constant (XEXP (op, 0));
18472
18473 push_minipool_fix (insn, address,
18474 recog_data.operand_loc[opno],
18475 recog_data.operand_mode[opno], cop);
18476 }
18477
18478 }
18479 }
18480 }
18481
18482 return;
18483 }
18484
18485 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18486 and unions in the context of ARMv8-M Security Extensions. It is used as a
18487 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18488 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18489 or four masks, depending on whether it is being computed for a
18490 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18491 respectively. The tree for the type of the argument or a field within an
18492 argument is passed in ARG_TYPE, the current register this argument or field
18493 starts in is kept in the pointer REGNO and updated accordingly, the bit this
18494 argument or field starts at is passed in STARTING_BIT and the last used bit
18495 is kept in LAST_USED_BIT which is also updated accordingly. */
18496
18497 static unsigned HOST_WIDE_INT
18498 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18499 uint32_t * padding_bits_to_clear,
18500 unsigned starting_bit, int * last_used_bit)
18501
18502 {
18503 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18504
18505 if (TREE_CODE (arg_type) == RECORD_TYPE)
18506 {
18507 unsigned current_bit = starting_bit;
18508 tree field;
18509 long int offset, size;
18510
18511
18512 field = TYPE_FIELDS (arg_type);
18513 while (field)
18514 {
18515 /* The offset within a structure is always an offset from
18516 the start of that structure. Make sure we take that into the
18517 calculation of the register based offset that we use here. */
18518 offset = starting_bit;
18519 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18520 offset %= 32;
18521
18522 /* This is the actual size of the field, for bitfields this is the
18523 bitfield width and not the container size. */
18524 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18525
18526 if (*last_used_bit != offset)
18527 {
18528 if (offset < *last_used_bit)
18529 {
18530 /* This field's offset is before the 'last_used_bit', that
18531 means this field goes on the next register. So we need to
18532 pad the rest of the current register and increase the
18533 register number. */
18534 uint32_t mask;
18535 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18536 mask++;
18537
18538 padding_bits_to_clear[*regno] |= mask;
18539 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18540 (*regno)++;
18541 }
18542 else
18543 {
18544 /* Otherwise we pad the bits between the last field's end and
18545 the start of the new field. */
18546 uint32_t mask;
18547
18548 mask = ((uint32_t)-1) >> (32 - offset);
18549 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18550 padding_bits_to_clear[*regno] |= mask;
18551 }
18552 current_bit = offset;
18553 }
18554
18555 /* Calculate further padding bits for inner structs/unions too. */
18556 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18557 {
18558 *last_used_bit = current_bit;
18559 not_to_clear_reg_mask
18560 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18561 padding_bits_to_clear, offset,
18562 last_used_bit);
18563 }
18564 else
18565 {
18566 /* Update 'current_bit' with this field's size. If the
18567 'current_bit' lies in a subsequent register, update 'regno' and
18568 reset 'current_bit' to point to the current bit in that new
18569 register. */
18570 current_bit += size;
18571 while (current_bit >= 32)
18572 {
18573 current_bit-=32;
18574 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18575 (*regno)++;
18576 }
18577 *last_used_bit = current_bit;
18578 }
18579
18580 field = TREE_CHAIN (field);
18581 }
18582 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18583 }
18584 else if (TREE_CODE (arg_type) == UNION_TYPE)
18585 {
18586 tree field, field_t;
18587 int i, regno_t, field_size;
18588 int max_reg = -1;
18589 int max_bit = -1;
18590 uint32_t mask;
18591 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18592 = {-1, -1, -1, -1};
18593
18594 /* To compute the padding bits in a union we only consider bits as
18595 padding bits if they are always either a padding bit or fall outside a
18596 fields size for all fields in the union. */
18597 field = TYPE_FIELDS (arg_type);
18598 while (field)
18599 {
18600 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18601 = {0U, 0U, 0U, 0U};
18602 int last_used_bit_t = *last_used_bit;
18603 regno_t = *regno;
18604 field_t = TREE_TYPE (field);
18605
18606 /* If the field's type is either a record or a union make sure to
18607 compute their padding bits too. */
18608 if (RECORD_OR_UNION_TYPE_P (field_t))
18609 not_to_clear_reg_mask
18610 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18611 &padding_bits_to_clear_t[0],
18612 starting_bit, &last_used_bit_t);
18613 else
18614 {
18615 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18616 regno_t = (field_size / 32) + *regno;
18617 last_used_bit_t = (starting_bit + field_size) % 32;
18618 }
18619
18620 for (i = *regno; i < regno_t; i++)
18621 {
18622 /* For all but the last register used by this field only keep the
18623 padding bits that were padding bits in this field. */
18624 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18625 }
18626
18627 /* For the last register, keep all padding bits that were padding
18628 bits in this field and any padding bits that are still valid
18629 as padding bits but fall outside of this field's size. */
18630 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18631 padding_bits_to_clear_res[regno_t]
18632 &= padding_bits_to_clear_t[regno_t] | mask;
18633
18634 /* Update the maximum size of the fields in terms of registers used
18635 ('max_reg') and the 'last_used_bit' in said register. */
18636 if (max_reg < regno_t)
18637 {
18638 max_reg = regno_t;
18639 max_bit = last_used_bit_t;
18640 }
18641 else if (max_reg == regno_t && max_bit < last_used_bit_t)
18642 max_bit = last_used_bit_t;
18643
18644 field = TREE_CHAIN (field);
18645 }
18646
18647 /* Update the current padding_bits_to_clear using the intersection of the
18648 padding bits of all the fields. */
18649 for (i=*regno; i < max_reg; i++)
18650 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18651
18652 /* Do not keep trailing padding bits, we do not know yet whether this
18653 is the end of the argument. */
18654 mask = ((uint32_t) 1 << max_bit) - 1;
18655 padding_bits_to_clear[max_reg]
18656 |= padding_bits_to_clear_res[max_reg] & mask;
18657
18658 *regno = max_reg;
18659 *last_used_bit = max_bit;
18660 }
18661 else
18662 /* This function should only be used for structs and unions. */
18663 gcc_unreachable ();
18664
18665 return not_to_clear_reg_mask;
18666 }
18667
18668 /* In the context of ARMv8-M Security Extensions, this function is used for both
18669 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18670 registers are used when returning or passing arguments, which is then
18671 returned as a mask. It will also compute a mask to indicate padding/unused
18672 bits for each of these registers, and passes this through the
18673 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
18674 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18675 the starting register used to pass this argument or return value is passed
18676 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18677 for struct and union types. */
18678
18679 static unsigned HOST_WIDE_INT
18680 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18681 uint32_t * padding_bits_to_clear)
18682
18683 {
18684 int last_used_bit = 0;
18685 unsigned HOST_WIDE_INT not_to_clear_mask;
18686
18687 if (RECORD_OR_UNION_TYPE_P (arg_type))
18688 {
18689 not_to_clear_mask
18690 = comp_not_to_clear_mask_str_un (arg_type, &regno,
18691 padding_bits_to_clear, 0,
18692 &last_used_bit);
18693
18694
18695 /* If the 'last_used_bit' is not zero, that means we are still using a
18696 part of the last 'regno'. In such cases we must clear the trailing
18697 bits. Otherwise we are not using regno and we should mark it as to
18698 clear. */
18699 if (last_used_bit != 0)
18700 padding_bits_to_clear[regno]
18701 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18702 else
18703 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18704 }
18705 else
18706 {
18707 not_to_clear_mask = 0;
18708 /* We are not dealing with structs nor unions. So these arguments may be
18709 passed in floating point registers too. In some cases a BLKmode is
18710 used when returning or passing arguments in multiple VFP registers. */
18711 if (GET_MODE (arg_rtx) == BLKmode)
18712 {
18713 int i, arg_regs;
18714 rtx reg;
18715
18716 /* This should really only occur when dealing with the hard-float
18717 ABI. */
18718 gcc_assert (TARGET_HARD_FLOAT_ABI);
18719
18720 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18721 {
18722 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18723 gcc_assert (REG_P (reg));
18724
18725 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18726
18727 /* If we are dealing with DF mode, make sure we don't
18728 clear either of the registers it addresses. */
18729 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18730 if (arg_regs > 1)
18731 {
18732 unsigned HOST_WIDE_INT mask;
18733 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18734 mask -= HOST_WIDE_INT_1U << REGNO (reg);
18735 not_to_clear_mask |= mask;
18736 }
18737 }
18738 }
18739 else
18740 {
18741 /* Otherwise we can rely on the MODE to determine how many registers
18742 are being used by this argument. */
18743 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18744 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18745 if (arg_regs > 1)
18746 {
18747 unsigned HOST_WIDE_INT
18748 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18749 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18750 not_to_clear_mask |= mask;
18751 }
18752 }
18753 }
18754
18755 return not_to_clear_mask;
18756 }
18757
18758 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18759 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
18760 are to be fully cleared, using the value in register CLEARING_REG if more
18761 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18762 the bits that needs to be cleared in caller-saved core registers, with
18763 SCRATCH_REG used as a scratch register for that clearing.
18764
18765 NOTE: one of three following assertions must hold:
18766 - SCRATCH_REG is a low register
18767 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18768 in TO_CLEAR_BITMAP)
18769 - CLEARING_REG is a low register. */
18770
18771 static void
18772 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18773 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18774 {
18775 bool saved_clearing = false;
18776 rtx saved_clearing_reg = NULL_RTX;
18777 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18778
18779 gcc_assert (arm_arch_cmse);
18780
18781 if (!bitmap_empty_p (to_clear_bitmap))
18782 {
18783 minregno = bitmap_first_set_bit (to_clear_bitmap);
18784 maxregno = bitmap_last_set_bit (to_clear_bitmap);
18785 }
18786 clearing_regno = REGNO (clearing_reg);
18787
18788 /* Clear padding bits. */
18789 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18790 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18791 {
18792 uint64_t mask;
18793 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18794
18795 if (padding_bits_to_clear[i] == 0)
18796 continue;
18797
18798 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18799 CLEARING_REG as scratch. */
18800 if (TARGET_THUMB1
18801 && REGNO (scratch_reg) > LAST_LO_REGNUM)
18802 {
18803 /* clearing_reg is not to be cleared, copy its value into scratch_reg
18804 such that we can use clearing_reg to clear the unused bits in the
18805 arguments. */
18806 if ((clearing_regno > maxregno
18807 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18808 && !saved_clearing)
18809 {
18810 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18811 emit_move_insn (scratch_reg, clearing_reg);
18812 saved_clearing = true;
18813 saved_clearing_reg = scratch_reg;
18814 }
18815 scratch_reg = clearing_reg;
18816 }
18817
18818 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
18819 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18820 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18821
18822 /* Fill the top half of the negated padding_bits_to_clear[i]. */
18823 mask = (~padding_bits_to_clear[i]) >> 16;
18824 rtx16 = gen_int_mode (16, SImode);
18825 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18826 if (mask)
18827 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18828
18829 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18830 }
18831 if (saved_clearing)
18832 emit_move_insn (clearing_reg, saved_clearing_reg);
18833
18834
18835 /* Clear full registers. */
18836
18837 if (TARGET_HAVE_FPCXT_CMSE)
18838 {
18839 rtvec vunspec_vec;
18840 int i, j, k, nb_regs;
18841 rtx use_seq, par, reg, set, vunspec;
18842 int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18843 auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18844 auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18845
18846 for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18847 {
18848 /* Find next register to clear and exit if none. */
18849 for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18850 if (i > maxregno)
18851 break;
18852
18853 /* Compute number of consecutive registers to clear. */
18854 for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18855 j++);
18856 nb_regs = j - i;
18857
18858 /* Create VSCCLRM RTX pattern. */
18859 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18860 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18861 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18862 VUNSPEC_VSCCLRM_VPR);
18863 XVECEXP (par, 0, 0) = vunspec;
18864
18865 /* Insert VFP register clearing RTX in the pattern. */
18866 start_sequence ();
18867 for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18868 {
18869 if (!bitmap_bit_p (to_clear_bitmap, j))
18870 continue;
18871
18872 reg = gen_rtx_REG (SFmode, j);
18873 set = gen_rtx_SET (reg, const0_rtx);
18874 XVECEXP (par, 0, k++) = set;
18875 emit_use (reg);
18876 }
18877 use_seq = get_insns ();
18878 end_sequence ();
18879
18880 emit_insn_after (use_seq, emit_insn (par));
18881 }
18882
18883 /* Get set of core registers to clear. */
18884 bitmap_clear (core_regs_bitmap);
18885 bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18886 IP_REGNUM - R0_REGNUM + 1);
18887 bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18888 core_regs_bitmap);
18889 gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18890
18891 if (bitmap_empty_p (to_clear_core_bitmap))
18892 return;
18893
18894 /* Create clrm RTX pattern. */
18895 nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18896 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18897
18898 /* Insert core register clearing RTX in the pattern. */
18899 start_sequence ();
18900 for (j = 0, i = minregno; j < nb_regs; i++)
18901 {
18902 if (!bitmap_bit_p (to_clear_core_bitmap, i))
18903 continue;
18904
18905 reg = gen_rtx_REG (SImode, i);
18906 set = gen_rtx_SET (reg, const0_rtx);
18907 XVECEXP (par, 0, j++) = set;
18908 emit_use (reg);
18909 }
18910
18911 /* Insert APSR register clearing RTX in the pattern
18912 * along with clobbering CC. */
18913 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18914 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18915 VUNSPEC_CLRM_APSR);
18916
18917 XVECEXP (par, 0, j++) = vunspec;
18918
18919 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18920 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18921 XVECEXP (par, 0, j) = clobber;
18922
18923 use_seq = get_insns ();
18924 end_sequence ();
18925
18926 emit_insn_after (use_seq, emit_insn (par));
18927 }
18928 else
18929 {
18930 /* If not marked for clearing, clearing_reg already does not contain
18931 any secret. */
18932 if (clearing_regno <= maxregno
18933 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18934 {
18935 emit_move_insn (clearing_reg, const0_rtx);
18936 emit_use (clearing_reg);
18937 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18938 }
18939
18940 for (regno = minregno; regno <= maxregno; regno++)
18941 {
18942 if (!bitmap_bit_p (to_clear_bitmap, regno))
18943 continue;
18944
18945 if (IS_VFP_REGNUM (regno))
18946 {
18947 /* If regno is an even vfp register and its successor is also to
18948 be cleared, use vmov. */
18949 if (TARGET_VFP_DOUBLE
18950 && VFP_REGNO_OK_FOR_DOUBLE (regno)
18951 && bitmap_bit_p (to_clear_bitmap, regno + 1))
18952 {
18953 emit_move_insn (gen_rtx_REG (DFmode, regno),
18954 CONST1_RTX (DFmode));
18955 emit_use (gen_rtx_REG (DFmode, regno));
18956 regno++;
18957 }
18958 else
18959 {
18960 emit_move_insn (gen_rtx_REG (SFmode, regno),
18961 CONST1_RTX (SFmode));
18962 emit_use (gen_rtx_REG (SFmode, regno));
18963 }
18964 }
18965 else
18966 {
18967 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18968 emit_use (gen_rtx_REG (SImode, regno));
18969 }
18970 }
18971 }
18972 }
18973
18974 /* Clear core and caller-saved VFP registers not used to pass arguments before
18975 a cmse_nonsecure_call. Saving, clearing and restoring of VFP callee-saved
18976 registers is done in the __gnu_cmse_nonsecure_call libcall. See
18977 libgcc/config/arm/cmse_nonsecure_call.S. */
18978
18979 static void
18980 cmse_nonsecure_call_inline_register_clear (void)
18981 {
18982 basic_block bb;
18983
18984 FOR_EACH_BB_FN (bb, cfun)
18985 {
18986 rtx_insn *insn;
18987
18988 FOR_BB_INSNS (bb, insn)
18989 {
18990 bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18991 /* frame = VFP regs + FPSCR + VPR. */
18992 unsigned lazy_store_stack_frame_size
18993 = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18994 unsigned long callee_saved_mask
18995 = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18996 & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18997 unsigned address_regnum, regno;
18998 unsigned max_int_regno
18999 = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
19000 unsigned max_fp_regno
19001 = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
19002 unsigned maxregno
19003 = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
19004 auto_sbitmap to_clear_bitmap (maxregno + 1);
19005 rtx_insn *seq;
19006 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
19007 rtx address;
19008 CUMULATIVE_ARGS args_so_far_v;
19009 cumulative_args_t args_so_far;
19010 tree arg_type, fntype;
19011 bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
19012 function_args_iterator args_iter;
19013 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
19014
19015 if (!NONDEBUG_INSN_P (insn))
19016 continue;
19017
19018 if (!CALL_P (insn))
19019 continue;
19020
19021 pat = PATTERN (insn);
19022 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
19023 call = XVECEXP (pat, 0, 0);
19024
19025 /* Get the real call RTX if the insn sets a value, ie. returns. */
19026 if (GET_CODE (call) == SET)
19027 call = SET_SRC (call);
19028
19029 /* Check if it is a cmse_nonsecure_call. */
19030 unspec = XEXP (call, 0);
19031 if (GET_CODE (unspec) != UNSPEC
19032 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
19033 continue;
19034
19035 /* Mark registers that needs to be cleared. Those that holds a
19036 parameter are removed from the set further below. */
19037 bitmap_clear (to_clear_bitmap);
19038 bitmap_set_range (to_clear_bitmap, R0_REGNUM,
19039 max_int_regno - R0_REGNUM + 1);
19040
19041 /* Only look at the caller-saved floating point registers in case of
19042 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
19043 lazy store and loads which clear both caller- and callee-saved
19044 registers. */
19045 if (!lazy_fpclear)
19046 {
19047 auto_sbitmap float_bitmap (maxregno + 1);
19048
19049 bitmap_clear (float_bitmap);
19050 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
19051 max_fp_regno - FIRST_VFP_REGNUM + 1);
19052 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
19053 }
19054
19055 /* Make sure the register used to hold the function address is not
19056 cleared. */
19057 address = RTVEC_ELT (XVEC (unspec, 0), 0);
19058 gcc_assert (MEM_P (address));
19059 gcc_assert (REG_P (XEXP (address, 0)));
19060 address_regnum = REGNO (XEXP (address, 0));
19061 if (address_regnum <= max_int_regno)
19062 bitmap_clear_bit (to_clear_bitmap, address_regnum);
19063
19064 /* Set basic block of call insn so that df rescan is performed on
19065 insns inserted here. */
19066 set_block_for_insn (insn, bb);
19067 df_set_flags (DF_DEFER_INSN_RESCAN);
19068 start_sequence ();
19069
19070 /* Make sure the scheduler doesn't schedule other insns beyond
19071 here. */
19072 emit_insn (gen_blockage ());
19073
19074 /* Walk through all arguments and clear registers appropriately.
19075 */
19076 fntype = TREE_TYPE (MEM_EXPR (address));
19077 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
19078 NULL_TREE);
19079 args_so_far = pack_cumulative_args (&args_so_far_v);
19080 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
19081 {
19082 rtx arg_rtx;
19083 uint64_t to_clear_args_mask;
19084
19085 if (VOID_TYPE_P (arg_type))
19086 continue;
19087
19088 function_arg_info arg (arg_type, /*named=*/true);
19089 if (!first_param)
19090 /* ??? We should advance after processing the argument and pass
19091 the argument we're advancing past. */
19092 arm_function_arg_advance (args_so_far, arg);
19093
19094 arg_rtx = arm_function_arg (args_so_far, arg);
19095 gcc_assert (REG_P (arg_rtx));
19096 to_clear_args_mask
19097 = compute_not_to_clear_mask (arg_type, arg_rtx,
19098 REGNO (arg_rtx),
19099 &padding_bits_to_clear[0]);
19100 if (to_clear_args_mask)
19101 {
19102 for (regno = R0_REGNUM; regno <= maxregno; regno++)
19103 {
19104 if (to_clear_args_mask & (1ULL << regno))
19105 bitmap_clear_bit (to_clear_bitmap, regno);
19106 }
19107 }
19108
19109 first_param = false;
19110 }
19111
19112 /* We use right shift and left shift to clear the LSB of the address
19113 we jump to instead of using bic, to avoid having to use an extra
19114 register on Thumb-1. */
19115 clearing_reg = XEXP (address, 0);
19116 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
19117 emit_insn (gen_rtx_SET (clearing_reg, shift));
19118 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
19119 emit_insn (gen_rtx_SET (clearing_reg, shift));
19120
19121 if (clear_callee_saved)
19122 {
19123 rtx push_insn =
19124 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
19125 /* Disable frame debug info in push because it needs to be
19126 disabled for pop (see below). */
19127 RTX_FRAME_RELATED_P (push_insn) = 0;
19128
19129 /* Lazy store multiple. */
19130 if (lazy_fpclear)
19131 {
19132 rtx imm;
19133 rtx_insn *add_insn;
19134
19135 imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
19136 add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19137 stack_pointer_rtx, imm));
19138 /* If we have the frame pointer, then it will be the
19139 CFA reg. Otherwise, the stack pointer is the CFA
19140 reg, so we need to emit a CFA adjust. */
19141 if (!frame_pointer_needed)
19142 arm_add_cfa_adjust_cfa_note (add_insn,
19143 - lazy_store_stack_frame_size,
19144 stack_pointer_rtx,
19145 stack_pointer_rtx);
19146 emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
19147 }
19148 /* Save VFP callee-saved registers. */
19149 else
19150 {
19151 vfp_emit_fstmd (D7_VFP_REGNUM + 1,
19152 (max_fp_regno - D7_VFP_REGNUM) / 2);
19153 /* Disable frame debug info in push because it needs to be
19154 disabled for vpop (see below). */
19155 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19156 }
19157 }
19158
19159 /* Clear caller-saved registers that leak before doing a non-secure
19160 call. */
19161 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
19162 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
19163 NUM_ARG_REGS, ip_reg, clearing_reg);
19164
19165 seq = get_insns ();
19166 end_sequence ();
19167 emit_insn_before (seq, insn);
19168
19169 if (TARGET_HAVE_FPCXT_CMSE)
19170 {
19171 rtx_insn *last, *pop_insn, *after = insn;
19172
19173 start_sequence ();
19174
19175 /* Lazy load multiple done as part of libcall in Armv8-M. */
19176 if (lazy_fpclear)
19177 {
19178 rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
19179 emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
19180 rtx_insn *add_insn =
19181 emit_insn (gen_addsi3 (stack_pointer_rtx,
19182 stack_pointer_rtx, imm));
19183 if (!frame_pointer_needed)
19184 arm_add_cfa_adjust_cfa_note (add_insn,
19185 lazy_store_stack_frame_size,
19186 stack_pointer_rtx,
19187 stack_pointer_rtx);
19188 }
19189 /* Restore VFP callee-saved registers. */
19190 else
19191 {
19192 int nb_callee_saved_vfp_regs =
19193 (max_fp_regno - D7_VFP_REGNUM) / 2;
19194 arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
19195 nb_callee_saved_vfp_regs,
19196 stack_pointer_rtx);
19197 /* Disable frame debug info in vpop because the SP adjustment
19198 is made using a CFA adjustment note while CFA used is
19199 sometimes R7. This then causes an assert failure in the
19200 CFI note creation code. */
19201 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19202 }
19203
19204 arm_emit_multi_reg_pop (callee_saved_mask);
19205 pop_insn = get_last_insn ();
19206
19207 /* Disable frame debug info in pop because they reset the state
19208 of popped registers to what it was at the beginning of the
19209 function, before the prologue. This leads to incorrect state
19210 when doing the pop after the nonsecure call for registers that
19211 are pushed both in prologue and before the nonsecure call.
19212
19213 It also occasionally triggers an assert failure in CFI note
19214 creation code when there are two codepaths to the epilogue,
19215 one of which does not go through the nonsecure call.
19216 Obviously this mean that debugging between the push and pop is
19217 not reliable. */
19218 RTX_FRAME_RELATED_P (pop_insn) = 0;
19219
19220 seq = get_insns ();
19221 last = get_last_insn ();
19222 end_sequence ();
19223
19224 emit_insn_after (seq, after);
19225
19226 /* Skip pop we have just inserted after nonsecure call, we know
19227 it does not contain a nonsecure call. */
19228 insn = last;
19229 }
19230 }
19231 }
19232 }
19233
19234 /* Rewrite move insn into subtract of 0 if the condition codes will
19235 be useful in next conditional jump insn. */
19236
19237 static void
19238 thumb1_reorg (void)
19239 {
19240 basic_block bb;
19241
19242 FOR_EACH_BB_FN (bb, cfun)
19243 {
19244 rtx dest, src;
19245 rtx cmp, op0, op1, set = NULL;
19246 rtx_insn *prev, *insn = BB_END (bb);
19247 bool insn_clobbered = false;
19248
19249 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
19250 insn = PREV_INSN (insn);
19251
19252 /* Find the last cbranchsi4_insn in basic block BB. */
19253 if (insn == BB_HEAD (bb)
19254 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19255 continue;
19256
19257 /* Get the register with which we are comparing. */
19258 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
19259 op0 = XEXP (cmp, 0);
19260 op1 = XEXP (cmp, 1);
19261
19262 /* Check that comparison is against ZERO. */
19263 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
19264 continue;
19265
19266 /* Find the first flag setting insn before INSN in basic block BB. */
19267 gcc_assert (insn != BB_HEAD (bb));
19268 for (prev = PREV_INSN (insn);
19269 (!insn_clobbered
19270 && prev != BB_HEAD (bb)
19271 && (NOTE_P (prev)
19272 || DEBUG_INSN_P (prev)
19273 || ((set = single_set (prev)) != NULL
19274 && get_attr_conds (prev) == CONDS_NOCOND)));
19275 prev = PREV_INSN (prev))
19276 {
19277 if (reg_set_p (op0, prev))
19278 insn_clobbered = true;
19279 }
19280
19281 /* Skip if op0 is clobbered by insn other than prev. */
19282 if (insn_clobbered)
19283 continue;
19284
19285 if (!set)
19286 continue;
19287
19288 dest = SET_DEST (set);
19289 src = SET_SRC (set);
19290 if (!low_register_operand (dest, SImode)
19291 || !low_register_operand (src, SImode))
19292 continue;
19293
19294 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19295 in INSN. Both src and dest of the move insn are checked. */
19296 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
19297 {
19298 dest = copy_rtx (dest);
19299 src = copy_rtx (src);
19300 src = gen_rtx_MINUS (SImode, src, const0_rtx);
19301 PATTERN (prev) = gen_rtx_SET (dest, src);
19302 INSN_CODE (prev) = -1;
19303 /* Set test register in INSN to dest. */
19304 XEXP (cmp, 0) = copy_rtx (dest);
19305 INSN_CODE (insn) = -1;
19306 }
19307 }
19308 }
19309
19310 /* Convert instructions to their cc-clobbering variant if possible, since
19311 that allows us to use smaller encodings. */
19312
19313 static void
19314 thumb2_reorg (void)
19315 {
19316 basic_block bb;
19317 regset_head live;
19318
19319 INIT_REG_SET (&live);
19320
19321 /* We are freeing block_for_insn in the toplev to keep compatibility
19322 with old MDEP_REORGS that are not CFG based. Recompute it now. */
19323 compute_bb_for_insn ();
19324 df_analyze ();
19325
19326 enum Convert_Action {SKIP, CONV, SWAP_CONV};
19327
19328 FOR_EACH_BB_FN (bb, cfun)
19329 {
19330 if ((current_tune->disparage_flag_setting_t16_encodings
19331 == tune_params::DISPARAGE_FLAGS_ALL)
19332 && optimize_bb_for_speed_p (bb))
19333 continue;
19334
19335 rtx_insn *insn;
19336 Convert_Action action = SKIP;
19337 Convert_Action action_for_partial_flag_setting
19338 = ((current_tune->disparage_flag_setting_t16_encodings
19339 != tune_params::DISPARAGE_FLAGS_NEITHER)
19340 && optimize_bb_for_speed_p (bb))
19341 ? SKIP : CONV;
19342
19343 COPY_REG_SET (&live, DF_LR_OUT (bb));
19344 df_simulate_initialize_backwards (bb, &live);
19345 FOR_BB_INSNS_REVERSE (bb, insn)
19346 {
19347 if (NONJUMP_INSN_P (insn)
19348 && !REGNO_REG_SET_P (&live, CC_REGNUM)
19349 && GET_CODE (PATTERN (insn)) == SET)
19350 {
19351 action = SKIP;
19352 rtx pat = PATTERN (insn);
19353 rtx dst = XEXP (pat, 0);
19354 rtx src = XEXP (pat, 1);
19355 rtx op0 = NULL_RTX, op1 = NULL_RTX;
19356
19357 if (UNARY_P (src) || BINARY_P (src))
19358 op0 = XEXP (src, 0);
19359
19360 if (BINARY_P (src))
19361 op1 = XEXP (src, 1);
19362
19363 if (low_register_operand (dst, SImode))
19364 {
19365 switch (GET_CODE (src))
19366 {
19367 case PLUS:
19368 /* Adding two registers and storing the result
19369 in the first source is already a 16-bit
19370 operation. */
19371 if (rtx_equal_p (dst, op0)
19372 && register_operand (op1, SImode))
19373 break;
19374
19375 if (low_register_operand (op0, SImode))
19376 {
19377 /* ADDS <Rd>,<Rn>,<Rm> */
19378 if (low_register_operand (op1, SImode))
19379 action = CONV;
19380 /* ADDS <Rdn>,#<imm8> */
19381 /* SUBS <Rdn>,#<imm8> */
19382 else if (rtx_equal_p (dst, op0)
19383 && CONST_INT_P (op1)
19384 && IN_RANGE (INTVAL (op1), -255, 255))
19385 action = CONV;
19386 /* ADDS <Rd>,<Rn>,#<imm3> */
19387 /* SUBS <Rd>,<Rn>,#<imm3> */
19388 else if (CONST_INT_P (op1)
19389 && IN_RANGE (INTVAL (op1), -7, 7))
19390 action = CONV;
19391 }
19392 /* ADCS <Rd>, <Rn> */
19393 else if (GET_CODE (XEXP (src, 0)) == PLUS
19394 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19395 && low_register_operand (XEXP (XEXP (src, 0), 1),
19396 SImode)
19397 && COMPARISON_P (op1)
19398 && cc_register (XEXP (op1, 0), VOIDmode)
19399 && maybe_get_arm_condition_code (op1) == ARM_CS
19400 && XEXP (op1, 1) == const0_rtx)
19401 action = CONV;
19402 break;
19403
19404 case MINUS:
19405 /* RSBS <Rd>,<Rn>,#0
19406 Not handled here: see NEG below. */
19407 /* SUBS <Rd>,<Rn>,#<imm3>
19408 SUBS <Rdn>,#<imm8>
19409 Not handled here: see PLUS above. */
19410 /* SUBS <Rd>,<Rn>,<Rm> */
19411 if (low_register_operand (op0, SImode)
19412 && low_register_operand (op1, SImode))
19413 action = CONV;
19414 break;
19415
19416 case MULT:
19417 /* MULS <Rdm>,<Rn>,<Rdm>
19418 As an exception to the rule, this is only used
19419 when optimizing for size since MULS is slow on all
19420 known implementations. We do not even want to use
19421 MULS in cold code, if optimizing for speed, so we
19422 test the global flag here. */
19423 if (!optimize_size)
19424 break;
19425 /* Fall through. */
19426 case AND:
19427 case IOR:
19428 case XOR:
19429 /* ANDS <Rdn>,<Rm> */
19430 if (rtx_equal_p (dst, op0)
19431 && low_register_operand (op1, SImode))
19432 action = action_for_partial_flag_setting;
19433 else if (rtx_equal_p (dst, op1)
19434 && low_register_operand (op0, SImode))
19435 action = action_for_partial_flag_setting == SKIP
19436 ? SKIP : SWAP_CONV;
19437 break;
19438
19439 case ASHIFTRT:
19440 case ASHIFT:
19441 case LSHIFTRT:
19442 /* ASRS <Rdn>,<Rm> */
19443 /* LSRS <Rdn>,<Rm> */
19444 /* LSLS <Rdn>,<Rm> */
19445 if (rtx_equal_p (dst, op0)
19446 && low_register_operand (op1, SImode))
19447 action = action_for_partial_flag_setting;
19448 /* ASRS <Rd>,<Rm>,#<imm5> */
19449 /* LSRS <Rd>,<Rm>,#<imm5> */
19450 /* LSLS <Rd>,<Rm>,#<imm5> */
19451 else if (low_register_operand (op0, SImode)
19452 && CONST_INT_P (op1)
19453 && IN_RANGE (INTVAL (op1), 0, 31))
19454 action = action_for_partial_flag_setting;
19455 break;
19456
19457 case ROTATERT:
19458 /* RORS <Rdn>,<Rm> */
19459 if (rtx_equal_p (dst, op0)
19460 && low_register_operand (op1, SImode))
19461 action = action_for_partial_flag_setting;
19462 break;
19463
19464 case NOT:
19465 /* MVNS <Rd>,<Rm> */
19466 if (low_register_operand (op0, SImode))
19467 action = action_for_partial_flag_setting;
19468 break;
19469
19470 case NEG:
19471 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
19472 if (low_register_operand (op0, SImode))
19473 action = CONV;
19474 break;
19475
19476 case CONST_INT:
19477 /* MOVS <Rd>,#<imm8> */
19478 if (CONST_INT_P (src)
19479 && IN_RANGE (INTVAL (src), 0, 255))
19480 action = action_for_partial_flag_setting;
19481 break;
19482
19483 case REG:
19484 /* MOVS and MOV<c> with registers have different
19485 encodings, so are not relevant here. */
19486 break;
19487
19488 default:
19489 break;
19490 }
19491 }
19492
19493 if (action != SKIP)
19494 {
19495 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19496 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19497 rtvec vec;
19498
19499 if (action == SWAP_CONV)
19500 {
19501 src = copy_rtx (src);
19502 XEXP (src, 0) = op1;
19503 XEXP (src, 1) = op0;
19504 pat = gen_rtx_SET (dst, src);
19505 vec = gen_rtvec (2, pat, clobber);
19506 }
19507 else /* action == CONV */
19508 vec = gen_rtvec (2, pat, clobber);
19509
19510 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19511 INSN_CODE (insn) = -1;
19512 }
19513 }
19514
19515 if (NONDEBUG_INSN_P (insn))
19516 df_simulate_one_insn_backwards (bb, insn, &live);
19517 }
19518 }
19519
19520 CLEAR_REG_SET (&live);
19521 }
19522
19523 /* Gcc puts the pool in the wrong place for ARM, since we can only
19524 load addresses a limited distance around the pc. We do some
19525 special munging to move the constant pool values to the correct
19526 point in the code. */
19527 static void
19528 arm_reorg (void)
19529 {
19530 rtx_insn *insn;
19531 HOST_WIDE_INT address = 0;
19532 Mfix * fix;
19533
19534 if (use_cmse)
19535 cmse_nonsecure_call_inline_register_clear ();
19536
19537 /* We cannot run the Thumb passes for thunks because there is no CFG. */
19538 if (cfun->is_thunk)
19539 ;
19540 else if (TARGET_THUMB1)
19541 thumb1_reorg ();
19542 else if (TARGET_THUMB2)
19543 thumb2_reorg ();
19544
19545 /* Ensure all insns that must be split have been split at this point.
19546 Otherwise, the pool placement code below may compute incorrect
19547 insn lengths. Note that when optimizing, all insns have already
19548 been split at this point. */
19549 if (!optimize)
19550 split_all_insns_noflow ();
19551
19552 /* Make sure we do not attempt to create a literal pool even though it should
19553 no longer be necessary to create any. */
19554 if (arm_disable_literal_pool)
19555 return ;
19556
19557 minipool_fix_head = minipool_fix_tail = NULL;
19558
19559 /* The first insn must always be a note, or the code below won't
19560 scan it properly. */
19561 insn = get_insns ();
19562 gcc_assert (NOTE_P (insn));
19563 minipool_pad = 0;
19564
19565 /* Scan all the insns and record the operands that will need fixing. */
19566 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19567 {
19568 if (BARRIER_P (insn))
19569 push_minipool_barrier (insn, address);
19570 else if (INSN_P (insn))
19571 {
19572 rtx_jump_table_data *table;
19573
19574 note_invalid_constants (insn, address, true);
19575 address += get_attr_length (insn);
19576
19577 /* If the insn is a vector jump, add the size of the table
19578 and skip the table. */
19579 if (tablejump_p (insn, NULL, &table))
19580 {
19581 address += get_jump_table_size (table);
19582 insn = table;
19583 }
19584 }
19585 else if (LABEL_P (insn))
19586 /* Add the worst-case padding due to alignment. We don't add
19587 the _current_ padding because the minipool insertions
19588 themselves might change it. */
19589 address += get_label_padding (insn);
19590 }
19591
19592 fix = minipool_fix_head;
19593
19594 /* Now scan the fixups and perform the required changes. */
19595 while (fix)
19596 {
19597 Mfix * ftmp;
19598 Mfix * fdel;
19599 Mfix * last_added_fix;
19600 Mfix * last_barrier = NULL;
19601 Mfix * this_fix;
19602
19603 /* Skip any further barriers before the next fix. */
19604 while (fix && BARRIER_P (fix->insn))
19605 fix = fix->next;
19606
19607 /* No more fixes. */
19608 if (fix == NULL)
19609 break;
19610
19611 last_added_fix = NULL;
19612
19613 for (ftmp = fix; ftmp; ftmp = ftmp->next)
19614 {
19615 if (BARRIER_P (ftmp->insn))
19616 {
19617 if (ftmp->address >= minipool_vector_head->max_address)
19618 break;
19619
19620 last_barrier = ftmp;
19621 }
19622 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19623 break;
19624
19625 last_added_fix = ftmp; /* Keep track of the last fix added. */
19626 }
19627
19628 /* If we found a barrier, drop back to that; any fixes that we
19629 could have reached but come after the barrier will now go in
19630 the next mini-pool. */
19631 if (last_barrier != NULL)
19632 {
19633 /* Reduce the refcount for those fixes that won't go into this
19634 pool after all. */
19635 for (fdel = last_barrier->next;
19636 fdel && fdel != ftmp;
19637 fdel = fdel->next)
19638 {
19639 fdel->minipool->refcount--;
19640 fdel->minipool = NULL;
19641 }
19642
19643 ftmp = last_barrier;
19644 }
19645 else
19646 {
19647 /* ftmp is first fix that we can't fit into this pool and
19648 there no natural barriers that we could use. Insert a
19649 new barrier in the code somewhere between the previous
19650 fix and this one, and arrange to jump around it. */
19651 HOST_WIDE_INT max_address;
19652
19653 /* The last item on the list of fixes must be a barrier, so
19654 we can never run off the end of the list of fixes without
19655 last_barrier being set. */
19656 gcc_assert (ftmp);
19657
19658 max_address = minipool_vector_head->max_address;
19659 /* Check that there isn't another fix that is in range that
19660 we couldn't fit into this pool because the pool was
19661 already too large: we need to put the pool before such an
19662 instruction. The pool itself may come just after the
19663 fix because create_fix_barrier also allows space for a
19664 jump instruction. */
19665 if (ftmp->address < max_address)
19666 max_address = ftmp->address + 1;
19667
19668 last_barrier = create_fix_barrier (last_added_fix, max_address);
19669 }
19670
19671 assign_minipool_offsets (last_barrier);
19672
19673 while (ftmp)
19674 {
19675 if (!BARRIER_P (ftmp->insn)
19676 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19677 == NULL))
19678 break;
19679
19680 ftmp = ftmp->next;
19681 }
19682
19683 /* Scan over the fixes we have identified for this pool, fixing them
19684 up and adding the constants to the pool itself. */
19685 for (this_fix = fix; this_fix && ftmp != this_fix;
19686 this_fix = this_fix->next)
19687 if (!BARRIER_P (this_fix->insn))
19688 {
19689 rtx addr
19690 = plus_constant (Pmode,
19691 gen_rtx_LABEL_REF (VOIDmode,
19692 minipool_vector_label),
19693 this_fix->minipool->offset);
19694 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19695 }
19696
19697 dump_minipool (last_barrier->insn);
19698 fix = ftmp;
19699 }
19700
19701 /* From now on we must synthesize any constants that we can't handle
19702 directly. This can happen if the RTL gets split during final
19703 instruction generation. */
19704 cfun->machine->after_arm_reorg = 1;
19705
19706 /* Free the minipool memory. */
19707 obstack_free (&minipool_obstack, minipool_startobj);
19708 }
19709 \f
19710 /* Routines to output assembly language. */
19711
19712 /* Return string representation of passed in real value. */
19713 static const char *
19714 fp_const_from_val (REAL_VALUE_TYPE *r)
19715 {
19716 if (!fp_consts_inited)
19717 init_fp_table ();
19718
19719 gcc_assert (real_equal (r, &value_fp0));
19720 return "0";
19721 }
19722
19723 /* OPERANDS[0] is the entire list of insns that constitute pop,
19724 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19725 is in the list, UPDATE is true iff the list contains explicit
19726 update of base register. */
19727 void
19728 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19729 bool update)
19730 {
19731 int i;
19732 char pattern[100];
19733 int offset;
19734 const char *conditional;
19735 int num_saves = XVECLEN (operands[0], 0);
19736 unsigned int regno;
19737 unsigned int regno_base = REGNO (operands[1]);
19738 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19739
19740 offset = 0;
19741 offset += update ? 1 : 0;
19742 offset += return_pc ? 1 : 0;
19743
19744 /* Is the base register in the list? */
19745 for (i = offset; i < num_saves; i++)
19746 {
19747 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19748 /* If SP is in the list, then the base register must be SP. */
19749 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19750 /* If base register is in the list, there must be no explicit update. */
19751 if (regno == regno_base)
19752 gcc_assert (!update);
19753 }
19754
19755 conditional = reverse ? "%?%D0" : "%?%d0";
19756 /* Can't use POP if returning from an interrupt. */
19757 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19758 sprintf (pattern, "pop%s\t{", conditional);
19759 else
19760 {
19761 /* Output ldmfd when the base register is SP, otherwise output ldmia.
19762 It's just a convention, their semantics are identical. */
19763 if (regno_base == SP_REGNUM)
19764 sprintf (pattern, "ldmfd%s\t", conditional);
19765 else if (update)
19766 sprintf (pattern, "ldmia%s\t", conditional);
19767 else
19768 sprintf (pattern, "ldm%s\t", conditional);
19769
19770 strcat (pattern, reg_names[regno_base]);
19771 if (update)
19772 strcat (pattern, "!, {");
19773 else
19774 strcat (pattern, ", {");
19775 }
19776
19777 /* Output the first destination register. */
19778 strcat (pattern,
19779 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19780
19781 /* Output the rest of the destination registers. */
19782 for (i = offset + 1; i < num_saves; i++)
19783 {
19784 strcat (pattern, ", ");
19785 strcat (pattern,
19786 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19787 }
19788
19789 strcat (pattern, "}");
19790
19791 if (interrupt_p && return_pc)
19792 strcat (pattern, "^");
19793
19794 output_asm_insn (pattern, &cond);
19795 }
19796
19797
19798 /* Output the assembly for a store multiple. */
19799
19800 const char *
19801 vfp_output_vstmd (rtx * operands)
19802 {
19803 char pattern[100];
19804 int p;
19805 int base;
19806 int i;
19807 rtx addr_reg = REG_P (XEXP (operands[0], 0))
19808 ? XEXP (operands[0], 0)
19809 : XEXP (XEXP (operands[0], 0), 0);
19810 bool push_p = REGNO (addr_reg) == SP_REGNUM;
19811
19812 if (push_p)
19813 strcpy (pattern, "vpush%?.64\t{%P1");
19814 else
19815 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19816
19817 p = strlen (pattern);
19818
19819 gcc_assert (REG_P (operands[1]));
19820
19821 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19822 for (i = 1; i < XVECLEN (operands[2], 0); i++)
19823 {
19824 p += sprintf (&pattern[p], ", d%d", base + i);
19825 }
19826 strcpy (&pattern[p], "}");
19827
19828 output_asm_insn (pattern, operands);
19829 return "";
19830 }
19831
19832
19833 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
19834 number of bytes pushed. */
19835
19836 static int
19837 vfp_emit_fstmd (int base_reg, int count)
19838 {
19839 rtx par;
19840 rtx dwarf;
19841 rtx tmp, reg;
19842 int i;
19843
19844 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
19845 register pairs are stored by a store multiple insn. We avoid this
19846 by pushing an extra pair. */
19847 if (count == 2 && !arm_arch6)
19848 {
19849 if (base_reg == LAST_VFP_REGNUM - 3)
19850 base_reg -= 2;
19851 count++;
19852 }
19853
19854 /* FSTMD may not store more than 16 doubleword registers at once. Split
19855 larger stores into multiple parts (up to a maximum of two, in
19856 practice). */
19857 if (count > 16)
19858 {
19859 int saved;
19860 /* NOTE: base_reg is an internal register number, so each D register
19861 counts as 2. */
19862 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19863 saved += vfp_emit_fstmd (base_reg, 16);
19864 return saved;
19865 }
19866
19867 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19868 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19869
19870 reg = gen_rtx_REG (DFmode, base_reg);
19871 base_reg += 2;
19872
19873 XVECEXP (par, 0, 0)
19874 = gen_rtx_SET (gen_frame_mem
19875 (BLKmode,
19876 gen_rtx_PRE_MODIFY (Pmode,
19877 stack_pointer_rtx,
19878 plus_constant
19879 (Pmode, stack_pointer_rtx,
19880 - (count * 8)))
19881 ),
19882 gen_rtx_UNSPEC (BLKmode,
19883 gen_rtvec (1, reg),
19884 UNSPEC_PUSH_MULT));
19885
19886 tmp = gen_rtx_SET (stack_pointer_rtx,
19887 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19888 RTX_FRAME_RELATED_P (tmp) = 1;
19889 XVECEXP (dwarf, 0, 0) = tmp;
19890
19891 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19892 RTX_FRAME_RELATED_P (tmp) = 1;
19893 XVECEXP (dwarf, 0, 1) = tmp;
19894
19895 for (i = 1; i < count; i++)
19896 {
19897 reg = gen_rtx_REG (DFmode, base_reg);
19898 base_reg += 2;
19899 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19900
19901 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19902 plus_constant (Pmode,
19903 stack_pointer_rtx,
19904 i * 8)),
19905 reg);
19906 RTX_FRAME_RELATED_P (tmp) = 1;
19907 XVECEXP (dwarf, 0, i + 1) = tmp;
19908 }
19909
19910 par = emit_insn (par);
19911 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19912 RTX_FRAME_RELATED_P (par) = 1;
19913
19914 return count * 8;
19915 }
19916
19917 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19918 has the cmse_nonsecure_call attribute and returns false otherwise. */
19919
19920 bool
19921 detect_cmse_nonsecure_call (tree addr)
19922 {
19923 if (!addr)
19924 return FALSE;
19925
19926 tree fntype = TREE_TYPE (addr);
19927 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19928 TYPE_ATTRIBUTES (fntype)))
19929 return TRUE;
19930 return FALSE;
19931 }
19932
19933
19934 /* Emit a call instruction with pattern PAT. ADDR is the address of
19935 the call target. */
19936
19937 void
19938 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19939 {
19940 rtx insn;
19941
19942 insn = emit_call_insn (pat);
19943
19944 /* The PIC register is live on entry to VxWorks PIC PLT entries.
19945 If the call might use such an entry, add a use of the PIC register
19946 to the instruction's CALL_INSN_FUNCTION_USAGE. */
19947 if (TARGET_VXWORKS_RTP
19948 && flag_pic
19949 && !sibcall
19950 && SYMBOL_REF_P (addr)
19951 && (SYMBOL_REF_DECL (addr)
19952 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19953 : !SYMBOL_REF_LOCAL_P (addr)))
19954 {
19955 require_pic_register (NULL_RTX, false /*compute_now*/);
19956 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19957 }
19958
19959 if (TARGET_FDPIC)
19960 {
19961 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19962 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19963 }
19964
19965 if (TARGET_AAPCS_BASED)
19966 {
19967 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19968 linker. We need to add an IP clobber to allow setting
19969 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
19970 is not needed since it's a fixed register. */
19971 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19972 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19973 }
19974 }
19975
19976 /* Output a 'call' insn. */
19977 const char *
19978 output_call (rtx *operands)
19979 {
19980 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
19981
19982 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
19983 if (REGNO (operands[0]) == LR_REGNUM)
19984 {
19985 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19986 output_asm_insn ("mov%?\t%0, %|lr", operands);
19987 }
19988
19989 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19990
19991 if (TARGET_INTERWORK || arm_arch4t)
19992 output_asm_insn ("bx%?\t%0", operands);
19993 else
19994 output_asm_insn ("mov%?\t%|pc, %0", operands);
19995
19996 return "";
19997 }
19998
19999 /* Output a move from arm registers to arm registers of a long double
20000 OPERANDS[0] is the destination.
20001 OPERANDS[1] is the source. */
20002 const char *
20003 output_mov_long_double_arm_from_arm (rtx *operands)
20004 {
20005 /* We have to be careful here because the two might overlap. */
20006 int dest_start = REGNO (operands[0]);
20007 int src_start = REGNO (operands[1]);
20008 rtx ops[2];
20009 int i;
20010
20011 if (dest_start < src_start)
20012 {
20013 for (i = 0; i < 3; i++)
20014 {
20015 ops[0] = gen_rtx_REG (SImode, dest_start + i);
20016 ops[1] = gen_rtx_REG (SImode, src_start + i);
20017 output_asm_insn ("mov%?\t%0, %1", ops);
20018 }
20019 }
20020 else
20021 {
20022 for (i = 2; i >= 0; i--)
20023 {
20024 ops[0] = gen_rtx_REG (SImode, dest_start + i);
20025 ops[1] = gen_rtx_REG (SImode, src_start + i);
20026 output_asm_insn ("mov%?\t%0, %1", ops);
20027 }
20028 }
20029
20030 return "";
20031 }
20032
20033 void
20034 arm_emit_movpair (rtx dest, rtx src)
20035 {
20036 /* If the src is an immediate, simplify it. */
20037 if (CONST_INT_P (src))
20038 {
20039 HOST_WIDE_INT val = INTVAL (src);
20040 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
20041 if ((val >> 16) & 0x0000ffff)
20042 {
20043 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
20044 GEN_INT (16)),
20045 GEN_INT ((val >> 16) & 0x0000ffff));
20046 rtx_insn *insn = get_last_insn ();
20047 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20048 }
20049 return;
20050 }
20051 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
20052 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
20053 rtx_insn *insn = get_last_insn ();
20054 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20055 }
20056
20057 /* Output a move between double words. It must be REG<-MEM
20058 or MEM<-REG. */
20059 const char *
20060 output_move_double (rtx *operands, bool emit, int *count)
20061 {
20062 enum rtx_code code0 = GET_CODE (operands[0]);
20063 enum rtx_code code1 = GET_CODE (operands[1]);
20064 rtx otherops[3];
20065 if (count)
20066 *count = 1;
20067
20068 /* The only case when this might happen is when
20069 you are looking at the length of a DImode instruction
20070 that has an invalid constant in it. */
20071 if (code0 == REG && code1 != MEM)
20072 {
20073 gcc_assert (!emit);
20074 *count = 2;
20075 return "";
20076 }
20077
20078 if (code0 == REG)
20079 {
20080 unsigned int reg0 = REGNO (operands[0]);
20081 const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
20082
20083 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
20084
20085 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
20086
20087 switch (GET_CODE (XEXP (operands[1], 0)))
20088 {
20089 case REG:
20090
20091 if (emit)
20092 {
20093 if (can_ldrd
20094 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
20095 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
20096 else
20097 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20098 }
20099 break;
20100
20101 case PRE_INC:
20102 gcc_assert (can_ldrd);
20103 if (emit)
20104 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
20105 break;
20106
20107 case PRE_DEC:
20108 if (emit)
20109 {
20110 if (can_ldrd)
20111 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
20112 else
20113 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
20114 }
20115 break;
20116
20117 case POST_INC:
20118 if (emit)
20119 {
20120 if (can_ldrd)
20121 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
20122 else
20123 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
20124 }
20125 break;
20126
20127 case POST_DEC:
20128 gcc_assert (can_ldrd);
20129 if (emit)
20130 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
20131 break;
20132
20133 case PRE_MODIFY:
20134 case POST_MODIFY:
20135 /* Autoicrement addressing modes should never have overlapping
20136 base and destination registers, and overlapping index registers
20137 are already prohibited, so this doesn't need to worry about
20138 fix_cm3_ldrd. */
20139 otherops[0] = operands[0];
20140 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
20141 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
20142
20143 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
20144 {
20145 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
20146 {
20147 /* Registers overlap so split out the increment. */
20148 if (emit)
20149 {
20150 gcc_assert (can_ldrd);
20151 output_asm_insn ("add%?\t%1, %1, %2", otherops);
20152 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
20153 }
20154 if (count)
20155 *count = 2;
20156 }
20157 else
20158 {
20159 /* Use a single insn if we can.
20160 FIXME: IWMMXT allows offsets larger than ldrd can
20161 handle, fix these up with a pair of ldr. */
20162 if (can_ldrd
20163 && (TARGET_THUMB2
20164 || !CONST_INT_P (otherops[2])
20165 || (INTVAL (otherops[2]) > -256
20166 && INTVAL (otherops[2]) < 256)))
20167 {
20168 if (emit)
20169 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
20170 }
20171 else
20172 {
20173 if (emit)
20174 {
20175 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
20176 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20177 }
20178 if (count)
20179 *count = 2;
20180
20181 }
20182 }
20183 }
20184 else
20185 {
20186 /* Use a single insn if we can.
20187 FIXME: IWMMXT allows offsets larger than ldrd can handle,
20188 fix these up with a pair of ldr. */
20189 if (can_ldrd
20190 && (TARGET_THUMB2
20191 || !CONST_INT_P (otherops[2])
20192 || (INTVAL (otherops[2]) > -256
20193 && INTVAL (otherops[2]) < 256)))
20194 {
20195 if (emit)
20196 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
20197 }
20198 else
20199 {
20200 if (emit)
20201 {
20202 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20203 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
20204 }
20205 if (count)
20206 *count = 2;
20207 }
20208 }
20209 break;
20210
20211 case LABEL_REF:
20212 case CONST:
20213 /* We might be able to use ldrd %0, %1 here. However the range is
20214 different to ldr/adr, and it is broken on some ARMv7-M
20215 implementations. */
20216 /* Use the second register of the pair to avoid problematic
20217 overlap. */
20218 otherops[1] = operands[1];
20219 if (emit)
20220 output_asm_insn ("adr%?\t%0, %1", otherops);
20221 operands[1] = otherops[0];
20222 if (emit)
20223 {
20224 if (can_ldrd)
20225 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20226 else
20227 output_asm_insn ("ldmia%?\t%1, %M0", operands);
20228 }
20229
20230 if (count)
20231 *count = 2;
20232 break;
20233
20234 /* ??? This needs checking for thumb2. */
20235 default:
20236 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
20237 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
20238 {
20239 otherops[0] = operands[0];
20240 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
20241 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
20242
20243 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
20244 {
20245 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20246 {
20247 switch ((int) INTVAL (otherops[2]))
20248 {
20249 case -8:
20250 if (emit)
20251 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
20252 return "";
20253 case -4:
20254 if (TARGET_THUMB2)
20255 break;
20256 if (emit)
20257 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
20258 return "";
20259 case 4:
20260 if (TARGET_THUMB2)
20261 break;
20262 if (emit)
20263 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
20264 return "";
20265 }
20266 }
20267 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
20268 operands[1] = otherops[0];
20269 if (can_ldrd
20270 && (REG_P (otherops[2])
20271 || TARGET_THUMB2
20272 || (CONST_INT_P (otherops[2])
20273 && INTVAL (otherops[2]) > -256
20274 && INTVAL (otherops[2]) < 256)))
20275 {
20276 if (reg_overlap_mentioned_p (operands[0],
20277 otherops[2]))
20278 {
20279 /* Swap base and index registers over to
20280 avoid a conflict. */
20281 std::swap (otherops[1], otherops[2]);
20282 }
20283 /* If both registers conflict, it will usually
20284 have been fixed by a splitter. */
20285 if (reg_overlap_mentioned_p (operands[0], otherops[2])
20286 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
20287 {
20288 if (emit)
20289 {
20290 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20291 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20292 }
20293 if (count)
20294 *count = 2;
20295 }
20296 else
20297 {
20298 otherops[0] = operands[0];
20299 if (emit)
20300 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
20301 }
20302 return "";
20303 }
20304
20305 if (CONST_INT_P (otherops[2]))
20306 {
20307 if (emit)
20308 {
20309 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
20310 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
20311 else
20312 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20313 }
20314 }
20315 else
20316 {
20317 if (emit)
20318 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20319 }
20320 }
20321 else
20322 {
20323 if (emit)
20324 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
20325 }
20326
20327 if (count)
20328 *count = 2;
20329
20330 if (can_ldrd)
20331 return "ldrd%?\t%0, [%1]";
20332
20333 return "ldmia%?\t%1, %M0";
20334 }
20335 else
20336 {
20337 otherops[1] = adjust_address (operands[1], SImode, 4);
20338 /* Take care of overlapping base/data reg. */
20339 if (reg_mentioned_p (operands[0], operands[1]))
20340 {
20341 if (emit)
20342 {
20343 output_asm_insn ("ldr%?\t%0, %1", otherops);
20344 output_asm_insn ("ldr%?\t%0, %1", operands);
20345 }
20346 if (count)
20347 *count = 2;
20348
20349 }
20350 else
20351 {
20352 if (emit)
20353 {
20354 output_asm_insn ("ldr%?\t%0, %1", operands);
20355 output_asm_insn ("ldr%?\t%0, %1", otherops);
20356 }
20357 if (count)
20358 *count = 2;
20359 }
20360 }
20361 }
20362 }
20363 else
20364 {
20365 /* Constraints should ensure this. */
20366 gcc_assert (code0 == MEM && code1 == REG);
20367 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20368 || (TARGET_ARM && TARGET_LDRD));
20369
20370 /* For TARGET_ARM the first source register of an STRD
20371 must be even. This is usually the case for double-word
20372 values but user assembly constraints can force an odd
20373 starting register. */
20374 bool allow_strd = TARGET_LDRD
20375 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20376 switch (GET_CODE (XEXP (operands[0], 0)))
20377 {
20378 case REG:
20379 if (emit)
20380 {
20381 if (allow_strd)
20382 output_asm_insn ("strd%?\t%1, [%m0]", operands);
20383 else
20384 output_asm_insn ("stm%?\t%m0, %M1", operands);
20385 }
20386 break;
20387
20388 case PRE_INC:
20389 gcc_assert (allow_strd);
20390 if (emit)
20391 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20392 break;
20393
20394 case PRE_DEC:
20395 if (emit)
20396 {
20397 if (allow_strd)
20398 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20399 else
20400 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20401 }
20402 break;
20403
20404 case POST_INC:
20405 if (emit)
20406 {
20407 if (allow_strd)
20408 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20409 else
20410 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20411 }
20412 break;
20413
20414 case POST_DEC:
20415 gcc_assert (allow_strd);
20416 if (emit)
20417 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20418 break;
20419
20420 case PRE_MODIFY:
20421 case POST_MODIFY:
20422 otherops[0] = operands[1];
20423 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20424 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20425
20426 /* IWMMXT allows offsets larger than strd can handle,
20427 fix these up with a pair of str. */
20428 if (!TARGET_THUMB2
20429 && CONST_INT_P (otherops[2])
20430 && (INTVAL(otherops[2]) <= -256
20431 || INTVAL(otherops[2]) >= 256))
20432 {
20433 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20434 {
20435 if (emit)
20436 {
20437 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20438 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20439 }
20440 if (count)
20441 *count = 2;
20442 }
20443 else
20444 {
20445 if (emit)
20446 {
20447 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20448 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20449 }
20450 if (count)
20451 *count = 2;
20452 }
20453 }
20454 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20455 {
20456 if (emit)
20457 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20458 }
20459 else
20460 {
20461 if (emit)
20462 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20463 }
20464 break;
20465
20466 case PLUS:
20467 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20468 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20469 {
20470 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20471 {
20472 case -8:
20473 if (emit)
20474 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20475 return "";
20476
20477 case -4:
20478 if (TARGET_THUMB2)
20479 break;
20480 if (emit)
20481 output_asm_insn ("stmda%?\t%m0, %M1", operands);
20482 return "";
20483
20484 case 4:
20485 if (TARGET_THUMB2)
20486 break;
20487 if (emit)
20488 output_asm_insn ("stmib%?\t%m0, %M1", operands);
20489 return "";
20490 }
20491 }
20492 if (allow_strd
20493 && (REG_P (otherops[2])
20494 || TARGET_THUMB2
20495 || (CONST_INT_P (otherops[2])
20496 && INTVAL (otherops[2]) > -256
20497 && INTVAL (otherops[2]) < 256)))
20498 {
20499 otherops[0] = operands[1];
20500 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20501 if (emit)
20502 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20503 return "";
20504 }
20505 /* Fall through */
20506
20507 default:
20508 otherops[0] = adjust_address (operands[0], SImode, 4);
20509 otherops[1] = operands[1];
20510 if (emit)
20511 {
20512 output_asm_insn ("str%?\t%1, %0", operands);
20513 output_asm_insn ("str%?\t%H1, %0", otherops);
20514 }
20515 if (count)
20516 *count = 2;
20517 }
20518 }
20519
20520 return "";
20521 }
20522
20523 /* Output a move, load or store for quad-word vectors in ARM registers. Only
20524 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
20525
20526 const char *
20527 output_move_quad (rtx *operands)
20528 {
20529 if (REG_P (operands[0]))
20530 {
20531 /* Load, or reg->reg move. */
20532
20533 if (MEM_P (operands[1]))
20534 {
20535 switch (GET_CODE (XEXP (operands[1], 0)))
20536 {
20537 case REG:
20538 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20539 break;
20540
20541 case LABEL_REF:
20542 case CONST:
20543 output_asm_insn ("adr%?\t%0, %1", operands);
20544 output_asm_insn ("ldmia%?\t%0, %M0", operands);
20545 break;
20546
20547 default:
20548 gcc_unreachable ();
20549 }
20550 }
20551 else
20552 {
20553 rtx ops[2];
20554 int dest, src, i;
20555
20556 gcc_assert (REG_P (operands[1]));
20557
20558 dest = REGNO (operands[0]);
20559 src = REGNO (operands[1]);
20560
20561 /* This seems pretty dumb, but hopefully GCC won't try to do it
20562 very often. */
20563 if (dest < src)
20564 for (i = 0; i < 4; i++)
20565 {
20566 ops[0] = gen_rtx_REG (SImode, dest + i);
20567 ops[1] = gen_rtx_REG (SImode, src + i);
20568 output_asm_insn ("mov%?\t%0, %1", ops);
20569 }
20570 else
20571 for (i = 3; i >= 0; i--)
20572 {
20573 ops[0] = gen_rtx_REG (SImode, dest + i);
20574 ops[1] = gen_rtx_REG (SImode, src + i);
20575 output_asm_insn ("mov%?\t%0, %1", ops);
20576 }
20577 }
20578 }
20579 else
20580 {
20581 gcc_assert (MEM_P (operands[0]));
20582 gcc_assert (REG_P (operands[1]));
20583 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20584
20585 switch (GET_CODE (XEXP (operands[0], 0)))
20586 {
20587 case REG:
20588 output_asm_insn ("stm%?\t%m0, %M1", operands);
20589 break;
20590
20591 default:
20592 gcc_unreachable ();
20593 }
20594 }
20595
20596 return "";
20597 }
20598
20599 /* Output a VFP load or store instruction. */
20600
20601 const char *
20602 output_move_vfp (rtx *operands)
20603 {
20604 rtx reg, mem, addr, ops[2];
20605 int load = REG_P (operands[0]);
20606 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20607 int sp = (!TARGET_VFP_FP16INST
20608 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20609 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20610 const char *templ;
20611 char buff[50];
20612 machine_mode mode;
20613
20614 reg = operands[!load];
20615 mem = operands[load];
20616
20617 mode = GET_MODE (reg);
20618
20619 gcc_assert (REG_P (reg));
20620 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20621 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20622 || mode == SFmode
20623 || mode == DFmode
20624 || mode == HImode
20625 || mode == SImode
20626 || mode == DImode
20627 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20628 gcc_assert (MEM_P (mem));
20629
20630 addr = XEXP (mem, 0);
20631
20632 switch (GET_CODE (addr))
20633 {
20634 case PRE_DEC:
20635 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20636 ops[0] = XEXP (addr, 0);
20637 ops[1] = reg;
20638 break;
20639
20640 case POST_INC:
20641 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20642 ops[0] = XEXP (addr, 0);
20643 ops[1] = reg;
20644 break;
20645
20646 default:
20647 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20648 ops[0] = reg;
20649 ops[1] = mem;
20650 break;
20651 }
20652
20653 sprintf (buff, templ,
20654 load ? "ld" : "st",
20655 dp ? "64" : sp ? "32" : "16",
20656 dp ? "P" : "",
20657 integer_p ? "\t%@ int" : "");
20658 output_asm_insn (buff, ops);
20659
20660 return "";
20661 }
20662
20663 /* Output a Neon double-word or quad-word load or store, or a load
20664 or store for larger structure modes.
20665
20666 WARNING: The ordering of elements is weird in big-endian mode,
20667 because the EABI requires that vectors stored in memory appear
20668 as though they were stored by a VSTM, as required by the EABI.
20669 GCC RTL defines element ordering based on in-memory order.
20670 This can be different from the architectural ordering of elements
20671 within a NEON register. The intrinsics defined in arm_neon.h use the
20672 NEON register element ordering, not the GCC RTL element ordering.
20673
20674 For example, the in-memory ordering of a big-endian a quadword
20675 vector with 16-bit elements when stored from register pair {d0,d1}
20676 will be (lowest address first, d0[N] is NEON register element N):
20677
20678 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20679
20680 When necessary, quadword registers (dN, dN+1) are moved to ARM
20681 registers from rN in the order:
20682
20683 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20684
20685 So that STM/LDM can be used on vectors in ARM registers, and the
20686 same memory layout will result as if VSTM/VLDM were used.
20687
20688 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20689 possible, which allows use of appropriate alignment tags.
20690 Note that the choice of "64" is independent of the actual vector
20691 element size; this size simply ensures that the behavior is
20692 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20693
20694 Due to limitations of those instructions, use of VST1.64/VLD1.64
20695 is not possible if:
20696 - the address contains PRE_DEC, or
20697 - the mode refers to more than 4 double-word registers
20698
20699 In those cases, it would be possible to replace VSTM/VLDM by a
20700 sequence of instructions; this is not currently implemented since
20701 this is not certain to actually improve performance. */
20702
20703 const char *
20704 output_move_neon (rtx *operands)
20705 {
20706 rtx reg, mem, addr, ops[2];
20707 int regno, nregs, load = REG_P (operands[0]);
20708 const char *templ;
20709 char buff[50];
20710 machine_mode mode;
20711
20712 reg = operands[!load];
20713 mem = operands[load];
20714
20715 mode = GET_MODE (reg);
20716
20717 gcc_assert (REG_P (reg));
20718 regno = REGNO (reg);
20719 nregs = REG_NREGS (reg) / 2;
20720 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20721 || NEON_REGNO_OK_FOR_QUAD (regno));
20722 gcc_assert (VALID_NEON_DREG_MODE (mode)
20723 || VALID_NEON_QREG_MODE (mode)
20724 || VALID_NEON_STRUCT_MODE (mode));
20725 gcc_assert (MEM_P (mem));
20726
20727 addr = XEXP (mem, 0);
20728
20729 /* Strip off const from addresses like (const (plus (...))). */
20730 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20731 addr = XEXP (addr, 0);
20732
20733 switch (GET_CODE (addr))
20734 {
20735 case POST_INC:
20736 /* We have to use vldm / vstm for too-large modes. */
20737 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20738 {
20739 templ = "v%smia%%?\t%%0!, %%h1";
20740 ops[0] = XEXP (addr, 0);
20741 }
20742 else
20743 {
20744 templ = "v%s1.64\t%%h1, %%A0";
20745 ops[0] = mem;
20746 }
20747 ops[1] = reg;
20748 break;
20749
20750 case PRE_DEC:
20751 /* We have to use vldm / vstm in this case, since there is no
20752 pre-decrement form of the vld1 / vst1 instructions. */
20753 templ = "v%smdb%%?\t%%0!, %%h1";
20754 ops[0] = XEXP (addr, 0);
20755 ops[1] = reg;
20756 break;
20757
20758 case POST_MODIFY:
20759 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
20760 gcc_unreachable ();
20761
20762 case REG:
20763 /* We have to use vldm / vstm for too-large modes. */
20764 if (nregs > 1)
20765 {
20766 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20767 templ = "v%smia%%?\t%%m0, %%h1";
20768 else
20769 templ = "v%s1.64\t%%h1, %%A0";
20770
20771 ops[0] = mem;
20772 ops[1] = reg;
20773 break;
20774 }
20775 /* Fall through. */
20776 case PLUS:
20777 if (GET_CODE (addr) == PLUS)
20778 addr = XEXP (addr, 0);
20779 /* Fall through. */
20780 case LABEL_REF:
20781 {
20782 int i;
20783 int overlap = -1;
20784 for (i = 0; i < nregs; i++)
20785 {
20786 /* We're only using DImode here because it's a convenient
20787 size. */
20788 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20789 ops[1] = adjust_address (mem, DImode, 8 * i);
20790 if (reg_overlap_mentioned_p (ops[0], mem))
20791 {
20792 gcc_assert (overlap == -1);
20793 overlap = i;
20794 }
20795 else
20796 {
20797 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20798 sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20799 else
20800 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20801 output_asm_insn (buff, ops);
20802 }
20803 }
20804 if (overlap != -1)
20805 {
20806 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20807 ops[1] = adjust_address (mem, SImode, 8 * overlap);
20808 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20809 sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20810 else
20811 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20812 output_asm_insn (buff, ops);
20813 }
20814
20815 return "";
20816 }
20817
20818 default:
20819 gcc_unreachable ();
20820 }
20821
20822 sprintf (buff, templ, load ? "ld" : "st");
20823 output_asm_insn (buff, ops);
20824
20825 return "";
20826 }
20827
20828 /* Compute and return the length of neon_mov<mode>, where <mode> is
20829 one of VSTRUCT modes: EI, OI, CI or XI. */
20830 int
20831 arm_attr_length_move_neon (rtx_insn *insn)
20832 {
20833 rtx reg, mem, addr;
20834 int load;
20835 machine_mode mode;
20836
20837 extract_insn_cached (insn);
20838
20839 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20840 {
20841 mode = GET_MODE (recog_data.operand[0]);
20842 switch (mode)
20843 {
20844 case E_EImode:
20845 case E_OImode:
20846 return 8;
20847 case E_CImode:
20848 return 12;
20849 case E_XImode:
20850 return 16;
20851 default:
20852 gcc_unreachable ();
20853 }
20854 }
20855
20856 load = REG_P (recog_data.operand[0]);
20857 reg = recog_data.operand[!load];
20858 mem = recog_data.operand[load];
20859
20860 gcc_assert (MEM_P (mem));
20861
20862 addr = XEXP (mem, 0);
20863
20864 /* Strip off const from addresses like (const (plus (...))). */
20865 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20866 addr = XEXP (addr, 0);
20867
20868 if (LABEL_REF_P (addr) || GET_CODE (addr) == PLUS)
20869 {
20870 int insns = REG_NREGS (reg) / 2;
20871 return insns * 4;
20872 }
20873 else
20874 return 4;
20875 }
20876
20877 /* Return nonzero if the offset in the address is an immediate. Otherwise,
20878 return zero. */
20879
20880 int
20881 arm_address_offset_is_imm (rtx_insn *insn)
20882 {
20883 rtx mem, addr;
20884
20885 extract_insn_cached (insn);
20886
20887 if (REG_P (recog_data.operand[0]))
20888 return 0;
20889
20890 mem = recog_data.operand[0];
20891
20892 gcc_assert (MEM_P (mem));
20893
20894 addr = XEXP (mem, 0);
20895
20896 if (REG_P (addr)
20897 || (GET_CODE (addr) == PLUS
20898 && REG_P (XEXP (addr, 0))
20899 && CONST_INT_P (XEXP (addr, 1))))
20900 return 1;
20901 else
20902 return 0;
20903 }
20904
20905 /* Output an ADD r, s, #n where n may be too big for one instruction.
20906 If adding zero to one register, output nothing. */
20907 const char *
20908 output_add_immediate (rtx *operands)
20909 {
20910 HOST_WIDE_INT n = INTVAL (operands[2]);
20911
20912 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20913 {
20914 if (n < 0)
20915 output_multi_immediate (operands,
20916 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20917 -n);
20918 else
20919 output_multi_immediate (operands,
20920 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20921 n);
20922 }
20923
20924 return "";
20925 }
20926
20927 /* Output a multiple immediate operation.
20928 OPERANDS is the vector of operands referred to in the output patterns.
20929 INSTR1 is the output pattern to use for the first constant.
20930 INSTR2 is the output pattern to use for subsequent constants.
20931 IMMED_OP is the index of the constant slot in OPERANDS.
20932 N is the constant value. */
20933 static const char *
20934 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20935 int immed_op, HOST_WIDE_INT n)
20936 {
20937 #if HOST_BITS_PER_WIDE_INT > 32
20938 n &= 0xffffffff;
20939 #endif
20940
20941 if (n == 0)
20942 {
20943 /* Quick and easy output. */
20944 operands[immed_op] = const0_rtx;
20945 output_asm_insn (instr1, operands);
20946 }
20947 else
20948 {
20949 int i;
20950 const char * instr = instr1;
20951
20952 /* Note that n is never zero here (which would give no output). */
20953 for (i = 0; i < 32; i += 2)
20954 {
20955 if (n & (3 << i))
20956 {
20957 operands[immed_op] = GEN_INT (n & (255 << i));
20958 output_asm_insn (instr, operands);
20959 instr = instr2;
20960 i += 6;
20961 }
20962 }
20963 }
20964
20965 return "";
20966 }
20967
20968 /* Return the name of a shifter operation. */
20969 static const char *
20970 arm_shift_nmem(enum rtx_code code)
20971 {
20972 switch (code)
20973 {
20974 case ASHIFT:
20975 return ARM_LSL_NAME;
20976
20977 case ASHIFTRT:
20978 return "asr";
20979
20980 case LSHIFTRT:
20981 return "lsr";
20982
20983 case ROTATERT:
20984 return "ror";
20985
20986 default:
20987 abort();
20988 }
20989 }
20990
20991 /* Return the appropriate ARM instruction for the operation code.
20992 The returned result should not be overwritten. OP is the rtx of the
20993 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20994 was shifted. */
20995 const char *
20996 arithmetic_instr (rtx op, int shift_first_arg)
20997 {
20998 switch (GET_CODE (op))
20999 {
21000 case PLUS:
21001 return "add";
21002
21003 case MINUS:
21004 return shift_first_arg ? "rsb" : "sub";
21005
21006 case IOR:
21007 return "orr";
21008
21009 case XOR:
21010 return "eor";
21011
21012 case AND:
21013 return "and";
21014
21015 case ASHIFT:
21016 case ASHIFTRT:
21017 case LSHIFTRT:
21018 case ROTATERT:
21019 return arm_shift_nmem(GET_CODE(op));
21020
21021 default:
21022 gcc_unreachable ();
21023 }
21024 }
21025
21026 /* Ensure valid constant shifts and return the appropriate shift mnemonic
21027 for the operation code. The returned result should not be overwritten.
21028 OP is the rtx code of the shift.
21029 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
21030 shift. */
21031 static const char *
21032 shift_op (rtx op, HOST_WIDE_INT *amountp)
21033 {
21034 const char * mnem;
21035 enum rtx_code code = GET_CODE (op);
21036
21037 switch (code)
21038 {
21039 case ROTATE:
21040 if (!CONST_INT_P (XEXP (op, 1)))
21041 {
21042 output_operand_lossage ("invalid shift operand");
21043 return NULL;
21044 }
21045
21046 code = ROTATERT;
21047 *amountp = 32 - INTVAL (XEXP (op, 1));
21048 mnem = "ror";
21049 break;
21050
21051 case ASHIFT:
21052 case ASHIFTRT:
21053 case LSHIFTRT:
21054 case ROTATERT:
21055 mnem = arm_shift_nmem(code);
21056 if (CONST_INT_P (XEXP (op, 1)))
21057 {
21058 *amountp = INTVAL (XEXP (op, 1));
21059 }
21060 else if (REG_P (XEXP (op, 1)))
21061 {
21062 *amountp = -1;
21063 return mnem;
21064 }
21065 else
21066 {
21067 output_operand_lossage ("invalid shift operand");
21068 return NULL;
21069 }
21070 break;
21071
21072 case MULT:
21073 /* We never have to worry about the amount being other than a
21074 power of 2, since this case can never be reloaded from a reg. */
21075 if (!CONST_INT_P (XEXP (op, 1)))
21076 {
21077 output_operand_lossage ("invalid shift operand");
21078 return NULL;
21079 }
21080
21081 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
21082
21083 /* Amount must be a power of two. */
21084 if (*amountp & (*amountp - 1))
21085 {
21086 output_operand_lossage ("invalid shift operand");
21087 return NULL;
21088 }
21089
21090 *amountp = exact_log2 (*amountp);
21091 gcc_assert (IN_RANGE (*amountp, 0, 31));
21092 return ARM_LSL_NAME;
21093
21094 default:
21095 output_operand_lossage ("invalid shift operand");
21096 return NULL;
21097 }
21098
21099 /* This is not 100% correct, but follows from the desire to merge
21100 multiplication by a power of 2 with the recognizer for a
21101 shift. >=32 is not a valid shift for "lsl", so we must try and
21102 output a shift that produces the correct arithmetical result.
21103 Using lsr #32 is identical except for the fact that the carry bit
21104 is not set correctly if we set the flags; but we never use the
21105 carry bit from such an operation, so we can ignore that. */
21106 if (code == ROTATERT)
21107 /* Rotate is just modulo 32. */
21108 *amountp &= 31;
21109 else if (*amountp != (*amountp & 31))
21110 {
21111 if (code == ASHIFT)
21112 mnem = "lsr";
21113 *amountp = 32;
21114 }
21115
21116 /* Shifts of 0 are no-ops. */
21117 if (*amountp == 0)
21118 return NULL;
21119
21120 return mnem;
21121 }
21122
21123 /* Output a .ascii pseudo-op, keeping track of lengths. This is
21124 because /bin/as is horribly restrictive. The judgement about
21125 whether or not each character is 'printable' (and can be output as
21126 is) or not (and must be printed with an octal escape) must be made
21127 with reference to the *host* character set -- the situation is
21128 similar to that discussed in the comments above pp_c_char in
21129 c-pretty-print.cc. */
21130
21131 #define MAX_ASCII_LEN 51
21132
21133 void
21134 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
21135 {
21136 int i;
21137 int len_so_far = 0;
21138
21139 fputs ("\t.ascii\t\"", stream);
21140
21141 for (i = 0; i < len; i++)
21142 {
21143 int c = p[i];
21144
21145 if (len_so_far >= MAX_ASCII_LEN)
21146 {
21147 fputs ("\"\n\t.ascii\t\"", stream);
21148 len_so_far = 0;
21149 }
21150
21151 if (ISPRINT (c))
21152 {
21153 if (c == '\\' || c == '\"')
21154 {
21155 putc ('\\', stream);
21156 len_so_far++;
21157 }
21158 putc (c, stream);
21159 len_so_far++;
21160 }
21161 else
21162 {
21163 fprintf (stream, "\\%03o", c);
21164 len_so_far += 4;
21165 }
21166 }
21167
21168 fputs ("\"\n", stream);
21169 }
21170 \f
21171
21172 /* Compute the register save mask for registers 0 through 12
21173 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
21174
21175 static unsigned long
21176 arm_compute_save_reg0_reg12_mask (void)
21177 {
21178 unsigned long func_type = arm_current_func_type ();
21179 unsigned long save_reg_mask = 0;
21180 unsigned int reg;
21181
21182 if (IS_INTERRUPT (func_type))
21183 {
21184 unsigned int max_reg;
21185 /* Interrupt functions must not corrupt any registers,
21186 even call clobbered ones. If this is a leaf function
21187 we can just examine the registers used by the RTL, but
21188 otherwise we have to assume that whatever function is
21189 called might clobber anything, and so we have to save
21190 all the call-clobbered registers as well. */
21191 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
21192 /* FIQ handlers have registers r8 - r12 banked, so
21193 we only need to check r0 - r7, Normal ISRs only
21194 bank r14 and r15, so we must check up to r12.
21195 r13 is the stack pointer which is always preserved,
21196 so we do not need to consider it here. */
21197 max_reg = 7;
21198 else
21199 max_reg = 12;
21200
21201 for (reg = 0; reg <= max_reg; reg++)
21202 if (reg_needs_saving_p (reg))
21203 save_reg_mask |= (1 << reg);
21204
21205 /* Also save the pic base register if necessary. */
21206 if (PIC_REGISTER_MAY_NEED_SAVING
21207 && crtl->uses_pic_offset_table)
21208 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21209 }
21210 else if (IS_VOLATILE(func_type))
21211 {
21212 /* For noreturn functions we historically omitted register saves
21213 altogether. However this really messes up debugging. As a
21214 compromise save just the frame pointers. Combined with the link
21215 register saved elsewhere this should be sufficient to get
21216 a backtrace. */
21217 if (frame_pointer_needed)
21218 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21219 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
21220 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21221 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
21222 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
21223 }
21224 else
21225 {
21226 /* In the normal case we only need to save those registers
21227 which are call saved and which are used by this function. */
21228 for (reg = 0; reg <= 11; reg++)
21229 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21230 save_reg_mask |= (1 << reg);
21231
21232 /* Handle the frame pointer as a special case. */
21233 if (frame_pointer_needed)
21234 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21235
21236 /* If we aren't loading the PIC register,
21237 don't stack it even though it may be live. */
21238 if (PIC_REGISTER_MAY_NEED_SAVING
21239 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
21240 || crtl->uses_pic_offset_table))
21241 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21242
21243 /* The prologue will copy SP into R0, so save it. */
21244 if (IS_STACKALIGN (func_type))
21245 save_reg_mask |= 1;
21246 }
21247
21248 /* Save registers so the exception handler can modify them. */
21249 if (crtl->calls_eh_return)
21250 {
21251 unsigned int i;
21252
21253 for (i = 0; ; i++)
21254 {
21255 reg = EH_RETURN_DATA_REGNO (i);
21256 if (reg == INVALID_REGNUM)
21257 break;
21258 save_reg_mask |= 1 << reg;
21259 }
21260 }
21261
21262 return save_reg_mask;
21263 }
21264
21265 /* Return true if r3 is live at the start of the function. */
21266
21267 static bool
21268 arm_r3_live_at_start_p (void)
21269 {
21270 /* Just look at cfg info, which is still close enough to correct at this
21271 point. This gives false positives for broken functions that might use
21272 uninitialized data that happens to be allocated in r3, but who cares? */
21273 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
21274 }
21275
21276 /* Compute the number of bytes used to store the static chain register on the
21277 stack, above the stack frame. We need to know this accurately to get the
21278 alignment of the rest of the stack frame correct. */
21279
21280 static int
21281 arm_compute_static_chain_stack_bytes (void)
21282 {
21283 /* Once the value is updated from the init value of -1, do not
21284 re-compute. */
21285 if (cfun->machine->static_chain_stack_bytes != -1)
21286 return cfun->machine->static_chain_stack_bytes;
21287
21288 /* See the defining assertion in arm_expand_prologue. */
21289 if (IS_NESTED (arm_current_func_type ())
21290 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21291 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21292 || flag_stack_clash_protection)
21293 && !df_regs_ever_live_p (LR_REGNUM)))
21294 && arm_r3_live_at_start_p ()
21295 && crtl->args.pretend_args_size == 0)
21296 return 4;
21297
21298 return 0;
21299 }
21300
21301 /* Compute a bit mask of which core registers need to be
21302 saved on the stack for the current function.
21303 This is used by arm_compute_frame_layout, which may add extra registers. */
21304
21305 static unsigned long
21306 arm_compute_save_core_reg_mask (void)
21307 {
21308 unsigned int save_reg_mask = 0;
21309 unsigned long func_type = arm_current_func_type ();
21310 unsigned int reg;
21311
21312 if (IS_NAKED (func_type))
21313 /* This should never really happen. */
21314 return 0;
21315
21316 /* If we are creating a stack frame, then we must save the frame pointer,
21317 IP (which will hold the old stack pointer), LR and the PC. */
21318 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21319 save_reg_mask |=
21320 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
21321 | (1 << IP_REGNUM)
21322 | (1 << LR_REGNUM)
21323 | (1 << PC_REGNUM);
21324
21325 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
21326
21327 if (arm_current_function_pac_enabled_p ())
21328 save_reg_mask |= 1 << IP_REGNUM;
21329
21330 /* Decide if we need to save the link register.
21331 Interrupt routines have their own banked link register,
21332 so they never need to save it.
21333 Otherwise if we do not use the link register we do not need to save
21334 it. If we are pushing other registers onto the stack however, we
21335 can save an instruction in the epilogue by pushing the link register
21336 now and then popping it back into the PC. This incurs extra memory
21337 accesses though, so we only do it when optimizing for size, and only
21338 if we know that we will not need a fancy return sequence. */
21339 if (df_regs_ever_live_p (LR_REGNUM)
21340 || (save_reg_mask
21341 && optimize_size
21342 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
21343 && !crtl->tail_call_emit
21344 && !crtl->calls_eh_return))
21345 save_reg_mask |= 1 << LR_REGNUM;
21346
21347 if (cfun->machine->lr_save_eliminated)
21348 save_reg_mask &= ~ (1 << LR_REGNUM);
21349
21350 if (TARGET_REALLY_IWMMXT
21351 && ((bit_count (save_reg_mask)
21352 + ARM_NUM_INTS (crtl->args.pretend_args_size +
21353 arm_compute_static_chain_stack_bytes())
21354 ) % 2) != 0)
21355 {
21356 /* The total number of registers that are going to be pushed
21357 onto the stack is odd. We need to ensure that the stack
21358 is 64-bit aligned before we start to save iWMMXt registers,
21359 and also before we start to create locals. (A local variable
21360 might be a double or long long which we will load/store using
21361 an iWMMXt instruction). Therefore we need to push another
21362 ARM register, so that the stack will be 64-bit aligned. We
21363 try to avoid using the arg registers (r0 -r3) as they might be
21364 used to pass values in a tail call. */
21365 for (reg = 4; reg <= 12; reg++)
21366 if ((save_reg_mask & (1 << reg)) == 0)
21367 break;
21368
21369 if (reg <= 12)
21370 save_reg_mask |= (1 << reg);
21371 else
21372 {
21373 cfun->machine->sibcall_blocked = 1;
21374 save_reg_mask |= (1 << 3);
21375 }
21376 }
21377
21378 /* We may need to push an additional register for use initializing the
21379 PIC base register. */
21380 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21381 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21382 {
21383 reg = thumb_find_work_register (1 << 4);
21384 if (!call_used_or_fixed_reg_p (reg))
21385 save_reg_mask |= (1 << reg);
21386 }
21387
21388 return save_reg_mask;
21389 }
21390
21391 /* Compute a bit mask of which core registers need to be
21392 saved on the stack for the current function. */
21393 static unsigned long
21394 thumb1_compute_save_core_reg_mask (void)
21395 {
21396 unsigned long mask;
21397 unsigned reg;
21398
21399 mask = 0;
21400 for (reg = 0; reg < 12; reg ++)
21401 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21402 mask |= 1 << reg;
21403
21404 /* Handle the frame pointer as a special case. */
21405 if (frame_pointer_needed)
21406 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21407
21408 if (flag_pic
21409 && !TARGET_SINGLE_PIC_BASE
21410 && arm_pic_register != INVALID_REGNUM
21411 && crtl->uses_pic_offset_table)
21412 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21413
21414 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
21415 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21416 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21417
21418 /* LR will also be pushed if any lo regs are pushed. */
21419 if (mask & 0xff || thumb_force_lr_save ())
21420 mask |= (1 << LR_REGNUM);
21421
21422 bool call_clobbered_scratch
21423 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21424 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21425
21426 /* Make sure we have a low work register if we need one. We will
21427 need one if we are going to push a high register, but we are not
21428 currently intending to push a low register. However if both the
21429 prologue and epilogue have a spare call-clobbered low register,
21430 then we won't need to find an additional work register. It does
21431 not need to be the same register in the prologue and
21432 epilogue. */
21433 if ((mask & 0xff) == 0
21434 && !call_clobbered_scratch
21435 && ((mask & 0x0f00) || TARGET_BACKTRACE))
21436 {
21437 /* Use thumb_find_work_register to choose which register
21438 we will use. If the register is live then we will
21439 have to push it. Use LAST_LO_REGNUM as our fallback
21440 choice for the register to select. */
21441 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21442 /* Make sure the register returned by thumb_find_work_register is
21443 not part of the return value. */
21444 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21445 reg = LAST_LO_REGNUM;
21446
21447 if (callee_saved_reg_p (reg))
21448 mask |= 1 << reg;
21449 }
21450
21451 /* The 504 below is 8 bytes less than 512 because there are two possible
21452 alignment words. We can't tell here if they will be present or not so we
21453 have to play it safe and assume that they are. */
21454 if ((CALLER_INTERWORKING_SLOT_SIZE +
21455 ROUND_UP_WORD (get_frame_size ()) +
21456 crtl->outgoing_args_size) >= 504)
21457 {
21458 /* This is the same as the code in thumb1_expand_prologue() which
21459 determines which register to use for stack decrement. */
21460 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21461 if (mask & (1 << reg))
21462 break;
21463
21464 if (reg > LAST_LO_REGNUM)
21465 {
21466 /* Make sure we have a register available for stack decrement. */
21467 mask |= 1 << LAST_LO_REGNUM;
21468 }
21469 }
21470
21471 return mask;
21472 }
21473
21474 /* Return the number of bytes required to save VFP registers. */
21475 static int
21476 arm_get_vfp_saved_size (void)
21477 {
21478 unsigned int regno;
21479 int count;
21480 int saved;
21481
21482 saved = 0;
21483 /* Space for saved VFP registers. */
21484 if (TARGET_VFP_BASE)
21485 {
21486 count = 0;
21487 for (regno = FIRST_VFP_REGNUM;
21488 regno < LAST_VFP_REGNUM;
21489 regno += 2)
21490 {
21491 if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21492 {
21493 if (count > 0)
21494 {
21495 /* Workaround ARM10 VFPr1 bug. */
21496 if (count == 2 && !arm_arch6)
21497 count++;
21498 saved += count * 8;
21499 }
21500 count = 0;
21501 }
21502 else
21503 count++;
21504 }
21505 if (count > 0)
21506 {
21507 if (count == 2 && !arm_arch6)
21508 count++;
21509 saved += count * 8;
21510 }
21511 }
21512 return saved;
21513 }
21514
21515
21516 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
21517 everything bar the final return instruction. If simple_return is true,
21518 then do not output epilogue, because it has already been emitted in RTL.
21519
21520 Note: do not forget to update length attribute of corresponding insn pattern
21521 when changing assembly output (eg. length attribute of
21522 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21523 register clearing sequences). */
21524 const char *
21525 output_return_instruction (rtx operand, bool really_return, bool reverse,
21526 bool simple_return)
21527 {
21528 char conditional[10];
21529 char instr[100];
21530 unsigned reg;
21531 unsigned long live_regs_mask;
21532 unsigned long func_type;
21533 arm_stack_offsets *offsets;
21534
21535 func_type = arm_current_func_type ();
21536
21537 if (IS_NAKED (func_type))
21538 return "";
21539
21540 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21541 {
21542 /* If this function was declared non-returning, and we have
21543 found a tail call, then we have to trust that the called
21544 function won't return. */
21545 if (really_return)
21546 {
21547 rtx ops[2];
21548
21549 /* Otherwise, trap an attempted return by aborting. */
21550 ops[0] = operand;
21551 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21552 : "abort");
21553 assemble_external_libcall (ops[1]);
21554 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21555 }
21556
21557 return "";
21558 }
21559
21560 gcc_assert (!cfun->calls_alloca || really_return);
21561
21562 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21563
21564 cfun->machine->return_used_this_function = 1;
21565
21566 offsets = arm_get_frame_offsets ();
21567 live_regs_mask = offsets->saved_regs_mask;
21568
21569 if (!simple_return && live_regs_mask)
21570 {
21571 const char * return_reg;
21572
21573 /* If we do not have any special requirements for function exit
21574 (e.g. interworking) then we can load the return address
21575 directly into the PC. Otherwise we must load it into LR. */
21576 if (really_return
21577 && !IS_CMSE_ENTRY (func_type)
21578 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21579 return_reg = reg_names[PC_REGNUM];
21580 else
21581 return_reg = reg_names[LR_REGNUM];
21582
21583 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21584 {
21585 /* There are three possible reasons for the IP register
21586 being saved. 1) a stack frame was created, in which case
21587 IP contains the old stack pointer, or 2) an ISR routine
21588 corrupted it, or 3) it was saved to align the stack on
21589 iWMMXt. In case 1, restore IP into SP, otherwise just
21590 restore IP. */
21591 if (frame_pointer_needed)
21592 {
21593 live_regs_mask &= ~ (1 << IP_REGNUM);
21594 live_regs_mask |= (1 << SP_REGNUM);
21595 }
21596 else
21597 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21598 }
21599
21600 /* On some ARM architectures it is faster to use LDR rather than
21601 LDM to load a single register. On other architectures, the
21602 cost is the same. In 26 bit mode, or for exception handlers,
21603 we have to use LDM to load the PC so that the CPSR is also
21604 restored. */
21605 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21606 if (live_regs_mask == (1U << reg))
21607 break;
21608
21609 if (reg <= LAST_ARM_REGNUM
21610 && (reg != LR_REGNUM
21611 || ! really_return
21612 || ! IS_INTERRUPT (func_type)))
21613 {
21614 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21615 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21616 }
21617 else
21618 {
21619 char *p;
21620 int first = 1;
21621
21622 /* Generate the load multiple instruction to restore the
21623 registers. Note we can get here, even if
21624 frame_pointer_needed is true, but only if sp already
21625 points to the base of the saved core registers. */
21626 if (live_regs_mask & (1 << SP_REGNUM))
21627 {
21628 unsigned HOST_WIDE_INT stack_adjust;
21629
21630 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21631 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21632
21633 if (stack_adjust && arm_arch5t && TARGET_ARM)
21634 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21635 else
21636 {
21637 /* If we can't use ldmib (SA110 bug),
21638 then try to pop r3 instead. */
21639 if (stack_adjust)
21640 live_regs_mask |= 1 << 3;
21641
21642 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21643 }
21644 }
21645 /* For interrupt returns we have to use an LDM rather than
21646 a POP so that we can use the exception return variant. */
21647 else if (IS_INTERRUPT (func_type))
21648 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21649 else
21650 sprintf (instr, "pop%s\t{", conditional);
21651
21652 p = instr + strlen (instr);
21653
21654 for (reg = 0; reg <= SP_REGNUM; reg++)
21655 if (live_regs_mask & (1 << reg))
21656 {
21657 int l = strlen (reg_names[reg]);
21658
21659 if (first)
21660 first = 0;
21661 else
21662 {
21663 memcpy (p, ", ", 2);
21664 p += 2;
21665 }
21666
21667 memcpy (p, "%|", 2);
21668 memcpy (p + 2, reg_names[reg], l);
21669 p += l + 2;
21670 }
21671
21672 if (live_regs_mask & (1 << LR_REGNUM))
21673 {
21674 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21675 /* If returning from an interrupt, restore the CPSR. */
21676 if (IS_INTERRUPT (func_type))
21677 strcat (p, "^");
21678 }
21679 else
21680 strcpy (p, "}");
21681 }
21682
21683 output_asm_insn (instr, & operand);
21684
21685 /* See if we need to generate an extra instruction to
21686 perform the actual function return. */
21687 if (really_return
21688 && func_type != ARM_FT_INTERWORKED
21689 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21690 {
21691 /* The return has already been handled
21692 by loading the LR into the PC. */
21693 return "";
21694 }
21695 }
21696
21697 if (really_return)
21698 {
21699 switch ((int) ARM_FUNC_TYPE (func_type))
21700 {
21701 case ARM_FT_ISR:
21702 case ARM_FT_FIQ:
21703 /* ??? This is wrong for unified assembly syntax. */
21704 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21705 break;
21706
21707 case ARM_FT_INTERWORKED:
21708 gcc_assert (arm_arch5t || arm_arch4t);
21709 sprintf (instr, "bx%s\t%%|lr", conditional);
21710 break;
21711
21712 case ARM_FT_EXCEPTION:
21713 /* ??? This is wrong for unified assembly syntax. */
21714 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21715 break;
21716
21717 default:
21718 if (IS_CMSE_ENTRY (func_type))
21719 {
21720 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21721 emitted by cmse_nonsecure_entry_clear_before_return () and the
21722 VSTR/VLDR instructions in the prologue and epilogue. */
21723 if (!TARGET_HAVE_FPCXT_CMSE)
21724 {
21725 /* Check if we have to clear the 'GE bits' which is only used if
21726 parallel add and subtraction instructions are available. */
21727 if (TARGET_INT_SIMD)
21728 snprintf (instr, sizeof (instr),
21729 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21730 else
21731 snprintf (instr, sizeof (instr),
21732 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21733
21734 output_asm_insn (instr, & operand);
21735 /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21736 care of it. */
21737 if (TARGET_HARD_FLOAT)
21738 {
21739 /* Clear the cumulative exception-status bits (0-4,7) and
21740 the condition code bits (28-31) of the FPSCR. We need
21741 to remember to clear the first scratch register used
21742 (IP) and save and restore the second (r4).
21743
21744 Important note: the length of the
21745 thumb2_cmse_entry_return insn pattern must account for
21746 the size of the below instructions. */
21747 output_asm_insn ("push\t{%|r4}", & operand);
21748 output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21749 output_asm_insn ("movw\t%|r4, #65376", & operand);
21750 output_asm_insn ("movt\t%|r4, #4095", & operand);
21751 output_asm_insn ("and\t%|ip, %|r4", & operand);
21752 output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21753 output_asm_insn ("pop\t{%|r4}", & operand);
21754 output_asm_insn ("mov\t%|ip, %|lr", & operand);
21755 }
21756 }
21757 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21758 }
21759 /* Use bx if it's available. */
21760 else if (arm_arch5t || arm_arch4t)
21761 sprintf (instr, "bx%s\t%%|lr", conditional);
21762 else
21763 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21764 break;
21765 }
21766
21767 output_asm_insn (instr, & operand);
21768 }
21769
21770 return "";
21771 }
21772
21773 /* Output in FILE asm statements needed to declare the NAME of the function
21774 defined by its DECL node. */
21775
21776 void
21777 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21778 {
21779 size_t cmse_name_len;
21780 char *cmse_name = 0;
21781 char cmse_prefix[] = "__acle_se_";
21782
21783 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21784 extra function label for each function with the 'cmse_nonsecure_entry'
21785 attribute. This extra function label should be prepended with
21786 '__acle_se_', telling the linker that it needs to create secure gateway
21787 veneers for this function. */
21788 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21789 DECL_ATTRIBUTES (decl)))
21790 {
21791 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21792 cmse_name = XALLOCAVEC (char, cmse_name_len);
21793 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21794 targetm.asm_out.globalize_label (file, cmse_name);
21795
21796 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21797 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21798 }
21799
21800 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21801 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21802 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21803 ASM_OUTPUT_LABEL (file, name);
21804
21805 if (cmse_name)
21806 ASM_OUTPUT_LABEL (file, cmse_name);
21807
21808 ARM_OUTPUT_FN_UNWIND (file, TRUE);
21809 }
21810
21811 /* Write the function name into the code section, directly preceding
21812 the function prologue.
21813
21814 Code will be output similar to this:
21815 t0
21816 .ascii "arm_poke_function_name", 0
21817 .align
21818 t1
21819 .word 0xff000000 + (t1 - t0)
21820 arm_poke_function_name
21821 mov ip, sp
21822 stmfd sp!, {fp, ip, lr, pc}
21823 sub fp, ip, #4
21824
21825 When performing a stack backtrace, code can inspect the value
21826 of 'pc' stored at 'fp' + 0. If the trace function then looks
21827 at location pc - 12 and the top 8 bits are set, then we know
21828 that there is a function name embedded immediately preceding this
21829 location and has length ((pc[-3]) & 0xff000000).
21830
21831 We assume that pc is declared as a pointer to an unsigned long.
21832
21833 It is of no benefit to output the function name if we are assembling
21834 a leaf function. These function types will not contain a stack
21835 backtrace structure, therefore it is not possible to determine the
21836 function name. */
21837 void
21838 arm_poke_function_name (FILE *stream, const char *name)
21839 {
21840 unsigned long alignlength;
21841 unsigned long length;
21842 rtx x;
21843
21844 length = strlen (name) + 1;
21845 alignlength = ROUND_UP_WORD (length);
21846
21847 ASM_OUTPUT_ASCII (stream, name, length);
21848 ASM_OUTPUT_ALIGN (stream, 2);
21849 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21850 assemble_aligned_integer (UNITS_PER_WORD, x);
21851 }
21852
21853 /* Place some comments into the assembler stream
21854 describing the current function. */
21855 static void
21856 arm_output_function_prologue (FILE *f)
21857 {
21858 unsigned long func_type;
21859
21860 /* Sanity check. */
21861 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21862
21863 func_type = arm_current_func_type ();
21864
21865 switch ((int) ARM_FUNC_TYPE (func_type))
21866 {
21867 default:
21868 case ARM_FT_NORMAL:
21869 break;
21870 case ARM_FT_INTERWORKED:
21871 asm_fprintf (f, "\t%@ Function supports interworking.\n");
21872 break;
21873 case ARM_FT_ISR:
21874 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21875 break;
21876 case ARM_FT_FIQ:
21877 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21878 break;
21879 case ARM_FT_EXCEPTION:
21880 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21881 break;
21882 }
21883
21884 if (IS_NAKED (func_type))
21885 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21886
21887 if (IS_VOLATILE (func_type))
21888 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21889
21890 if (IS_NESTED (func_type))
21891 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21892 if (IS_STACKALIGN (func_type))
21893 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21894 if (IS_CMSE_ENTRY (func_type))
21895 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21896
21897 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21898 (HOST_WIDE_INT) crtl->args.size,
21899 crtl->args.pretend_args_size,
21900 (HOST_WIDE_INT) get_frame_size ());
21901
21902 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21903 frame_pointer_needed,
21904 cfun->machine->uses_anonymous_args);
21905
21906 if (cfun->machine->lr_save_eliminated)
21907 asm_fprintf (f, "\t%@ link register save eliminated.\n");
21908
21909 if (crtl->calls_eh_return)
21910 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21911
21912 }
21913
21914 static void
21915 arm_output_function_epilogue (FILE *)
21916 {
21917 arm_stack_offsets *offsets;
21918
21919 if (TARGET_THUMB1)
21920 {
21921 int regno;
21922
21923 /* Emit any call-via-reg trampolines that are needed for v4t support
21924 of call_reg and call_value_reg type insns. */
21925 for (regno = 0; regno < LR_REGNUM; regno++)
21926 {
21927 rtx label = cfun->machine->call_via[regno];
21928
21929 if (label != NULL)
21930 {
21931 switch_to_section (function_section (current_function_decl));
21932 targetm.asm_out.internal_label (asm_out_file, "L",
21933 CODE_LABEL_NUMBER (label));
21934 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21935 }
21936 }
21937
21938 /* ??? Probably not safe to set this here, since it assumes that a
21939 function will be emitted as assembly immediately after we generate
21940 RTL for it. This does not happen for inline functions. */
21941 cfun->machine->return_used_this_function = 0;
21942 }
21943 else /* TARGET_32BIT */
21944 {
21945 /* We need to take into account any stack-frame rounding. */
21946 offsets = arm_get_frame_offsets ();
21947
21948 gcc_assert (!use_return_insn (FALSE, NULL)
21949 || (cfun->machine->return_used_this_function != 0)
21950 || offsets->saved_regs == offsets->outgoing_args
21951 || frame_pointer_needed);
21952 }
21953 }
21954
21955 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21956 STR and STRD. If an even number of registers are being pushed, one
21957 or more STRD patterns are created for each register pair. If an
21958 odd number of registers are pushed, emit an initial STR followed by
21959 as many STRD instructions as are needed. This works best when the
21960 stack is initially 64-bit aligned (the normal case), since it
21961 ensures that each STRD is also 64-bit aligned. */
21962 static void
21963 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21964 {
21965 int num_regs = 0;
21966 int i;
21967 int regno;
21968 rtx par = NULL_RTX;
21969 rtx dwarf = NULL_RTX;
21970 rtx tmp;
21971 bool first = true;
21972
21973 num_regs = bit_count (saved_regs_mask);
21974
21975 /* Must be at least one register to save, and can't save SP or PC. */
21976 gcc_assert (num_regs > 0 && num_regs <= 14);
21977 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21978 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21979
21980 /* Create sequence for DWARF info. All the frame-related data for
21981 debugging is held in this wrapper. */
21982 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21983
21984 /* Describe the stack adjustment. */
21985 tmp = gen_rtx_SET (stack_pointer_rtx,
21986 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21987 RTX_FRAME_RELATED_P (tmp) = 1;
21988 XVECEXP (dwarf, 0, 0) = tmp;
21989
21990 /* Find the first register. */
21991 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21992 ;
21993
21994 i = 0;
21995
21996 /* If there's an odd number of registers to push. Start off by
21997 pushing a single register. This ensures that subsequent strd
21998 operations are dword aligned (assuming that SP was originally
21999 64-bit aligned). */
22000 if ((num_regs & 1) != 0)
22001 {
22002 rtx reg, mem, insn;
22003
22004 reg = gen_rtx_REG (SImode, regno);
22005 if (num_regs == 1)
22006 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
22007 stack_pointer_rtx));
22008 else
22009 mem = gen_frame_mem (Pmode,
22010 gen_rtx_PRE_MODIFY
22011 (Pmode, stack_pointer_rtx,
22012 plus_constant (Pmode, stack_pointer_rtx,
22013 -4 * num_regs)));
22014
22015 tmp = gen_rtx_SET (mem, reg);
22016 RTX_FRAME_RELATED_P (tmp) = 1;
22017 insn = emit_insn (tmp);
22018 RTX_FRAME_RELATED_P (insn) = 1;
22019 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22020 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
22021 RTX_FRAME_RELATED_P (tmp) = 1;
22022 i++;
22023 regno++;
22024 XVECEXP (dwarf, 0, i) = tmp;
22025 first = false;
22026 }
22027
22028 while (i < num_regs)
22029 if (saved_regs_mask & (1 << regno))
22030 {
22031 rtx reg1, reg2, mem1, mem2;
22032 rtx tmp0, tmp1, tmp2;
22033 int regno2;
22034
22035 /* Find the register to pair with this one. */
22036 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
22037 regno2++)
22038 ;
22039
22040 reg1 = gen_rtx_REG (SImode, regno);
22041 reg2 = gen_rtx_REG (SImode, regno2);
22042
22043 if (first)
22044 {
22045 rtx insn;
22046
22047 first = false;
22048 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22049 stack_pointer_rtx,
22050 -4 * num_regs));
22051 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22052 stack_pointer_rtx,
22053 -4 * (num_regs - 1)));
22054 tmp0 = gen_rtx_SET (stack_pointer_rtx,
22055 plus_constant (Pmode, stack_pointer_rtx,
22056 -4 * (num_regs)));
22057 tmp1 = gen_rtx_SET (mem1, reg1);
22058 tmp2 = gen_rtx_SET (mem2, reg2);
22059 RTX_FRAME_RELATED_P (tmp0) = 1;
22060 RTX_FRAME_RELATED_P (tmp1) = 1;
22061 RTX_FRAME_RELATED_P (tmp2) = 1;
22062 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
22063 XVECEXP (par, 0, 0) = tmp0;
22064 XVECEXP (par, 0, 1) = tmp1;
22065 XVECEXP (par, 0, 2) = tmp2;
22066 insn = emit_insn (par);
22067 RTX_FRAME_RELATED_P (insn) = 1;
22068 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22069 }
22070 else
22071 {
22072 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22073 stack_pointer_rtx,
22074 4 * i));
22075 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22076 stack_pointer_rtx,
22077 4 * (i + 1)));
22078 tmp1 = gen_rtx_SET (mem1, reg1);
22079 tmp2 = gen_rtx_SET (mem2, reg2);
22080 RTX_FRAME_RELATED_P (tmp1) = 1;
22081 RTX_FRAME_RELATED_P (tmp2) = 1;
22082 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22083 XVECEXP (par, 0, 0) = tmp1;
22084 XVECEXP (par, 0, 1) = tmp2;
22085 emit_insn (par);
22086 }
22087
22088 /* Create unwind information. This is an approximation. */
22089 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
22090 plus_constant (Pmode,
22091 stack_pointer_rtx,
22092 4 * i)),
22093 reg1);
22094 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
22095 plus_constant (Pmode,
22096 stack_pointer_rtx,
22097 4 * (i + 1))),
22098 reg2);
22099
22100 RTX_FRAME_RELATED_P (tmp1) = 1;
22101 RTX_FRAME_RELATED_P (tmp2) = 1;
22102 XVECEXP (dwarf, 0, i + 1) = tmp1;
22103 XVECEXP (dwarf, 0, i + 2) = tmp2;
22104 i += 2;
22105 regno = regno2 + 1;
22106 }
22107 else
22108 regno++;
22109
22110 return;
22111 }
22112
22113 /* STRD in ARM mode requires consecutive registers. This function emits STRD
22114 whenever possible, otherwise it emits single-word stores. The first store
22115 also allocates stack space for all saved registers, using writeback with
22116 post-addressing mode. All other stores use offset addressing. If no STRD
22117 can be emitted, this function emits a sequence of single-word stores,
22118 and not an STM as before, because single-word stores provide more freedom
22119 scheduling and can be turned into an STM by peephole optimizations. */
22120 static void
22121 arm_emit_strd_push (unsigned long saved_regs_mask)
22122 {
22123 int num_regs = 0;
22124 int i, j, dwarf_index = 0;
22125 int offset = 0;
22126 rtx dwarf = NULL_RTX;
22127 rtx insn = NULL_RTX;
22128 rtx tmp, mem;
22129
22130 /* TODO: A more efficient code can be emitted by changing the
22131 layout, e.g., first push all pairs that can use STRD to keep the
22132 stack aligned, and then push all other registers. */
22133 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22134 if (saved_regs_mask & (1 << i))
22135 num_regs++;
22136
22137 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22138 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22139 gcc_assert (num_regs > 0);
22140
22141 /* Create sequence for DWARF info. */
22142 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22143
22144 /* For dwarf info, we generate explicit stack update. */
22145 tmp = gen_rtx_SET (stack_pointer_rtx,
22146 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22147 RTX_FRAME_RELATED_P (tmp) = 1;
22148 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22149
22150 /* Save registers. */
22151 offset = - 4 * num_regs;
22152 j = 0;
22153 while (j <= LAST_ARM_REGNUM)
22154 if (saved_regs_mask & (1 << j))
22155 {
22156 if ((j % 2 == 0)
22157 && (saved_regs_mask & (1 << (j + 1))))
22158 {
22159 /* Current register and previous register form register pair for
22160 which STRD can be generated. */
22161 if (offset < 0)
22162 {
22163 /* Allocate stack space for all saved registers. */
22164 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22165 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22166 mem = gen_frame_mem (DImode, tmp);
22167 offset = 0;
22168 }
22169 else if (offset > 0)
22170 mem = gen_frame_mem (DImode,
22171 plus_constant (Pmode,
22172 stack_pointer_rtx,
22173 offset));
22174 else
22175 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22176
22177 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
22178 RTX_FRAME_RELATED_P (tmp) = 1;
22179 tmp = emit_insn (tmp);
22180
22181 /* Record the first store insn. */
22182 if (dwarf_index == 1)
22183 insn = tmp;
22184
22185 /* Generate dwarf info. */
22186 mem = gen_frame_mem (SImode,
22187 plus_constant (Pmode,
22188 stack_pointer_rtx,
22189 offset));
22190 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22191 RTX_FRAME_RELATED_P (tmp) = 1;
22192 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22193
22194 mem = gen_frame_mem (SImode,
22195 plus_constant (Pmode,
22196 stack_pointer_rtx,
22197 offset + 4));
22198 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
22199 RTX_FRAME_RELATED_P (tmp) = 1;
22200 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22201
22202 offset += 8;
22203 j += 2;
22204 }
22205 else
22206 {
22207 /* Emit a single word store. */
22208 if (offset < 0)
22209 {
22210 /* Allocate stack space for all saved registers. */
22211 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22212 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22213 mem = gen_frame_mem (SImode, tmp);
22214 offset = 0;
22215 }
22216 else if (offset > 0)
22217 mem = gen_frame_mem (SImode,
22218 plus_constant (Pmode,
22219 stack_pointer_rtx,
22220 offset));
22221 else
22222 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22223
22224 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22225 RTX_FRAME_RELATED_P (tmp) = 1;
22226 tmp = emit_insn (tmp);
22227
22228 /* Record the first store insn. */
22229 if (dwarf_index == 1)
22230 insn = tmp;
22231
22232 /* Generate dwarf info. */
22233 mem = gen_frame_mem (SImode,
22234 plus_constant(Pmode,
22235 stack_pointer_rtx,
22236 offset));
22237 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22238 RTX_FRAME_RELATED_P (tmp) = 1;
22239 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22240
22241 offset += 4;
22242 j += 1;
22243 }
22244 }
22245 else
22246 j++;
22247
22248 /* Attach dwarf info to the first insn we generate. */
22249 gcc_assert (insn != NULL_RTX);
22250 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22251 RTX_FRAME_RELATED_P (insn) = 1;
22252 }
22253
22254 /* Generate and emit an insn that we will recognize as a push_multi.
22255 Unfortunately, since this insn does not reflect very well the actual
22256 semantics of the operation, we need to annotate the insn for the benefit
22257 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
22258 MASK for registers that should be annotated for DWARF2 frame unwind
22259 information. */
22260 static rtx
22261 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
22262 {
22263 int num_regs = 0;
22264 int num_dwarf_regs = 0;
22265 int i, j;
22266 rtx par;
22267 rtx dwarf;
22268 int dwarf_par_index;
22269 rtx tmp, reg;
22270
22271 /* We don't record the PC in the dwarf frame information. */
22272 dwarf_regs_mask &= ~(1 << PC_REGNUM);
22273
22274 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22275 {
22276 if (mask & (1 << i))
22277 num_regs++;
22278 if (dwarf_regs_mask & (1 << i))
22279 num_dwarf_regs++;
22280 }
22281
22282 gcc_assert (num_regs && num_regs <= 16);
22283 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
22284
22285 /* For the body of the insn we are going to generate an UNSPEC in
22286 parallel with several USEs. This allows the insn to be recognized
22287 by the push_multi pattern in the arm.md file.
22288
22289 The body of the insn looks something like this:
22290
22291 (parallel [
22292 (set (mem:BLK (pre_modify:SI (reg:SI sp)
22293 (const_int:SI <num>)))
22294 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22295 (use (reg:SI XX))
22296 (use (reg:SI YY))
22297 ...
22298 ])
22299
22300 For the frame note however, we try to be more explicit and actually
22301 show each register being stored into the stack frame, plus a (single)
22302 decrement of the stack pointer. We do it this way in order to be
22303 friendly to the stack unwinding code, which only wants to see a single
22304 stack decrement per instruction. The RTL we generate for the note looks
22305 something like this:
22306
22307 (sequence [
22308 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22309 (set (mem:SI (reg:SI sp)) (reg:SI r4))
22310 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22311 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22312 ...
22313 ])
22314
22315 FIXME:: In an ideal world the PRE_MODIFY would not exist and
22316 instead we'd have a parallel expression detailing all
22317 the stores to the various memory addresses so that debug
22318 information is more up-to-date. Remember however while writing
22319 this to take care of the constraints with the push instruction.
22320
22321 Note also that this has to be taken care of for the VFP registers.
22322
22323 For more see PR43399. */
22324
22325 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
22326 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
22327 dwarf_par_index = 1;
22328
22329 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22330 {
22331 if (mask & (1 << i))
22332 {
22333 /* NOTE: Dwarf code emitter handle reg-reg copies correctly and in the
22334 following example reg-reg copy of SP to IP register is handled
22335 through .cfi_def_cfa_register directive and the .cfi_offset
22336 directive for IP register is skipped by dwarf code emitter.
22337 Example:
22338 mov ip, sp
22339 .cfi_def_cfa_register 12
22340 push {fp, ip, lr, pc}
22341 .cfi_offset 11, -16
22342 .cfi_offset 13, -12
22343 .cfi_offset 14, -8
22344
22345 Where as Arm-specific .save directive handling is different to that
22346 of dwarf code emitter and it doesn't consider reg-reg copies while
22347 updating the register list. When PACBTI is enabled we manually
22348 updated the .save directive register list to use "ra_auth_code"
22349 (pseduo register 143) instead of IP register as shown in following
22350 pseduo code.
22351 Example:
22352 pacbti ip, lr, sp
22353 .cfi_register 143, 12
22354 push {r3, r7, ip, lr}
22355 .save {r3, r7, ra_auth_code, lr}
22356 */
22357 rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22358 if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22359 dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22360
22361 XVECEXP (par, 0, 0)
22362 = gen_rtx_SET (gen_frame_mem
22363 (BLKmode,
22364 gen_rtx_PRE_MODIFY (Pmode,
22365 stack_pointer_rtx,
22366 plus_constant
22367 (Pmode, stack_pointer_rtx,
22368 -4 * num_regs))
22369 ),
22370 gen_rtx_UNSPEC (BLKmode,
22371 gen_rtvec (1, reg),
22372 UNSPEC_PUSH_MULT));
22373
22374 if (dwarf_regs_mask & (1 << i))
22375 {
22376 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
22377 dwarf_reg);
22378 RTX_FRAME_RELATED_P (tmp) = 1;
22379 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22380 }
22381
22382 break;
22383 }
22384 }
22385
22386 for (j = 1, i++; j < num_regs; i++)
22387 {
22388 if (mask & (1 << i))
22389 {
22390 rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22391 if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22392 dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22393
22394 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22395
22396 if (dwarf_regs_mask & (1 << i))
22397 {
22398 tmp
22399 = gen_rtx_SET (gen_frame_mem
22400 (SImode,
22401 plus_constant (Pmode, stack_pointer_rtx,
22402 4 * j)),
22403 dwarf_reg);
22404 RTX_FRAME_RELATED_P (tmp) = 1;
22405 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22406 }
22407
22408 j++;
22409 }
22410 }
22411
22412 par = emit_insn (par);
22413
22414 tmp = gen_rtx_SET (stack_pointer_rtx,
22415 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22416 RTX_FRAME_RELATED_P (tmp) = 1;
22417 XVECEXP (dwarf, 0, 0) = tmp;
22418
22419 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22420
22421 return par;
22422 }
22423
22424 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22425 SIZE is the offset to be adjusted.
22426 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22427 static void
22428 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22429 {
22430 rtx dwarf;
22431
22432 RTX_FRAME_RELATED_P (insn) = 1;
22433 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22434 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22435 }
22436
22437 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22438 SAVED_REGS_MASK shows which registers need to be restored.
22439
22440 Unfortunately, since this insn does not reflect very well the actual
22441 semantics of the operation, we need to annotate the insn for the benefit
22442 of DWARF2 frame unwind information. */
22443 static void
22444 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22445 {
22446 int num_regs = 0;
22447 int i, j;
22448 rtx par;
22449 rtx dwarf = NULL_RTX;
22450 rtx tmp, reg;
22451 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22452 int offset_adj;
22453 int emit_update;
22454
22455 offset_adj = return_in_pc ? 1 : 0;
22456 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22457 if (saved_regs_mask & (1 << i))
22458 num_regs++;
22459
22460 gcc_assert (num_regs && num_regs <= 16);
22461
22462 /* If SP is in reglist, then we don't emit SP update insn. */
22463 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22464
22465 /* The parallel needs to hold num_regs SETs
22466 and one SET for the stack update. */
22467 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22468
22469 if (return_in_pc)
22470 XVECEXP (par, 0, 0) = ret_rtx;
22471
22472 if (emit_update)
22473 {
22474 /* Increment the stack pointer, based on there being
22475 num_regs 4-byte registers to restore. */
22476 tmp = gen_rtx_SET (stack_pointer_rtx,
22477 plus_constant (Pmode,
22478 stack_pointer_rtx,
22479 4 * num_regs));
22480 RTX_FRAME_RELATED_P (tmp) = 1;
22481 XVECEXP (par, 0, offset_adj) = tmp;
22482 }
22483
22484 /* Now restore every reg, which may include PC. */
22485 for (j = 0, i = 0; j < num_regs; i++)
22486 if (saved_regs_mask & (1 << i))
22487 {
22488 rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22489 if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22490 dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22491 if ((num_regs == 1) && emit_update && !return_in_pc)
22492 {
22493 /* Emit single load with writeback. */
22494 tmp = gen_frame_mem (SImode,
22495 gen_rtx_POST_INC (Pmode,
22496 stack_pointer_rtx));
22497 tmp = emit_insn (gen_rtx_SET (reg, tmp));
22498 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg,
22499 dwarf);
22500 return;
22501 }
22502
22503 tmp = gen_rtx_SET (reg,
22504 gen_frame_mem
22505 (SImode,
22506 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22507 RTX_FRAME_RELATED_P (tmp) = 1;
22508 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22509
22510 /* We need to maintain a sequence for DWARF info too. As dwarf info
22511 should not have PC, skip PC. */
22512 if (i != PC_REGNUM)
22513 dwarf = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, dwarf);
22514
22515 j++;
22516 }
22517
22518 if (return_in_pc)
22519 par = emit_jump_insn (par);
22520 else
22521 par = emit_insn (par);
22522
22523 REG_NOTES (par) = dwarf;
22524 if (!return_in_pc)
22525 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22526 stack_pointer_rtx, stack_pointer_rtx);
22527 }
22528
22529 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22530 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22531
22532 Unfortunately, since this insn does not reflect very well the actual
22533 semantics of the operation, we need to annotate the insn for the benefit
22534 of DWARF2 frame unwind information. */
22535 static void
22536 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22537 {
22538 int i, j;
22539 rtx par;
22540 rtx dwarf = NULL_RTX;
22541 rtx tmp, reg;
22542
22543 gcc_assert (num_regs && num_regs <= 32);
22544
22545 /* Workaround ARM10 VFPr1 bug. */
22546 if (num_regs == 2 && !arm_arch6)
22547 {
22548 if (first_reg == 15)
22549 first_reg--;
22550
22551 num_regs++;
22552 }
22553
22554 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22555 there could be up to 32 D-registers to restore.
22556 If there are more than 16 D-registers, make two recursive calls,
22557 each of which emits one pop_multi instruction. */
22558 if (num_regs > 16)
22559 {
22560 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22561 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22562 return;
22563 }
22564
22565 /* The parallel needs to hold num_regs SETs
22566 and one SET for the stack update. */
22567 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22568
22569 /* Increment the stack pointer, based on there being
22570 num_regs 8-byte registers to restore. */
22571 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22572 RTX_FRAME_RELATED_P (tmp) = 1;
22573 XVECEXP (par, 0, 0) = tmp;
22574
22575 /* Now show every reg that will be restored, using a SET for each. */
22576 for (j = 0, i=first_reg; j < num_regs; i += 2)
22577 {
22578 reg = gen_rtx_REG (DFmode, i);
22579
22580 tmp = gen_rtx_SET (reg,
22581 gen_frame_mem
22582 (DFmode,
22583 plus_constant (Pmode, base_reg, 8 * j)));
22584 RTX_FRAME_RELATED_P (tmp) = 1;
22585 XVECEXP (par, 0, j + 1) = tmp;
22586
22587 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22588
22589 j++;
22590 }
22591
22592 par = emit_insn (par);
22593 REG_NOTES (par) = dwarf;
22594
22595 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
22596 if (REGNO (base_reg) == IP_REGNUM)
22597 {
22598 RTX_FRAME_RELATED_P (par) = 1;
22599 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22600 }
22601 else
22602 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22603 base_reg, base_reg);
22604 }
22605
22606 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22607 number of registers are being popped, multiple LDRD patterns are created for
22608 all register pairs. If odd number of registers are popped, last register is
22609 loaded by using LDR pattern. */
22610 static void
22611 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22612 {
22613 int num_regs = 0;
22614 int i, j;
22615 rtx par = NULL_RTX;
22616 rtx dwarf = NULL_RTX;
22617 rtx tmp, reg, tmp1;
22618 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22619
22620 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22621 if (saved_regs_mask & (1 << i))
22622 num_regs++;
22623
22624 gcc_assert (num_regs && num_regs <= 16);
22625
22626 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
22627 to be popped. So, if num_regs is even, now it will become odd,
22628 and we can generate pop with PC. If num_regs is odd, it will be
22629 even now, and ldr with return can be generated for PC. */
22630 if (return_in_pc)
22631 num_regs--;
22632
22633 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22634
22635 /* Var j iterates over all the registers to gather all the registers in
22636 saved_regs_mask. Var i gives index of saved registers in stack frame.
22637 A PARALLEL RTX of register-pair is created here, so that pattern for
22638 LDRD can be matched. As PC is always last register to be popped, and
22639 we have already decremented num_regs if PC, we don't have to worry
22640 about PC in this loop. */
22641 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22642 if (saved_regs_mask & (1 << j))
22643 {
22644 /* Create RTX for memory load. */
22645 reg = gen_rtx_REG (SImode, j);
22646 tmp = gen_rtx_SET (reg,
22647 gen_frame_mem (SImode,
22648 plus_constant (Pmode,
22649 stack_pointer_rtx, 4 * i)));
22650 RTX_FRAME_RELATED_P (tmp) = 1;
22651
22652 if (i % 2 == 0)
22653 {
22654 /* When saved-register index (i) is even, the RTX to be emitted is
22655 yet to be created. Hence create it first. The LDRD pattern we
22656 are generating is :
22657 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22658 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22659 where target registers need not be consecutive. */
22660 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22661 dwarf = NULL_RTX;
22662 }
22663
22664 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
22665 added as 0th element and if i is odd, reg_i is added as 1st element
22666 of LDRD pattern shown above. */
22667 XVECEXP (par, 0, (i % 2)) = tmp;
22668 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22669
22670 if ((i % 2) == 1)
22671 {
22672 /* When saved-register index (i) is odd, RTXs for both the registers
22673 to be loaded are generated in above given LDRD pattern, and the
22674 pattern can be emitted now. */
22675 par = emit_insn (par);
22676 REG_NOTES (par) = dwarf;
22677 RTX_FRAME_RELATED_P (par) = 1;
22678 }
22679
22680 i++;
22681 }
22682
22683 /* If the number of registers pushed is odd AND return_in_pc is false OR
22684 number of registers are even AND return_in_pc is true, last register is
22685 popped using LDR. It can be PC as well. Hence, adjust the stack first and
22686 then LDR with post increment. */
22687
22688 /* Increment the stack pointer, based on there being
22689 num_regs 4-byte registers to restore. */
22690 tmp = gen_rtx_SET (stack_pointer_rtx,
22691 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22692 RTX_FRAME_RELATED_P (tmp) = 1;
22693 tmp = emit_insn (tmp);
22694 if (!return_in_pc)
22695 {
22696 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22697 stack_pointer_rtx, stack_pointer_rtx);
22698 }
22699
22700 dwarf = NULL_RTX;
22701
22702 if (((num_regs % 2) == 1 && !return_in_pc)
22703 || ((num_regs % 2) == 0 && return_in_pc))
22704 {
22705 /* Scan for the single register to be popped. Skip until the saved
22706 register is found. */
22707 for (; (saved_regs_mask & (1 << j)) == 0; j++);
22708
22709 /* Gen LDR with post increment here. */
22710 tmp1 = gen_rtx_MEM (SImode,
22711 gen_rtx_POST_INC (SImode,
22712 stack_pointer_rtx));
22713 set_mem_alias_set (tmp1, get_frame_alias_set ());
22714
22715 reg = gen_rtx_REG (SImode, j);
22716 tmp = gen_rtx_SET (reg, tmp1);
22717 RTX_FRAME_RELATED_P (tmp) = 1;
22718 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22719
22720 if (return_in_pc)
22721 {
22722 /* If return_in_pc, j must be PC_REGNUM. */
22723 gcc_assert (j == PC_REGNUM);
22724 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22725 XVECEXP (par, 0, 0) = ret_rtx;
22726 XVECEXP (par, 0, 1) = tmp;
22727 par = emit_jump_insn (par);
22728 }
22729 else
22730 {
22731 par = emit_insn (tmp);
22732 REG_NOTES (par) = dwarf;
22733 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22734 stack_pointer_rtx, stack_pointer_rtx);
22735 }
22736
22737 }
22738 else if ((num_regs % 2) == 1 && return_in_pc)
22739 {
22740 /* There are 2 registers to be popped. So, generate the pattern
22741 pop_multiple_with_stack_update_and_return to pop in PC. */
22742 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22743 }
22744
22745 return;
22746 }
22747
22748 /* LDRD in ARM mode needs consecutive registers as operands. This function
22749 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22750 offset addressing and then generates one separate stack udpate. This provides
22751 more scheduling freedom, compared to writeback on every load. However,
22752 if the function returns using load into PC directly
22753 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22754 before the last load. TODO: Add a peephole optimization to recognize
22755 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22756 peephole optimization to merge the load at stack-offset zero
22757 with the stack update instruction using load with writeback
22758 in post-index addressing mode. */
22759 static void
22760 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22761 {
22762 int j = 0;
22763 int offset = 0;
22764 rtx par = NULL_RTX;
22765 rtx dwarf = NULL_RTX;
22766 rtx tmp, mem;
22767
22768 /* Restore saved registers. */
22769 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22770 j = 0;
22771 while (j <= LAST_ARM_REGNUM)
22772 if (saved_regs_mask & (1 << j))
22773 {
22774 if ((j % 2) == 0
22775 && (saved_regs_mask & (1 << (j + 1)))
22776 && (j + 1) != PC_REGNUM)
22777 {
22778 /* Current register and next register form register pair for which
22779 LDRD can be generated. PC is always the last register popped, and
22780 we handle it separately. */
22781 if (offset > 0)
22782 mem = gen_frame_mem (DImode,
22783 plus_constant (Pmode,
22784 stack_pointer_rtx,
22785 offset));
22786 else
22787 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22788
22789 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22790 tmp = emit_insn (tmp);
22791 RTX_FRAME_RELATED_P (tmp) = 1;
22792
22793 /* Generate dwarf info. */
22794
22795 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22796 gen_rtx_REG (SImode, j),
22797 NULL_RTX);
22798 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22799 gen_rtx_REG (SImode, j + 1),
22800 dwarf);
22801
22802 REG_NOTES (tmp) = dwarf;
22803
22804 offset += 8;
22805 j += 2;
22806 }
22807 else if (j != PC_REGNUM)
22808 {
22809 /* Emit a single word load. */
22810 if (offset > 0)
22811 mem = gen_frame_mem (SImode,
22812 plus_constant (Pmode,
22813 stack_pointer_rtx,
22814 offset));
22815 else
22816 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22817
22818 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22819 tmp = emit_insn (tmp);
22820 RTX_FRAME_RELATED_P (tmp) = 1;
22821
22822 /* Generate dwarf info. */
22823 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22824 gen_rtx_REG (SImode, j),
22825 NULL_RTX);
22826
22827 offset += 4;
22828 j += 1;
22829 }
22830 else /* j == PC_REGNUM */
22831 j++;
22832 }
22833 else
22834 j++;
22835
22836 /* Update the stack. */
22837 if (offset > 0)
22838 {
22839 tmp = gen_rtx_SET (stack_pointer_rtx,
22840 plus_constant (Pmode,
22841 stack_pointer_rtx,
22842 offset));
22843 tmp = emit_insn (tmp);
22844 arm_add_cfa_adjust_cfa_note (tmp, offset,
22845 stack_pointer_rtx, stack_pointer_rtx);
22846 offset = 0;
22847 }
22848
22849 if (saved_regs_mask & (1 << PC_REGNUM))
22850 {
22851 /* Only PC is to be popped. */
22852 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22853 XVECEXP (par, 0, 0) = ret_rtx;
22854 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22855 gen_frame_mem (SImode,
22856 gen_rtx_POST_INC (SImode,
22857 stack_pointer_rtx)));
22858 RTX_FRAME_RELATED_P (tmp) = 1;
22859 XVECEXP (par, 0, 1) = tmp;
22860 par = emit_jump_insn (par);
22861
22862 /* Generate dwarf info. */
22863 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22864 gen_rtx_REG (SImode, PC_REGNUM),
22865 NULL_RTX);
22866 REG_NOTES (par) = dwarf;
22867 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22868 stack_pointer_rtx, stack_pointer_rtx);
22869 }
22870 }
22871
22872 /* Calculate the size of the return value that is passed in registers. */
22873 static unsigned
22874 arm_size_return_regs (void)
22875 {
22876 machine_mode mode;
22877
22878 if (crtl->return_rtx != 0)
22879 mode = GET_MODE (crtl->return_rtx);
22880 else
22881 mode = DECL_MODE (DECL_RESULT (current_function_decl));
22882
22883 return GET_MODE_SIZE (mode);
22884 }
22885
22886 /* Return true if the current function needs to save/restore LR. */
22887 static bool
22888 thumb_force_lr_save (void)
22889 {
22890 return !cfun->machine->lr_save_eliminated
22891 && (!crtl->is_leaf
22892 || thumb_far_jump_used_p ()
22893 || df_regs_ever_live_p (LR_REGNUM));
22894 }
22895
22896 /* We do not know if r3 will be available because
22897 we do have an indirect tailcall happening in this
22898 particular case. */
22899 static bool
22900 is_indirect_tailcall_p (rtx call)
22901 {
22902 rtx pat = PATTERN (call);
22903
22904 /* Indirect tail call. */
22905 pat = XVECEXP (pat, 0, 0);
22906 if (GET_CODE (pat) == SET)
22907 pat = SET_SRC (pat);
22908
22909 pat = XEXP (XEXP (pat, 0), 0);
22910 return REG_P (pat);
22911 }
22912
22913 /* Return true if r3 is used by any of the tail call insns in the
22914 current function. */
22915 static bool
22916 any_sibcall_could_use_r3 (void)
22917 {
22918 edge_iterator ei;
22919 edge e;
22920
22921 if (!crtl->tail_call_emit)
22922 return false;
22923 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22924 if (e->flags & EDGE_SIBCALL)
22925 {
22926 rtx_insn *call = BB_END (e->src);
22927 if (!CALL_P (call))
22928 call = prev_nonnote_nondebug_insn (call);
22929 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22930 if (find_regno_fusage (call, USE, 3)
22931 || is_indirect_tailcall_p (call))
22932 return true;
22933 }
22934 return false;
22935 }
22936
22937
22938 /* Compute the distance from register FROM to register TO.
22939 These can be the arg pointer (26), the soft frame pointer (25),
22940 the stack pointer (13) or the hard frame pointer (11).
22941 In thumb mode r7 is used as the soft frame pointer, if needed.
22942 Typical stack layout looks like this:
22943
22944 old stack pointer -> | |
22945 ----
22946 | | \
22947 | | saved arguments for
22948 | | vararg functions
22949 | | /
22950 --
22951 hard FP & arg pointer -> | | \
22952 | | stack
22953 | | frame
22954 | | /
22955 --
22956 | | \
22957 | | call saved
22958 | | registers
22959 soft frame pointer -> | | /
22960 --
22961 | | \
22962 | | local
22963 | | variables
22964 locals base pointer -> | | /
22965 --
22966 | | \
22967 | | outgoing
22968 | | arguments
22969 current stack pointer -> | | /
22970 --
22971
22972 For a given function some or all of these stack components
22973 may not be needed, giving rise to the possibility of
22974 eliminating some of the registers.
22975
22976 The values returned by this function must reflect the behavior
22977 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22978
22979 The sign of the number returned reflects the direction of stack
22980 growth, so the values are positive for all eliminations except
22981 from the soft frame pointer to the hard frame pointer.
22982
22983 SFP may point just inside the local variables block to ensure correct
22984 alignment. */
22985
22986
22987 /* Return cached stack offsets. */
22988
22989 static arm_stack_offsets *
22990 arm_get_frame_offsets (void)
22991 {
22992 struct arm_stack_offsets *offsets;
22993
22994 offsets = &cfun->machine->stack_offsets;
22995
22996 return offsets;
22997 }
22998
22999
23000 /* Calculate stack offsets. These are used to calculate register elimination
23001 offsets and in prologue/epilogue code. Also calculates which registers
23002 should be saved. */
23003
23004 static void
23005 arm_compute_frame_layout (void)
23006 {
23007 struct arm_stack_offsets *offsets;
23008 unsigned long func_type;
23009 int saved;
23010 int core_saved;
23011 HOST_WIDE_INT frame_size;
23012 int i;
23013
23014 offsets = &cfun->machine->stack_offsets;
23015
23016 /* Initially this is the size of the local variables. It will translated
23017 into an offset once we have determined the size of preceding data. */
23018 frame_size = ROUND_UP_WORD (get_frame_size ());
23019
23020 /* Space for variadic functions. */
23021 offsets->saved_args = crtl->args.pretend_args_size;
23022
23023 /* In Thumb mode this is incorrect, but never used. */
23024 offsets->frame
23025 = (offsets->saved_args
23026 + arm_compute_static_chain_stack_bytes ()
23027 + (frame_pointer_needed ? 4 : 0));
23028
23029 if (TARGET_32BIT)
23030 {
23031 unsigned int regno;
23032
23033 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
23034 core_saved = bit_count (offsets->saved_regs_mask) * 4;
23035 saved = core_saved;
23036
23037 /* We know that SP will be doubleword aligned on entry, and we must
23038 preserve that condition at any subroutine call. We also require the
23039 soft frame pointer to be doubleword aligned. */
23040
23041 if (TARGET_REALLY_IWMMXT)
23042 {
23043 /* Check for the call-saved iWMMXt registers. */
23044 for (regno = FIRST_IWMMXT_REGNUM;
23045 regno <= LAST_IWMMXT_REGNUM;
23046 regno++)
23047 if (reg_needs_saving_p (regno))
23048 saved += 8;
23049 }
23050
23051 func_type = arm_current_func_type ();
23052 /* Space for saved VFP registers. */
23053 if (! IS_VOLATILE (func_type)
23054 && TARGET_VFP_BASE)
23055 saved += arm_get_vfp_saved_size ();
23056
23057 /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
23058 nonecure entry functions with VSTR/VLDR. */
23059 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23060 saved += 4;
23061 }
23062 else /* TARGET_THUMB1 */
23063 {
23064 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
23065 core_saved = bit_count (offsets->saved_regs_mask) * 4;
23066 saved = core_saved;
23067 if (TARGET_BACKTRACE)
23068 saved += 16;
23069 }
23070
23071 /* Saved registers include the stack frame. */
23072 offsets->saved_regs
23073 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
23074 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
23075
23076 /* A leaf function does not need any stack alignment if it has nothing
23077 on the stack. */
23078 if (crtl->is_leaf && frame_size == 0
23079 /* However if it calls alloca(), we have a dynamically allocated
23080 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
23081 && ! cfun->calls_alloca)
23082 {
23083 offsets->outgoing_args = offsets->soft_frame;
23084 offsets->locals_base = offsets->soft_frame;
23085 return;
23086 }
23087
23088 /* Ensure SFP has the correct alignment. */
23089 if (ARM_DOUBLEWORD_ALIGN
23090 && (offsets->soft_frame & 7))
23091 {
23092 offsets->soft_frame += 4;
23093 /* Try to align stack by pushing an extra reg. Don't bother doing this
23094 when there is a stack frame as the alignment will be rolled into
23095 the normal stack adjustment. */
23096 if (frame_size + crtl->outgoing_args_size == 0)
23097 {
23098 int reg = -1;
23099
23100 /* Register r3 is caller-saved. Normally it does not need to be
23101 saved on entry by the prologue. However if we choose to save
23102 it for padding then we may confuse the compiler into thinking
23103 a prologue sequence is required when in fact it is not. This
23104 will occur when shrink-wrapping if r3 is used as a scratch
23105 register and there are no other callee-saved writes.
23106
23107 This situation can be avoided when other callee-saved registers
23108 are available and r3 is not mandatory if we choose a callee-saved
23109 register for padding. */
23110 bool prefer_callee_reg_p = false;
23111
23112 /* If it is safe to use r3, then do so. This sometimes
23113 generates better code on Thumb-2 by avoiding the need to
23114 use 32-bit push/pop instructions. */
23115 if (! any_sibcall_could_use_r3 ()
23116 && arm_size_return_regs () <= 12
23117 && (offsets->saved_regs_mask & (1 << 3)) == 0
23118 && (TARGET_THUMB2
23119 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
23120 {
23121 reg = 3;
23122 if (!TARGET_THUMB2)
23123 prefer_callee_reg_p = true;
23124 }
23125 if (reg == -1
23126 || prefer_callee_reg_p)
23127 {
23128 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
23129 {
23130 /* Avoid fixed registers; they may be changed at
23131 arbitrary times so it's unsafe to restore them
23132 during the epilogue. */
23133 if (!fixed_regs[i]
23134 && (offsets->saved_regs_mask & (1 << i)) == 0)
23135 {
23136 reg = i;
23137 break;
23138 }
23139 }
23140 }
23141
23142 if (reg != -1)
23143 {
23144 offsets->saved_regs += 4;
23145 offsets->saved_regs_mask |= (1 << reg);
23146 }
23147 }
23148 }
23149
23150 offsets->locals_base = offsets->soft_frame + frame_size;
23151 offsets->outgoing_args = (offsets->locals_base
23152 + crtl->outgoing_args_size);
23153
23154 if (ARM_DOUBLEWORD_ALIGN)
23155 {
23156 /* Ensure SP remains doubleword aligned. */
23157 if (offsets->outgoing_args & 7)
23158 offsets->outgoing_args += 4;
23159 gcc_assert (!(offsets->outgoing_args & 7));
23160 }
23161 }
23162
23163
23164 /* Calculate the relative offsets for the different stack pointers. Positive
23165 offsets are in the direction of stack growth. */
23166
23167 HOST_WIDE_INT
23168 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23169 {
23170 arm_stack_offsets *offsets;
23171
23172 offsets = arm_get_frame_offsets ();
23173
23174 /* OK, now we have enough information to compute the distances.
23175 There must be an entry in these switch tables for each pair
23176 of registers in ELIMINABLE_REGS, even if some of the entries
23177 seem to be redundant or useless. */
23178 switch (from)
23179 {
23180 case ARG_POINTER_REGNUM:
23181 switch (to)
23182 {
23183 case THUMB_HARD_FRAME_POINTER_REGNUM:
23184 return 0;
23185
23186 case FRAME_POINTER_REGNUM:
23187 /* This is the reverse of the soft frame pointer
23188 to hard frame pointer elimination below. */
23189 return offsets->soft_frame - offsets->saved_args;
23190
23191 case ARM_HARD_FRAME_POINTER_REGNUM:
23192 /* This is only non-zero in the case where the static chain register
23193 is stored above the frame. */
23194 return offsets->frame - offsets->saved_args - 4;
23195
23196 case STACK_POINTER_REGNUM:
23197 /* If nothing has been pushed on the stack at all
23198 then this will return -4. This *is* correct! */
23199 return offsets->outgoing_args - (offsets->saved_args + 4);
23200
23201 default:
23202 gcc_unreachable ();
23203 }
23204 gcc_unreachable ();
23205
23206 case FRAME_POINTER_REGNUM:
23207 switch (to)
23208 {
23209 case THUMB_HARD_FRAME_POINTER_REGNUM:
23210 return 0;
23211
23212 case ARM_HARD_FRAME_POINTER_REGNUM:
23213 /* The hard frame pointer points to the top entry in the
23214 stack frame. The soft frame pointer to the bottom entry
23215 in the stack frame. If there is no stack frame at all,
23216 then they are identical. */
23217
23218 return offsets->frame - offsets->soft_frame;
23219
23220 case STACK_POINTER_REGNUM:
23221 return offsets->outgoing_args - offsets->soft_frame;
23222
23223 default:
23224 gcc_unreachable ();
23225 }
23226 gcc_unreachable ();
23227
23228 default:
23229 /* You cannot eliminate from the stack pointer.
23230 In theory you could eliminate from the hard frame
23231 pointer to the stack pointer, but this will never
23232 happen, since if a stack frame is not needed the
23233 hard frame pointer will never be used. */
23234 gcc_unreachable ();
23235 }
23236 }
23237
23238 /* Given FROM and TO register numbers, say whether this elimination is
23239 allowed. Frame pointer elimination is automatically handled.
23240
23241 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
23242 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
23243 pointer, we must eliminate FRAME_POINTER_REGNUM into
23244 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23245 ARG_POINTER_REGNUM. */
23246
23247 bool
23248 arm_can_eliminate (const int from, const int to)
23249 {
23250 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
23251 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
23252 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
23253 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
23254 true);
23255 }
23256
23257 /* Emit RTL to save coprocessor registers on function entry. Returns the
23258 number of bytes pushed. */
23259
23260 static int
23261 arm_save_coproc_regs(void)
23262 {
23263 int saved_size = 0;
23264 unsigned reg;
23265 unsigned start_reg;
23266 rtx insn;
23267
23268 if (TARGET_REALLY_IWMMXT)
23269 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
23270 if (reg_needs_saving_p (reg))
23271 {
23272 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23273 insn = gen_rtx_MEM (V2SImode, insn);
23274 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
23275 RTX_FRAME_RELATED_P (insn) = 1;
23276 saved_size += 8;
23277 }
23278
23279 if (TARGET_VFP_BASE)
23280 {
23281 start_reg = FIRST_VFP_REGNUM;
23282
23283 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
23284 {
23285 if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
23286 {
23287 if (start_reg != reg)
23288 saved_size += vfp_emit_fstmd (start_reg,
23289 (reg - start_reg) / 2);
23290 start_reg = reg + 2;
23291 }
23292 }
23293 if (start_reg != reg)
23294 saved_size += vfp_emit_fstmd (start_reg,
23295 (reg - start_reg) / 2);
23296 }
23297 return saved_size;
23298 }
23299
23300
23301 /* Set the Thumb frame pointer from the stack pointer. */
23302
23303 static void
23304 thumb_set_frame_pointer (arm_stack_offsets *offsets)
23305 {
23306 HOST_WIDE_INT amount;
23307 rtx insn, dwarf;
23308
23309 amount = offsets->outgoing_args - offsets->locals_base;
23310 if (amount < 1024)
23311 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23312 stack_pointer_rtx, GEN_INT (amount)));
23313 else
23314 {
23315 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
23316 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
23317 expects the first two operands to be the same. */
23318 if (TARGET_THUMB2)
23319 {
23320 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23321 stack_pointer_rtx,
23322 hard_frame_pointer_rtx));
23323 }
23324 else
23325 {
23326 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23327 hard_frame_pointer_rtx,
23328 stack_pointer_rtx));
23329 }
23330 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
23331 plus_constant (Pmode, stack_pointer_rtx, amount));
23332 RTX_FRAME_RELATED_P (dwarf) = 1;
23333 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23334 }
23335
23336 RTX_FRAME_RELATED_P (insn) = 1;
23337 }
23338
23339 struct scratch_reg {
23340 rtx reg;
23341 bool saved;
23342 };
23343
23344 /* Return a short-lived scratch register for use as a 2nd scratch register on
23345 function entry after the registers are saved in the prologue. This register
23346 must be released by means of release_scratch_register_on_entry. IP is not
23347 considered since it is always used as the 1st scratch register if available.
23348
23349 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23350 mask of live registers. */
23351
23352 static void
23353 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
23354 unsigned long live_regs)
23355 {
23356 int regno = -1;
23357
23358 sr->saved = false;
23359
23360 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
23361 regno = LR_REGNUM;
23362 else
23363 {
23364 unsigned int i;
23365
23366 for (i = 4; i < 11; i++)
23367 if (regno1 != i && (live_regs & (1 << i)) != 0)
23368 {
23369 regno = i;
23370 break;
23371 }
23372
23373 if (regno < 0)
23374 {
23375 /* If IP is used as the 1st scratch register for a nested function,
23376 then either r3 wasn't available or is used to preserve IP. */
23377 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
23378 regno1 = 3;
23379 regno = (regno1 == 3 ? 2 : 3);
23380 sr->saved
23381 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
23382 regno);
23383 }
23384 }
23385
23386 sr->reg = gen_rtx_REG (SImode, regno);
23387 if (sr->saved)
23388 {
23389 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23390 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23391 rtx x = gen_rtx_SET (stack_pointer_rtx,
23392 plus_constant (Pmode, stack_pointer_rtx, -4));
23393 RTX_FRAME_RELATED_P (insn) = 1;
23394 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23395 }
23396 }
23397
23398 /* Release a scratch register obtained from the preceding function. */
23399
23400 static void
23401 release_scratch_register_on_entry (struct scratch_reg *sr)
23402 {
23403 if (sr->saved)
23404 {
23405 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23406 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23407 rtx x = gen_rtx_SET (stack_pointer_rtx,
23408 plus_constant (Pmode, stack_pointer_rtx, 4));
23409 RTX_FRAME_RELATED_P (insn) = 1;
23410 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23411 }
23412 }
23413
23414 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23415
23416 #if PROBE_INTERVAL > 4096
23417 #error Cannot use indexed addressing mode for stack probing
23418 #endif
23419
23420 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23421 inclusive. These are offsets from the current stack pointer. REGNO1
23422 is the index number of the 1st scratch register and LIVE_REGS is the
23423 mask of live registers. */
23424
23425 static void
23426 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23427 unsigned int regno1, unsigned long live_regs)
23428 {
23429 rtx reg1 = gen_rtx_REG (Pmode, regno1);
23430
23431 /* See if we have a constant small number of probes to generate. If so,
23432 that's the easy case. */
23433 if (size <= PROBE_INTERVAL)
23434 {
23435 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23436 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23437 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23438 }
23439
23440 /* The run-time loop is made up of 10 insns in the generic case while the
23441 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
23442 else if (size <= 5 * PROBE_INTERVAL)
23443 {
23444 HOST_WIDE_INT i, rem;
23445
23446 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23447 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23448 emit_stack_probe (reg1);
23449
23450 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23451 it exceeds SIZE. If only two probes are needed, this will not
23452 generate any code. Then probe at FIRST + SIZE. */
23453 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23454 {
23455 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23456 emit_stack_probe (reg1);
23457 }
23458
23459 rem = size - (i - PROBE_INTERVAL);
23460 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23461 {
23462 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23463 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23464 }
23465 else
23466 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23467 }
23468
23469 /* Otherwise, do the same as above, but in a loop. Note that we must be
23470 extra careful with variables wrapping around because we might be at
23471 the very top (or the very bottom) of the address space and we have
23472 to be able to handle this case properly; in particular, we use an
23473 equality test for the loop condition. */
23474 else
23475 {
23476 HOST_WIDE_INT rounded_size;
23477 struct scratch_reg sr;
23478
23479 get_scratch_register_on_entry (&sr, regno1, live_regs);
23480
23481 emit_move_insn (reg1, GEN_INT (first));
23482
23483
23484 /* Step 1: round SIZE to the previous multiple of the interval. */
23485
23486 rounded_size = size & -PROBE_INTERVAL;
23487 emit_move_insn (sr.reg, GEN_INT (rounded_size));
23488
23489
23490 /* Step 2: compute initial and final value of the loop counter. */
23491
23492 /* TEST_ADDR = SP + FIRST. */
23493 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23494
23495 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23496 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23497
23498
23499 /* Step 3: the loop
23500
23501 do
23502 {
23503 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23504 probe at TEST_ADDR
23505 }
23506 while (TEST_ADDR != LAST_ADDR)
23507
23508 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23509 until it is equal to ROUNDED_SIZE. */
23510
23511 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23512
23513
23514 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23515 that SIZE is equal to ROUNDED_SIZE. */
23516
23517 if (size != rounded_size)
23518 {
23519 HOST_WIDE_INT rem = size - rounded_size;
23520
23521 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23522 {
23523 emit_set_insn (sr.reg,
23524 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23525 emit_stack_probe (plus_constant (Pmode, sr.reg,
23526 PROBE_INTERVAL - rem));
23527 }
23528 else
23529 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23530 }
23531
23532 release_scratch_register_on_entry (&sr);
23533 }
23534
23535 /* Make sure nothing is scheduled before we are done. */
23536 emit_insn (gen_blockage ());
23537 }
23538
23539 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23540 absolute addresses. */
23541
23542 const char *
23543 output_probe_stack_range (rtx reg1, rtx reg2)
23544 {
23545 static int labelno = 0;
23546 char loop_lab[32];
23547 rtx xops[2];
23548
23549 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23550
23551 /* Loop. */
23552 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23553
23554 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23555 xops[0] = reg1;
23556 xops[1] = GEN_INT (PROBE_INTERVAL);
23557 output_asm_insn ("sub\t%0, %0, %1", xops);
23558
23559 /* Probe at TEST_ADDR. */
23560 output_asm_insn ("str\tr0, [%0, #0]", xops);
23561
23562 /* Test if TEST_ADDR == LAST_ADDR. */
23563 xops[1] = reg2;
23564 output_asm_insn ("cmp\t%0, %1", xops);
23565
23566 /* Branch. */
23567 fputs ("\tbne\t", asm_out_file);
23568 assemble_name_raw (asm_out_file, loop_lab);
23569 fputc ('\n', asm_out_file);
23570
23571 return "";
23572 }
23573
23574 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23575 function. */
23576 void
23577 arm_expand_prologue (void)
23578 {
23579 rtx amount;
23580 rtx insn;
23581 rtx ip_rtx;
23582 unsigned long live_regs_mask;
23583 unsigned long func_type;
23584 int fp_offset = 0;
23585 int saved_pretend_args = 0;
23586 int saved_regs = 0;
23587 unsigned HOST_WIDE_INT args_to_push;
23588 HOST_WIDE_INT size;
23589 arm_stack_offsets *offsets;
23590 bool clobber_ip;
23591
23592 func_type = arm_current_func_type ();
23593
23594 /* Naked functions don't have prologues. */
23595 if (IS_NAKED (func_type))
23596 {
23597 if (flag_stack_usage_info)
23598 current_function_static_stack_size = 0;
23599 return;
23600 }
23601
23602 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
23603 args_to_push = crtl->args.pretend_args_size;
23604
23605 /* Compute which register we will have to save onto the stack. */
23606 offsets = arm_get_frame_offsets ();
23607 live_regs_mask = offsets->saved_regs_mask;
23608
23609 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23610
23611 if (IS_STACKALIGN (func_type))
23612 {
23613 rtx r0, r1;
23614
23615 /* Handle a word-aligned stack pointer. We generate the following:
23616
23617 mov r0, sp
23618 bic r1, r0, #7
23619 mov sp, r1
23620 <save and restore r0 in normal prologue/epilogue>
23621 mov sp, r0
23622 bx lr
23623
23624 The unwinder doesn't need to know about the stack realignment.
23625 Just tell it we saved SP in r0. */
23626 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23627
23628 r0 = gen_rtx_REG (SImode, R0_REGNUM);
23629 r1 = gen_rtx_REG (SImode, R1_REGNUM);
23630
23631 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23632 RTX_FRAME_RELATED_P (insn) = 1;
23633 add_reg_note (insn, REG_CFA_REGISTER, NULL);
23634
23635 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23636
23637 /* ??? The CFA changes here, which may cause GDB to conclude that it
23638 has entered a different function. That said, the unwind info is
23639 correct, individually, before and after this instruction because
23640 we've described the save of SP, which will override the default
23641 handling of SP as restoring from the CFA. */
23642 emit_insn (gen_movsi (stack_pointer_rtx, r1));
23643 }
23644
23645 /* Let's compute the static_chain_stack_bytes required and store it. Right
23646 now the value must be -1 as stored by arm_init_machine_status (). */
23647 cfun->machine->static_chain_stack_bytes
23648 = arm_compute_static_chain_stack_bytes ();
23649
23650 /* The static chain register is the same as the IP register. If it is
23651 clobbered when creating the frame, we need to save and restore it. */
23652 clobber_ip = (IS_NESTED (func_type)
23653 && (((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23654 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23655 || flag_stack_clash_protection)
23656 && !df_regs_ever_live_p (LR_REGNUM)
23657 && arm_r3_live_at_start_p ()))
23658 || arm_current_function_pac_enabled_p ()));
23659
23660 /* Find somewhere to store IP whilst the frame is being created.
23661 We try the following places in order:
23662
23663 1. The last argument register r3 if it is available.
23664 2. A slot on the stack above the frame if there are no
23665 arguments to push onto the stack.
23666 3. Register r3 again, after pushing the argument registers
23667 onto the stack, if this is a varargs function.
23668 4. The last slot on the stack created for the arguments to
23669 push, if this isn't a varargs function.
23670
23671 Note - we only need to tell the dwarf2 backend about the SP
23672 adjustment in the second variant; the static chain register
23673 doesn't need to be unwound, as it doesn't contain a value
23674 inherited from the caller. */
23675 if (clobber_ip)
23676 {
23677 if (!arm_r3_live_at_start_p ())
23678 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23679 else if (args_to_push == 0)
23680 {
23681 rtx addr, dwarf;
23682
23683 saved_regs += 4;
23684
23685 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23686 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23687 fp_offset = 4;
23688
23689 /* Just tell the dwarf backend that we adjusted SP. */
23690 dwarf = gen_rtx_SET (stack_pointer_rtx,
23691 plus_constant (Pmode, stack_pointer_rtx,
23692 -fp_offset));
23693 RTX_FRAME_RELATED_P (insn) = 1;
23694 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23695 if (arm_current_function_pac_enabled_p ())
23696 cfun->machine->pacspval_needed = 1;
23697 }
23698 else
23699 {
23700 /* Store the args on the stack. */
23701 if (cfun->machine->uses_anonymous_args)
23702 {
23703 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23704 (0xf0 >> (args_to_push / 4)) & 0xf);
23705 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23706 saved_pretend_args = 1;
23707 }
23708 else
23709 {
23710 rtx addr, dwarf;
23711
23712 if (args_to_push == 4)
23713 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23714 else
23715 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23716 plus_constant (Pmode,
23717 stack_pointer_rtx,
23718 -args_to_push));
23719
23720 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23721
23722 /* Just tell the dwarf backend that we adjusted SP. */
23723 dwarf = gen_rtx_SET (stack_pointer_rtx,
23724 plus_constant (Pmode, stack_pointer_rtx,
23725 -args_to_push));
23726 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23727 }
23728
23729 RTX_FRAME_RELATED_P (insn) = 1;
23730 fp_offset = args_to_push;
23731 args_to_push = 0;
23732 if (arm_current_function_pac_enabled_p ())
23733 cfun->machine->pacspval_needed = 1;
23734 }
23735 }
23736
23737 if (arm_current_function_pac_enabled_p ())
23738 {
23739 /* If IP was clobbered we only emit a PAC instruction as the BTI
23740 one will be added before the push of the clobbered IP (if
23741 necessary) by the bti pass. */
23742 if (aarch_bti_enabled () && !clobber_ip)
23743 insn = emit_insn (gen_pacbti_nop ());
23744 else
23745 insn = emit_insn (gen_pac_nop ());
23746
23747 rtx dwarf = gen_rtx_SET (ip_rtx, gen_rtx_REG (SImode, RA_AUTH_CODE));
23748 RTX_FRAME_RELATED_P (insn) = 1;
23749 add_reg_note (insn, REG_CFA_REGISTER, dwarf);
23750 }
23751
23752 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23753 {
23754 if (IS_INTERRUPT (func_type))
23755 {
23756 /* Interrupt functions must not corrupt any registers.
23757 Creating a frame pointer however, corrupts the IP
23758 register, so we must push it first. */
23759 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23760
23761 /* Do not set RTX_FRAME_RELATED_P on this insn.
23762 The dwarf stack unwinding code only wants to see one
23763 stack decrement per function, and this is not it. If
23764 this instruction is labeled as being part of the frame
23765 creation sequence then dwarf2out_frame_debug_expr will
23766 die when it encounters the assignment of IP to FP
23767 later on, since the use of SP here establishes SP as
23768 the CFA register and not IP.
23769
23770 Anyway this instruction is not really part of the stack
23771 frame creation although it is part of the prologue. */
23772 }
23773
23774 insn = emit_set_insn (ip_rtx,
23775 plus_constant (Pmode, stack_pointer_rtx,
23776 fp_offset));
23777 RTX_FRAME_RELATED_P (insn) = 1;
23778 }
23779
23780 /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR. */
23781 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23782 {
23783 saved_regs += 4;
23784 insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23785 GEN_INT (FPCXTNS_ENUM)));
23786 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23787 plus_constant (Pmode, stack_pointer_rtx, -4));
23788 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23789 RTX_FRAME_RELATED_P (insn) = 1;
23790 }
23791
23792 if (args_to_push)
23793 {
23794 /* Push the argument registers, or reserve space for them. */
23795 if (cfun->machine->uses_anonymous_args)
23796 insn = emit_multi_reg_push
23797 ((0xf0 >> (args_to_push / 4)) & 0xf,
23798 (0xf0 >> (args_to_push / 4)) & 0xf);
23799 else
23800 insn = emit_insn
23801 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23802 GEN_INT (- args_to_push)));
23803 RTX_FRAME_RELATED_P (insn) = 1;
23804 }
23805
23806 /* If this is an interrupt service routine, and the link register
23807 is going to be pushed, and we're not generating extra
23808 push of IP (needed when frame is needed and frame layout if apcs),
23809 subtracting four from LR now will mean that the function return
23810 can be done with a single instruction. */
23811 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23812 && (live_regs_mask & (1 << LR_REGNUM)) != 0
23813 && !(frame_pointer_needed && TARGET_APCS_FRAME)
23814 && TARGET_ARM)
23815 {
23816 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23817
23818 emit_set_insn (lr, plus_constant (SImode, lr, -4));
23819 }
23820
23821 if (live_regs_mask)
23822 {
23823 unsigned long dwarf_regs_mask = live_regs_mask;
23824
23825 saved_regs += bit_count (live_regs_mask) * 4;
23826 if (optimize_size && !frame_pointer_needed
23827 && saved_regs == offsets->saved_regs - offsets->saved_args)
23828 {
23829 /* If no coprocessor registers are being pushed and we don't have
23830 to worry about a frame pointer then push extra registers to
23831 create the stack frame. This is done in a way that does not
23832 alter the frame layout, so is independent of the epilogue. */
23833 int n;
23834 int frame;
23835 n = 0;
23836 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23837 n++;
23838 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23839 if (frame && n * 4 >= frame)
23840 {
23841 n = frame / 4;
23842 live_regs_mask |= (1 << n) - 1;
23843 saved_regs += frame;
23844 }
23845 }
23846
23847 if (TARGET_LDRD
23848 && current_tune->prefer_ldrd_strd
23849 && !optimize_function_for_size_p (cfun))
23850 {
23851 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23852 if (TARGET_THUMB2)
23853 thumb2_emit_strd_push (live_regs_mask);
23854 else if (TARGET_ARM
23855 && !TARGET_APCS_FRAME
23856 && !IS_INTERRUPT (func_type))
23857 arm_emit_strd_push (live_regs_mask);
23858 else
23859 {
23860 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23861 RTX_FRAME_RELATED_P (insn) = 1;
23862 }
23863 }
23864 else
23865 {
23866 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23867 RTX_FRAME_RELATED_P (insn) = 1;
23868 }
23869 }
23870
23871 if (! IS_VOLATILE (func_type))
23872 saved_regs += arm_save_coproc_regs ();
23873
23874 if (frame_pointer_needed && TARGET_ARM)
23875 {
23876 /* Create the new frame pointer. */
23877 if (TARGET_APCS_FRAME)
23878 {
23879 insn = GEN_INT (-(4 + args_to_push + fp_offset));
23880 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23881 RTX_FRAME_RELATED_P (insn) = 1;
23882 }
23883 else
23884 {
23885 insn = GEN_INT (saved_regs - (4 + fp_offset));
23886 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23887 stack_pointer_rtx, insn));
23888 RTX_FRAME_RELATED_P (insn) = 1;
23889 }
23890 }
23891
23892 size = offsets->outgoing_args - offsets->saved_args;
23893 if (flag_stack_usage_info)
23894 current_function_static_stack_size = size;
23895
23896 /* If this isn't an interrupt service routine and we have a frame, then do
23897 stack checking. We use IP as the first scratch register, except for the
23898 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
23899 if (!IS_INTERRUPT (func_type)
23900 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23901 || flag_stack_clash_protection))
23902 {
23903 unsigned int regno;
23904
23905 if (!IS_NESTED (func_type) || clobber_ip)
23906 regno = IP_REGNUM;
23907 else if (df_regs_ever_live_p (LR_REGNUM))
23908 regno = LR_REGNUM;
23909 else
23910 regno = 3;
23911
23912 if (crtl->is_leaf && !cfun->calls_alloca)
23913 {
23914 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23915 arm_emit_probe_stack_range (get_stack_check_protect (),
23916 size - get_stack_check_protect (),
23917 regno, live_regs_mask);
23918 }
23919 else if (size > 0)
23920 arm_emit_probe_stack_range (get_stack_check_protect (), size,
23921 regno, live_regs_mask);
23922 }
23923
23924 /* Recover the static chain register. */
23925 if (clobber_ip)
23926 {
23927 if (!arm_r3_live_at_start_p () || saved_pretend_args)
23928 insn = gen_rtx_REG (SImode, 3);
23929 else
23930 {
23931 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23932 insn = gen_frame_mem (SImode, insn);
23933 }
23934 emit_set_insn (ip_rtx, insn);
23935 emit_insn (gen_force_register_use (ip_rtx));
23936 }
23937
23938 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23939 {
23940 /* This add can produce multiple insns for a large constant, so we
23941 need to get tricky. */
23942 rtx_insn *last = get_last_insn ();
23943
23944 amount = GEN_INT (offsets->saved_args + saved_regs
23945 - offsets->outgoing_args);
23946
23947 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23948 amount));
23949 do
23950 {
23951 last = last ? NEXT_INSN (last) : get_insns ();
23952 RTX_FRAME_RELATED_P (last) = 1;
23953 }
23954 while (last != insn);
23955
23956 /* If the frame pointer is needed, emit a special barrier that
23957 will prevent the scheduler from moving stores to the frame
23958 before the stack adjustment. */
23959 if (frame_pointer_needed)
23960 emit_insn (gen_stack_tie (stack_pointer_rtx,
23961 hard_frame_pointer_rtx));
23962 }
23963
23964
23965 if (frame_pointer_needed && TARGET_THUMB2)
23966 thumb_set_frame_pointer (offsets);
23967
23968 if (flag_pic && arm_pic_register != INVALID_REGNUM)
23969 {
23970 unsigned long mask;
23971
23972 mask = live_regs_mask;
23973 mask &= THUMB2_WORK_REGS;
23974 if (!IS_NESTED (func_type))
23975 mask |= (1 << IP_REGNUM);
23976 arm_load_pic_register (mask, NULL_RTX);
23977 }
23978
23979 /* If we are profiling, make sure no instructions are scheduled before
23980 the call to mcount. Similarly if the user has requested no
23981 scheduling in the prolog. Similarly if we want non-call exceptions
23982 using the EABI unwinder, to prevent faulting instructions from being
23983 swapped with a stack adjustment. */
23984 if (crtl->profile || !TARGET_SCHED_PROLOG
23985 || (arm_except_unwind_info (&global_options) == UI_TARGET
23986 && cfun->can_throw_non_call_exceptions))
23987 emit_insn (gen_blockage ());
23988
23989 /* If the link register is being kept alive, with the return address in it,
23990 then make sure that it does not get reused by the ce2 pass. */
23991 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23992 cfun->machine->lr_save_eliminated = 1;
23993 }
23994 \f
23995 /* Print condition code to STREAM. Helper function for arm_print_operand. */
23996 static void
23997 arm_print_condition (FILE *stream)
23998 {
23999 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
24000 {
24001 /* Branch conversion is not implemented for Thumb-2. */
24002 if (TARGET_THUMB)
24003 {
24004 output_operand_lossage ("predicated Thumb instruction");
24005 return;
24006 }
24007 if (current_insn_predicate != NULL)
24008 {
24009 output_operand_lossage
24010 ("predicated instruction in conditional sequence");
24011 return;
24012 }
24013
24014 fputs (arm_condition_codes[arm_current_cc], stream);
24015 }
24016 else if (current_insn_predicate)
24017 {
24018 enum arm_cond_code code;
24019
24020 if (TARGET_THUMB1)
24021 {
24022 output_operand_lossage ("predicated Thumb instruction");
24023 return;
24024 }
24025
24026 code = get_arm_condition_code (current_insn_predicate);
24027 fputs (arm_condition_codes[code], stream);
24028 }
24029 }
24030
24031
24032 /* Globally reserved letters: acln
24033 Puncutation letters currently used: @_|?().!#
24034 Lower case letters currently used: bcdefhimpqtvwxyz
24035 Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
24036 Letters previously used, but now deprecated/obsolete: sWXYZ.
24037
24038 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
24039
24040 If CODE is 'd', then the X is a condition operand and the instruction
24041 should only be executed if the condition is true.
24042 if CODE is 'D', then the X is a condition operand and the instruction
24043 should only be executed if the condition is false: however, if the mode
24044 of the comparison is CCFPEmode, then always execute the instruction -- we
24045 do this because in these circumstances !GE does not necessarily imply LT;
24046 in these cases the instruction pattern will take care to make sure that
24047 an instruction containing %d will follow, thereby undoing the effects of
24048 doing this instruction unconditionally.
24049 If CODE is 'N' then X is a floating point operand that must be negated
24050 before output.
24051 If CODE is 'B' then output a bitwise inverted value of X (a const int).
24052 If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
24053 If CODE is 'V', then the operand must be a CONST_INT representing
24054 the bits to preserve in the modified register (Rd) of a BFI or BFC
24055 instruction: print out both the width and lsb (shift) fields. */
24056 static void
24057 arm_print_operand (FILE *stream, rtx x, int code)
24058 {
24059 switch (code)
24060 {
24061 case '@':
24062 fputs (ASM_COMMENT_START, stream);
24063 return;
24064
24065 case '_':
24066 fputs (user_label_prefix, stream);
24067 return;
24068
24069 case '|':
24070 fputs (REGISTER_PREFIX, stream);
24071 return;
24072
24073 case '?':
24074 arm_print_condition (stream);
24075 return;
24076
24077 case '.':
24078 /* The current condition code for a condition code setting instruction.
24079 Preceded by 's' in unified syntax, otherwise followed by 's'. */
24080 fputc('s', stream);
24081 arm_print_condition (stream);
24082 return;
24083
24084 case '!':
24085 /* If the instruction is conditionally executed then print
24086 the current condition code, otherwise print 's'. */
24087 gcc_assert (TARGET_THUMB2);
24088 if (current_insn_predicate)
24089 arm_print_condition (stream);
24090 else
24091 fputc('s', stream);
24092 break;
24093
24094 /* %# is a "break" sequence. It doesn't output anything, but is used to
24095 separate e.g. operand numbers from following text, if that text consists
24096 of further digits which we don't want to be part of the operand
24097 number. */
24098 case '#':
24099 return;
24100
24101 case 'N':
24102 {
24103 REAL_VALUE_TYPE r;
24104 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
24105 fprintf (stream, "%s", fp_const_from_val (&r));
24106 }
24107 return;
24108
24109 /* An integer or symbol address without a preceding # sign. */
24110 case 'c':
24111 switch (GET_CODE (x))
24112 {
24113 case CONST_INT:
24114 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
24115 break;
24116
24117 case SYMBOL_REF:
24118 output_addr_const (stream, x);
24119 break;
24120
24121 case CONST:
24122 if (GET_CODE (XEXP (x, 0)) == PLUS
24123 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
24124 {
24125 output_addr_const (stream, x);
24126 break;
24127 }
24128 /* Fall through. */
24129
24130 default:
24131 output_operand_lossage ("Unsupported operand for code '%c'", code);
24132 }
24133 return;
24134
24135 /* An integer that we want to print in HEX. */
24136 case 'x':
24137 switch (GET_CODE (x))
24138 {
24139 case CONST_INT:
24140 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
24141 break;
24142
24143 default:
24144 output_operand_lossage ("Unsupported operand for code '%c'", code);
24145 }
24146 return;
24147
24148 case 'B':
24149 if (CONST_INT_P (x))
24150 {
24151 HOST_WIDE_INT val;
24152 val = ARM_SIGN_EXTEND (~INTVAL (x));
24153 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
24154 }
24155 else
24156 {
24157 putc ('~', stream);
24158 output_addr_const (stream, x);
24159 }
24160 return;
24161
24162 case 'b':
24163 /* Print the log2 of a CONST_INT. */
24164 {
24165 HOST_WIDE_INT val;
24166
24167 if (!CONST_INT_P (x)
24168 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
24169 output_operand_lossage ("Unsupported operand for code '%c'", code);
24170 else
24171 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24172 }
24173 return;
24174
24175 case 'L':
24176 /* The low 16 bits of an immediate constant. */
24177 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
24178 return;
24179
24180 case 'i':
24181 fprintf (stream, "%s", arithmetic_instr (x, 1));
24182 return;
24183
24184 case 'I':
24185 fprintf (stream, "%s", arithmetic_instr (x, 0));
24186 return;
24187
24188 case 'S':
24189 {
24190 HOST_WIDE_INT val;
24191 const char *shift;
24192
24193 shift = shift_op (x, &val);
24194
24195 if (shift)
24196 {
24197 fprintf (stream, ", %s ", shift);
24198 if (val == -1)
24199 arm_print_operand (stream, XEXP (x, 1), 0);
24200 else
24201 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24202 }
24203 }
24204 return;
24205
24206 /* An explanation of the 'Q', 'R' and 'H' register operands:
24207
24208 In a pair of registers containing a DI or DF value the 'Q'
24209 operand returns the register number of the register containing
24210 the least significant part of the value. The 'R' operand returns
24211 the register number of the register containing the most
24212 significant part of the value.
24213
24214 The 'H' operand returns the higher of the two register numbers.
24215 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24216 same as the 'Q' operand, since the most significant part of the
24217 value is held in the lower number register. The reverse is true
24218 on systems where WORDS_BIG_ENDIAN is false.
24219
24220 The purpose of these operands is to distinguish between cases
24221 where the endian-ness of the values is important (for example
24222 when they are added together), and cases where the endian-ness
24223 is irrelevant, but the order of register operations is important.
24224 For example when loading a value from memory into a register
24225 pair, the endian-ness does not matter. Provided that the value
24226 from the lower memory address is put into the lower numbered
24227 register, and the value from the higher address is put into the
24228 higher numbered register, the load will work regardless of whether
24229 the value being loaded is big-wordian or little-wordian. The
24230 order of the two register loads can matter however, if the address
24231 of the memory location is actually held in one of the registers
24232 being overwritten by the load.
24233
24234 The 'Q' and 'R' constraints are also available for 64-bit
24235 constants. */
24236 case 'Q':
24237 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24238 {
24239 rtx part = gen_lowpart (SImode, x);
24240 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24241 return;
24242 }
24243
24244 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24245 {
24246 output_operand_lossage ("invalid operand for code '%c'", code);
24247 return;
24248 }
24249
24250 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
24251 return;
24252
24253 case 'R':
24254 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24255 {
24256 machine_mode mode = GET_MODE (x);
24257 rtx part;
24258
24259 if (mode == VOIDmode)
24260 mode = DImode;
24261 part = gen_highpart_mode (SImode, mode, x);
24262 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24263 return;
24264 }
24265
24266 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24267 {
24268 output_operand_lossage ("invalid operand for code '%c'", code);
24269 return;
24270 }
24271
24272 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
24273 return;
24274
24275 case 'H':
24276 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24277 {
24278 output_operand_lossage ("invalid operand for code '%c'", code);
24279 return;
24280 }
24281
24282 asm_fprintf (stream, "%r", REGNO (x) + 1);
24283 return;
24284
24285 case 'J':
24286 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24287 {
24288 output_operand_lossage ("invalid operand for code '%c'", code);
24289 return;
24290 }
24291
24292 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
24293 return;
24294
24295 case 'K':
24296 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24297 {
24298 output_operand_lossage ("invalid operand for code '%c'", code);
24299 return;
24300 }
24301
24302 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
24303 return;
24304
24305 case 'm':
24306 asm_fprintf (stream, "%r",
24307 REG_P (XEXP (x, 0))
24308 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
24309 return;
24310
24311 case 'M':
24312 asm_fprintf (stream, "{%r-%r}",
24313 REGNO (x),
24314 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
24315 return;
24316
24317 /* Like 'M', but writing doubleword vector registers, for use by Neon
24318 insns. */
24319 case 'h':
24320 {
24321 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
24322 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
24323 if (numregs == 1)
24324 asm_fprintf (stream, "{d%d}", regno);
24325 else
24326 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
24327 }
24328 return;
24329
24330 case 'd':
24331 /* CONST_TRUE_RTX means always -- that's the default. */
24332 if (x == const_true_rtx)
24333 return;
24334
24335 if (!COMPARISON_P (x))
24336 {
24337 output_operand_lossage ("invalid operand for code '%c'", code);
24338 return;
24339 }
24340
24341 fputs (arm_condition_codes[get_arm_condition_code (x)],
24342 stream);
24343 return;
24344
24345 case 'D':
24346 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
24347 want to do that. */
24348 if (x == const_true_rtx)
24349 {
24350 output_operand_lossage ("instruction never executed");
24351 return;
24352 }
24353 if (!COMPARISON_P (x))
24354 {
24355 output_operand_lossage ("invalid operand for code '%c'", code);
24356 return;
24357 }
24358
24359 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
24360 (get_arm_condition_code (x))],
24361 stream);
24362 return;
24363
24364 case 'V':
24365 {
24366 /* Output the LSB (shift) and width for a bitmask instruction
24367 based on a literal mask. The LSB is printed first,
24368 followed by the width.
24369
24370 Eg. For 0b1...1110001, the result is #1, #3. */
24371 if (!CONST_INT_P (x))
24372 {
24373 output_operand_lossage ("invalid operand for code '%c'", code);
24374 return;
24375 }
24376
24377 unsigned HOST_WIDE_INT val
24378 = ~UINTVAL (x) & HOST_WIDE_INT_UC (0xffffffff);
24379 int lsb = exact_log2 (val & -val);
24380 asm_fprintf (stream, "#%d, #%d", lsb,
24381 (exact_log2 (val + (val & -val)) - lsb));
24382 }
24383 return;
24384
24385 case 's':
24386 case 'W':
24387 case 'X':
24388 case 'Y':
24389 case 'Z':
24390 /* Former Maverick support, removed after GCC-4.7. */
24391 output_operand_lossage ("obsolete Maverick format code '%c'", code);
24392 return;
24393
24394 case 'U':
24395 if (!REG_P (x)
24396 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
24397 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
24398 /* Bad value for wCG register number. */
24399 {
24400 output_operand_lossage ("invalid operand for code '%c'", code);
24401 return;
24402 }
24403
24404 else
24405 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
24406 return;
24407
24408 /* Print an iWMMXt control register name. */
24409 case 'w':
24410 if (!CONST_INT_P (x)
24411 || INTVAL (x) < 0
24412 || INTVAL (x) >= 16)
24413 /* Bad value for wC register number. */
24414 {
24415 output_operand_lossage ("invalid operand for code '%c'", code);
24416 return;
24417 }
24418
24419 else
24420 {
24421 static const char * wc_reg_names [16] =
24422 {
24423 "wCID", "wCon", "wCSSF", "wCASF",
24424 "wC4", "wC5", "wC6", "wC7",
24425 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24426 "wC12", "wC13", "wC14", "wC15"
24427 };
24428
24429 fputs (wc_reg_names [INTVAL (x)], stream);
24430 }
24431 return;
24432
24433 /* Print the high single-precision register of a VFP double-precision
24434 register. */
24435 case 'p':
24436 {
24437 machine_mode mode = GET_MODE (x);
24438 int regno;
24439
24440 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24441 {
24442 output_operand_lossage ("invalid operand for code '%c'", code);
24443 return;
24444 }
24445
24446 regno = REGNO (x);
24447 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24448 {
24449 output_operand_lossage ("invalid operand for code '%c'", code);
24450 return;
24451 }
24452
24453 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24454 }
24455 return;
24456
24457 /* Print a VFP/Neon double precision or quad precision register name. */
24458 case 'P':
24459 case 'q':
24460 {
24461 machine_mode mode = GET_MODE (x);
24462 int is_quad = (code == 'q');
24463 int regno;
24464
24465 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24466 {
24467 output_operand_lossage ("invalid operand for code '%c'", code);
24468 return;
24469 }
24470
24471 if (!REG_P (x)
24472 || !IS_VFP_REGNUM (REGNO (x)))
24473 {
24474 output_operand_lossage ("invalid operand for code '%c'", code);
24475 return;
24476 }
24477
24478 regno = REGNO (x);
24479 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24480 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24481 {
24482 output_operand_lossage ("invalid operand for code '%c'", code);
24483 return;
24484 }
24485
24486 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24487 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24488 }
24489 return;
24490
24491 /* These two codes print the low/high doubleword register of a Neon quad
24492 register, respectively. For pair-structure types, can also print
24493 low/high quadword registers. */
24494 case 'e':
24495 case 'f':
24496 {
24497 machine_mode mode = GET_MODE (x);
24498 int regno;
24499
24500 if ((GET_MODE_SIZE (mode) != 16
24501 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24502 {
24503 output_operand_lossage ("invalid operand for code '%c'", code);
24504 return;
24505 }
24506
24507 regno = REGNO (x);
24508 if (!NEON_REGNO_OK_FOR_QUAD (regno))
24509 {
24510 output_operand_lossage ("invalid operand for code '%c'", code);
24511 return;
24512 }
24513
24514 if (GET_MODE_SIZE (mode) == 16)
24515 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24516 + (code == 'f' ? 1 : 0));
24517 else
24518 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24519 + (code == 'f' ? 1 : 0));
24520 }
24521 return;
24522
24523 /* Print a VFPv3 floating-point constant, represented as an integer
24524 index. */
24525 case 'G':
24526 {
24527 int index = vfp3_const_double_index (x);
24528 gcc_assert (index != -1);
24529 fprintf (stream, "%d", index);
24530 }
24531 return;
24532
24533 /* Print bits representing opcode features for Neon.
24534
24535 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
24536 and polynomials as unsigned.
24537
24538 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24539
24540 Bit 2 is 1 for rounding functions, 0 otherwise. */
24541
24542 /* Identify the type as 's', 'u', 'p' or 'f'. */
24543 case 'T':
24544 {
24545 HOST_WIDE_INT bits = INTVAL (x);
24546 fputc ("uspf"[bits & 3], stream);
24547 }
24548 return;
24549
24550 /* Likewise, but signed and unsigned integers are both 'i'. */
24551 case 'F':
24552 {
24553 HOST_WIDE_INT bits = INTVAL (x);
24554 fputc ("iipf"[bits & 3], stream);
24555 }
24556 return;
24557
24558 /* As for 'T', but emit 'u' instead of 'p'. */
24559 case 't':
24560 {
24561 HOST_WIDE_INT bits = INTVAL (x);
24562 fputc ("usuf"[bits & 3], stream);
24563 }
24564 return;
24565
24566 /* Bit 2: rounding (vs none). */
24567 case 'O':
24568 {
24569 HOST_WIDE_INT bits = INTVAL (x);
24570 fputs ((bits & 4) != 0 ? "r" : "", stream);
24571 }
24572 return;
24573
24574 /* Memory operand for vld1/vst1 instruction. */
24575 case 'A':
24576 {
24577 rtx addr;
24578 bool postinc = FALSE;
24579 rtx postinc_reg = NULL;
24580 unsigned align, memsize, align_bits;
24581
24582 gcc_assert (MEM_P (x));
24583 addr = XEXP (x, 0);
24584 if (GET_CODE (addr) == POST_INC)
24585 {
24586 postinc = 1;
24587 addr = XEXP (addr, 0);
24588 }
24589 if (GET_CODE (addr) == POST_MODIFY)
24590 {
24591 postinc_reg = XEXP( XEXP (addr, 1), 1);
24592 addr = XEXP (addr, 0);
24593 }
24594 asm_fprintf (stream, "[%r", REGNO (addr));
24595
24596 /* We know the alignment of this access, so we can emit a hint in the
24597 instruction (for some alignments) as an aid to the memory subsystem
24598 of the target. */
24599 align = MEM_ALIGN (x) >> 3;
24600 memsize = MEM_SIZE (x);
24601
24602 /* Only certain alignment specifiers are supported by the hardware. */
24603 if (memsize == 32 && (align % 32) == 0)
24604 align_bits = 256;
24605 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24606 align_bits = 128;
24607 else if (memsize >= 8 && (align % 8) == 0)
24608 align_bits = 64;
24609 else
24610 align_bits = 0;
24611
24612 if (align_bits != 0)
24613 asm_fprintf (stream, ":%d", align_bits);
24614
24615 asm_fprintf (stream, "]");
24616
24617 if (postinc)
24618 fputs("!", stream);
24619 if (postinc_reg)
24620 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24621 }
24622 return;
24623
24624 /* To print the memory operand with "Ux" or "Uj" constraint. Based on the
24625 rtx_code the memory operands output looks like following.
24626 1. [Rn], #+/-<imm>
24627 2. [Rn, #+/-<imm>]!
24628 3. [Rn, #+/-<imm>]
24629 4. [Rn]. */
24630 case 'E':
24631 {
24632 rtx addr;
24633 rtx postinc_reg = NULL;
24634 unsigned inc_val = 0;
24635 enum rtx_code code;
24636
24637 gcc_assert (MEM_P (x));
24638 addr = XEXP (x, 0);
24639 code = GET_CODE (addr);
24640 if (code == POST_INC || code == POST_DEC || code == PRE_INC
24641 || code == PRE_DEC)
24642 {
24643 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24644 inc_val = GET_MODE_SIZE (GET_MODE (x));
24645 if (code == POST_INC || code == POST_DEC)
24646 asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24647 ? "": "-", inc_val);
24648 else
24649 asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24650 ? "": "-", inc_val);
24651 }
24652 else if (code == POST_MODIFY || code == PRE_MODIFY)
24653 {
24654 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24655 postinc_reg = XEXP (XEXP (addr, 1), 1);
24656 if (postinc_reg && CONST_INT_P (postinc_reg))
24657 {
24658 if (code == POST_MODIFY)
24659 asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24660 else
24661 asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24662 }
24663 }
24664 else if (code == PLUS)
24665 {
24666 rtx base = XEXP (addr, 0);
24667 rtx index = XEXP (addr, 1);
24668
24669 gcc_assert (REG_P (base) && CONST_INT_P (index));
24670
24671 HOST_WIDE_INT offset = INTVAL (index);
24672 asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24673 }
24674 else
24675 {
24676 gcc_assert (REG_P (addr));
24677 asm_fprintf (stream, "[%r]",REGNO (addr));
24678 }
24679 }
24680 return;
24681
24682 case 'C':
24683 {
24684 rtx addr;
24685
24686 gcc_assert (MEM_P (x));
24687 addr = XEXP (x, 0);
24688 gcc_assert (REG_P (addr));
24689 asm_fprintf (stream, "[%r]", REGNO (addr));
24690 }
24691 return;
24692
24693 /* Translate an S register number into a D register number and element index. */
24694 case 'y':
24695 {
24696 machine_mode mode = GET_MODE (x);
24697 int regno;
24698
24699 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24700 {
24701 output_operand_lossage ("invalid operand for code '%c'", code);
24702 return;
24703 }
24704
24705 regno = REGNO (x);
24706 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24707 {
24708 output_operand_lossage ("invalid operand for code '%c'", code);
24709 return;
24710 }
24711
24712 regno = regno - FIRST_VFP_REGNUM;
24713 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24714 }
24715 return;
24716
24717 case 'v':
24718 gcc_assert (CONST_DOUBLE_P (x));
24719 int result;
24720 result = vfp3_const_double_for_fract_bits (x);
24721 if (result == 0)
24722 result = vfp3_const_double_for_bits (x);
24723 fprintf (stream, "#%d", result);
24724 return;
24725
24726 /* Register specifier for vld1.16/vst1.16. Translate the S register
24727 number into a D register number and element index. */
24728 case 'z':
24729 {
24730 machine_mode mode = GET_MODE (x);
24731 int regno;
24732
24733 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24734 {
24735 output_operand_lossage ("invalid operand for code '%c'", code);
24736 return;
24737 }
24738
24739 regno = REGNO (x);
24740 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24741 {
24742 output_operand_lossage ("invalid operand for code '%c'", code);
24743 return;
24744 }
24745
24746 regno = regno - FIRST_VFP_REGNUM;
24747 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24748 }
24749 return;
24750
24751 default:
24752 if (x == 0)
24753 {
24754 output_operand_lossage ("missing operand");
24755 return;
24756 }
24757
24758 switch (GET_CODE (x))
24759 {
24760 case REG:
24761 asm_fprintf (stream, "%r", REGNO (x));
24762 break;
24763
24764 case MEM:
24765 output_address (GET_MODE (x), XEXP (x, 0));
24766 break;
24767
24768 case CONST_DOUBLE:
24769 {
24770 char fpstr[20];
24771 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24772 sizeof (fpstr), 0, 1);
24773 fprintf (stream, "#%s", fpstr);
24774 }
24775 break;
24776
24777 default:
24778 gcc_assert (GET_CODE (x) != NEG);
24779 fputc ('#', stream);
24780 if (GET_CODE (x) == HIGH)
24781 {
24782 fputs (":lower16:", stream);
24783 x = XEXP (x, 0);
24784 }
24785
24786 output_addr_const (stream, x);
24787 break;
24788 }
24789 }
24790 }
24791 \f
24792 /* Target hook for printing a memory address. */
24793 static void
24794 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24795 {
24796 if (TARGET_32BIT)
24797 {
24798 int is_minus = GET_CODE (x) == MINUS;
24799
24800 if (REG_P (x))
24801 asm_fprintf (stream, "[%r]", REGNO (x));
24802 else if (GET_CODE (x) == PLUS || is_minus)
24803 {
24804 rtx base = XEXP (x, 0);
24805 rtx index = XEXP (x, 1);
24806 HOST_WIDE_INT offset = 0;
24807 if (!REG_P (base)
24808 || (REG_P (index) && REGNO (index) == SP_REGNUM))
24809 {
24810 /* Ensure that BASE is a register. */
24811 /* (one of them must be). */
24812 /* Also ensure the SP is not used as in index register. */
24813 std::swap (base, index);
24814 }
24815 switch (GET_CODE (index))
24816 {
24817 case CONST_INT:
24818 offset = INTVAL (index);
24819 if (is_minus)
24820 offset = -offset;
24821 asm_fprintf (stream, "[%r, #%wd]",
24822 REGNO (base), offset);
24823 break;
24824
24825 case REG:
24826 asm_fprintf (stream, "[%r, %s%r]",
24827 REGNO (base), is_minus ? "-" : "",
24828 REGNO (index));
24829 break;
24830
24831 case MULT:
24832 case ASHIFTRT:
24833 case LSHIFTRT:
24834 case ASHIFT:
24835 case ROTATERT:
24836 {
24837 asm_fprintf (stream, "[%r, %s%r",
24838 REGNO (base), is_minus ? "-" : "",
24839 REGNO (XEXP (index, 0)));
24840 arm_print_operand (stream, index, 'S');
24841 fputs ("]", stream);
24842 break;
24843 }
24844
24845 default:
24846 gcc_unreachable ();
24847 }
24848 }
24849 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24850 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24851 {
24852 gcc_assert (REG_P (XEXP (x, 0)));
24853
24854 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24855 asm_fprintf (stream, "[%r, #%s%d]!",
24856 REGNO (XEXP (x, 0)),
24857 GET_CODE (x) == PRE_DEC ? "-" : "",
24858 GET_MODE_SIZE (mode));
24859 else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24860 asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24861 else
24862 asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24863 GET_CODE (x) == POST_DEC ? "-" : "",
24864 GET_MODE_SIZE (mode));
24865 }
24866 else if (GET_CODE (x) == PRE_MODIFY)
24867 {
24868 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24869 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24870 asm_fprintf (stream, "#%wd]!",
24871 INTVAL (XEXP (XEXP (x, 1), 1)));
24872 else
24873 asm_fprintf (stream, "%r]!",
24874 REGNO (XEXP (XEXP (x, 1), 1)));
24875 }
24876 else if (GET_CODE (x) == POST_MODIFY)
24877 {
24878 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24879 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24880 asm_fprintf (stream, "#%wd",
24881 INTVAL (XEXP (XEXP (x, 1), 1)));
24882 else
24883 asm_fprintf (stream, "%r",
24884 REGNO (XEXP (XEXP (x, 1), 1)));
24885 }
24886 else output_addr_const (stream, x);
24887 }
24888 else
24889 {
24890 if (REG_P (x))
24891 asm_fprintf (stream, "[%r]", REGNO (x));
24892 else if (GET_CODE (x) == POST_INC)
24893 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24894 else if (GET_CODE (x) == PLUS)
24895 {
24896 gcc_assert (REG_P (XEXP (x, 0)));
24897 if (CONST_INT_P (XEXP (x, 1)))
24898 asm_fprintf (stream, "[%r, #%wd]",
24899 REGNO (XEXP (x, 0)),
24900 INTVAL (XEXP (x, 1)));
24901 else
24902 asm_fprintf (stream, "[%r, %r]",
24903 REGNO (XEXP (x, 0)),
24904 REGNO (XEXP (x, 1)));
24905 }
24906 else
24907 output_addr_const (stream, x);
24908 }
24909 }
24910 \f
24911 /* Target hook for indicating whether a punctuation character for
24912 TARGET_PRINT_OPERAND is valid. */
24913 static bool
24914 arm_print_operand_punct_valid_p (unsigned char code)
24915 {
24916 return (code == '@' || code == '|' || code == '.'
24917 || code == '(' || code == ')' || code == '#'
24918 || (TARGET_32BIT && (code == '?'))
24919 || (TARGET_THUMB2 && (code == '!'))
24920 || (TARGET_THUMB && (code == '_')));
24921 }
24922 \f
24923 /* Target hook for assembling integer objects. The ARM version needs to
24924 handle word-sized values specially. */
24925 static bool
24926 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24927 {
24928 machine_mode mode;
24929
24930 if (size == UNITS_PER_WORD && aligned_p)
24931 {
24932 fputs ("\t.word\t", asm_out_file);
24933 output_addr_const (asm_out_file, x);
24934
24935 /* Mark symbols as position independent. We only do this in the
24936 .text segment, not in the .data segment. */
24937 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24938 (SYMBOL_REF_P (x) || LABEL_REF_P (x)))
24939 {
24940 /* See legitimize_pic_address for an explanation of the
24941 TARGET_VXWORKS_RTP check. */
24942 /* References to weak symbols cannot be resolved locally:
24943 they may be overridden by a non-weak definition at link
24944 time. */
24945 if (!arm_pic_data_is_text_relative
24946 || (SYMBOL_REF_P (x)
24947 && (!SYMBOL_REF_LOCAL_P (x)
24948 || (SYMBOL_REF_DECL (x)
24949 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24950 || (SYMBOL_REF_FUNCTION_P (x)
24951 && !arm_fdpic_local_funcdesc_p (x)))))
24952 {
24953 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24954 fputs ("(GOTFUNCDESC)", asm_out_file);
24955 else
24956 fputs ("(GOT)", asm_out_file);
24957 }
24958 else
24959 {
24960 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24961 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24962 else
24963 {
24964 bool is_readonly;
24965
24966 if (!TARGET_FDPIC
24967 || arm_is_segment_info_known (x, &is_readonly))
24968 fputs ("(GOTOFF)", asm_out_file);
24969 else
24970 fputs ("(GOT)", asm_out_file);
24971 }
24972 }
24973 }
24974
24975 /* For FDPIC we also have to mark symbol for .data section. */
24976 if (TARGET_FDPIC
24977 && !making_const_table
24978 && SYMBOL_REF_P (x)
24979 && SYMBOL_REF_FUNCTION_P (x))
24980 fputs ("(FUNCDESC)", asm_out_file);
24981
24982 fputc ('\n', asm_out_file);
24983 return true;
24984 }
24985
24986 mode = GET_MODE (x);
24987
24988 if (arm_vector_mode_supported_p (mode))
24989 {
24990 int i, units;
24991
24992 gcc_assert (GET_CODE (x) == CONST_VECTOR);
24993
24994 units = CONST_VECTOR_NUNITS (x);
24995 size = GET_MODE_UNIT_SIZE (mode);
24996
24997 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24998 for (i = 0; i < units; i++)
24999 {
25000 rtx elt = CONST_VECTOR_ELT (x, i);
25001 assemble_integer
25002 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
25003 }
25004 else
25005 for (i = 0; i < units; i++)
25006 {
25007 rtx elt = CONST_VECTOR_ELT (x, i);
25008 assemble_real
25009 (*CONST_DOUBLE_REAL_VALUE (elt),
25010 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
25011 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
25012 }
25013
25014 return true;
25015 }
25016
25017 return default_assemble_integer (x, size, aligned_p);
25018 }
25019
25020 static void
25021 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
25022 {
25023 section *s;
25024
25025 if (!TARGET_AAPCS_BASED)
25026 {
25027 (is_ctor ?
25028 default_named_section_asm_out_constructor
25029 : default_named_section_asm_out_destructor) (symbol, priority);
25030 return;
25031 }
25032
25033 /* Put these in the .init_array section, using a special relocation. */
25034 if (priority != DEFAULT_INIT_PRIORITY)
25035 {
25036 char buf[18];
25037 sprintf (buf, "%s.%.5u",
25038 is_ctor ? ".init_array" : ".fini_array",
25039 priority);
25040 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
25041 }
25042 else if (is_ctor)
25043 s = ctors_section;
25044 else
25045 s = dtors_section;
25046
25047 switch_to_section (s);
25048 assemble_align (POINTER_SIZE);
25049 fputs ("\t.word\t", asm_out_file);
25050 output_addr_const (asm_out_file, symbol);
25051 fputs ("(target1)\n", asm_out_file);
25052 }
25053
25054 /* Add a function to the list of static constructors. */
25055
25056 static void
25057 arm_elf_asm_constructor (rtx symbol, int priority)
25058 {
25059 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
25060 }
25061
25062 /* Add a function to the list of static destructors. */
25063
25064 static void
25065 arm_elf_asm_destructor (rtx symbol, int priority)
25066 {
25067 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
25068 }
25069 \f
25070 /* A finite state machine takes care of noticing whether or not instructions
25071 can be conditionally executed, and thus decrease execution time and code
25072 size by deleting branch instructions. The fsm is controlled by
25073 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
25074
25075 /* The state of the fsm controlling condition codes are:
25076 0: normal, do nothing special
25077 1: make ASM_OUTPUT_OPCODE not output this instruction
25078 2: make ASM_OUTPUT_OPCODE not output this instruction
25079 3: make instructions conditional
25080 4: make instructions conditional
25081
25082 State transitions (state->state by whom under condition):
25083 0 -> 1 final_prescan_insn if the `target' is a label
25084 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
25085 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
25086 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
25087 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
25088 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
25089 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
25090 (the target insn is arm_target_insn).
25091
25092 If the jump clobbers the conditions then we use states 2 and 4.
25093
25094 A similar thing can be done with conditional return insns.
25095
25096 XXX In case the `target' is an unconditional branch, this conditionalising
25097 of the instructions always reduces code size, but not always execution
25098 time. But then, I want to reduce the code size to somewhere near what
25099 /bin/cc produces. */
25100
25101 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
25102 instructions. When a COND_EXEC instruction is seen the subsequent
25103 instructions are scanned so that multiple conditional instructions can be
25104 combined into a single IT block. arm_condexec_count and arm_condexec_mask
25105 specify the length and true/false mask for the IT block. These will be
25106 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
25107
25108 /* Returns the index of the ARM condition code string in
25109 `arm_condition_codes', or ARM_NV if the comparison is invalid.
25110 COMPARISON should be an rtx like `(eq (...) (...))'. */
25111
25112 enum arm_cond_code
25113 maybe_get_arm_condition_code (rtx comparison)
25114 {
25115 machine_mode mode = GET_MODE (XEXP (comparison, 0));
25116 enum arm_cond_code code;
25117 enum rtx_code comp_code = GET_CODE (comparison);
25118
25119 if (GET_MODE_CLASS (mode) != MODE_CC)
25120 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
25121 XEXP (comparison, 1));
25122
25123 switch (mode)
25124 {
25125 case E_CC_DNEmode: code = ARM_NE; goto dominance;
25126 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
25127 case E_CC_DGEmode: code = ARM_GE; goto dominance;
25128 case E_CC_DGTmode: code = ARM_GT; goto dominance;
25129 case E_CC_DLEmode: code = ARM_LE; goto dominance;
25130 case E_CC_DLTmode: code = ARM_LT; goto dominance;
25131 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
25132 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
25133 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
25134 case E_CC_DLTUmode: code = ARM_CC;
25135
25136 dominance:
25137 if (comp_code == EQ)
25138 return ARM_INVERSE_CONDITION_CODE (code);
25139 if (comp_code == NE)
25140 return code;
25141 return ARM_NV;
25142
25143 case E_CC_NZmode:
25144 switch (comp_code)
25145 {
25146 case NE: return ARM_NE;
25147 case EQ: return ARM_EQ;
25148 case GE: return ARM_PL;
25149 case LT: return ARM_MI;
25150 default: return ARM_NV;
25151 }
25152
25153 case E_CC_Zmode:
25154 switch (comp_code)
25155 {
25156 case NE: return ARM_NE;
25157 case EQ: return ARM_EQ;
25158 default: return ARM_NV;
25159 }
25160
25161 case E_CC_Nmode:
25162 switch (comp_code)
25163 {
25164 case NE: return ARM_MI;
25165 case EQ: return ARM_PL;
25166 default: return ARM_NV;
25167 }
25168
25169 case E_CCFPEmode:
25170 case E_CCFPmode:
25171 /* We can handle all cases except UNEQ and LTGT. */
25172 switch (comp_code)
25173 {
25174 case GE: return ARM_GE;
25175 case GT: return ARM_GT;
25176 case LE: return ARM_LS;
25177 case LT: return ARM_MI;
25178 case NE: return ARM_NE;
25179 case EQ: return ARM_EQ;
25180 case ORDERED: return ARM_VC;
25181 case UNORDERED: return ARM_VS;
25182 case UNLT: return ARM_LT;
25183 case UNLE: return ARM_LE;
25184 case UNGT: return ARM_HI;
25185 case UNGE: return ARM_PL;
25186 /* UNEQ and LTGT do not have a representation. */
25187 case UNEQ: /* Fall through. */
25188 case LTGT: /* Fall through. */
25189 default: return ARM_NV;
25190 }
25191
25192 case E_CC_SWPmode:
25193 switch (comp_code)
25194 {
25195 case NE: return ARM_NE;
25196 case EQ: return ARM_EQ;
25197 case GE: return ARM_LE;
25198 case GT: return ARM_LT;
25199 case LE: return ARM_GE;
25200 case LT: return ARM_GT;
25201 case GEU: return ARM_LS;
25202 case GTU: return ARM_CC;
25203 case LEU: return ARM_CS;
25204 case LTU: return ARM_HI;
25205 default: return ARM_NV;
25206 }
25207
25208 case E_CC_Cmode:
25209 switch (comp_code)
25210 {
25211 case LTU: return ARM_CS;
25212 case GEU: return ARM_CC;
25213 default: return ARM_NV;
25214 }
25215
25216 case E_CC_NVmode:
25217 switch (comp_code)
25218 {
25219 case GE: return ARM_GE;
25220 case LT: return ARM_LT;
25221 default: return ARM_NV;
25222 }
25223
25224 case E_CC_Bmode:
25225 switch (comp_code)
25226 {
25227 case GEU: return ARM_CS;
25228 case LTU: return ARM_CC;
25229 default: return ARM_NV;
25230 }
25231
25232 case E_CC_Vmode:
25233 switch (comp_code)
25234 {
25235 case NE: return ARM_VS;
25236 case EQ: return ARM_VC;
25237 default: return ARM_NV;
25238 }
25239
25240 case E_CC_ADCmode:
25241 switch (comp_code)
25242 {
25243 case GEU: return ARM_CS;
25244 case LTU: return ARM_CC;
25245 default: return ARM_NV;
25246 }
25247
25248 case E_CCmode:
25249 case E_CC_RSBmode:
25250 switch (comp_code)
25251 {
25252 case NE: return ARM_NE;
25253 case EQ: return ARM_EQ;
25254 case GE: return ARM_GE;
25255 case GT: return ARM_GT;
25256 case LE: return ARM_LE;
25257 case LT: return ARM_LT;
25258 case GEU: return ARM_CS;
25259 case GTU: return ARM_HI;
25260 case LEU: return ARM_LS;
25261 case LTU: return ARM_CC;
25262 default: return ARM_NV;
25263 }
25264
25265 default: gcc_unreachable ();
25266 }
25267 }
25268
25269 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
25270 static enum arm_cond_code
25271 get_arm_condition_code (rtx comparison)
25272 {
25273 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
25274 gcc_assert (code != ARM_NV);
25275 return code;
25276 }
25277
25278 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
25279 code registers when not targetting Thumb1. The VFP condition register
25280 only exists when generating hard-float code. */
25281 static bool
25282 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
25283 {
25284 if (!TARGET_32BIT)
25285 return false;
25286
25287 *p1 = CC_REGNUM;
25288 *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
25289 return true;
25290 }
25291
25292 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25293 instructions. */
25294 void
25295 thumb2_final_prescan_insn (rtx_insn *insn)
25296 {
25297 rtx_insn *first_insn = insn;
25298 rtx body = PATTERN (insn);
25299 rtx predicate;
25300 enum arm_cond_code code;
25301 int n;
25302 int mask;
25303 int max;
25304
25305 /* max_insns_skipped in the tune was already taken into account in the
25306 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
25307 just emit the IT blocks as we can. It does not make sense to split
25308 the IT blocks. */
25309 max = MAX_INSN_PER_IT_BLOCK;
25310
25311 /* Remove the previous insn from the count of insns to be output. */
25312 if (arm_condexec_count)
25313 arm_condexec_count--;
25314
25315 /* Nothing to do if we are already inside a conditional block. */
25316 if (arm_condexec_count)
25317 return;
25318
25319 if (GET_CODE (body) != COND_EXEC)
25320 return;
25321
25322 /* Conditional jumps are implemented directly. */
25323 if (JUMP_P (insn))
25324 return;
25325
25326 predicate = COND_EXEC_TEST (body);
25327 arm_current_cc = get_arm_condition_code (predicate);
25328
25329 n = get_attr_ce_count (insn);
25330 arm_condexec_count = 1;
25331 arm_condexec_mask = (1 << n) - 1;
25332 arm_condexec_masklen = n;
25333 /* See if subsequent instructions can be combined into the same block. */
25334 for (;;)
25335 {
25336 insn = next_nonnote_insn (insn);
25337
25338 /* Jumping into the middle of an IT block is illegal, so a label or
25339 barrier terminates the block. */
25340 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
25341 break;
25342
25343 body = PATTERN (insn);
25344 /* USE and CLOBBER aren't really insns, so just skip them. */
25345 if (GET_CODE (body) == USE
25346 || GET_CODE (body) == CLOBBER)
25347 continue;
25348
25349 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
25350 if (GET_CODE (body) != COND_EXEC)
25351 break;
25352 /* Maximum number of conditionally executed instructions in a block. */
25353 n = get_attr_ce_count (insn);
25354 if (arm_condexec_masklen + n > max)
25355 break;
25356
25357 predicate = COND_EXEC_TEST (body);
25358 code = get_arm_condition_code (predicate);
25359 mask = (1 << n) - 1;
25360 if (arm_current_cc == code)
25361 arm_condexec_mask |= (mask << arm_condexec_masklen);
25362 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
25363 break;
25364
25365 arm_condexec_count++;
25366 arm_condexec_masklen += n;
25367
25368 /* A jump must be the last instruction in a conditional block. */
25369 if (JUMP_P (insn))
25370 break;
25371 }
25372 /* Restore recog_data (getting the attributes of other insns can
25373 destroy this array, but final.cc assumes that it remains intact
25374 across this call). */
25375 extract_constrain_insn_cached (first_insn);
25376 }
25377
25378 void
25379 arm_final_prescan_insn (rtx_insn *insn)
25380 {
25381 /* BODY will hold the body of INSN. */
25382 rtx body = PATTERN (insn);
25383
25384 /* This will be 1 if trying to repeat the trick, and things need to be
25385 reversed if it appears to fail. */
25386 int reverse = 0;
25387
25388 /* If we start with a return insn, we only succeed if we find another one. */
25389 int seeking_return = 0;
25390 enum rtx_code return_code = UNKNOWN;
25391
25392 /* START_INSN will hold the insn from where we start looking. This is the
25393 first insn after the following code_label if REVERSE is true. */
25394 rtx_insn *start_insn = insn;
25395
25396 /* If in state 4, check if the target branch is reached, in order to
25397 change back to state 0. */
25398 if (arm_ccfsm_state == 4)
25399 {
25400 if (insn == arm_target_insn)
25401 {
25402 arm_target_insn = NULL;
25403 arm_ccfsm_state = 0;
25404 }
25405 return;
25406 }
25407
25408 /* If in state 3, it is possible to repeat the trick, if this insn is an
25409 unconditional branch to a label, and immediately following this branch
25410 is the previous target label which is only used once, and the label this
25411 branch jumps to is not too far off. */
25412 if (arm_ccfsm_state == 3)
25413 {
25414 if (simplejump_p (insn))
25415 {
25416 start_insn = next_nonnote_insn (start_insn);
25417 if (BARRIER_P (start_insn))
25418 {
25419 /* XXX Isn't this always a barrier? */
25420 start_insn = next_nonnote_insn (start_insn);
25421 }
25422 if (LABEL_P (start_insn)
25423 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25424 && LABEL_NUSES (start_insn) == 1)
25425 reverse = TRUE;
25426 else
25427 return;
25428 }
25429 else if (ANY_RETURN_P (body))
25430 {
25431 start_insn = next_nonnote_insn (start_insn);
25432 if (BARRIER_P (start_insn))
25433 start_insn = next_nonnote_insn (start_insn);
25434 if (LABEL_P (start_insn)
25435 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25436 && LABEL_NUSES (start_insn) == 1)
25437 {
25438 reverse = TRUE;
25439 seeking_return = 1;
25440 return_code = GET_CODE (body);
25441 }
25442 else
25443 return;
25444 }
25445 else
25446 return;
25447 }
25448
25449 gcc_assert (!arm_ccfsm_state || reverse);
25450 if (!JUMP_P (insn))
25451 return;
25452
25453 /* This jump might be paralleled with a clobber of the condition codes
25454 the jump should always come first */
25455 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25456 body = XVECEXP (body, 0, 0);
25457
25458 if (reverse
25459 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25460 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25461 {
25462 int insns_skipped;
25463 int fail = FALSE, succeed = FALSE;
25464 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
25465 int then_not_else = TRUE;
25466 rtx_insn *this_insn = start_insn;
25467 rtx label = 0;
25468
25469 /* Register the insn jumped to. */
25470 if (reverse)
25471 {
25472 if (!seeking_return)
25473 label = XEXP (SET_SRC (body), 0);
25474 }
25475 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25476 label = XEXP (XEXP (SET_SRC (body), 1), 0);
25477 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25478 {
25479 label = XEXP (XEXP (SET_SRC (body), 2), 0);
25480 then_not_else = FALSE;
25481 }
25482 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25483 {
25484 seeking_return = 1;
25485 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25486 }
25487 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25488 {
25489 seeking_return = 1;
25490 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25491 then_not_else = FALSE;
25492 }
25493 else
25494 gcc_unreachable ();
25495
25496 /* See how many insns this branch skips, and what kind of insns. If all
25497 insns are okay, and the label or unconditional branch to the same
25498 label is not too far away, succeed. */
25499 for (insns_skipped = 0;
25500 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25501 {
25502 rtx scanbody;
25503
25504 this_insn = next_nonnote_insn (this_insn);
25505 if (!this_insn)
25506 break;
25507
25508 switch (GET_CODE (this_insn))
25509 {
25510 case CODE_LABEL:
25511 /* Succeed if it is the target label, otherwise fail since
25512 control falls in from somewhere else. */
25513 if (this_insn == label)
25514 {
25515 arm_ccfsm_state = 1;
25516 succeed = TRUE;
25517 }
25518 else
25519 fail = TRUE;
25520 break;
25521
25522 case BARRIER:
25523 /* Succeed if the following insn is the target label.
25524 Otherwise fail.
25525 If return insns are used then the last insn in a function
25526 will be a barrier. */
25527 this_insn = next_nonnote_insn (this_insn);
25528 if (this_insn && this_insn == label)
25529 {
25530 arm_ccfsm_state = 1;
25531 succeed = TRUE;
25532 }
25533 else
25534 fail = TRUE;
25535 break;
25536
25537 case CALL_INSN:
25538 /* The AAPCS says that conditional calls should not be
25539 used since they make interworking inefficient (the
25540 linker can't transform BL<cond> into BLX). That's
25541 only a problem if the machine has BLX. */
25542 if (arm_arch5t)
25543 {
25544 fail = TRUE;
25545 break;
25546 }
25547
25548 /* Succeed if the following insn is the target label, or
25549 if the following two insns are a barrier and the
25550 target label. */
25551 this_insn = next_nonnote_insn (this_insn);
25552 if (this_insn && BARRIER_P (this_insn))
25553 this_insn = next_nonnote_insn (this_insn);
25554
25555 if (this_insn && this_insn == label
25556 && insns_skipped < max_insns_skipped)
25557 {
25558 arm_ccfsm_state = 1;
25559 succeed = TRUE;
25560 }
25561 else
25562 fail = TRUE;
25563 break;
25564
25565 case JUMP_INSN:
25566 /* If this is an unconditional branch to the same label, succeed.
25567 If it is to another label, do nothing. If it is conditional,
25568 fail. */
25569 /* XXX Probably, the tests for SET and the PC are
25570 unnecessary. */
25571
25572 scanbody = PATTERN (this_insn);
25573 if (GET_CODE (scanbody) == SET
25574 && GET_CODE (SET_DEST (scanbody)) == PC)
25575 {
25576 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25577 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25578 {
25579 arm_ccfsm_state = 2;
25580 succeed = TRUE;
25581 }
25582 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25583 fail = TRUE;
25584 }
25585 /* Fail if a conditional return is undesirable (e.g. on a
25586 StrongARM), but still allow this if optimizing for size. */
25587 else if (GET_CODE (scanbody) == return_code
25588 && !use_return_insn (TRUE, NULL)
25589 && !optimize_size)
25590 fail = TRUE;
25591 else if (GET_CODE (scanbody) == return_code)
25592 {
25593 arm_ccfsm_state = 2;
25594 succeed = TRUE;
25595 }
25596 else if (GET_CODE (scanbody) == PARALLEL)
25597 {
25598 switch (get_attr_conds (this_insn))
25599 {
25600 case CONDS_NOCOND:
25601 break;
25602 default:
25603 fail = TRUE;
25604 break;
25605 }
25606 }
25607 else
25608 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
25609
25610 break;
25611
25612 case INSN:
25613 /* Instructions using or affecting the condition codes make it
25614 fail. */
25615 scanbody = PATTERN (this_insn);
25616 if (!(GET_CODE (scanbody) == SET
25617 || GET_CODE (scanbody) == PARALLEL)
25618 || get_attr_conds (this_insn) != CONDS_NOCOND)
25619 fail = TRUE;
25620 break;
25621
25622 default:
25623 break;
25624 }
25625 }
25626 if (succeed)
25627 {
25628 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25629 arm_target_label = CODE_LABEL_NUMBER (label);
25630 else
25631 {
25632 gcc_assert (seeking_return || arm_ccfsm_state == 2);
25633
25634 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25635 {
25636 this_insn = next_nonnote_insn (this_insn);
25637 gcc_assert (!this_insn
25638 || (!BARRIER_P (this_insn)
25639 && !LABEL_P (this_insn)));
25640 }
25641 if (!this_insn)
25642 {
25643 /* Oh, dear! we ran off the end.. give up. */
25644 extract_constrain_insn_cached (insn);
25645 arm_ccfsm_state = 0;
25646 arm_target_insn = NULL;
25647 return;
25648 }
25649 arm_target_insn = this_insn;
25650 }
25651
25652 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25653 what it was. */
25654 if (!reverse)
25655 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25656
25657 if (reverse || then_not_else)
25658 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25659 }
25660
25661 /* Restore recog_data (getting the attributes of other insns can
25662 destroy this array, but final.cc assumes that it remains intact
25663 across this call. */
25664 extract_constrain_insn_cached (insn);
25665 }
25666 }
25667
25668 /* Output IT instructions. */
25669 void
25670 thumb2_asm_output_opcode (FILE * stream)
25671 {
25672 char buff[5];
25673 int n;
25674
25675 if (arm_condexec_mask)
25676 {
25677 for (n = 0; n < arm_condexec_masklen; n++)
25678 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25679 buff[n] = 0;
25680 asm_fprintf(stream, "i%s\t%s\n\t", buff,
25681 arm_condition_codes[arm_current_cc]);
25682 arm_condexec_mask = 0;
25683 }
25684 }
25685
25686 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
25687 UNITS_PER_WORD bytes wide. */
25688 static unsigned int
25689 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25690 {
25691 if (IS_VPR_REGNUM (regno))
25692 return CEIL (GET_MODE_SIZE (mode), 2);
25693
25694 if (TARGET_32BIT
25695 && regno > PC_REGNUM
25696 && regno != FRAME_POINTER_REGNUM
25697 && regno != ARG_POINTER_REGNUM
25698 && !IS_VFP_REGNUM (regno))
25699 return 1;
25700
25701 return ARM_NUM_REGS (mode);
25702 }
25703
25704 /* Implement TARGET_HARD_REGNO_MODE_OK. */
25705 static bool
25706 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25707 {
25708 if (GET_MODE_CLASS (mode) == MODE_CC)
25709 return (regno == CC_REGNUM
25710 || (TARGET_VFP_BASE
25711 && regno == VFPCC_REGNUM));
25712
25713 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25714 return false;
25715
25716 if (IS_VPR_REGNUM (regno))
25717 return VALID_MVE_PRED_MODE (mode);
25718
25719 if (TARGET_THUMB1)
25720 /* For the Thumb we only allow values bigger than SImode in
25721 registers 0 - 6, so that there is always a second low
25722 register available to hold the upper part of the value.
25723 We probably we ought to ensure that the register is the
25724 start of an even numbered register pair. */
25725 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25726
25727 if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25728 {
25729 if (mode == DFmode || mode == DImode)
25730 return VFP_REGNO_OK_FOR_DOUBLE (regno);
25731
25732 if (mode == HFmode || mode == BFmode || mode == HImode
25733 || mode == SFmode || mode == SImode)
25734 return VFP_REGNO_OK_FOR_SINGLE (regno);
25735
25736 if (TARGET_NEON)
25737 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25738 || (VALID_NEON_QREG_MODE (mode)
25739 && NEON_REGNO_OK_FOR_QUAD (regno))
25740 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25741 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25742 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25743 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25744 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25745 if (TARGET_HAVE_MVE)
25746 return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25747 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25748 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25749
25750 return false;
25751 }
25752
25753 if (TARGET_REALLY_IWMMXT)
25754 {
25755 if (IS_IWMMXT_GR_REGNUM (regno))
25756 return mode == SImode;
25757
25758 if (IS_IWMMXT_REGNUM (regno))
25759 return VALID_IWMMXT_REG_MODE (mode);
25760 }
25761
25762 /* We allow almost any value to be stored in the general registers.
25763 Restrict doubleword quantities to even register pairs in ARM state
25764 so that we can use ldrd. The same restriction applies for MVE
25765 in order to support Armv8.1-M Mainline instructions.
25766 Do not allow very large Neon structure opaque modes in general
25767 registers; they would use too many. */
25768 if (regno <= LAST_ARM_REGNUM)
25769 {
25770 if (ARM_NUM_REGS (mode) > 4)
25771 return false;
25772
25773 if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25774 return true;
25775
25776 return !((TARGET_LDRD || TARGET_CDE)
25777 && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25778 }
25779
25780 if (regno == FRAME_POINTER_REGNUM
25781 || regno == ARG_POINTER_REGNUM)
25782 /* We only allow integers in the fake hard registers. */
25783 return GET_MODE_CLASS (mode) == MODE_INT;
25784
25785 return false;
25786 }
25787
25788 /* Implement TARGET_MODES_TIEABLE_P. */
25789
25790 static bool
25791 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25792 {
25793 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25794 return true;
25795
25796 if (TARGET_HAVE_MVE
25797 && (VALID_MVE_PRED_MODE (mode1) && VALID_MVE_PRED_MODE (mode2)))
25798 return true;
25799
25800 /* We specifically want to allow elements of "structure" modes to
25801 be tieable to the structure. This more general condition allows
25802 other rarer situations too. */
25803 if ((TARGET_NEON
25804 && (VALID_NEON_DREG_MODE (mode1)
25805 || VALID_NEON_QREG_MODE (mode1)
25806 || VALID_NEON_STRUCT_MODE (mode1))
25807 && (VALID_NEON_DREG_MODE (mode2)
25808 || VALID_NEON_QREG_MODE (mode2)
25809 || VALID_NEON_STRUCT_MODE (mode2)))
25810 || (TARGET_HAVE_MVE
25811 && (VALID_MVE_MODE (mode1)
25812 || VALID_MVE_STRUCT_MODE (mode1))
25813 && (VALID_MVE_MODE (mode2)
25814 || VALID_MVE_STRUCT_MODE (mode2))))
25815 return true;
25816
25817 return false;
25818 }
25819
25820 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25821 not used in arm mode. */
25822
25823 enum reg_class
25824 arm_regno_class (int regno)
25825 {
25826 if (regno == PC_REGNUM)
25827 return NO_REGS;
25828
25829 if (IS_VPR_REGNUM (regno))
25830 return VPR_REG;
25831
25832 if (IS_PAC_REGNUM (regno))
25833 return PAC_REG;
25834
25835 if (TARGET_THUMB1)
25836 {
25837 if (regno == STACK_POINTER_REGNUM)
25838 return STACK_REG;
25839 if (regno == CC_REGNUM)
25840 return CC_REG;
25841 if (regno < 8)
25842 return LO_REGS;
25843 return HI_REGS;
25844 }
25845
25846 if (TARGET_THUMB2 && regno < 8)
25847 return LO_REGS;
25848
25849 if ( regno <= LAST_ARM_REGNUM
25850 || regno == FRAME_POINTER_REGNUM
25851 || regno == ARG_POINTER_REGNUM)
25852 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25853
25854 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25855 return TARGET_THUMB2 ? CC_REG : NO_REGS;
25856
25857 if (IS_VFP_REGNUM (regno))
25858 {
25859 if (regno <= D7_VFP_REGNUM)
25860 return VFP_D0_D7_REGS;
25861 else if (regno <= LAST_LO_VFP_REGNUM)
25862 return VFP_LO_REGS;
25863 else
25864 return VFP_HI_REGS;
25865 }
25866
25867 if (IS_IWMMXT_REGNUM (regno))
25868 return IWMMXT_REGS;
25869
25870 if (IS_IWMMXT_GR_REGNUM (regno))
25871 return IWMMXT_GR_REGS;
25872
25873 return NO_REGS;
25874 }
25875
25876 /* Handle a special case when computing the offset
25877 of an argument from the frame pointer. */
25878 int
25879 arm_debugger_arg_offset (int value, rtx addr)
25880 {
25881 rtx_insn *insn;
25882
25883 /* We are only interested if dbxout_parms() failed to compute the offset. */
25884 if (value != 0)
25885 return 0;
25886
25887 /* We can only cope with the case where the address is held in a register. */
25888 if (!REG_P (addr))
25889 return 0;
25890
25891 /* If we are using the frame pointer to point at the argument, then
25892 an offset of 0 is correct. */
25893 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25894 return 0;
25895
25896 /* If we are using the stack pointer to point at the
25897 argument, then an offset of 0 is correct. */
25898 /* ??? Check this is consistent with thumb2 frame layout. */
25899 if ((TARGET_THUMB || !frame_pointer_needed)
25900 && REGNO (addr) == SP_REGNUM)
25901 return 0;
25902
25903 /* Oh dear. The argument is pointed to by a register rather
25904 than being held in a register, or being stored at a known
25905 offset from the frame pointer. Since GDB only understands
25906 those two kinds of argument we must translate the address
25907 held in the register into an offset from the frame pointer.
25908 We do this by searching through the insns for the function
25909 looking to see where this register gets its value. If the
25910 register is initialized from the frame pointer plus an offset
25911 then we are in luck and we can continue, otherwise we give up.
25912
25913 This code is exercised by producing debugging information
25914 for a function with arguments like this:
25915
25916 double func (double a, double b, int c, double d) {return d;}
25917
25918 Without this code the stab for parameter 'd' will be set to
25919 an offset of 0 from the frame pointer, rather than 8. */
25920
25921 /* The if() statement says:
25922
25923 If the insn is a normal instruction
25924 and if the insn is setting the value in a register
25925 and if the register being set is the register holding the address of the argument
25926 and if the address is computing by an addition
25927 that involves adding to a register
25928 which is the frame pointer
25929 a constant integer
25930
25931 then... */
25932
25933 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25934 {
25935 if ( NONJUMP_INSN_P (insn)
25936 && GET_CODE (PATTERN (insn)) == SET
25937 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25938 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25939 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25940 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25941 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25942 )
25943 {
25944 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25945
25946 break;
25947 }
25948 }
25949
25950 if (value == 0)
25951 {
25952 debug_rtx (addr);
25953 warning (0, "unable to compute real location of stacked parameter");
25954 value = 8; /* XXX magic hack */
25955 }
25956
25957 return value;
25958 }
25959 \f
25960 /* Implement TARGET_PROMOTED_TYPE. */
25961
25962 static tree
25963 arm_promoted_type (const_tree t)
25964 {
25965 if (SCALAR_FLOAT_TYPE_P (t)
25966 && TYPE_PRECISION (t) == 16
25967 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25968 return float_type_node;
25969 return NULL_TREE;
25970 }
25971
25972 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25973 This simply adds HFmode as a supported mode; even though we don't
25974 implement arithmetic on this type directly, it's supported by
25975 optabs conversions, much the way the double-word arithmetic is
25976 special-cased in the default hook. */
25977
25978 static bool
25979 arm_scalar_mode_supported_p (scalar_mode mode)
25980 {
25981 if (mode == HFmode)
25982 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25983 else if (ALL_FIXED_POINT_MODE_P (mode))
25984 return true;
25985 else
25986 return default_scalar_mode_supported_p (mode);
25987 }
25988
25989 /* Set the value of FLT_EVAL_METHOD.
25990 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25991
25992 0: evaluate all operations and constants, whose semantic type has at
25993 most the range and precision of type float, to the range and
25994 precision of float; evaluate all other operations and constants to
25995 the range and precision of the semantic type;
25996
25997 N, where _FloatN is a supported interchange floating type
25998 evaluate all operations and constants, whose semantic type has at
25999 most the range and precision of _FloatN type, to the range and
26000 precision of the _FloatN type; evaluate all other operations and
26001 constants to the range and precision of the semantic type;
26002
26003 If we have the ARMv8.2-A extensions then we support _Float16 in native
26004 precision, so we should set this to 16. Otherwise, we support the type,
26005 but want to evaluate expressions in float precision, so set this to
26006 0. */
26007
26008 static enum flt_eval_method
26009 arm_excess_precision (enum excess_precision_type type)
26010 {
26011 switch (type)
26012 {
26013 case EXCESS_PRECISION_TYPE_FAST:
26014 case EXCESS_PRECISION_TYPE_STANDARD:
26015 /* We can calculate either in 16-bit range and precision or
26016 32-bit range and precision. Make that decision based on whether
26017 we have native support for the ARMv8.2-A 16-bit floating-point
26018 instructions or not. */
26019 return (TARGET_VFP_FP16INST
26020 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
26021 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
26022 case EXCESS_PRECISION_TYPE_IMPLICIT:
26023 case EXCESS_PRECISION_TYPE_FLOAT16:
26024 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
26025 default:
26026 gcc_unreachable ();
26027 }
26028 return FLT_EVAL_METHOD_UNPREDICTABLE;
26029 }
26030
26031
26032 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
26033 _Float16 if we are using anything other than ieee format for 16-bit
26034 floating point. Otherwise, punt to the default implementation. */
26035 static opt_scalar_float_mode
26036 arm_floatn_mode (int n, bool extended)
26037 {
26038 if (!extended && n == 16)
26039 {
26040 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
26041 return HFmode;
26042 return opt_scalar_float_mode ();
26043 }
26044
26045 return default_floatn_mode (n, extended);
26046 }
26047
26048
26049 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
26050 not to early-clobber SRC registers in the process.
26051
26052 We assume that the operands described by SRC and DEST represent a
26053 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
26054 number of components into which the copy has been decomposed. */
26055 void
26056 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
26057 {
26058 unsigned int i;
26059
26060 if (!reg_overlap_mentioned_p (operands[0], operands[1])
26061 || REGNO (operands[0]) < REGNO (operands[1]))
26062 {
26063 for (i = 0; i < count; i++)
26064 {
26065 operands[2 * i] = dest[i];
26066 operands[2 * i + 1] = src[i];
26067 }
26068 }
26069 else
26070 {
26071 for (i = 0; i < count; i++)
26072 {
26073 operands[2 * i] = dest[count - i - 1];
26074 operands[2 * i + 1] = src[count - i - 1];
26075 }
26076 }
26077 }
26078
26079 /* Split operands into moves from op[1] + op[2] into op[0]. */
26080
26081 void
26082 neon_split_vcombine (rtx operands[3])
26083 {
26084 unsigned int dest = REGNO (operands[0]);
26085 unsigned int src1 = REGNO (operands[1]);
26086 unsigned int src2 = REGNO (operands[2]);
26087 machine_mode halfmode = GET_MODE (operands[1]);
26088 unsigned int halfregs = REG_NREGS (operands[1]);
26089 rtx destlo, desthi;
26090
26091 if (src1 == dest && src2 == dest + halfregs)
26092 {
26093 /* No-op move. Can't split to nothing; emit something. */
26094 emit_note (NOTE_INSN_DELETED);
26095 return;
26096 }
26097
26098 /* Preserve register attributes for variable tracking. */
26099 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
26100 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
26101 GET_MODE_SIZE (halfmode));
26102
26103 /* Special case of reversed high/low parts. Use VSWP. */
26104 if (src2 == dest && src1 == dest + halfregs)
26105 {
26106 rtx x = gen_rtx_SET (destlo, operands[1]);
26107 rtx y = gen_rtx_SET (desthi, operands[2]);
26108 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
26109 return;
26110 }
26111
26112 if (!reg_overlap_mentioned_p (operands[2], destlo))
26113 {
26114 /* Try to avoid unnecessary moves if part of the result
26115 is in the right place already. */
26116 if (src1 != dest)
26117 emit_move_insn (destlo, operands[1]);
26118 if (src2 != dest + halfregs)
26119 emit_move_insn (desthi, operands[2]);
26120 }
26121 else
26122 {
26123 if (src2 != dest + halfregs)
26124 emit_move_insn (desthi, operands[2]);
26125 if (src1 != dest)
26126 emit_move_insn (destlo, operands[1]);
26127 }
26128 }
26129 \f
26130 /* Return the number (counting from 0) of
26131 the least significant set bit in MASK. */
26132
26133 inline static int
26134 number_of_first_bit_set (unsigned mask)
26135 {
26136 return ctz_hwi (mask);
26137 }
26138
26139 /* Like emit_multi_reg_push, but allowing for a different set of
26140 registers to be described as saved. MASK is the set of registers
26141 to be saved; REAL_REGS is the set of registers to be described as
26142 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26143
26144 static rtx_insn *
26145 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26146 {
26147 unsigned long regno;
26148 rtx par[10], tmp, reg;
26149 rtx_insn *insn;
26150 int i, j;
26151
26152 /* Build the parallel of the registers actually being stored. */
26153 for (i = 0; mask; ++i, mask &= mask - 1)
26154 {
26155 regno = ctz_hwi (mask);
26156 reg = gen_rtx_REG (SImode, regno);
26157
26158 if (i == 0)
26159 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26160 else
26161 tmp = gen_rtx_USE (VOIDmode, reg);
26162
26163 par[i] = tmp;
26164 }
26165
26166 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26167 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26168 tmp = gen_frame_mem (BLKmode, tmp);
26169 tmp = gen_rtx_SET (tmp, par[0]);
26170 par[0] = tmp;
26171
26172 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26173 insn = emit_insn (tmp);
26174
26175 /* Always build the stack adjustment note for unwind info. */
26176 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26177 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
26178 par[0] = tmp;
26179
26180 /* Build the parallel of the registers recorded as saved for unwind. */
26181 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26182 {
26183 regno = ctz_hwi (real_regs);
26184 reg = gen_rtx_REG (SImode, regno);
26185
26186 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26187 tmp = gen_frame_mem (SImode, tmp);
26188 tmp = gen_rtx_SET (tmp, reg);
26189 RTX_FRAME_RELATED_P (tmp) = 1;
26190 par[j + 1] = tmp;
26191 }
26192
26193 if (j == 0)
26194 tmp = par[0];
26195 else
26196 {
26197 RTX_FRAME_RELATED_P (par[0]) = 1;
26198 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26199 }
26200
26201 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26202
26203 return insn;
26204 }
26205
26206 /* Emit code to push or pop registers to or from the stack. F is the
26207 assembly file. MASK is the registers to pop. */
26208 static void
26209 thumb_pop (FILE *f, unsigned long mask)
26210 {
26211 int regno;
26212 int lo_mask = mask & 0xFF;
26213
26214 gcc_assert (mask);
26215
26216 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26217 {
26218 /* Special case. Do not generate a POP PC statement here, do it in
26219 thumb_exit() */
26220 thumb_exit (f, -1);
26221 return;
26222 }
26223
26224 fprintf (f, "\tpop\t{");
26225
26226 /* Look at the low registers first. */
26227 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26228 {
26229 if (lo_mask & 1)
26230 {
26231 asm_fprintf (f, "%r", regno);
26232
26233 if ((lo_mask & ~1) != 0)
26234 fprintf (f, ", ");
26235 }
26236 }
26237
26238 if (mask & (1 << PC_REGNUM))
26239 {
26240 /* Catch popping the PC. */
26241 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
26242 || IS_CMSE_ENTRY (arm_current_func_type ()))
26243 {
26244 /* The PC is never poped directly, instead
26245 it is popped into r3 and then BX is used. */
26246 fprintf (f, "}\n");
26247
26248 thumb_exit (f, -1);
26249
26250 return;
26251 }
26252 else
26253 {
26254 if (mask & 0xFF)
26255 fprintf (f, ", ");
26256
26257 asm_fprintf (f, "%r", PC_REGNUM);
26258 }
26259 }
26260
26261 fprintf (f, "}\n");
26262 }
26263
26264 /* Generate code to return from a thumb function.
26265 If 'reg_containing_return_addr' is -1, then the return address is
26266 actually on the stack, at the stack pointer.
26267
26268 Note: do not forget to update length attribute of corresponding insn pattern
26269 when changing assembly output (eg. length attribute of epilogue_insns when
26270 updating Armv8-M Baseline Security Extensions register clearing
26271 sequences). */
26272 static void
26273 thumb_exit (FILE *f, int reg_containing_return_addr)
26274 {
26275 unsigned regs_available_for_popping;
26276 unsigned regs_to_pop;
26277 int pops_needed;
26278 unsigned available;
26279 unsigned required;
26280 machine_mode mode;
26281 int size;
26282 int restore_a4 = FALSE;
26283
26284 /* Compute the registers we need to pop. */
26285 regs_to_pop = 0;
26286 pops_needed = 0;
26287
26288 if (reg_containing_return_addr == -1)
26289 {
26290 regs_to_pop |= 1 << LR_REGNUM;
26291 ++pops_needed;
26292 }
26293
26294 if (TARGET_BACKTRACE)
26295 {
26296 /* Restore the (ARM) frame pointer and stack pointer. */
26297 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26298 pops_needed += 2;
26299 }
26300
26301 /* If there is nothing to pop then just emit the BX instruction and
26302 return. */
26303 if (pops_needed == 0)
26304 {
26305 if (crtl->calls_eh_return)
26306 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26307
26308 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26309 {
26310 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26311 emitted by cmse_nonsecure_entry_clear_before_return (). */
26312 if (!TARGET_HAVE_FPCXT_CMSE)
26313 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
26314 reg_containing_return_addr);
26315 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26316 }
26317 else
26318 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26319 return;
26320 }
26321 /* Otherwise if we are not supporting interworking and we have not created
26322 a backtrace structure and the function was not entered in ARM mode then
26323 just pop the return address straight into the PC. */
26324 else if (!TARGET_INTERWORK
26325 && !TARGET_BACKTRACE
26326 && !is_called_in_ARM_mode (current_function_decl)
26327 && !crtl->calls_eh_return
26328 && !IS_CMSE_ENTRY (arm_current_func_type ()))
26329 {
26330 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26331 return;
26332 }
26333
26334 /* Find out how many of the (return) argument registers we can corrupt. */
26335 regs_available_for_popping = 0;
26336
26337 /* If returning via __builtin_eh_return, the bottom three registers
26338 all contain information needed for the return. */
26339 if (crtl->calls_eh_return)
26340 size = 12;
26341 else
26342 {
26343 /* If we can deduce the registers used from the function's
26344 return value. This is more reliable that examining
26345 df_regs_ever_live_p () because that will be set if the register is
26346 ever used in the function, not just if the register is used
26347 to hold a return value. */
26348
26349 if (crtl->return_rtx != 0)
26350 mode = GET_MODE (crtl->return_rtx);
26351 else
26352 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26353
26354 size = GET_MODE_SIZE (mode);
26355
26356 if (size == 0)
26357 {
26358 /* In a void function we can use any argument register.
26359 In a function that returns a structure on the stack
26360 we can use the second and third argument registers. */
26361 if (mode == VOIDmode)
26362 regs_available_for_popping =
26363 (1 << ARG_REGISTER (1))
26364 | (1 << ARG_REGISTER (2))
26365 | (1 << ARG_REGISTER (3));
26366 else
26367 regs_available_for_popping =
26368 (1 << ARG_REGISTER (2))
26369 | (1 << ARG_REGISTER (3));
26370 }
26371 else if (size <= 4)
26372 regs_available_for_popping =
26373 (1 << ARG_REGISTER (2))
26374 | (1 << ARG_REGISTER (3));
26375 else if (size <= 8)
26376 regs_available_for_popping =
26377 (1 << ARG_REGISTER (3));
26378 }
26379
26380 /* Match registers to be popped with registers into which we pop them. */
26381 for (available = regs_available_for_popping,
26382 required = regs_to_pop;
26383 required != 0 && available != 0;
26384 available &= ~(available & - available),
26385 required &= ~(required & - required))
26386 -- pops_needed;
26387
26388 /* If we have any popping registers left over, remove them. */
26389 if (available > 0)
26390 regs_available_for_popping &= ~available;
26391
26392 /* Otherwise if we need another popping register we can use
26393 the fourth argument register. */
26394 else if (pops_needed)
26395 {
26396 /* If we have not found any free argument registers and
26397 reg a4 contains the return address, we must move it. */
26398 if (regs_available_for_popping == 0
26399 && reg_containing_return_addr == LAST_ARG_REGNUM)
26400 {
26401 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26402 reg_containing_return_addr = LR_REGNUM;
26403 }
26404 else if (size > 12)
26405 {
26406 /* Register a4 is being used to hold part of the return value,
26407 but we have dire need of a free, low register. */
26408 restore_a4 = TRUE;
26409
26410 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26411 }
26412
26413 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26414 {
26415 /* The fourth argument register is available. */
26416 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26417
26418 --pops_needed;
26419 }
26420 }
26421
26422 /* Pop as many registers as we can. */
26423 thumb_pop (f, regs_available_for_popping);
26424
26425 /* Process the registers we popped. */
26426 if (reg_containing_return_addr == -1)
26427 {
26428 /* The return address was popped into the lowest numbered register. */
26429 regs_to_pop &= ~(1 << LR_REGNUM);
26430
26431 reg_containing_return_addr =
26432 number_of_first_bit_set (regs_available_for_popping);
26433
26434 /* Remove this register for the mask of available registers, so that
26435 the return address will not be corrupted by further pops. */
26436 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26437 }
26438
26439 /* If we popped other registers then handle them here. */
26440 if (regs_available_for_popping)
26441 {
26442 int frame_pointer;
26443
26444 /* Work out which register currently contains the frame pointer. */
26445 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26446
26447 /* Move it into the correct place. */
26448 asm_fprintf (f, "\tmov\t%r, %r\n",
26449 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26450
26451 /* (Temporarily) remove it from the mask of popped registers. */
26452 regs_available_for_popping &= ~(1 << frame_pointer);
26453 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26454
26455 if (regs_available_for_popping)
26456 {
26457 int stack_pointer;
26458
26459 /* We popped the stack pointer as well,
26460 find the register that contains it. */
26461 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26462
26463 /* Move it into the stack register. */
26464 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26465
26466 /* At this point we have popped all necessary registers, so
26467 do not worry about restoring regs_available_for_popping
26468 to its correct value:
26469
26470 assert (pops_needed == 0)
26471 assert (regs_available_for_popping == (1 << frame_pointer))
26472 assert (regs_to_pop == (1 << STACK_POINTER)) */
26473 }
26474 else
26475 {
26476 /* Since we have just move the popped value into the frame
26477 pointer, the popping register is available for reuse, and
26478 we know that we still have the stack pointer left to pop. */
26479 regs_available_for_popping |= (1 << frame_pointer);
26480 }
26481 }
26482
26483 /* If we still have registers left on the stack, but we no longer have
26484 any registers into which we can pop them, then we must move the return
26485 address into the link register and make available the register that
26486 contained it. */
26487 if (regs_available_for_popping == 0 && pops_needed > 0)
26488 {
26489 regs_available_for_popping |= 1 << reg_containing_return_addr;
26490
26491 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26492 reg_containing_return_addr);
26493
26494 reg_containing_return_addr = LR_REGNUM;
26495 }
26496
26497 /* If we have registers left on the stack then pop some more.
26498 We know that at most we will want to pop FP and SP. */
26499 if (pops_needed > 0)
26500 {
26501 int popped_into;
26502 int move_to;
26503
26504 thumb_pop (f, regs_available_for_popping);
26505
26506 /* We have popped either FP or SP.
26507 Move whichever one it is into the correct register. */
26508 popped_into = number_of_first_bit_set (regs_available_for_popping);
26509 move_to = number_of_first_bit_set (regs_to_pop);
26510
26511 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26512 --pops_needed;
26513 }
26514
26515 /* If we still have not popped everything then we must have only
26516 had one register available to us and we are now popping the SP. */
26517 if (pops_needed > 0)
26518 {
26519 int popped_into;
26520
26521 thumb_pop (f, regs_available_for_popping);
26522
26523 popped_into = number_of_first_bit_set (regs_available_for_popping);
26524
26525 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26526 /*
26527 assert (regs_to_pop == (1 << STACK_POINTER))
26528 assert (pops_needed == 1)
26529 */
26530 }
26531
26532 /* If necessary restore the a4 register. */
26533 if (restore_a4)
26534 {
26535 if (reg_containing_return_addr != LR_REGNUM)
26536 {
26537 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26538 reg_containing_return_addr = LR_REGNUM;
26539 }
26540
26541 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26542 }
26543
26544 if (crtl->calls_eh_return)
26545 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26546
26547 /* Return to caller. */
26548 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26549 {
26550 /* This is for the cases where LR is not being used to contain the return
26551 address. It may therefore contain information that we might not want
26552 to leak, hence it must be cleared. The value in R0 will never be a
26553 secret at this point, so it is safe to use it, see the clearing code
26554 in cmse_nonsecure_entry_clear_before_return (). */
26555 if (reg_containing_return_addr != LR_REGNUM)
26556 asm_fprintf (f, "\tmov\tlr, r0\n");
26557
26558 /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26559 by cmse_nonsecure_entry_clear_before_return (). */
26560 if (!TARGET_HAVE_FPCXT_CMSE)
26561 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26562 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26563 }
26564 else
26565 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26566 }
26567 \f
26568 /* Scan INSN just before assembler is output for it.
26569 For Thumb-1, we track the status of the condition codes; this
26570 information is used in the cbranchsi4_insn pattern. */
26571 void
26572 thumb1_final_prescan_insn (rtx_insn *insn)
26573 {
26574 if (flag_print_asm_name)
26575 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26576 INSN_ADDRESSES (INSN_UID (insn)));
26577 /* Don't overwrite the previous setter when we get to a cbranch. */
26578 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26579 {
26580 enum attr_conds conds;
26581
26582 if (cfun->machine->thumb1_cc_insn)
26583 {
26584 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26585 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26586 CC_STATUS_INIT;
26587 }
26588 conds = get_attr_conds (insn);
26589 if (conds == CONDS_SET)
26590 {
26591 rtx set = single_set (insn);
26592 cfun->machine->thumb1_cc_insn = insn;
26593 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26594 cfun->machine->thumb1_cc_op1 = const0_rtx;
26595 cfun->machine->thumb1_cc_mode = CC_NZmode;
26596 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26597 {
26598 rtx src1 = XEXP (SET_SRC (set), 1);
26599 if (src1 == const0_rtx)
26600 cfun->machine->thumb1_cc_mode = CCmode;
26601 }
26602 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26603 {
26604 /* Record the src register operand instead of dest because
26605 cprop_hardreg pass propagates src. */
26606 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26607 }
26608 }
26609 else if (conds != CONDS_NOCOND)
26610 cfun->machine->thumb1_cc_insn = NULL_RTX;
26611 }
26612
26613 /* Check if unexpected far jump is used. */
26614 if (cfun->machine->lr_save_eliminated
26615 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26616 internal_error("Unexpected thumb1 far jump");
26617 }
26618
26619 int
26620 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26621 {
26622 unsigned HOST_WIDE_INT mask = 0xff;
26623 int i;
26624
26625 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26626 if (val == 0) /* XXX */
26627 return 0;
26628
26629 for (i = 0; i < 25; i++)
26630 if ((val & (mask << i)) == val)
26631 return 1;
26632
26633 return 0;
26634 }
26635
26636 /* Returns nonzero if the current function contains,
26637 or might contain a far jump. */
26638 static int
26639 thumb_far_jump_used_p (void)
26640 {
26641 rtx_insn *insn;
26642 bool far_jump = false;
26643 unsigned int func_size = 0;
26644
26645 /* If we have already decided that far jumps may be used,
26646 do not bother checking again, and always return true even if
26647 it turns out that they are not being used. Once we have made
26648 the decision that far jumps are present (and that hence the link
26649 register will be pushed onto the stack) we cannot go back on it. */
26650 if (cfun->machine->far_jump_used)
26651 return 1;
26652
26653 /* If this function is not being called from the prologue/epilogue
26654 generation code then it must be being called from the
26655 INITIAL_ELIMINATION_OFFSET macro. */
26656 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26657 {
26658 /* In this case we know that we are being asked about the elimination
26659 of the arg pointer register. If that register is not being used,
26660 then there are no arguments on the stack, and we do not have to
26661 worry that a far jump might force the prologue to push the link
26662 register, changing the stack offsets. In this case we can just
26663 return false, since the presence of far jumps in the function will
26664 not affect stack offsets.
26665
26666 If the arg pointer is live (or if it was live, but has now been
26667 eliminated and so set to dead) then we do have to test to see if
26668 the function might contain a far jump. This test can lead to some
26669 false negatives, since before reload is completed, then length of
26670 branch instructions is not known, so gcc defaults to returning their
26671 longest length, which in turn sets the far jump attribute to true.
26672
26673 A false negative will not result in bad code being generated, but it
26674 will result in a needless push and pop of the link register. We
26675 hope that this does not occur too often.
26676
26677 If we need doubleword stack alignment this could affect the other
26678 elimination offsets so we can't risk getting it wrong. */
26679 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26680 cfun->machine->arg_pointer_live = 1;
26681 else if (!cfun->machine->arg_pointer_live)
26682 return 0;
26683 }
26684
26685 /* We should not change far_jump_used during or after reload, as there is
26686 no chance to change stack frame layout. */
26687 if (reload_in_progress || reload_completed)
26688 return 0;
26689
26690 /* Check to see if the function contains a branch
26691 insn with the far jump attribute set. */
26692 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26693 {
26694 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26695 {
26696 far_jump = true;
26697 }
26698 func_size += get_attr_length (insn);
26699 }
26700
26701 /* Attribute far_jump will always be true for thumb1 before
26702 shorten_branch pass. So checking far_jump attribute before
26703 shorten_branch isn't much useful.
26704
26705 Following heuristic tries to estimate more accurately if a far jump
26706 may finally be used. The heuristic is very conservative as there is
26707 no chance to roll-back the decision of not to use far jump.
26708
26709 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26710 2-byte insn is associated with a 4 byte constant pool. Using
26711 function size 2048/3 as the threshold is conservative enough. */
26712 if (far_jump)
26713 {
26714 if ((func_size * 3) >= 2048)
26715 {
26716 /* Record the fact that we have decided that
26717 the function does use far jumps. */
26718 cfun->machine->far_jump_used = 1;
26719 return 1;
26720 }
26721 }
26722
26723 return 0;
26724 }
26725
26726 /* Return nonzero if FUNC must be entered in ARM mode. */
26727 static bool
26728 is_called_in_ARM_mode (tree func)
26729 {
26730 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26731
26732 /* Ignore the problem about functions whose address is taken. */
26733 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26734 return true;
26735
26736 #ifdef ARM_PE
26737 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26738 #else
26739 return false;
26740 #endif
26741 }
26742
26743 /* Given the stack offsets and register mask in OFFSETS, decide how
26744 many additional registers to push instead of subtracting a constant
26745 from SP. For epilogues the principle is the same except we use pop.
26746 FOR_PROLOGUE indicates which we're generating. */
26747 static int
26748 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26749 {
26750 HOST_WIDE_INT amount;
26751 unsigned long live_regs_mask = offsets->saved_regs_mask;
26752 /* Extract a mask of the ones we can give to the Thumb's push/pop
26753 instruction. */
26754 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26755 /* Then count how many other high registers will need to be pushed. */
26756 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26757 int n_free, reg_base, size;
26758
26759 if (!for_prologue && frame_pointer_needed)
26760 amount = offsets->locals_base - offsets->saved_regs;
26761 else
26762 amount = offsets->outgoing_args - offsets->saved_regs;
26763
26764 /* If the stack frame size is 512 exactly, we can save one load
26765 instruction, which should make this a win even when optimizing
26766 for speed. */
26767 if (!optimize_size && amount != 512)
26768 return 0;
26769
26770 /* Can't do this if there are high registers to push. */
26771 if (high_regs_pushed != 0)
26772 return 0;
26773
26774 /* Shouldn't do it in the prologue if no registers would normally
26775 be pushed at all. In the epilogue, also allow it if we'll have
26776 a pop insn for the PC. */
26777 if (l_mask == 0
26778 && (for_prologue
26779 || TARGET_BACKTRACE
26780 || (live_regs_mask & 1 << LR_REGNUM) == 0
26781 || TARGET_INTERWORK
26782 || crtl->args.pretend_args_size != 0))
26783 return 0;
26784
26785 /* Don't do this if thumb_expand_prologue wants to emit instructions
26786 between the push and the stack frame allocation. */
26787 if (for_prologue
26788 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26789 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26790 return 0;
26791
26792 reg_base = 0;
26793 n_free = 0;
26794 if (!for_prologue)
26795 {
26796 size = arm_size_return_regs ();
26797 reg_base = ARM_NUM_INTS (size);
26798 live_regs_mask >>= reg_base;
26799 }
26800
26801 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26802 && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26803 {
26804 live_regs_mask >>= 1;
26805 n_free++;
26806 }
26807
26808 if (n_free == 0)
26809 return 0;
26810 gcc_assert (amount / 4 * 4 == amount);
26811
26812 if (amount >= 512 && (amount - n_free * 4) < 512)
26813 return (amount - 508) / 4;
26814 if (amount <= n_free * 4)
26815 return amount / 4;
26816 return 0;
26817 }
26818
26819 /* The bits which aren't usefully expanded as rtl. */
26820 const char *
26821 thumb1_unexpanded_epilogue (void)
26822 {
26823 arm_stack_offsets *offsets;
26824 int regno;
26825 unsigned long live_regs_mask = 0;
26826 int high_regs_pushed = 0;
26827 int extra_pop;
26828 int had_to_push_lr;
26829 int size;
26830
26831 if (cfun->machine->return_used_this_function != 0)
26832 return "";
26833
26834 if (IS_NAKED (arm_current_func_type ()))
26835 return "";
26836
26837 offsets = arm_get_frame_offsets ();
26838 live_regs_mask = offsets->saved_regs_mask;
26839 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26840
26841 /* If we can deduce the registers used from the function's return value.
26842 This is more reliable that examining df_regs_ever_live_p () because that
26843 will be set if the register is ever used in the function, not just if
26844 the register is used to hold a return value. */
26845 size = arm_size_return_regs ();
26846
26847 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26848 if (extra_pop > 0)
26849 {
26850 unsigned long extra_mask = (1 << extra_pop) - 1;
26851 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26852 }
26853
26854 /* The prolog may have pushed some high registers to use as
26855 work registers. e.g. the testsuite file:
26856 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26857 compiles to produce:
26858 push {r4, r5, r6, r7, lr}
26859 mov r7, r9
26860 mov r6, r8
26861 push {r6, r7}
26862 as part of the prolog. We have to undo that pushing here. */
26863
26864 if (high_regs_pushed)
26865 {
26866 unsigned long mask = live_regs_mask & 0xff;
26867 int next_hi_reg;
26868
26869 mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26870
26871 if (mask == 0)
26872 /* Oh dear! We have no low registers into which we can pop
26873 high registers! */
26874 internal_error
26875 ("no low registers available for popping high registers");
26876
26877 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26878 if (live_regs_mask & (1 << next_hi_reg))
26879 break;
26880
26881 while (high_regs_pushed)
26882 {
26883 /* Find lo register(s) into which the high register(s) can
26884 be popped. */
26885 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26886 {
26887 if (mask & (1 << regno))
26888 high_regs_pushed--;
26889 if (high_regs_pushed == 0)
26890 break;
26891 }
26892
26893 if (high_regs_pushed == 0 && regno >= 0)
26894 mask &= ~((1 << regno) - 1);
26895
26896 /* Pop the values into the low register(s). */
26897 thumb_pop (asm_out_file, mask);
26898
26899 /* Move the value(s) into the high registers. */
26900 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26901 {
26902 if (mask & (1 << regno))
26903 {
26904 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26905 regno);
26906
26907 for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26908 next_hi_reg--)
26909 if (live_regs_mask & (1 << next_hi_reg))
26910 break;
26911 }
26912 }
26913 }
26914 live_regs_mask &= ~0x0f00;
26915 }
26916
26917 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26918 live_regs_mask &= 0xff;
26919
26920 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26921 {
26922 /* Pop the return address into the PC. */
26923 if (had_to_push_lr)
26924 live_regs_mask |= 1 << PC_REGNUM;
26925
26926 /* Either no argument registers were pushed or a backtrace
26927 structure was created which includes an adjusted stack
26928 pointer, so just pop everything. */
26929 if (live_regs_mask)
26930 thumb_pop (asm_out_file, live_regs_mask);
26931
26932 /* We have either just popped the return address into the
26933 PC or it is was kept in LR for the entire function.
26934 Note that thumb_pop has already called thumb_exit if the
26935 PC was in the list. */
26936 if (!had_to_push_lr)
26937 thumb_exit (asm_out_file, LR_REGNUM);
26938 }
26939 else
26940 {
26941 /* Pop everything but the return address. */
26942 if (live_regs_mask)
26943 thumb_pop (asm_out_file, live_regs_mask);
26944
26945 if (had_to_push_lr)
26946 {
26947 if (size > 12)
26948 {
26949 /* We have no free low regs, so save one. */
26950 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26951 LAST_ARG_REGNUM);
26952 }
26953
26954 /* Get the return address into a temporary register. */
26955 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26956
26957 if (size > 12)
26958 {
26959 /* Move the return address to lr. */
26960 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26961 LAST_ARG_REGNUM);
26962 /* Restore the low register. */
26963 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26964 IP_REGNUM);
26965 regno = LR_REGNUM;
26966 }
26967 else
26968 regno = LAST_ARG_REGNUM;
26969 }
26970 else
26971 regno = LR_REGNUM;
26972
26973 /* Remove the argument registers that were pushed onto the stack. */
26974 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26975 SP_REGNUM, SP_REGNUM,
26976 crtl->args.pretend_args_size);
26977
26978 thumb_exit (asm_out_file, regno);
26979 }
26980
26981 return "";
26982 }
26983
26984 /* Functions to save and restore machine-specific function data. */
26985 static struct machine_function *
26986 arm_init_machine_status (void)
26987 {
26988 struct machine_function *machine;
26989 machine = ggc_cleared_alloc<machine_function> ();
26990
26991 #if ARM_FT_UNKNOWN != 0
26992 machine->func_type = ARM_FT_UNKNOWN;
26993 #endif
26994 machine->static_chain_stack_bytes = -1;
26995 machine->pacspval_needed = 0;
26996 return machine;
26997 }
26998
26999 /* Return an RTX indicating where the return address to the
27000 calling function can be found. */
27001 rtx
27002 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
27003 {
27004 if (count != 0)
27005 return NULL_RTX;
27006
27007 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
27008 }
27009
27010 /* Do anything needed before RTL is emitted for each function. */
27011 void
27012 arm_init_expanders (void)
27013 {
27014 /* Arrange to initialize and mark the machine per-function status. */
27015 init_machine_status = arm_init_machine_status;
27016
27017 /* This is to stop the combine pass optimizing away the alignment
27018 adjustment of va_arg. */
27019 /* ??? It is claimed that this should not be necessary. */
27020 if (cfun)
27021 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
27022 }
27023
27024 /* Check that FUNC is called with a different mode. */
27025
27026 bool
27027 arm_change_mode_p (tree func)
27028 {
27029 if (TREE_CODE (func) != FUNCTION_DECL)
27030 return false;
27031
27032 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
27033
27034 if (!callee_tree)
27035 callee_tree = target_option_default_node;
27036
27037 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
27038 int flags = callee_opts->x_target_flags;
27039
27040 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
27041 }
27042
27043 /* Like arm_compute_initial_elimination offset. Simpler because there
27044 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27045 to point at the base of the local variables after static stack
27046 space for a function has been allocated. */
27047
27048 HOST_WIDE_INT
27049 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27050 {
27051 arm_stack_offsets *offsets;
27052
27053 offsets = arm_get_frame_offsets ();
27054
27055 switch (from)
27056 {
27057 case ARG_POINTER_REGNUM:
27058 switch (to)
27059 {
27060 case STACK_POINTER_REGNUM:
27061 return offsets->outgoing_args - offsets->saved_args;
27062
27063 case FRAME_POINTER_REGNUM:
27064 return offsets->soft_frame - offsets->saved_args;
27065
27066 case ARM_HARD_FRAME_POINTER_REGNUM:
27067 return offsets->saved_regs - offsets->saved_args;
27068
27069 case THUMB_HARD_FRAME_POINTER_REGNUM:
27070 return offsets->locals_base - offsets->saved_args;
27071
27072 default:
27073 gcc_unreachable ();
27074 }
27075 break;
27076
27077 case FRAME_POINTER_REGNUM:
27078 switch (to)
27079 {
27080 case STACK_POINTER_REGNUM:
27081 return offsets->outgoing_args - offsets->soft_frame;
27082
27083 case ARM_HARD_FRAME_POINTER_REGNUM:
27084 return offsets->saved_regs - offsets->soft_frame;
27085
27086 case THUMB_HARD_FRAME_POINTER_REGNUM:
27087 return offsets->locals_base - offsets->soft_frame;
27088
27089 default:
27090 gcc_unreachable ();
27091 }
27092 break;
27093
27094 default:
27095 gcc_unreachable ();
27096 }
27097 }
27098
27099 /* Generate the function's prologue. */
27100
27101 void
27102 thumb1_expand_prologue (void)
27103 {
27104 rtx_insn *insn;
27105
27106 HOST_WIDE_INT amount;
27107 HOST_WIDE_INT size;
27108 arm_stack_offsets *offsets;
27109 unsigned long func_type;
27110 int regno;
27111 unsigned long live_regs_mask;
27112 unsigned long l_mask;
27113 unsigned high_regs_pushed = 0;
27114 bool lr_needs_saving;
27115
27116 func_type = arm_current_func_type ();
27117
27118 /* Naked functions don't have prologues. */
27119 if (IS_NAKED (func_type))
27120 {
27121 if (flag_stack_usage_info)
27122 current_function_static_stack_size = 0;
27123 return;
27124 }
27125
27126 if (IS_INTERRUPT (func_type))
27127 {
27128 error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
27129 return;
27130 }
27131
27132 if (is_called_in_ARM_mode (current_function_decl))
27133 emit_insn (gen_prologue_thumb1_interwork ());
27134
27135 offsets = arm_get_frame_offsets ();
27136 live_regs_mask = offsets->saved_regs_mask;
27137 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
27138
27139 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27140 l_mask = live_regs_mask & 0x40ff;
27141 /* Then count how many other high registers will need to be pushed. */
27142 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27143
27144 if (crtl->args.pretend_args_size)
27145 {
27146 rtx x = GEN_INT (-crtl->args.pretend_args_size);
27147
27148 if (cfun->machine->uses_anonymous_args)
27149 {
27150 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27151 unsigned long mask;
27152
27153 mask = 1ul << (LAST_ARG_REGNUM + 1);
27154 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27155
27156 insn = thumb1_emit_multi_reg_push (mask, 0);
27157 }
27158 else
27159 {
27160 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27161 stack_pointer_rtx, x));
27162 }
27163 RTX_FRAME_RELATED_P (insn) = 1;
27164 }
27165
27166 if (TARGET_BACKTRACE)
27167 {
27168 HOST_WIDE_INT offset = 0;
27169 unsigned work_register;
27170 rtx work_reg, x, arm_hfp_rtx;
27171
27172 /* We have been asked to create a stack backtrace structure.
27173 The code looks like this:
27174
27175 0 .align 2
27176 0 func:
27177 0 sub SP, #16 Reserve space for 4 registers.
27178 2 push {R7} Push low registers.
27179 4 add R7, SP, #20 Get the stack pointer before the push.
27180 6 str R7, [SP, #8] Store the stack pointer
27181 (before reserving the space).
27182 8 mov R7, PC Get hold of the start of this code + 12.
27183 10 str R7, [SP, #16] Store it.
27184 12 mov R7, FP Get hold of the current frame pointer.
27185 14 str R7, [SP, #4] Store it.
27186 16 mov R7, LR Get hold of the current return address.
27187 18 str R7, [SP, #12] Store it.
27188 20 add R7, SP, #16 Point at the start of the
27189 backtrace structure.
27190 22 mov FP, R7 Put this value into the frame pointer. */
27191
27192 work_register = thumb_find_work_register (live_regs_mask);
27193 work_reg = gen_rtx_REG (SImode, work_register);
27194 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27195
27196 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27197 stack_pointer_rtx, GEN_INT (-16)));
27198 RTX_FRAME_RELATED_P (insn) = 1;
27199
27200 if (l_mask)
27201 {
27202 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27203 RTX_FRAME_RELATED_P (insn) = 1;
27204 lr_needs_saving = false;
27205
27206 offset = bit_count (l_mask) * UNITS_PER_WORD;
27207 }
27208
27209 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27210 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27211
27212 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27213 x = gen_frame_mem (SImode, x);
27214 emit_move_insn (x, work_reg);
27215
27216 /* Make sure that the instruction fetching the PC is in the right place
27217 to calculate "start of backtrace creation code + 12". */
27218 /* ??? The stores using the common WORK_REG ought to be enough to
27219 prevent the scheduler from doing anything weird. Failing that
27220 we could always move all of the following into an UNSPEC_VOLATILE. */
27221 if (l_mask)
27222 {
27223 x = gen_rtx_REG (SImode, PC_REGNUM);
27224 emit_move_insn (work_reg, x);
27225
27226 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27227 x = gen_frame_mem (SImode, x);
27228 emit_move_insn (x, work_reg);
27229
27230 emit_move_insn (work_reg, arm_hfp_rtx);
27231
27232 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27233 x = gen_frame_mem (SImode, x);
27234 emit_move_insn (x, work_reg);
27235 }
27236 else
27237 {
27238 emit_move_insn (work_reg, arm_hfp_rtx);
27239
27240 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27241 x = gen_frame_mem (SImode, x);
27242 emit_move_insn (x, work_reg);
27243
27244 x = gen_rtx_REG (SImode, PC_REGNUM);
27245 emit_move_insn (work_reg, x);
27246
27247 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27248 x = gen_frame_mem (SImode, x);
27249 emit_move_insn (x, work_reg);
27250 }
27251
27252 x = gen_rtx_REG (SImode, LR_REGNUM);
27253 emit_move_insn (work_reg, x);
27254
27255 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27256 x = gen_frame_mem (SImode, x);
27257 emit_move_insn (x, work_reg);
27258
27259 x = GEN_INT (offset + 12);
27260 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27261
27262 emit_move_insn (arm_hfp_rtx, work_reg);
27263 }
27264 /* Optimization: If we are not pushing any low registers but we are going
27265 to push some high registers then delay our first push. This will just
27266 be a push of LR and we can combine it with the push of the first high
27267 register. */
27268 else if ((l_mask & 0xff) != 0
27269 || (high_regs_pushed == 0 && lr_needs_saving))
27270 {
27271 unsigned long mask = l_mask;
27272 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27273 insn = thumb1_emit_multi_reg_push (mask, mask);
27274 RTX_FRAME_RELATED_P (insn) = 1;
27275 lr_needs_saving = false;
27276 }
27277
27278 if (high_regs_pushed)
27279 {
27280 unsigned pushable_regs;
27281 unsigned next_hi_reg;
27282 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27283 : crtl->args.info.nregs;
27284 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27285
27286 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27287 if (live_regs_mask & (1 << next_hi_reg))
27288 break;
27289
27290 /* Here we need to mask out registers used for passing arguments
27291 even if they can be pushed. This is to avoid using them to
27292 stash the high registers. Such kind of stash may clobber the
27293 use of arguments. */
27294 pushable_regs = l_mask & (~arg_regs_mask);
27295 pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
27296
27297 /* Normally, LR can be used as a scratch register once it has been
27298 saved; but if the function examines its own return address then
27299 the value is still live and we need to avoid using it. */
27300 bool return_addr_live
27301 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
27302 LR_REGNUM);
27303
27304 if (lr_needs_saving || return_addr_live)
27305 pushable_regs &= ~(1 << LR_REGNUM);
27306
27307 if (pushable_regs == 0)
27308 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27309
27310 while (high_regs_pushed > 0)
27311 {
27312 unsigned long real_regs_mask = 0;
27313 unsigned long push_mask = 0;
27314
27315 for (regno = LR_REGNUM; regno >= 0; regno --)
27316 {
27317 if (pushable_regs & (1 << regno))
27318 {
27319 emit_move_insn (gen_rtx_REG (SImode, regno),
27320 gen_rtx_REG (SImode, next_hi_reg));
27321
27322 high_regs_pushed --;
27323 real_regs_mask |= (1 << next_hi_reg);
27324 push_mask |= (1 << regno);
27325
27326 if (high_regs_pushed)
27327 {
27328 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27329 next_hi_reg --)
27330 if (live_regs_mask & (1 << next_hi_reg))
27331 break;
27332 }
27333 else
27334 break;
27335 }
27336 }
27337
27338 /* If we had to find a work register and we have not yet
27339 saved the LR then add it to the list of regs to push. */
27340 if (lr_needs_saving)
27341 {
27342 push_mask |= 1 << LR_REGNUM;
27343 real_regs_mask |= 1 << LR_REGNUM;
27344 lr_needs_saving = false;
27345 /* If the return address is not live at this point, we
27346 can add LR to the list of registers that we can use
27347 for pushes. */
27348 if (!return_addr_live)
27349 pushable_regs |= 1 << LR_REGNUM;
27350 }
27351
27352 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
27353 RTX_FRAME_RELATED_P (insn) = 1;
27354 }
27355 }
27356
27357 /* Load the pic register before setting the frame pointer,
27358 so we can use r7 as a temporary work register. */
27359 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27360 arm_load_pic_register (live_regs_mask, NULL_RTX);
27361
27362 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27363 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27364 stack_pointer_rtx);
27365
27366 size = offsets->outgoing_args - offsets->saved_args;
27367 if (flag_stack_usage_info)
27368 current_function_static_stack_size = size;
27369
27370 /* If we have a frame, then do stack checking. FIXME: not implemented. */
27371 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27372 || flag_stack_clash_protection)
27373 && size)
27374 sorry ("%<-fstack-check=specific%> for Thumb-1");
27375
27376 amount = offsets->outgoing_args - offsets->saved_regs;
27377 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27378 if (amount)
27379 {
27380 if (amount < 512)
27381 {
27382 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27383 GEN_INT (- amount)));
27384 RTX_FRAME_RELATED_P (insn) = 1;
27385 }
27386 else
27387 {
27388 rtx reg, dwarf;
27389
27390 /* The stack decrement is too big for an immediate value in a single
27391 insn. In theory we could issue multiple subtracts, but after
27392 three of them it becomes more space efficient to place the full
27393 value in the constant pool and load into a register. (Also the
27394 ARM debugger really likes to see only one stack decrement per
27395 function). So instead we look for a scratch register into which
27396 we can load the decrement, and then we subtract this from the
27397 stack pointer. Unfortunately on the thumb the only available
27398 scratch registers are the argument registers, and we cannot use
27399 these as they may hold arguments to the function. Instead we
27400 attempt to locate a call preserved register which is used by this
27401 function. If we can find one, then we know that it will have
27402 been pushed at the start of the prologue and so we can corrupt
27403 it now. */
27404 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27405 if (live_regs_mask & (1 << regno))
27406 break;
27407
27408 gcc_assert(regno <= LAST_LO_REGNUM);
27409
27410 reg = gen_rtx_REG (SImode, regno);
27411
27412 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27413
27414 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27415 stack_pointer_rtx, reg));
27416
27417 dwarf = gen_rtx_SET (stack_pointer_rtx,
27418 plus_constant (Pmode, stack_pointer_rtx,
27419 -amount));
27420 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27421 RTX_FRAME_RELATED_P (insn) = 1;
27422 }
27423 }
27424
27425 if (frame_pointer_needed)
27426 thumb_set_frame_pointer (offsets);
27427
27428 /* If we are profiling, make sure no instructions are scheduled before
27429 the call to mcount. Similarly if the user has requested no
27430 scheduling in the prolog. Similarly if we want non-call exceptions
27431 using the EABI unwinder, to prevent faulting instructions from being
27432 swapped with a stack adjustment. */
27433 if (crtl->profile || !TARGET_SCHED_PROLOG
27434 || (arm_except_unwind_info (&global_options) == UI_TARGET
27435 && cfun->can_throw_non_call_exceptions))
27436 emit_insn (gen_blockage ());
27437
27438 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27439 if (live_regs_mask & 0xff)
27440 cfun->machine->lr_save_eliminated = 0;
27441 }
27442
27443 /* Clear caller saved registers not used to pass return values and leaked
27444 condition flags before exiting a cmse_nonsecure_entry function. */
27445
27446 void
27447 cmse_nonsecure_entry_clear_before_return (void)
27448 {
27449 bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27450 int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27451 uint32_t padding_bits_to_clear = 0;
27452 auto_sbitmap to_clear_bitmap (maxregno + 1);
27453 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27454 tree result_type;
27455
27456 bitmap_clear (to_clear_bitmap);
27457 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27458 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27459
27460 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27461 registers. */
27462 if (clear_vfpregs)
27463 {
27464 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27465
27466 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27467
27468 if (!TARGET_HAVE_FPCXT_CMSE)
27469 {
27470 /* Make sure we don't clear the two scratch registers used to clear
27471 the relevant FPSCR bits in output_return_instruction. */
27472 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27473 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27474 emit_use (gen_rtx_REG (SImode, 4));
27475 bitmap_clear_bit (to_clear_bitmap, 4);
27476 }
27477 }
27478
27479 /* If the user has defined registers to be caller saved, these are no longer
27480 restored by the function before returning and must thus be cleared for
27481 security purposes. */
27482 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27483 {
27484 /* We do not touch registers that can be used to pass arguments as per
27485 the AAPCS, since these should never be made callee-saved by user
27486 options. */
27487 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27488 continue;
27489 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27490 continue;
27491 if (!callee_saved_reg_p (regno)
27492 && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27493 || TARGET_HARD_FLOAT))
27494 bitmap_set_bit (to_clear_bitmap, regno);
27495 }
27496
27497 /* Make sure we do not clear the registers used to return the result in. */
27498 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27499 if (!VOID_TYPE_P (result_type))
27500 {
27501 uint64_t to_clear_return_mask;
27502 result_rtl = arm_function_value (result_type, current_function_decl, 0);
27503
27504 /* No need to check that we return in registers, because we don't
27505 support returning on stack yet. */
27506 gcc_assert (REG_P (result_rtl));
27507 to_clear_return_mask
27508 = compute_not_to_clear_mask (result_type, result_rtl, 0,
27509 &padding_bits_to_clear);
27510 if (to_clear_return_mask)
27511 {
27512 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27513 for (regno = R0_REGNUM; regno <= maxregno; regno++)
27514 {
27515 if (to_clear_return_mask & (1ULL << regno))
27516 bitmap_clear_bit (to_clear_bitmap, regno);
27517 }
27518 }
27519 }
27520
27521 if (padding_bits_to_clear != 0)
27522 {
27523 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27524 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27525
27526 /* Padding_bits_to_clear is not 0 so we know we are dealing with
27527 returning a composite type, which only uses r0. Let's make sure that
27528 r1-r3 is cleared too. */
27529 bitmap_clear (to_clear_arg_regs_bitmap);
27530 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27531 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27532 }
27533
27534 /* Clear full registers that leak before returning. */
27535 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27536 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27537 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27538 clearing_reg);
27539 }
27540
27541 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27542 POP instruction can be generated. LR should be replaced by PC. All
27543 the checks required are already done by USE_RETURN_INSN (). Hence,
27544 all we really need to check here is if single register is to be
27545 returned, or multiple register return. */
27546 void
27547 thumb2_expand_return (bool simple_return)
27548 {
27549 int i, num_regs;
27550 unsigned long saved_regs_mask;
27551 arm_stack_offsets *offsets;
27552
27553 offsets = arm_get_frame_offsets ();
27554 saved_regs_mask = offsets->saved_regs_mask;
27555
27556 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27557 if (saved_regs_mask & (1 << i))
27558 num_regs++;
27559
27560 if (!simple_return && saved_regs_mask)
27561 {
27562 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27563 functions or adapt code to handle according to ACLE. This path should
27564 not be reachable for cmse_nonsecure_entry functions though we prefer
27565 to assert it for now to ensure that future code changes do not silently
27566 change this behavior. */
27567 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27568 if (arm_current_function_pac_enabled_p ())
27569 {
27570 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
27571 arm_emit_multi_reg_pop (saved_regs_mask);
27572 emit_insn (gen_aut_nop ());
27573 emit_jump_insn (simple_return_rtx);
27574 }
27575 else if (num_regs == 1)
27576 {
27577 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27578 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27579 rtx addr = gen_rtx_MEM (SImode,
27580 gen_rtx_POST_INC (SImode,
27581 stack_pointer_rtx));
27582 set_mem_alias_set (addr, get_frame_alias_set ());
27583 XVECEXP (par, 0, 0) = ret_rtx;
27584 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27585 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27586 emit_jump_insn (par);
27587 }
27588 else
27589 {
27590 saved_regs_mask &= ~ (1 << LR_REGNUM);
27591 saved_regs_mask |= (1 << PC_REGNUM);
27592 arm_emit_multi_reg_pop (saved_regs_mask);
27593 }
27594 }
27595 else
27596 {
27597 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27598 cmse_nonsecure_entry_clear_before_return ();
27599 emit_jump_insn (simple_return_rtx);
27600 }
27601 }
27602
27603 void
27604 thumb1_expand_epilogue (void)
27605 {
27606 HOST_WIDE_INT amount;
27607 arm_stack_offsets *offsets;
27608 int regno;
27609
27610 /* Naked functions don't have prologues. */
27611 if (IS_NAKED (arm_current_func_type ()))
27612 return;
27613
27614 offsets = arm_get_frame_offsets ();
27615 amount = offsets->outgoing_args - offsets->saved_regs;
27616
27617 if (frame_pointer_needed)
27618 {
27619 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27620 amount = offsets->locals_base - offsets->saved_regs;
27621 }
27622 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27623
27624 gcc_assert (amount >= 0);
27625 if (amount)
27626 {
27627 emit_insn (gen_blockage ());
27628
27629 if (amount < 512)
27630 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27631 GEN_INT (amount)));
27632 else
27633 {
27634 /* r3 is always free in the epilogue. */
27635 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27636
27637 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27638 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27639 }
27640 }
27641
27642 /* Emit a USE (stack_pointer_rtx), so that
27643 the stack adjustment will not be deleted. */
27644 emit_insn (gen_force_register_use (stack_pointer_rtx));
27645
27646 if (crtl->profile || !TARGET_SCHED_PROLOG)
27647 emit_insn (gen_blockage ());
27648
27649 /* Emit a clobber for each insn that will be restored in the epilogue,
27650 so that flow2 will get register lifetimes correct. */
27651 for (regno = 0; regno < 13; regno++)
27652 if (reg_needs_saving_p (regno))
27653 emit_clobber (gen_rtx_REG (SImode, regno));
27654
27655 if (! df_regs_ever_live_p (LR_REGNUM))
27656 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27657
27658 /* Clear all caller-saved regs that are not used to return. */
27659 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27660 cmse_nonsecure_entry_clear_before_return ();
27661 }
27662
27663 /* Epilogue code for APCS frame. */
27664 static void
27665 arm_expand_epilogue_apcs_frame (bool really_return)
27666 {
27667 unsigned long func_type;
27668 unsigned long saved_regs_mask;
27669 int num_regs = 0;
27670 int i;
27671 int floats_from_frame = 0;
27672 arm_stack_offsets *offsets;
27673
27674 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27675 func_type = arm_current_func_type ();
27676
27677 /* Get frame offsets for ARM. */
27678 offsets = arm_get_frame_offsets ();
27679 saved_regs_mask = offsets->saved_regs_mask;
27680
27681 /* Find the offset of the floating-point save area in the frame. */
27682 floats_from_frame
27683 = (offsets->saved_args
27684 + arm_compute_static_chain_stack_bytes ()
27685 - offsets->frame);
27686
27687 /* Compute how many core registers saved and how far away the floats are. */
27688 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27689 if (saved_regs_mask & (1 << i))
27690 {
27691 num_regs++;
27692 floats_from_frame += 4;
27693 }
27694
27695 if (TARGET_VFP_BASE)
27696 {
27697 int start_reg;
27698 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27699
27700 /* The offset is from IP_REGNUM. */
27701 int saved_size = arm_get_vfp_saved_size ();
27702 if (saved_size > 0)
27703 {
27704 rtx_insn *insn;
27705 floats_from_frame += saved_size;
27706 insn = emit_insn (gen_addsi3 (ip_rtx,
27707 hard_frame_pointer_rtx,
27708 GEN_INT (-floats_from_frame)));
27709 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27710 ip_rtx, hard_frame_pointer_rtx);
27711 }
27712
27713 /* Generate VFP register multi-pop. */
27714 start_reg = FIRST_VFP_REGNUM;
27715
27716 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27717 /* Look for a case where a reg does not need restoring. */
27718 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27719 {
27720 if (start_reg != i)
27721 arm_emit_vfp_multi_reg_pop (start_reg,
27722 (i - start_reg) / 2,
27723 gen_rtx_REG (SImode,
27724 IP_REGNUM));
27725 start_reg = i + 2;
27726 }
27727
27728 /* Restore the remaining regs that we have discovered (or possibly
27729 even all of them, if the conditional in the for loop never
27730 fired). */
27731 if (start_reg != i)
27732 arm_emit_vfp_multi_reg_pop (start_reg,
27733 (i - start_reg) / 2,
27734 gen_rtx_REG (SImode, IP_REGNUM));
27735 }
27736
27737 if (TARGET_IWMMXT)
27738 {
27739 /* The frame pointer is guaranteed to be non-double-word aligned, as
27740 it is set to double-word-aligned old_stack_pointer - 4. */
27741 rtx_insn *insn;
27742 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27743
27744 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27745 if (reg_needs_saving_p (i))
27746 {
27747 rtx addr = gen_frame_mem (V2SImode,
27748 plus_constant (Pmode, hard_frame_pointer_rtx,
27749 - lrm_count * 4));
27750 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27751 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27752 gen_rtx_REG (V2SImode, i),
27753 NULL_RTX);
27754 lrm_count += 2;
27755 }
27756 }
27757
27758 /* saved_regs_mask should contain IP which contains old stack pointer
27759 at the time of activation creation. Since SP and IP are adjacent registers,
27760 we can restore the value directly into SP. */
27761 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27762 saved_regs_mask &= ~(1 << IP_REGNUM);
27763 saved_regs_mask |= (1 << SP_REGNUM);
27764
27765 /* There are two registers left in saved_regs_mask - LR and PC. We
27766 only need to restore LR (the return address), but to
27767 save time we can load it directly into PC, unless we need a
27768 special function exit sequence, or we are not really returning. */
27769 if (really_return
27770 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27771 && !crtl->calls_eh_return)
27772 /* Delete LR from the register mask, so that LR on
27773 the stack is loaded into the PC in the register mask. */
27774 saved_regs_mask &= ~(1 << LR_REGNUM);
27775 else
27776 saved_regs_mask &= ~(1 << PC_REGNUM);
27777
27778 num_regs = bit_count (saved_regs_mask);
27779 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27780 {
27781 rtx_insn *insn;
27782 emit_insn (gen_blockage ());
27783 /* Unwind the stack to just below the saved registers. */
27784 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27785 hard_frame_pointer_rtx,
27786 GEN_INT (- 4 * num_regs)));
27787
27788 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27789 stack_pointer_rtx, hard_frame_pointer_rtx);
27790 }
27791
27792 arm_emit_multi_reg_pop (saved_regs_mask);
27793
27794 if (IS_INTERRUPT (func_type))
27795 {
27796 /* Interrupt handlers will have pushed the
27797 IP onto the stack, so restore it now. */
27798 rtx_insn *insn;
27799 rtx addr = gen_rtx_MEM (SImode,
27800 gen_rtx_POST_INC (SImode,
27801 stack_pointer_rtx));
27802 set_mem_alias_set (addr, get_frame_alias_set ());
27803 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27804 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27805 gen_rtx_REG (SImode, IP_REGNUM),
27806 NULL_RTX);
27807 }
27808
27809 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27810 return;
27811
27812 if (crtl->calls_eh_return)
27813 emit_insn (gen_addsi3 (stack_pointer_rtx,
27814 stack_pointer_rtx,
27815 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27816
27817 if (IS_STACKALIGN (func_type))
27818 /* Restore the original stack pointer. Before prologue, the stack was
27819 realigned and the original stack pointer saved in r0. For details,
27820 see comment in arm_expand_prologue. */
27821 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27822
27823 emit_jump_insn (simple_return_rtx);
27824 }
27825
27826 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27827 function is not a sibcall. */
27828 void
27829 arm_expand_epilogue (bool really_return)
27830 {
27831 unsigned long func_type;
27832 unsigned long saved_regs_mask;
27833 int num_regs = 0;
27834 int i;
27835 int amount;
27836 arm_stack_offsets *offsets;
27837
27838 func_type = arm_current_func_type ();
27839
27840 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27841 let output_return_instruction take care of instruction emission if any. */
27842 if (IS_NAKED (func_type)
27843 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27844 {
27845 if (really_return)
27846 emit_jump_insn (simple_return_rtx);
27847 return;
27848 }
27849
27850 /* If we are throwing an exception, then we really must be doing a
27851 return, so we can't tail-call. */
27852 gcc_assert (!crtl->calls_eh_return || really_return);
27853
27854 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27855 {
27856 arm_expand_epilogue_apcs_frame (really_return);
27857 return;
27858 }
27859
27860 /* Get frame offsets for ARM. */
27861 offsets = arm_get_frame_offsets ();
27862 saved_regs_mask = offsets->saved_regs_mask;
27863 num_regs = bit_count (saved_regs_mask);
27864
27865 if (frame_pointer_needed)
27866 {
27867 rtx_insn *insn;
27868 /* Restore stack pointer if necessary. */
27869 if (TARGET_ARM)
27870 {
27871 /* In ARM mode, frame pointer points to first saved register.
27872 Restore stack pointer to last saved register. */
27873 amount = offsets->frame - offsets->saved_regs;
27874
27875 /* Force out any pending memory operations that reference stacked data
27876 before stack de-allocation occurs. */
27877 emit_insn (gen_blockage ());
27878 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27879 hard_frame_pointer_rtx,
27880 GEN_INT (amount)));
27881 arm_add_cfa_adjust_cfa_note (insn, amount,
27882 stack_pointer_rtx,
27883 hard_frame_pointer_rtx);
27884
27885 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27886 deleted. */
27887 emit_insn (gen_force_register_use (stack_pointer_rtx));
27888 }
27889 else
27890 {
27891 /* In Thumb-2 mode, the frame pointer points to the last saved
27892 register. */
27893 amount = offsets->locals_base - offsets->saved_regs;
27894 if (amount)
27895 {
27896 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27897 hard_frame_pointer_rtx,
27898 GEN_INT (amount)));
27899 arm_add_cfa_adjust_cfa_note (insn, amount,
27900 hard_frame_pointer_rtx,
27901 hard_frame_pointer_rtx);
27902 }
27903
27904 /* Force out any pending memory operations that reference stacked data
27905 before stack de-allocation occurs. */
27906 emit_insn (gen_blockage ());
27907 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27908 hard_frame_pointer_rtx));
27909 arm_add_cfa_adjust_cfa_note (insn, 0,
27910 stack_pointer_rtx,
27911 hard_frame_pointer_rtx);
27912 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27913 deleted. */
27914 emit_insn (gen_force_register_use (stack_pointer_rtx));
27915 }
27916 }
27917 else
27918 {
27919 /* Pop off outgoing args and local frame to adjust stack pointer to
27920 last saved register. */
27921 amount = offsets->outgoing_args - offsets->saved_regs;
27922 if (amount)
27923 {
27924 rtx_insn *tmp;
27925 /* Force out any pending memory operations that reference stacked data
27926 before stack de-allocation occurs. */
27927 emit_insn (gen_blockage ());
27928 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27929 stack_pointer_rtx,
27930 GEN_INT (amount)));
27931 arm_add_cfa_adjust_cfa_note (tmp, amount,
27932 stack_pointer_rtx, stack_pointer_rtx);
27933 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27934 not deleted. */
27935 emit_insn (gen_force_register_use (stack_pointer_rtx));
27936 }
27937 }
27938
27939 if (TARGET_VFP_BASE)
27940 {
27941 /* Generate VFP register multi-pop. */
27942 int end_reg = LAST_VFP_REGNUM + 1;
27943
27944 /* Scan the registers in reverse order. We need to match
27945 any groupings made in the prologue and generate matching
27946 vldm operations. The need to match groups is because,
27947 unlike pop, vldm can only do consecutive regs. */
27948 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27949 /* Look for a case where a reg does not need restoring. */
27950 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27951 {
27952 /* Restore the regs discovered so far (from reg+2 to
27953 end_reg). */
27954 if (end_reg > i + 2)
27955 arm_emit_vfp_multi_reg_pop (i + 2,
27956 (end_reg - (i + 2)) / 2,
27957 stack_pointer_rtx);
27958 end_reg = i;
27959 }
27960
27961 /* Restore the remaining regs that we have discovered (or possibly
27962 even all of them, if the conditional in the for loop never
27963 fired). */
27964 if (end_reg > i + 2)
27965 arm_emit_vfp_multi_reg_pop (i + 2,
27966 (end_reg - (i + 2)) / 2,
27967 stack_pointer_rtx);
27968 }
27969
27970 if (TARGET_IWMMXT)
27971 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27972 if (reg_needs_saving_p (i))
27973 {
27974 rtx_insn *insn;
27975 rtx addr = gen_rtx_MEM (V2SImode,
27976 gen_rtx_POST_INC (SImode,
27977 stack_pointer_rtx));
27978 set_mem_alias_set (addr, get_frame_alias_set ());
27979 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27980 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27981 gen_rtx_REG (V2SImode, i),
27982 NULL_RTX);
27983 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27984 stack_pointer_rtx, stack_pointer_rtx);
27985 }
27986
27987 if (saved_regs_mask)
27988 {
27989 rtx insn;
27990 bool return_in_pc = false;
27991
27992 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27993 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27994 && !IS_CMSE_ENTRY (func_type)
27995 && !IS_STACKALIGN (func_type)
27996 && really_return
27997 && crtl->args.pretend_args_size == 0
27998 && saved_regs_mask & (1 << LR_REGNUM)
27999 && !crtl->calls_eh_return
28000 && !arm_current_function_pac_enabled_p ())
28001 {
28002 saved_regs_mask &= ~(1 << LR_REGNUM);
28003 saved_regs_mask |= (1 << PC_REGNUM);
28004 return_in_pc = true;
28005 }
28006
28007 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
28008 {
28009 for (i = 0; i <= LAST_ARM_REGNUM; i++)
28010 if (saved_regs_mask & (1 << i))
28011 {
28012 rtx addr = gen_rtx_MEM (SImode,
28013 gen_rtx_POST_INC (SImode,
28014 stack_pointer_rtx));
28015 set_mem_alias_set (addr, get_frame_alias_set ());
28016
28017 if (i == PC_REGNUM)
28018 {
28019 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
28020 XVECEXP (insn, 0, 0) = ret_rtx;
28021 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
28022 addr);
28023 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
28024 insn = emit_jump_insn (insn);
28025 }
28026 else
28027 {
28028 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
28029 addr));
28030 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
28031 gen_rtx_REG (SImode, i),
28032 NULL_RTX);
28033 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
28034 stack_pointer_rtx,
28035 stack_pointer_rtx);
28036 }
28037 }
28038 }
28039 else
28040 {
28041 if (TARGET_LDRD
28042 && current_tune->prefer_ldrd_strd
28043 && !optimize_function_for_size_p (cfun))
28044 {
28045 if (TARGET_THUMB2)
28046 thumb2_emit_ldrd_pop (saved_regs_mask);
28047 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
28048 arm_emit_ldrd_pop (saved_regs_mask);
28049 else
28050 arm_emit_multi_reg_pop (saved_regs_mask);
28051 }
28052 else
28053 arm_emit_multi_reg_pop (saved_regs_mask);
28054 }
28055
28056 if (return_in_pc)
28057 return;
28058 }
28059
28060 amount
28061 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
28062 if (amount)
28063 {
28064 int i, j;
28065 rtx dwarf = NULL_RTX;
28066 rtx_insn *tmp =
28067 emit_insn (gen_addsi3 (stack_pointer_rtx,
28068 stack_pointer_rtx,
28069 GEN_INT (amount)));
28070
28071 RTX_FRAME_RELATED_P (tmp) = 1;
28072
28073 if (cfun->machine->uses_anonymous_args)
28074 {
28075 /* Restore pretend args. Refer arm_expand_prologue on how to save
28076 pretend_args in stack. */
28077 int num_regs = crtl->args.pretend_args_size / 4;
28078 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
28079 for (j = 0, i = 0; j < num_regs; i++)
28080 if (saved_regs_mask & (1 << i))
28081 {
28082 rtx reg = gen_rtx_REG (SImode, i);
28083 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
28084 j++;
28085 }
28086 REG_NOTES (tmp) = dwarf;
28087 }
28088 arm_add_cfa_adjust_cfa_note (tmp, amount,
28089 stack_pointer_rtx, stack_pointer_rtx);
28090 }
28091
28092 if (IS_CMSE_ENTRY (func_type))
28093 {
28094 /* CMSE_ENTRY always returns. */
28095 gcc_assert (really_return);
28096 /* Clear all caller-saved regs that are not used to return. */
28097 cmse_nonsecure_entry_clear_before_return ();
28098
28099 /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
28100 VLDR. */
28101 if (TARGET_HAVE_FPCXT_CMSE)
28102 {
28103 rtx_insn *insn;
28104
28105 insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
28106 GEN_INT (FPCXTNS_ENUM)));
28107 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
28108 plus_constant (Pmode, stack_pointer_rtx, 4));
28109 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
28110 RTX_FRAME_RELATED_P (insn) = 1;
28111 }
28112 }
28113
28114 if (arm_current_function_pac_enabled_p ())
28115 emit_insn (gen_aut_nop ());
28116
28117 if (!really_return)
28118 return;
28119
28120 if (crtl->calls_eh_return)
28121 emit_insn (gen_addsi3 (stack_pointer_rtx,
28122 stack_pointer_rtx,
28123 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
28124
28125 if (IS_STACKALIGN (func_type))
28126 /* Restore the original stack pointer. Before prologue, the stack was
28127 realigned and the original stack pointer saved in r0. For details,
28128 see comment in arm_expand_prologue. */
28129 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
28130
28131 emit_jump_insn (simple_return_rtx);
28132 }
28133
28134 /* Implementation of insn prologue_thumb1_interwork. This is the first
28135 "instruction" of a function called in ARM mode. Swap to thumb mode. */
28136
28137 const char *
28138 thumb1_output_interwork (void)
28139 {
28140 const char * name;
28141 FILE *f = asm_out_file;
28142
28143 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28144 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28145 == SYMBOL_REF);
28146 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28147
28148 /* Generate code sequence to switch us into Thumb mode. */
28149 /* The .code 32 directive has already been emitted by
28150 ASM_DECLARE_FUNCTION_NAME. */
28151 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28152 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28153
28154 /* Generate a label, so that the debugger will notice the
28155 change in instruction sets. This label is also used by
28156 the assembler to bypass the ARM code when this function
28157 is called from a Thumb encoded function elsewhere in the
28158 same file. Hence the definition of STUB_NAME here must
28159 agree with the definition in gas/config/tc-arm.c. */
28160
28161 #define STUB_NAME ".real_start_of"
28162
28163 fprintf (f, "\t.code\t16\n");
28164 #ifdef ARM_PE
28165 if (arm_dllexport_name_p (name))
28166 name = arm_strip_name_encoding (name);
28167 #endif
28168 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28169 fprintf (f, "\t.thumb_func\n");
28170 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28171
28172 return "";
28173 }
28174
28175 /* Handle the case of a double word load into a low register from
28176 a computed memory address. The computed address may involve a
28177 register which is overwritten by the load. */
28178 const char *
28179 thumb_load_double_from_address (rtx *operands)
28180 {
28181 rtx addr;
28182 rtx base;
28183 rtx offset;
28184 rtx arg1;
28185 rtx arg2;
28186
28187 gcc_assert (REG_P (operands[0]));
28188 gcc_assert (MEM_P (operands[1]));
28189
28190 /* Get the memory address. */
28191 addr = XEXP (operands[1], 0);
28192
28193 /* Work out how the memory address is computed. */
28194 switch (GET_CODE (addr))
28195 {
28196 case REG:
28197 operands[2] = adjust_address (operands[1], SImode, 4);
28198
28199 if (REGNO (operands[0]) == REGNO (addr))
28200 {
28201 output_asm_insn ("ldr\t%H0, %2", operands);
28202 output_asm_insn ("ldr\t%0, %1", operands);
28203 }
28204 else
28205 {
28206 output_asm_insn ("ldr\t%0, %1", operands);
28207 output_asm_insn ("ldr\t%H0, %2", operands);
28208 }
28209 break;
28210
28211 case CONST:
28212 /* Compute <address> + 4 for the high order load. */
28213 operands[2] = adjust_address (operands[1], SImode, 4);
28214
28215 output_asm_insn ("ldr\t%0, %1", operands);
28216 output_asm_insn ("ldr\t%H0, %2", operands);
28217 break;
28218
28219 case PLUS:
28220 arg1 = XEXP (addr, 0);
28221 arg2 = XEXP (addr, 1);
28222
28223 if (CONSTANT_P (arg1))
28224 base = arg2, offset = arg1;
28225 else
28226 base = arg1, offset = arg2;
28227
28228 gcc_assert (REG_P (base));
28229
28230 /* Catch the case of <address> = <reg> + <reg> */
28231 if (REG_P (offset))
28232 {
28233 int reg_offset = REGNO (offset);
28234 int reg_base = REGNO (base);
28235 int reg_dest = REGNO (operands[0]);
28236
28237 /* Add the base and offset registers together into the
28238 higher destination register. */
28239 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28240 reg_dest + 1, reg_base, reg_offset);
28241
28242 /* Load the lower destination register from the address in
28243 the higher destination register. */
28244 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28245 reg_dest, reg_dest + 1);
28246
28247 /* Load the higher destination register from its own address
28248 plus 4. */
28249 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28250 reg_dest + 1, reg_dest + 1);
28251 }
28252 else
28253 {
28254 /* Compute <address> + 4 for the high order load. */
28255 operands[2] = adjust_address (operands[1], SImode, 4);
28256
28257 /* If the computed address is held in the low order register
28258 then load the high order register first, otherwise always
28259 load the low order register first. */
28260 if (REGNO (operands[0]) == REGNO (base))
28261 {
28262 output_asm_insn ("ldr\t%H0, %2", operands);
28263 output_asm_insn ("ldr\t%0, %1", operands);
28264 }
28265 else
28266 {
28267 output_asm_insn ("ldr\t%0, %1", operands);
28268 output_asm_insn ("ldr\t%H0, %2", operands);
28269 }
28270 }
28271 break;
28272
28273 case LABEL_REF:
28274 /* With no registers to worry about we can just load the value
28275 directly. */
28276 operands[2] = adjust_address (operands[1], SImode, 4);
28277
28278 output_asm_insn ("ldr\t%H0, %2", operands);
28279 output_asm_insn ("ldr\t%0, %1", operands);
28280 break;
28281
28282 default:
28283 gcc_unreachable ();
28284 }
28285
28286 return "";
28287 }
28288
28289 const char *
28290 thumb_output_move_mem_multiple (int n, rtx *operands)
28291 {
28292 switch (n)
28293 {
28294 case 2:
28295 if (REGNO (operands[4]) > REGNO (operands[5]))
28296 std::swap (operands[4], operands[5]);
28297
28298 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28299 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28300 break;
28301
28302 case 3:
28303 if (REGNO (operands[4]) > REGNO (operands[5]))
28304 std::swap (operands[4], operands[5]);
28305 if (REGNO (operands[5]) > REGNO (operands[6]))
28306 std::swap (operands[5], operands[6]);
28307 if (REGNO (operands[4]) > REGNO (operands[5]))
28308 std::swap (operands[4], operands[5]);
28309
28310 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28311 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28312 break;
28313
28314 default:
28315 gcc_unreachable ();
28316 }
28317
28318 return "";
28319 }
28320
28321 /* Output a call-via instruction for thumb state. */
28322 const char *
28323 thumb_call_via_reg (rtx reg)
28324 {
28325 int regno = REGNO (reg);
28326 rtx *labelp;
28327
28328 gcc_assert (regno < LR_REGNUM);
28329
28330 /* If we are in the normal text section we can use a single instance
28331 per compilation unit. If we are doing function sections, then we need
28332 an entry per section, since we can't rely on reachability. */
28333 if (in_section == text_section)
28334 {
28335 thumb_call_reg_needed = 1;
28336
28337 if (thumb_call_via_label[regno] == NULL)
28338 thumb_call_via_label[regno] = gen_label_rtx ();
28339 labelp = thumb_call_via_label + regno;
28340 }
28341 else
28342 {
28343 if (cfun->machine->call_via[regno] == NULL)
28344 cfun->machine->call_via[regno] = gen_label_rtx ();
28345 labelp = cfun->machine->call_via + regno;
28346 }
28347
28348 output_asm_insn ("bl\t%a0", labelp);
28349 return "";
28350 }
28351
28352 /* Routines for generating rtl. */
28353 void
28354 thumb_expand_cpymemqi (rtx *operands)
28355 {
28356 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28357 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28358 HOST_WIDE_INT len = INTVAL (operands[2]);
28359 HOST_WIDE_INT offset = 0;
28360
28361 while (len >= 12)
28362 {
28363 emit_insn (gen_cpymem12b (out, in, out, in));
28364 len -= 12;
28365 }
28366
28367 if (len >= 8)
28368 {
28369 emit_insn (gen_cpymem8b (out, in, out, in));
28370 len -= 8;
28371 }
28372
28373 if (len >= 4)
28374 {
28375 rtx reg = gen_reg_rtx (SImode);
28376 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28377 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28378 len -= 4;
28379 offset += 4;
28380 }
28381
28382 if (len >= 2)
28383 {
28384 rtx reg = gen_reg_rtx (HImode);
28385 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28386 plus_constant (Pmode, in,
28387 offset))));
28388 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28389 offset)),
28390 reg));
28391 len -= 2;
28392 offset += 2;
28393 }
28394
28395 if (len)
28396 {
28397 rtx reg = gen_reg_rtx (QImode);
28398 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28399 plus_constant (Pmode, in,
28400 offset))));
28401 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28402 offset)),
28403 reg));
28404 }
28405 }
28406
28407 void
28408 thumb_reload_out_hi (rtx *operands)
28409 {
28410 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28411 }
28412
28413 /* Return the length of a function name prefix
28414 that starts with the character 'c'. */
28415 static int
28416 arm_get_strip_length (int c)
28417 {
28418 switch (c)
28419 {
28420 ARM_NAME_ENCODING_LENGTHS
28421 default: return 0;
28422 }
28423 }
28424
28425 /* Return a pointer to a function's name with any
28426 and all prefix encodings stripped from it. */
28427 const char *
28428 arm_strip_name_encoding (const char *name)
28429 {
28430 int skip;
28431
28432 while ((skip = arm_get_strip_length (* name)))
28433 name += skip;
28434
28435 return name;
28436 }
28437
28438 /* If there is a '*' anywhere in the name's prefix, then
28439 emit the stripped name verbatim, otherwise prepend an
28440 underscore if leading underscores are being used. */
28441 void
28442 arm_asm_output_labelref (FILE *stream, const char *name)
28443 {
28444 int skip;
28445 int verbatim = 0;
28446
28447 while ((skip = arm_get_strip_length (* name)))
28448 {
28449 verbatim |= (*name == '*');
28450 name += skip;
28451 }
28452
28453 if (verbatim)
28454 fputs (name, stream);
28455 else
28456 asm_fprintf (stream, "%U%s", name);
28457 }
28458
28459 /* This function is used to emit an EABI tag and its associated value.
28460 We emit the numerical value of the tag in case the assembler does not
28461 support textual tags. (Eg gas prior to 2.20). If requested we include
28462 the tag name in a comment so that anyone reading the assembler output
28463 will know which tag is being set.
28464
28465 This function is not static because arm-c.cc needs it too. */
28466
28467 void
28468 arm_emit_eabi_attribute (const char *name, int num, int val)
28469 {
28470 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28471 if (flag_verbose_asm || flag_debug_asm)
28472 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28473 asm_fprintf (asm_out_file, "\n");
28474 }
28475
28476 /* This function is used to print CPU tuning information as comment
28477 in assembler file. Pointers are not printed for now. */
28478
28479 void
28480 arm_print_tune_info (void)
28481 {
28482 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28483 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28484 current_tune->constant_limit);
28485 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28486 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28487 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28488 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28489 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28490 "prefetch.l1_cache_size:\t%d\n",
28491 current_tune->prefetch.l1_cache_size);
28492 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28493 "prefetch.l1_cache_line_size:\t%d\n",
28494 current_tune->prefetch.l1_cache_line_size);
28495 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28496 "prefer_constant_pool:\t%d\n",
28497 (int) current_tune->prefer_constant_pool);
28498 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28499 "branch_cost:\t(s:speed, p:predictable)\n");
28500 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28501 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28502 current_tune->branch_cost (false, false));
28503 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28504 current_tune->branch_cost (false, true));
28505 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28506 current_tune->branch_cost (true, false));
28507 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28508 current_tune->branch_cost (true, true));
28509 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28510 "prefer_ldrd_strd:\t%d\n",
28511 (int) current_tune->prefer_ldrd_strd);
28512 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28513 "logical_op_non_short_circuit:\t[%d,%d]\n",
28514 (int) current_tune->logical_op_non_short_circuit_thumb,
28515 (int) current_tune->logical_op_non_short_circuit_arm);
28516 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28517 "disparage_flag_setting_t16_encodings:\t%d\n",
28518 (int) current_tune->disparage_flag_setting_t16_encodings);
28519 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28520 "string_ops_prefer_neon:\t%d\n",
28521 (int) current_tune->string_ops_prefer_neon);
28522 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28523 "max_insns_inline_memset:\t%d\n",
28524 current_tune->max_insns_inline_memset);
28525 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28526 current_tune->fusible_ops);
28527 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28528 (int) current_tune->sched_autopref);
28529 }
28530
28531 /* The last set of target options used to emit .arch directives, etc. This
28532 could be a function-local static if it were not required to expose it as a
28533 root to the garbage collector. */
28534 static GTY(()) cl_target_option *last_asm_targ_options = NULL;
28535
28536 /* Print .arch and .arch_extension directives corresponding to the
28537 current architecture configuration. */
28538 static void
28539 arm_print_asm_arch_directives (FILE *stream, cl_target_option *targ_options)
28540 {
28541 arm_build_target build_target;
28542 /* If the target options haven't changed since the last time we were called
28543 there is nothing to do. This should be sufficient to suppress the
28544 majority of redundant work. */
28545 if (last_asm_targ_options == targ_options)
28546 return;
28547
28548 last_asm_targ_options = targ_options;
28549
28550 build_target.isa = sbitmap_alloc (isa_num_bits);
28551 arm_configure_build_target (&build_target, targ_options, false);
28552
28553 if (build_target.core_name
28554 && !bitmap_bit_p (build_target.isa, isa_bit_quirk_no_asmcpu))
28555 {
28556 const char* truncated_name
28557 = arm_rewrite_selected_cpu (build_target.core_name);
28558 asm_fprintf (stream, "\t.cpu %s\n", truncated_name);
28559 }
28560
28561 const arch_option *arch
28562 = arm_parse_arch_option_name (all_architectures, "-march",
28563 build_target.arch_name);
28564 auto_sbitmap opt_bits (isa_num_bits);
28565
28566 gcc_assert (arch);
28567
28568 if (strcmp (build_target.arch_name, "armv7ve") == 0)
28569 {
28570 /* Keep backward compatability for assemblers which don't support
28571 armv7ve. Fortunately, none of the following extensions are reset
28572 by a .fpu directive. */
28573 asm_fprintf (stream, "\t.arch armv7-a\n");
28574 asm_fprintf (stream, "\t.arch_extension virt\n");
28575 asm_fprintf (stream, "\t.arch_extension idiv\n");
28576 asm_fprintf (stream, "\t.arch_extension sec\n");
28577 asm_fprintf (stream, "\t.arch_extension mp\n");
28578 }
28579 else
28580 asm_fprintf (stream, "\t.arch %s\n", build_target.arch_name);
28581
28582 /* The .fpu directive will reset any architecture extensions from the
28583 assembler that relate to the fp/vector extensions. So put this out before
28584 any .arch_extension directives. */
28585 const char *fpu_name = (TARGET_SOFT_FLOAT
28586 ? "softvfp"
28587 : arm_identify_fpu_from_isa (build_target.isa));
28588 asm_fprintf (stream, "\t.fpu %s\n", fpu_name);
28589
28590 if (!arch->common.extensions)
28591 return;
28592
28593 for (const struct cpu_arch_extension *opt = arch->common.extensions;
28594 opt->name != NULL;
28595 opt++)
28596 {
28597 if (!opt->remove)
28598 {
28599 arm_initialize_isa (opt_bits, opt->isa_bits);
28600
28601 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28602 "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28603 floating point instructions is disabled. So the following check
28604 restricts the printing of ".arch_extension mve" and
28605 ".arch_extension fp" (for mve.fp) in the assembly file. MVE needs
28606 this special behaviour because the feature bit "mve" and
28607 "mve_float" are not part of "fpu bits", so they are not cleared
28608 when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28609 TARGET_HAVE_MVE_FLOAT are disabled. */
28610 if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28611 || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28612 && !TARGET_HAVE_MVE_FLOAT))
28613 continue;
28614
28615 /* If every feature bit of this option is set in the target ISA
28616 specification, print out the option name. However, don't print
28617 anything if all the bits are part of the FPU specification. */
28618 if (bitmap_subset_p (opt_bits, build_target.isa)
28619 && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28620 asm_fprintf (stream, "\t.arch_extension %s\n", opt->name);
28621 }
28622 }
28623 }
28624
28625 static void
28626 arm_file_start (void)
28627 {
28628 int val;
28629 bool pac = (aarch_ra_sign_scope != AARCH_FUNCTION_NONE);
28630 bool bti = (aarch_enable_bti == 1);
28631
28632 arm_print_asm_arch_directives
28633 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28634
28635 if (TARGET_BPABI)
28636 {
28637 /* If we have a named cpu, but we the assembler does not support that
28638 name via .cpu, put out a cpu name attribute; but don't do this if the
28639 name starts with the fictitious prefix, 'generic'. */
28640 if (arm_active_target.core_name
28641 && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu)
28642 && !startswith (arm_active_target.core_name, "generic"))
28643 {
28644 const char* truncated_name
28645 = arm_rewrite_selected_cpu (arm_active_target.core_name);
28646 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28647 asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28648 truncated_name);
28649 }
28650
28651 if (print_tune_info)
28652 arm_print_tune_info ();
28653
28654 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28655 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28656
28657 if (TARGET_HARD_FLOAT_ABI)
28658 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28659
28660 /* Some of these attributes only apply when the corresponding features
28661 are used. However we don't have any easy way of figuring this out.
28662 Conservatively record the setting that would have been used. */
28663
28664 if (flag_rounding_math)
28665 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28666
28667 if (!flag_unsafe_math_optimizations)
28668 {
28669 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28670 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28671 }
28672 if (flag_signaling_nans)
28673 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28674
28675 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28676 flag_finite_math_only ? 1 : 3);
28677
28678 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28679 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28680 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28681 flag_short_enums ? 1 : 2);
28682
28683 /* Tag_ABI_optimization_goals. */
28684 if (optimize_size)
28685 val = 4;
28686 else if (optimize >= 2)
28687 val = 2;
28688 else if (optimize)
28689 val = 1;
28690 else
28691 val = 6;
28692 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28693
28694 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28695 unaligned_access);
28696
28697 if (arm_fp16_format)
28698 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28699 (int) arm_fp16_format);
28700
28701 if (TARGET_HAVE_PACBTI)
28702 {
28703 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
28704 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
28705 }
28706 else if (pac || bti)
28707 {
28708 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
28709 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
28710 }
28711
28712 if (bti)
28713 arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
28714 if (pac)
28715 arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
28716
28717 if (arm_lang_output_object_attributes_hook)
28718 arm_lang_output_object_attributes_hook();
28719 }
28720
28721 default_file_start ();
28722 }
28723
28724 static void
28725 arm_file_end (void)
28726 {
28727 int regno;
28728
28729 /* Just in case the last function output in the assembler had non-default
28730 architecture directives, we force the assembler state back to the default
28731 set, so that any 'calculated' build attributes are based on the default
28732 options rather than the special options for that function. */
28733 arm_print_asm_arch_directives
28734 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28735
28736 if (NEED_INDICATE_EXEC_STACK)
28737 /* Add .note.GNU-stack. */
28738 file_end_indicate_exec_stack ();
28739
28740 if (! thumb_call_reg_needed)
28741 return;
28742
28743 switch_to_section (text_section);
28744 asm_fprintf (asm_out_file, "\t.code 16\n");
28745 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28746
28747 for (regno = 0; regno < LR_REGNUM; regno++)
28748 {
28749 rtx label = thumb_call_via_label[regno];
28750
28751 if (label != 0)
28752 {
28753 targetm.asm_out.internal_label (asm_out_file, "L",
28754 CODE_LABEL_NUMBER (label));
28755 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28756 }
28757 }
28758 }
28759
28760 #ifndef ARM_PE
28761 /* Symbols in the text segment can be accessed without indirecting via the
28762 constant pool; it may take an extra binary operation, but this is still
28763 faster than indirecting via memory. Don't do this when not optimizing,
28764 since we won't be calculating al of the offsets necessary to do this
28765 simplification. */
28766
28767 static void
28768 arm_encode_section_info (tree decl, rtx rtl, int first)
28769 {
28770 if (optimize > 0 && TREE_CONSTANT (decl))
28771 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28772
28773 default_encode_section_info (decl, rtl, first);
28774 }
28775 #endif /* !ARM_PE */
28776
28777 static void
28778 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28779 {
28780 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28781 && !strcmp (prefix, "L"))
28782 {
28783 arm_ccfsm_state = 0;
28784 arm_target_insn = NULL;
28785 }
28786 default_internal_label (stream, prefix, labelno);
28787 }
28788
28789 /* Define classes to generate code as RTL or output asm to a file.
28790 Using templates then allows to use the same code to output code
28791 sequences in the two formats. */
28792 class thumb1_const_rtl
28793 {
28794 public:
28795 thumb1_const_rtl (rtx dst) : dst (dst) {}
28796
28797 void mov (HOST_WIDE_INT val)
28798 {
28799 emit_set_insn (dst, GEN_INT (val));
28800 }
28801
28802 void add (HOST_WIDE_INT val)
28803 {
28804 emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28805 }
28806
28807 void ashift (HOST_WIDE_INT shift)
28808 {
28809 emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28810 }
28811
28812 void neg ()
28813 {
28814 emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28815 }
28816
28817 private:
28818 rtx dst;
28819 };
28820
28821 class thumb1_const_print
28822 {
28823 public:
28824 thumb1_const_print (FILE *f, int regno)
28825 {
28826 t_file = f;
28827 dst_regname = reg_names[regno];
28828 }
28829
28830 void mov (HOST_WIDE_INT val)
28831 {
28832 asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28833 dst_regname, val);
28834 }
28835
28836 void add (HOST_WIDE_INT val)
28837 {
28838 asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28839 dst_regname, val);
28840 }
28841
28842 void ashift (HOST_WIDE_INT shift)
28843 {
28844 asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28845 dst_regname, shift);
28846 }
28847
28848 void neg ()
28849 {
28850 asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
28851 }
28852
28853 private:
28854 FILE *t_file;
28855 const char *dst_regname;
28856 };
28857
28858 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28859 Avoid generating useless code when one of the bytes is zero. */
28860 template <class T>
28861 void
28862 thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
28863 {
28864 bool mov_done_p = false;
28865 unsigned HOST_WIDE_INT val = op1;
28866 int shift = 0;
28867 int i;
28868
28869 gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
28870
28871 if (val <= 255)
28872 {
28873 dst.mov (val);
28874 return;
28875 }
28876
28877 /* For negative numbers with the first nine bits set, build the
28878 opposite of OP1, then negate it, it's generally shorter and not
28879 longer. */
28880 if ((val & 0xFF800000) == 0xFF800000)
28881 {
28882 thumb1_gen_const_int_1 (dst, -op1);
28883 dst.neg ();
28884 return;
28885 }
28886
28887 /* In the general case, we need 7 instructions to build
28888 a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28889 do better if VAL is small enough, or
28890 right-shiftable by a suitable amount. If the
28891 right-shift enables to encode at least one less byte,
28892 it's worth it: we save a adds and a lsls at the
28893 expense of a final lsls. */
28894 int final_shift = number_of_first_bit_set (val);
28895
28896 int leading_zeroes = clz_hwi (val);
28897 int number_of_bytes_needed
28898 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
28899 / BITS_PER_UNIT) + 1;
28900 int number_of_bytes_needed2
28901 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
28902 / BITS_PER_UNIT) + 1;
28903
28904 if (number_of_bytes_needed2 < number_of_bytes_needed)
28905 val >>= final_shift;
28906 else
28907 final_shift = 0;
28908
28909 /* If we are in a very small range, we can use either a single movs
28910 or movs+adds. */
28911 if (val <= 510)
28912 {
28913 if (val > 255)
28914 {
28915 unsigned HOST_WIDE_INT high = val - 255;
28916
28917 dst.mov (high);
28918 dst.add (255);
28919 }
28920 else
28921 dst.mov (val);
28922
28923 if (final_shift > 0)
28924 dst.ashift (final_shift);
28925 }
28926 else
28927 {
28928 /* General case, emit upper 3 bytes as needed. */
28929 for (i = 0; i < 3; i++)
28930 {
28931 unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
28932
28933 if (byte)
28934 {
28935 /* We are about to emit new bits, stop accumulating a
28936 shift amount, and left-shift only if we have already
28937 emitted some upper bits. */
28938 if (mov_done_p)
28939 {
28940 dst.ashift (shift);
28941 dst.add (byte);
28942 }
28943 else
28944 dst.mov (byte);
28945
28946 /* Stop accumulating shift amount since we've just
28947 emitted some bits. */
28948 shift = 0;
28949
28950 mov_done_p = true;
28951 }
28952
28953 if (mov_done_p)
28954 shift += 8;
28955 }
28956
28957 /* Emit lower byte. */
28958 if (!mov_done_p)
28959 dst.mov (val & 0xff);
28960 else
28961 {
28962 dst.ashift (shift);
28963 if (val & 0xff)
28964 dst.add (val & 0xff);
28965 }
28966
28967 if (final_shift > 0)
28968 dst.ashift (final_shift);
28969 }
28970 }
28971
28972 /* Proxies for thumb1.md, since the thumb1_const_print and
28973 thumb1_const_rtl classes are not exported. */
28974 void
28975 thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
28976 {
28977 thumb1_const_rtl t (dst);
28978 thumb1_gen_const_int_1 (t, op1);
28979 }
28980
28981 void
28982 thumb1_gen_const_int_print (rtx dst, HOST_WIDE_INT op1)
28983 {
28984 thumb1_const_print t (asm_out_file, REGNO (dst));
28985 thumb1_gen_const_int_1 (t, op1);
28986 }
28987
28988 /* Output code to add DELTA to the first argument, and then jump
28989 to FUNCTION. Used for C++ multiple inheritance. */
28990
28991 static void
28992 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28993 HOST_WIDE_INT, tree function)
28994 {
28995 static int thunk_label = 0;
28996 char label[256];
28997 char labelpc[256];
28998 int mi_delta = delta;
28999 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
29000 int shift = 0;
29001 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
29002 ? 1 : 0);
29003 if (mi_delta < 0)
29004 mi_delta = - mi_delta;
29005
29006 final_start_function (emit_barrier (), file, 1);
29007
29008 if (TARGET_THUMB1)
29009 {
29010 int labelno = thunk_label++;
29011 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
29012 /* Thunks are entered in arm mode when available. */
29013 if (TARGET_THUMB1_ONLY)
29014 {
29015 /* push r3 so we can use it as a temporary. */
29016 /* TODO: Omit this save if r3 is not used. */
29017 fputs ("\tpush {r3}\n", file);
29018
29019 /* With -mpure-code, we cannot load the address from the
29020 constant pool: we build it explicitly. */
29021 if (target_pure_code)
29022 {
29023 fputs ("\tmovs\tr3, #:upper8_15:#", file);
29024 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29025 fputc ('\n', file);
29026 fputs ("\tlsls r3, #8\n", file);
29027 fputs ("\tadds\tr3, #:upper0_7:#", file);
29028 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29029 fputc ('\n', file);
29030 fputs ("\tlsls r3, #8\n", file);
29031 fputs ("\tadds\tr3, #:lower8_15:#", file);
29032 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29033 fputc ('\n', file);
29034 fputs ("\tlsls r3, #8\n", file);
29035 fputs ("\tadds\tr3, #:lower0_7:#", file);
29036 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29037 fputc ('\n', file);
29038 }
29039 else
29040 fputs ("\tldr\tr3, ", file);
29041 }
29042 else
29043 {
29044 fputs ("\tldr\tr12, ", file);
29045 }
29046
29047 if (!target_pure_code)
29048 {
29049 assemble_name (file, label);
29050 fputc ('\n', file);
29051 }
29052
29053 if (flag_pic)
29054 {
29055 /* If we are generating PIC, the ldr instruction below loads
29056 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
29057 the address of the add + 8, so we have:
29058
29059 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
29060 = target + 1.
29061
29062 Note that we have "+ 1" because some versions of GNU ld
29063 don't set the low bit of the result for R_ARM_REL32
29064 relocations against thumb function symbols.
29065 On ARMv6M this is +4, not +8. */
29066 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
29067 assemble_name (file, labelpc);
29068 fputs (":\n", file);
29069 if (TARGET_THUMB1_ONLY)
29070 {
29071 /* This is 2 insns after the start of the thunk, so we know it
29072 is 4-byte aligned. */
29073 fputs ("\tadd\tr3, pc, r3\n", file);
29074 fputs ("\tmov r12, r3\n", file);
29075 }
29076 else
29077 fputs ("\tadd\tr12, pc, r12\n", file);
29078 }
29079 else if (TARGET_THUMB1_ONLY)
29080 fputs ("\tmov r12, r3\n", file);
29081 }
29082 if (TARGET_THUMB1_ONLY)
29083 {
29084 if (mi_delta > 255)
29085 {
29086 /* With -mpure-code, we cannot load MI_DELTA from the
29087 constant pool: we build it explicitly. */
29088 if (target_pure_code)
29089 {
29090 thumb1_const_print r3 (file, 3);
29091 thumb1_gen_const_int_1 (r3, mi_delta);
29092 }
29093 else
29094 {
29095 fputs ("\tldr\tr3, ", file);
29096 assemble_name (file, label);
29097 fputs ("+4\n", file);
29098 }
29099 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
29100 mi_op, this_regno, this_regno);
29101 }
29102 else if (mi_delta != 0)
29103 {
29104 /* Thumb1 unified syntax requires s suffix in instruction name when
29105 one of the operands is immediate. */
29106 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
29107 mi_op, this_regno, this_regno,
29108 mi_delta);
29109 }
29110 }
29111 else
29112 {
29113 /* TODO: Use movw/movt for large constants when available. */
29114 while (mi_delta != 0)
29115 {
29116 if ((mi_delta & (3 << shift)) == 0)
29117 shift += 2;
29118 else
29119 {
29120 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
29121 mi_op, this_regno, this_regno,
29122 mi_delta & (0xff << shift));
29123 mi_delta &= ~(0xff << shift);
29124 shift += 8;
29125 }
29126 }
29127 }
29128 if (TARGET_THUMB1)
29129 {
29130 if (TARGET_THUMB1_ONLY)
29131 fputs ("\tpop\t{r3}\n", file);
29132
29133 fprintf (file, "\tbx\tr12\n");
29134
29135 /* With -mpure-code, we don't need to emit literals for the
29136 function address and delta since we emitted code to build
29137 them. */
29138 if (!target_pure_code)
29139 {
29140 ASM_OUTPUT_ALIGN (file, 2);
29141 assemble_name (file, label);
29142 fputs (":\n", file);
29143 if (flag_pic)
29144 {
29145 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
29146 rtx tem = XEXP (DECL_RTL (function), 0);
29147 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
29148 pipeline offset is four rather than eight. Adjust the offset
29149 accordingly. */
29150 tem = plus_constant (GET_MODE (tem), tem,
29151 TARGET_THUMB1_ONLY ? -3 : -7);
29152 tem = gen_rtx_MINUS (GET_MODE (tem),
29153 tem,
29154 gen_rtx_SYMBOL_REF (Pmode,
29155 ggc_strdup (labelpc)));
29156 assemble_integer (tem, 4, BITS_PER_WORD, 1);
29157 }
29158 else
29159 /* Output ".word .LTHUNKn". */
29160 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
29161
29162 if (TARGET_THUMB1_ONLY && mi_delta > 255)
29163 assemble_integer (GEN_INT (mi_delta), 4, BITS_PER_WORD, 1);
29164 }
29165 }
29166 else
29167 {
29168 fputs ("\tb\t", file);
29169 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29170 if (NEED_PLT_RELOC)
29171 fputs ("(PLT)", file);
29172 fputc ('\n', file);
29173 }
29174
29175 final_end_function ();
29176 }
29177
29178 /* MI thunk handling for TARGET_32BIT. */
29179
29180 static void
29181 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
29182 HOST_WIDE_INT vcall_offset, tree function)
29183 {
29184 const bool long_call_p = arm_is_long_call_p (function);
29185
29186 /* On ARM, this_regno is R0 or R1 depending on
29187 whether the function returns an aggregate or not.
29188 */
29189 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
29190 function)
29191 ? R1_REGNUM : R0_REGNUM);
29192
29193 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
29194 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
29195 reload_completed = 1;
29196 emit_note (NOTE_INSN_PROLOGUE_END);
29197
29198 /* Add DELTA to THIS_RTX. */
29199 if (delta != 0)
29200 arm_split_constant (PLUS, Pmode, NULL_RTX,
29201 delta, this_rtx, this_rtx, false);
29202
29203 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
29204 if (vcall_offset != 0)
29205 {
29206 /* Load *THIS_RTX. */
29207 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
29208 /* Compute *THIS_RTX + VCALL_OFFSET. */
29209 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
29210 false);
29211 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
29212 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
29213 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
29214 }
29215
29216 /* Generate a tail call to the target function. */
29217 if (!TREE_USED (function))
29218 {
29219 assemble_external (function);
29220 TREE_USED (function) = 1;
29221 }
29222 rtx funexp = XEXP (DECL_RTL (function), 0);
29223 if (long_call_p)
29224 {
29225 emit_move_insn (temp, funexp);
29226 funexp = temp;
29227 }
29228 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29229 rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
29230 SIBLING_CALL_P (insn) = 1;
29231 emit_barrier ();
29232
29233 /* Indirect calls require a bit of fixup in PIC mode. */
29234 if (long_call_p)
29235 {
29236 split_all_insns_noflow ();
29237 arm_reorg ();
29238 }
29239
29240 insn = get_insns ();
29241 shorten_branches (insn);
29242 final_start_function (insn, file, 1);
29243 final (insn, file, 1);
29244 final_end_function ();
29245
29246 /* Stop pretending this is a post-reload pass. */
29247 reload_completed = 0;
29248 }
29249
29250 /* Output code to add DELTA to the first argument, and then jump
29251 to FUNCTION. Used for C++ multiple inheritance. */
29252
29253 static void
29254 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
29255 HOST_WIDE_INT vcall_offset, tree function)
29256 {
29257 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
29258
29259 assemble_start_function (thunk, fnname);
29260 if (TARGET_32BIT)
29261 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
29262 else
29263 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
29264 assemble_end_function (thunk, fnname);
29265 }
29266
29267 int
29268 arm_emit_vector_const (FILE *file, rtx x)
29269 {
29270 int i;
29271 const char * pattern;
29272
29273 gcc_assert (GET_CODE (x) == CONST_VECTOR);
29274
29275 switch (GET_MODE (x))
29276 {
29277 case E_V2SImode: pattern = "%08x"; break;
29278 case E_V4HImode: pattern = "%04x"; break;
29279 case E_V8QImode: pattern = "%02x"; break;
29280 default: gcc_unreachable ();
29281 }
29282
29283 fprintf (file, "0x");
29284 for (i = CONST_VECTOR_NUNITS (x); i--;)
29285 {
29286 rtx element;
29287
29288 element = CONST_VECTOR_ELT (x, i);
29289 fprintf (file, pattern, INTVAL (element));
29290 }
29291
29292 return 1;
29293 }
29294
29295 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29296 HFmode constant pool entries are actually loaded with ldr. */
29297 void
29298 arm_emit_fp16_const (rtx c)
29299 {
29300 long bits;
29301
29302 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
29303 if (WORDS_BIG_ENDIAN)
29304 assemble_zeros (2);
29305 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
29306 if (!WORDS_BIG_ENDIAN)
29307 assemble_zeros (2);
29308 }
29309
29310 const char *
29311 arm_output_load_gr (rtx *operands)
29312 {
29313 rtx reg;
29314 rtx offset;
29315 rtx wcgr;
29316 rtx sum;
29317
29318 if (!MEM_P (operands [1])
29319 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
29320 || !REG_P (reg = XEXP (sum, 0))
29321 || !CONST_INT_P (offset = XEXP (sum, 1))
29322 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
29323 return "wldrw%?\t%0, %1";
29324
29325 /* Fix up an out-of-range load of a GR register. */
29326 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
29327 wcgr = operands[0];
29328 operands[0] = reg;
29329 output_asm_insn ("ldr%?\t%0, %1", operands);
29330
29331 operands[0] = wcgr;
29332 operands[1] = reg;
29333 output_asm_insn ("tmcr%?\t%0, %1", operands);
29334 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
29335
29336 return "";
29337 }
29338
29339 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29340
29341 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29342 named arg and all anonymous args onto the stack.
29343 XXX I know the prologue shouldn't be pushing registers, but it is faster
29344 that way. */
29345
29346 static void
29347 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
29348 const function_arg_info &arg,
29349 int *pretend_size,
29350 int second_time ATTRIBUTE_UNUSED)
29351 {
29352 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
29353 int nregs;
29354
29355 cfun->machine->uses_anonymous_args = 1;
29356 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
29357 {
29358 nregs = pcum->aapcs_ncrn;
29359 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
29360 && (nregs & 1))
29361 {
29362 int res = arm_needs_doubleword_align (arg.mode, arg.type);
29363 if (res < 0 && warn_psabi)
29364 inform (input_location, "parameter passing for argument of "
29365 "type %qT changed in GCC 7.1", arg.type);
29366 else if (res > 0)
29367 {
29368 nregs++;
29369 if (res > 1 && warn_psabi)
29370 inform (input_location,
29371 "parameter passing for argument of type "
29372 "%qT changed in GCC 9.1", arg.type);
29373 }
29374 }
29375 }
29376 else
29377 nregs = pcum->nregs;
29378
29379 if (nregs < NUM_ARG_REGS)
29380 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
29381 }
29382
29383 /* We can't rely on the caller doing the proper promotion when
29384 using APCS or ATPCS. */
29385
29386 static bool
29387 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
29388 {
29389 return !TARGET_AAPCS_BASED;
29390 }
29391
29392 static machine_mode
29393 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
29394 machine_mode mode,
29395 int *punsignedp ATTRIBUTE_UNUSED,
29396 const_tree fntype ATTRIBUTE_UNUSED,
29397 int for_return ATTRIBUTE_UNUSED)
29398 {
29399 if (GET_MODE_CLASS (mode) == MODE_INT
29400 && GET_MODE_SIZE (mode) < 4)
29401 return SImode;
29402
29403 return mode;
29404 }
29405
29406
29407 static bool
29408 arm_default_short_enums (void)
29409 {
29410 return ARM_DEFAULT_SHORT_ENUMS;
29411 }
29412
29413
29414 /* AAPCS requires that anonymous bitfields affect structure alignment. */
29415
29416 static bool
29417 arm_align_anon_bitfield (void)
29418 {
29419 return TARGET_AAPCS_BASED;
29420 }
29421
29422
29423 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
29424
29425 static tree
29426 arm_cxx_guard_type (void)
29427 {
29428 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
29429 }
29430
29431
29432 /* The EABI says test the least significant bit of a guard variable. */
29433
29434 static bool
29435 arm_cxx_guard_mask_bit (void)
29436 {
29437 return TARGET_AAPCS_BASED;
29438 }
29439
29440
29441 /* The EABI specifies that all array cookies are 8 bytes long. */
29442
29443 static tree
29444 arm_get_cookie_size (tree type)
29445 {
29446 tree size;
29447
29448 if (!TARGET_AAPCS_BASED)
29449 return default_cxx_get_cookie_size (type);
29450
29451 size = build_int_cst (sizetype, 8);
29452 return size;
29453 }
29454
29455
29456 /* The EABI says that array cookies should also contain the element size. */
29457
29458 static bool
29459 arm_cookie_has_size (void)
29460 {
29461 return TARGET_AAPCS_BASED;
29462 }
29463
29464
29465 /* The EABI says constructors and destructors should return a pointer to
29466 the object constructed/destroyed. */
29467
29468 static bool
29469 arm_cxx_cdtor_returns_this (void)
29470 {
29471 return TARGET_AAPCS_BASED;
29472 }
29473
29474 /* The EABI says that an inline function may never be the key
29475 method. */
29476
29477 static bool
29478 arm_cxx_key_method_may_be_inline (void)
29479 {
29480 return !TARGET_AAPCS_BASED;
29481 }
29482
29483 static void
29484 arm_cxx_determine_class_data_visibility (tree decl)
29485 {
29486 if (!TARGET_AAPCS_BASED
29487 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
29488 return;
29489
29490 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29491 is exported. However, on systems without dynamic vague linkage,
29492 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
29493 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
29494 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
29495 else
29496 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
29497 DECL_VISIBILITY_SPECIFIED (decl) = 1;
29498 }
29499
29500 static bool
29501 arm_cxx_class_data_always_comdat (void)
29502 {
29503 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29504 vague linkage if the class has no key function. */
29505 return !TARGET_AAPCS_BASED;
29506 }
29507
29508
29509 /* The EABI says __aeabi_atexit should be used to register static
29510 destructors. */
29511
29512 static bool
29513 arm_cxx_use_aeabi_atexit (void)
29514 {
29515 return TARGET_AAPCS_BASED;
29516 }
29517
29518
29519 void
29520 arm_set_return_address (rtx source, rtx scratch)
29521 {
29522 arm_stack_offsets *offsets;
29523 HOST_WIDE_INT delta;
29524 rtx addr, mem;
29525 unsigned long saved_regs;
29526
29527 offsets = arm_get_frame_offsets ();
29528 saved_regs = offsets->saved_regs_mask;
29529
29530 if ((saved_regs & (1 << LR_REGNUM)) == 0)
29531 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29532 else
29533 {
29534 if (frame_pointer_needed)
29535 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29536 else
29537 {
29538 /* LR will be the first saved register. */
29539 delta = offsets->outgoing_args - (offsets->frame + 4);
29540
29541
29542 if (delta >= 4096)
29543 {
29544 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29545 GEN_INT (delta & ~4095)));
29546 addr = scratch;
29547 delta &= 4095;
29548 }
29549 else
29550 addr = stack_pointer_rtx;
29551
29552 addr = plus_constant (Pmode, addr, delta);
29553 }
29554
29555 /* The store needs to be marked to prevent DSE from deleting
29556 it as dead if it is based on fp. */
29557 mem = gen_frame_mem (Pmode, addr);
29558 MEM_VOLATILE_P (mem) = true;
29559 emit_move_insn (mem, source);
29560 }
29561 }
29562
29563
29564 void
29565 thumb_set_return_address (rtx source, rtx scratch)
29566 {
29567 arm_stack_offsets *offsets;
29568 HOST_WIDE_INT delta;
29569 HOST_WIDE_INT limit;
29570 int reg;
29571 rtx addr, mem;
29572 unsigned long mask;
29573
29574 emit_use (source);
29575
29576 offsets = arm_get_frame_offsets ();
29577 mask = offsets->saved_regs_mask;
29578 if (mask & (1 << LR_REGNUM))
29579 {
29580 limit = 1024;
29581 /* Find the saved regs. */
29582 if (frame_pointer_needed)
29583 {
29584 delta = offsets->soft_frame - offsets->saved_args;
29585 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29586 if (TARGET_THUMB1)
29587 limit = 128;
29588 }
29589 else
29590 {
29591 delta = offsets->outgoing_args - offsets->saved_args;
29592 reg = SP_REGNUM;
29593 }
29594 /* Allow for the stack frame. */
29595 if (TARGET_THUMB1 && TARGET_BACKTRACE)
29596 delta -= 16;
29597 /* The link register is always the first saved register. */
29598 delta -= 4;
29599
29600 /* Construct the address. */
29601 addr = gen_rtx_REG (SImode, reg);
29602 if (delta > limit)
29603 {
29604 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29605 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29606 addr = scratch;
29607 }
29608 else
29609 addr = plus_constant (Pmode, addr, delta);
29610
29611 /* The store needs to be marked to prevent DSE from deleting
29612 it as dead if it is based on fp. */
29613 mem = gen_frame_mem (Pmode, addr);
29614 MEM_VOLATILE_P (mem) = true;
29615 emit_move_insn (mem, source);
29616 }
29617 else
29618 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29619 }
29620
29621 /* Implements target hook vector_mode_supported_p. */
29622 bool
29623 arm_vector_mode_supported_p (machine_mode mode)
29624 {
29625 /* Neon also supports V2SImode, etc. listed in the clause below. */
29626 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29627 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
29628 || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
29629 || mode == V8BFmode))
29630 return true;
29631
29632 if ((TARGET_NEON || TARGET_IWMMXT)
29633 && ((mode == V2SImode)
29634 || (mode == V4HImode)
29635 || (mode == V8QImode)))
29636 return true;
29637
29638 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29639 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29640 || mode == V2HAmode))
29641 return true;
29642
29643 if (TARGET_HAVE_MVE
29644 && (VALID_MVE_SI_MODE (mode) || VALID_MVE_PRED_MODE (mode)))
29645 return true;
29646
29647 if (TARGET_HAVE_MVE_FLOAT
29648 && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
29649 return true;
29650
29651 return false;
29652 }
29653
29654 /* Implements target hook array_mode_supported_p. */
29655
29656 static bool
29657 arm_array_mode_supported_p (machine_mode mode,
29658 unsigned HOST_WIDE_INT nelems)
29659 {
29660 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29661 for now, as the lane-swapping logic needs to be extended in the expanders.
29662 See PR target/82518. */
29663 if (TARGET_NEON && !BYTES_BIG_ENDIAN
29664 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29665 && (nelems >= 2 && nelems <= 4))
29666 return true;
29667
29668 if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
29669 && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
29670 return true;
29671
29672 return false;
29673 }
29674
29675 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29676 registers when autovectorizing for Neon, at least until multiple vector
29677 widths are supported properly by the middle-end. */
29678
29679 static machine_mode
29680 arm_preferred_simd_mode (scalar_mode mode)
29681 {
29682 if (TARGET_NEON)
29683 switch (mode)
29684 {
29685 case E_HFmode:
29686 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HFmode : V8HFmode;
29687 case E_SFmode:
29688 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29689 case E_SImode:
29690 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29691 case E_HImode:
29692 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29693 case E_QImode:
29694 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29695 case E_DImode:
29696 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29697 return V2DImode;
29698 break;
29699
29700 default:;
29701 }
29702
29703 if (TARGET_REALLY_IWMMXT)
29704 switch (mode)
29705 {
29706 case E_SImode:
29707 return V2SImode;
29708 case E_HImode:
29709 return V4HImode;
29710 case E_QImode:
29711 return V8QImode;
29712
29713 default:;
29714 }
29715
29716 if (TARGET_HAVE_MVE)
29717 switch (mode)
29718 {
29719 case E_QImode:
29720 return V16QImode;
29721 case E_HImode:
29722 return V8HImode;
29723 case E_SImode:
29724 return V4SImode;
29725
29726 default:;
29727 }
29728
29729 if (TARGET_HAVE_MVE_FLOAT)
29730 switch (mode)
29731 {
29732 case E_HFmode:
29733 return V8HFmode;
29734 case E_SFmode:
29735 return V4SFmode;
29736
29737 default:;
29738 }
29739
29740 return word_mode;
29741 }
29742
29743 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29744
29745 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29746 using r0-r4 for function arguments, r7 for the stack frame and don't have
29747 enough left over to do doubleword arithmetic. For Thumb-2 all the
29748 potentially problematic instructions accept high registers so this is not
29749 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29750 that require many low registers. */
29751 static bool
29752 arm_class_likely_spilled_p (reg_class_t rclass)
29753 {
29754 if ((TARGET_THUMB1 && rclass == LO_REGS)
29755 || rclass == CC_REG)
29756 return true;
29757
29758 return default_class_likely_spilled_p (rclass);
29759 }
29760
29761 /* Implements target hook small_register_classes_for_mode_p. */
29762 bool
29763 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29764 {
29765 return TARGET_THUMB1;
29766 }
29767
29768 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29769 ARM insns and therefore guarantee that the shift count is modulo 256.
29770 DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29771 guarantee no particular behavior for out-of-range counts. */
29772
29773 static unsigned HOST_WIDE_INT
29774 arm_shift_truncation_mask (machine_mode mode)
29775 {
29776 return mode == SImode ? 255 : 0;
29777 }
29778
29779
29780 /* Map internal gcc register numbers to DWARF2 register numbers. */
29781
29782 unsigned int
29783 arm_debugger_regno (unsigned int regno)
29784 {
29785 if (regno < 16)
29786 return regno;
29787
29788 if (IS_VFP_REGNUM (regno))
29789 {
29790 /* See comment in arm_dwarf_register_span. */
29791 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29792 return 64 + regno - FIRST_VFP_REGNUM;
29793 else
29794 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29795 }
29796
29797 if (IS_IWMMXT_GR_REGNUM (regno))
29798 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29799
29800 if (IS_IWMMXT_REGNUM (regno))
29801 return 112 + regno - FIRST_IWMMXT_REGNUM;
29802
29803 if (IS_PAC_REGNUM (regno))
29804 return DWARF_PAC_REGNUM;
29805
29806 return DWARF_FRAME_REGISTERS;
29807 }
29808
29809 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29810 GCC models tham as 64 32-bit registers, so we need to describe this to
29811 the DWARF generation code. Other registers can use the default. */
29812 static rtx
29813 arm_dwarf_register_span (rtx rtl)
29814 {
29815 machine_mode mode;
29816 unsigned regno;
29817 rtx parts[16];
29818 int nregs;
29819 int i;
29820
29821 regno = REGNO (rtl);
29822 if (!IS_VFP_REGNUM (regno))
29823 return NULL_RTX;
29824
29825 /* XXX FIXME: The EABI defines two VFP register ranges:
29826 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29827 256-287: D0-D31
29828 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29829 corresponding D register. Until GDB supports this, we shall use the
29830 legacy encodings. We also use these encodings for D0-D15 for
29831 compatibility with older debuggers. */
29832 mode = GET_MODE (rtl);
29833 if (GET_MODE_SIZE (mode) < 8)
29834 return NULL_RTX;
29835
29836 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29837 {
29838 nregs = GET_MODE_SIZE (mode) / 4;
29839 for (i = 0; i < nregs; i += 2)
29840 if (TARGET_BIG_END)
29841 {
29842 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29843 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29844 }
29845 else
29846 {
29847 parts[i] = gen_rtx_REG (SImode, regno + i);
29848 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29849 }
29850 }
29851 else
29852 {
29853 nregs = GET_MODE_SIZE (mode) / 8;
29854 for (i = 0; i < nregs; i++)
29855 parts[i] = gen_rtx_REG (DImode, regno + i);
29856 }
29857
29858 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29859 }
29860
29861 #if ARM_UNWIND_INFO
29862 /* Emit unwind directives for a store-multiple instruction or stack pointer
29863 push during alignment.
29864 These should only ever be generated by the function prologue code, so
29865 expect them to have a particular form.
29866 The store-multiple instruction sometimes pushes pc as the last register,
29867 although it should not be tracked into unwind information, or for -Os
29868 sometimes pushes some dummy registers before first register that needs
29869 to be tracked in unwind information; such dummy registers are there just
29870 to avoid separate stack adjustment, and will not be restored in the
29871 epilogue. */
29872
29873 static void
29874 arm_unwind_emit_sequence (FILE * out_file, rtx p)
29875 {
29876 int i;
29877 HOST_WIDE_INT offset;
29878 HOST_WIDE_INT nregs;
29879 int reg_size;
29880 unsigned reg;
29881 unsigned lastreg;
29882 unsigned padfirst = 0, padlast = 0;
29883 rtx e;
29884
29885 e = XVECEXP (p, 0, 0);
29886 gcc_assert (GET_CODE (e) == SET);
29887
29888 /* First insn will adjust the stack pointer. */
29889 gcc_assert (GET_CODE (e) == SET
29890 && REG_P (SET_DEST (e))
29891 && REGNO (SET_DEST (e)) == SP_REGNUM
29892 && GET_CODE (SET_SRC (e)) == PLUS);
29893
29894 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29895 nregs = XVECLEN (p, 0) - 1;
29896 gcc_assert (nregs);
29897
29898 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29899 if (reg < 16 || IS_PAC_REGNUM (reg))
29900 {
29901 /* For -Os dummy registers can be pushed at the beginning to
29902 avoid separate stack pointer adjustment. */
29903 e = XVECEXP (p, 0, 1);
29904 e = XEXP (SET_DEST (e), 0);
29905 if (GET_CODE (e) == PLUS)
29906 padfirst = INTVAL (XEXP (e, 1));
29907 gcc_assert (padfirst == 0 || optimize_size);
29908 /* The function prologue may also push pc, but not annotate it as it is
29909 never restored. We turn this into a stack pointer adjustment. */
29910 e = XVECEXP (p, 0, nregs);
29911 e = XEXP (SET_DEST (e), 0);
29912 if (GET_CODE (e) == PLUS)
29913 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29914 else
29915 padlast = offset - 4;
29916 gcc_assert (padlast == 0 || padlast == 4);
29917 if (padlast == 4)
29918 fprintf (out_file, "\t.pad #4\n");
29919 reg_size = 4;
29920 fprintf (out_file, "\t.save {");
29921 }
29922 else if (IS_VFP_REGNUM (reg))
29923 {
29924 reg_size = 8;
29925 fprintf (out_file, "\t.vsave {");
29926 }
29927 else
29928 /* Unknown register type. */
29929 gcc_unreachable ();
29930
29931 /* If the stack increment doesn't match the size of the saved registers,
29932 something has gone horribly wrong. */
29933 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29934
29935 offset = padfirst;
29936 lastreg = 0;
29937 /* The remaining insns will describe the stores. */
29938 for (i = 1; i <= nregs; i++)
29939 {
29940 /* Expect (set (mem <addr>) (reg)).
29941 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29942 e = XVECEXP (p, 0, i);
29943 gcc_assert (GET_CODE (e) == SET
29944 && MEM_P (SET_DEST (e))
29945 && REG_P (SET_SRC (e)));
29946
29947 reg = REGNO (SET_SRC (e));
29948 gcc_assert (reg >= lastreg);
29949
29950 if (i != 1)
29951 fprintf (out_file, ", ");
29952 /* We can't use %r for vfp because we need to use the
29953 double precision register names. */
29954 if (IS_VFP_REGNUM (reg))
29955 asm_fprintf (out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29956 else if (IS_PAC_REGNUM (reg))
29957 asm_fprintf (asm_out_file, "ra_auth_code");
29958 else
29959 asm_fprintf (out_file, "%r", reg);
29960
29961 if (flag_checking)
29962 {
29963 /* Check that the addresses are consecutive. */
29964 e = XEXP (SET_DEST (e), 0);
29965 if (GET_CODE (e) == PLUS)
29966 gcc_assert (REG_P (XEXP (e, 0))
29967 && REGNO (XEXP (e, 0)) == SP_REGNUM
29968 && CONST_INT_P (XEXP (e, 1))
29969 && offset == INTVAL (XEXP (e, 1)));
29970 else
29971 gcc_assert (i == 1
29972 && REG_P (e)
29973 && REGNO (e) == SP_REGNUM);
29974 offset += reg_size;
29975 }
29976 }
29977 fprintf (out_file, "}\n");
29978 if (padfirst)
29979 fprintf (out_file, "\t.pad #%d\n", padfirst);
29980 }
29981
29982 /* Emit unwind directives for a SET. */
29983
29984 static void
29985 arm_unwind_emit_set (FILE * out_file, rtx p)
29986 {
29987 rtx e0;
29988 rtx e1;
29989 unsigned reg;
29990
29991 e0 = XEXP (p, 0);
29992 e1 = XEXP (p, 1);
29993 switch (GET_CODE (e0))
29994 {
29995 case MEM:
29996 /* Pushing a single register. */
29997 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29998 || !REG_P (XEXP (XEXP (e0, 0), 0))
29999 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
30000 abort ();
30001
30002 asm_fprintf (out_file, "\t.save ");
30003 if (IS_VFP_REGNUM (REGNO (e1)))
30004 asm_fprintf(out_file, "{d%d}\n",
30005 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
30006 else
30007 asm_fprintf(out_file, "{%r}\n", REGNO (e1));
30008 break;
30009
30010 case REG:
30011 if (REGNO (e0) == SP_REGNUM)
30012 {
30013 /* A stack increment. */
30014 if (GET_CODE (e1) != PLUS
30015 || !REG_P (XEXP (e1, 0))
30016 || REGNO (XEXP (e1, 0)) != SP_REGNUM
30017 || !CONST_INT_P (XEXP (e1, 1)))
30018 abort ();
30019
30020 asm_fprintf (out_file, "\t.pad #%wd\n",
30021 -INTVAL (XEXP (e1, 1)));
30022 }
30023 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
30024 {
30025 HOST_WIDE_INT offset;
30026
30027 if (GET_CODE (e1) == PLUS)
30028 {
30029 if (!REG_P (XEXP (e1, 0))
30030 || !CONST_INT_P (XEXP (e1, 1)))
30031 abort ();
30032 reg = REGNO (XEXP (e1, 0));
30033 offset = INTVAL (XEXP (e1, 1));
30034 asm_fprintf (out_file, "\t.setfp %r, %r, #%wd\n",
30035 HARD_FRAME_POINTER_REGNUM, reg,
30036 offset);
30037 }
30038 else if (REG_P (e1))
30039 {
30040 reg = REGNO (e1);
30041 asm_fprintf (out_file, "\t.setfp %r, %r\n",
30042 HARD_FRAME_POINTER_REGNUM, reg);
30043 }
30044 else
30045 abort ();
30046 }
30047 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
30048 {
30049 /* Move from sp to reg. */
30050 asm_fprintf (out_file, "\t.movsp %r\n", REGNO (e0));
30051 }
30052 else if (GET_CODE (e1) == PLUS
30053 && REG_P (XEXP (e1, 0))
30054 && REGNO (XEXP (e1, 0)) == SP_REGNUM
30055 && CONST_INT_P (XEXP (e1, 1)))
30056 {
30057 /* Set reg to offset from sp. */
30058 asm_fprintf (out_file, "\t.movsp %r, #%d\n",
30059 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
30060 }
30061 else if (REGNO (e0) == IP_REGNUM && arm_current_function_pac_enabled_p ())
30062 {
30063 if (cfun->machine->pacspval_needed)
30064 asm_fprintf (out_file, "\t.pacspval\n");
30065 }
30066 else
30067 abort ();
30068 break;
30069
30070 default:
30071 abort ();
30072 }
30073 }
30074
30075
30076 /* Emit unwind directives for the given insn. */
30077
30078 static void
30079 arm_unwind_emit (FILE * out_file, rtx_insn *insn)
30080 {
30081 rtx note, pat;
30082 bool handled_one = false;
30083
30084 if (arm_except_unwind_info (&global_options) != UI_TARGET)
30085 return;
30086
30087 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30088 && (TREE_NOTHROW (current_function_decl)
30089 || crtl->all_throwers_are_sibcalls))
30090 return;
30091
30092 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
30093 return;
30094
30095 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
30096 {
30097 switch (REG_NOTE_KIND (note))
30098 {
30099 case REG_FRAME_RELATED_EXPR:
30100 pat = XEXP (note, 0);
30101 goto found;
30102
30103 case REG_CFA_REGISTER:
30104 pat = XEXP (note, 0);
30105 if (pat == NULL)
30106 {
30107 pat = PATTERN (insn);
30108 if (GET_CODE (pat) == PARALLEL)
30109 pat = XVECEXP (pat, 0, 0);
30110 }
30111
30112 /* Only emitted for IS_STACKALIGN re-alignment. */
30113 {
30114 rtx dest, src;
30115 unsigned reg;
30116
30117 src = SET_SRC (pat);
30118 dest = SET_DEST (pat);
30119
30120 gcc_assert (src == stack_pointer_rtx
30121 || IS_PAC_REGNUM (REGNO (src)));
30122 reg = REGNO (dest);
30123
30124 if (IS_PAC_REGNUM (REGNO (src)))
30125 arm_unwind_emit_set (out_file, PATTERN (insn));
30126 else
30127 asm_fprintf (out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
30128 reg + 0x90, reg);
30129 }
30130 handled_one = true;
30131 break;
30132
30133 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
30134 to get correct dwarf information for shrink-wrap. We should not
30135 emit unwind information for it because these are used either for
30136 pretend arguments or notes to adjust sp and restore registers from
30137 stack. */
30138 case REG_CFA_DEF_CFA:
30139 case REG_CFA_ADJUST_CFA:
30140 case REG_CFA_RESTORE:
30141 return;
30142
30143 case REG_CFA_EXPRESSION:
30144 case REG_CFA_OFFSET:
30145 /* ??? Only handling here what we actually emit. */
30146 gcc_unreachable ();
30147
30148 default:
30149 break;
30150 }
30151 }
30152 if (handled_one)
30153 return;
30154 pat = PATTERN (insn);
30155 found:
30156
30157 switch (GET_CODE (pat))
30158 {
30159 case SET:
30160 arm_unwind_emit_set (out_file, pat);
30161 break;
30162
30163 case SEQUENCE:
30164 /* Store multiple. */
30165 arm_unwind_emit_sequence (out_file, pat);
30166 break;
30167
30168 default:
30169 abort();
30170 }
30171 }
30172
30173
30174 /* Output a reference from a function exception table to the type_info
30175 object X. The EABI specifies that the symbol should be relocated by
30176 an R_ARM_TARGET2 relocation. */
30177
30178 static bool
30179 arm_output_ttype (rtx x)
30180 {
30181 fputs ("\t.word\t", asm_out_file);
30182 output_addr_const (asm_out_file, x);
30183 /* Use special relocations for symbol references. */
30184 if (!CONST_INT_P (x))
30185 fputs ("(TARGET2)", asm_out_file);
30186 fputc ('\n', asm_out_file);
30187
30188 return TRUE;
30189 }
30190
30191 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
30192
30193 static void
30194 arm_asm_emit_except_personality (rtx personality)
30195 {
30196 fputs ("\t.personality\t", asm_out_file);
30197 output_addr_const (asm_out_file, personality);
30198 fputc ('\n', asm_out_file);
30199 }
30200 #endif /* ARM_UNWIND_INFO */
30201
30202 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
30203
30204 static void
30205 arm_asm_init_sections (void)
30206 {
30207 #if ARM_UNWIND_INFO
30208 exception_section = get_unnamed_section (0, output_section_asm_op,
30209 "\t.handlerdata");
30210 #endif /* ARM_UNWIND_INFO */
30211
30212 #ifdef OBJECT_FORMAT_ELF
30213 if (target_pure_code)
30214 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
30215 #endif
30216 }
30217
30218 /* Output unwind directives for the start/end of a function. */
30219
30220 void
30221 arm_output_fn_unwind (FILE * f, bool prologue)
30222 {
30223 if (arm_except_unwind_info (&global_options) != UI_TARGET)
30224 return;
30225
30226 if (prologue)
30227 fputs ("\t.fnstart\n", f);
30228 else
30229 {
30230 /* If this function will never be unwound, then mark it as such.
30231 The came condition is used in arm_unwind_emit to suppress
30232 the frame annotations. */
30233 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30234 && (TREE_NOTHROW (current_function_decl)
30235 || crtl->all_throwers_are_sibcalls))
30236 fputs("\t.cantunwind\n", f);
30237
30238 fputs ("\t.fnend\n", f);
30239 }
30240 }
30241
30242 static bool
30243 arm_emit_tls_decoration (FILE *fp, rtx x)
30244 {
30245 enum tls_reloc reloc;
30246 rtx val;
30247
30248 val = XVECEXP (x, 0, 0);
30249 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
30250
30251 output_addr_const (fp, val);
30252
30253 switch (reloc)
30254 {
30255 case TLS_GD32:
30256 fputs ("(tlsgd)", fp);
30257 break;
30258 case TLS_GD32_FDPIC:
30259 fputs ("(tlsgd_fdpic)", fp);
30260 break;
30261 case TLS_LDM32:
30262 fputs ("(tlsldm)", fp);
30263 break;
30264 case TLS_LDM32_FDPIC:
30265 fputs ("(tlsldm_fdpic)", fp);
30266 break;
30267 case TLS_LDO32:
30268 fputs ("(tlsldo)", fp);
30269 break;
30270 case TLS_IE32:
30271 fputs ("(gottpoff)", fp);
30272 break;
30273 case TLS_IE32_FDPIC:
30274 fputs ("(gottpoff_fdpic)", fp);
30275 break;
30276 case TLS_LE32:
30277 fputs ("(tpoff)", fp);
30278 break;
30279 case TLS_DESCSEQ:
30280 fputs ("(tlsdesc)", fp);
30281 break;
30282 default:
30283 gcc_unreachable ();
30284 }
30285
30286 switch (reloc)
30287 {
30288 case TLS_GD32:
30289 case TLS_LDM32:
30290 case TLS_IE32:
30291 case TLS_DESCSEQ:
30292 fputs (" + (. - ", fp);
30293 output_addr_const (fp, XVECEXP (x, 0, 2));
30294 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30295 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
30296 output_addr_const (fp, XVECEXP (x, 0, 3));
30297 fputc (')', fp);
30298 break;
30299 default:
30300 break;
30301 }
30302
30303 return TRUE;
30304 }
30305
30306 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
30307
30308 static void
30309 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
30310 {
30311 gcc_assert (size == 4);
30312 fputs ("\t.word\t", file);
30313 output_addr_const (file, x);
30314 fputs ("(tlsldo)", file);
30315 }
30316
30317 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
30318
30319 static bool
30320 arm_output_addr_const_extra (FILE *fp, rtx x)
30321 {
30322 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
30323 return arm_emit_tls_decoration (fp, x);
30324 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
30325 {
30326 char label[256];
30327 int labelno = INTVAL (XVECEXP (x, 0, 0));
30328
30329 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
30330 assemble_name_raw (fp, label);
30331
30332 return TRUE;
30333 }
30334 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
30335 {
30336 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
30337 if (GOT_PCREL)
30338 fputs ("+.", fp);
30339 fputs ("-(", fp);
30340 output_addr_const (fp, XVECEXP (x, 0, 0));
30341 fputc (')', fp);
30342 return TRUE;
30343 }
30344 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
30345 {
30346 output_addr_const (fp, XVECEXP (x, 0, 0));
30347 if (GOT_PCREL)
30348 fputs ("+.", fp);
30349 fputs ("-(", fp);
30350 output_addr_const (fp, XVECEXP (x, 0, 1));
30351 fputc (')', fp);
30352 return TRUE;
30353 }
30354 else if (GET_CODE (x) == CONST_VECTOR)
30355 return arm_emit_vector_const (fp, x);
30356
30357 return FALSE;
30358 }
30359
30360 /* Output assembly for a shift instruction.
30361 SET_FLAGS determines how the instruction modifies the condition codes.
30362 0 - Do not set condition codes.
30363 1 - Set condition codes.
30364 2 - Use smallest instruction. */
30365 const char *
30366 arm_output_shift(rtx * operands, int set_flags)
30367 {
30368 char pattern[100];
30369 static const char flag_chars[3] = {'?', '.', '!'};
30370 const char *shift;
30371 HOST_WIDE_INT val;
30372 char c;
30373
30374 c = flag_chars[set_flags];
30375 shift = shift_op(operands[3], &val);
30376 if (shift)
30377 {
30378 if (val != -1)
30379 operands[2] = GEN_INT(val);
30380 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
30381 }
30382 else
30383 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
30384
30385 output_asm_insn (pattern, operands);
30386 return "";
30387 }
30388
30389 /* Output assembly for a WMMX immediate shift instruction. */
30390 const char *
30391 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
30392 {
30393 int shift = INTVAL (operands[2]);
30394 char templ[50];
30395 machine_mode opmode = GET_MODE (operands[0]);
30396
30397 gcc_assert (shift >= 0);
30398
30399 /* If the shift value in the register versions is > 63 (for D qualifier),
30400 31 (for W qualifier) or 15 (for H qualifier). */
30401 if (((opmode == V4HImode) && (shift > 15))
30402 || ((opmode == V2SImode) && (shift > 31))
30403 || ((opmode == DImode) && (shift > 63)))
30404 {
30405 if (wror_or_wsra)
30406 {
30407 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30408 output_asm_insn (templ, operands);
30409 if (opmode == DImode)
30410 {
30411 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
30412 output_asm_insn (templ, operands);
30413 }
30414 }
30415 else
30416 {
30417 /* The destination register will contain all zeros. */
30418 sprintf (templ, "wzero\t%%0");
30419 output_asm_insn (templ, operands);
30420 }
30421 return "";
30422 }
30423
30424 if ((opmode == DImode) && (shift > 32))
30425 {
30426 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30427 output_asm_insn (templ, operands);
30428 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
30429 output_asm_insn (templ, operands);
30430 }
30431 else
30432 {
30433 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
30434 output_asm_insn (templ, operands);
30435 }
30436 return "";
30437 }
30438
30439 /* Output assembly for a WMMX tinsr instruction. */
30440 const char *
30441 arm_output_iwmmxt_tinsr (rtx *operands)
30442 {
30443 int mask = INTVAL (operands[3]);
30444 int i;
30445 char templ[50];
30446 int units = mode_nunits[GET_MODE (operands[0])];
30447 gcc_assert ((mask & (mask - 1)) == 0);
30448 for (i = 0; i < units; ++i)
30449 {
30450 if ((mask & 0x01) == 1)
30451 {
30452 break;
30453 }
30454 mask >>= 1;
30455 }
30456 gcc_assert (i < units);
30457 {
30458 switch (GET_MODE (operands[0]))
30459 {
30460 case E_V8QImode:
30461 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
30462 break;
30463 case E_V4HImode:
30464 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
30465 break;
30466 case E_V2SImode:
30467 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
30468 break;
30469 default:
30470 gcc_unreachable ();
30471 break;
30472 }
30473 output_asm_insn (templ, operands);
30474 }
30475 return "";
30476 }
30477
30478 /* Output an arm casesi dispatch sequence. Used by arm_casesi_internal insn.
30479 Responsible for the handling of switch statements in arm. */
30480 const char *
30481 arm_output_casesi (rtx *operands)
30482 {
30483 char label[100];
30484 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30485 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30486 output_asm_insn ("cmp\t%0, %1", operands);
30487 output_asm_insn ("bhi\t%l3", operands);
30488 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
30489 switch (GET_MODE (diff_vec))
30490 {
30491 case E_QImode:
30492 if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30493 output_asm_insn ("ldrb\t%4, [%5, %0]", operands);
30494 else
30495 output_asm_insn ("ldrsb\t%4, [%5, %0]", operands);
30496 output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
30497 break;
30498 case E_HImode:
30499 if (REGNO (operands[4]) != REGNO (operands[5]))
30500 {
30501 output_asm_insn ("add\t%4, %0, %0", operands);
30502 if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30503 output_asm_insn ("ldrh\t%4, [%5, %4]", operands);
30504 else
30505 output_asm_insn ("ldrsh\t%4, [%5, %4]", operands);
30506 }
30507 else
30508 {
30509 output_asm_insn ("add\t%4, %5, %0", operands);
30510 if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30511 output_asm_insn ("ldrh\t%4, [%4, %0]", operands);
30512 else
30513 output_asm_insn ("ldrsh\t%4, [%4, %0]", operands);
30514 }
30515 output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
30516 break;
30517 case E_SImode:
30518 if (flag_pic)
30519 {
30520 output_asm_insn ("ldr\t%4, [%5, %0, lsl #2]", operands);
30521 output_asm_insn ("add\t%|pc, %|pc, %4", operands);
30522 }
30523 else
30524 output_asm_insn ("ldr\t%|pc, [%5, %0, lsl #2]", operands);
30525 break;
30526 default:
30527 gcc_unreachable ();
30528 }
30529 assemble_label (asm_out_file, label);
30530 output_asm_insn ("nop", operands);
30531 return "";
30532 }
30533
30534 /* Output a Thumb-1 casesi dispatch sequence. */
30535 const char *
30536 thumb1_output_casesi (rtx *operands)
30537 {
30538 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
30539
30540 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30541
30542 switch (GET_MODE(diff_vec))
30543 {
30544 case E_QImode:
30545 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30546 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30547 case E_HImode:
30548 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30549 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30550 case E_SImode:
30551 return "bl\t%___gnu_thumb1_case_si";
30552 default:
30553 gcc_unreachable ();
30554 }
30555 }
30556
30557 /* Output a Thumb-2 casesi instruction. */
30558 const char *
30559 thumb2_output_casesi (rtx *operands)
30560 {
30561 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30562
30563 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30564
30565 output_asm_insn ("cmp\t%0, %1", operands);
30566 output_asm_insn ("bhi\t%l3", operands);
30567 switch (GET_MODE(diff_vec))
30568 {
30569 case E_QImode:
30570 return "tbb\t[%|pc, %0]";
30571 case E_HImode:
30572 return "tbh\t[%|pc, %0, lsl #1]";
30573 case E_SImode:
30574 if (flag_pic)
30575 {
30576 output_asm_insn ("adr\t%4, %l2", operands);
30577 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
30578 output_asm_insn ("add\t%4, %4, %5", operands);
30579 return "bx\t%4";
30580 }
30581 else
30582 {
30583 output_asm_insn ("adr\t%4, %l2", operands);
30584 return "ldr\t%|pc, [%4, %0, lsl #2]";
30585 }
30586 default:
30587 gcc_unreachable ();
30588 }
30589 }
30590
30591 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
30592 per-core tuning structs. */
30593 static int
30594 arm_issue_rate (void)
30595 {
30596 return current_tune->issue_rate;
30597 }
30598
30599 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
30600 static int
30601 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
30602 {
30603 if (DEBUG_INSN_P (insn))
30604 return more;
30605
30606 rtx_code code = GET_CODE (PATTERN (insn));
30607 if (code == USE || code == CLOBBER)
30608 return more;
30609
30610 if (get_attr_type (insn) == TYPE_NO_INSN)
30611 return more;
30612
30613 return more - 1;
30614 }
30615
30616 /* Return how many instructions should scheduler lookahead to choose the
30617 best one. */
30618 static int
30619 arm_first_cycle_multipass_dfa_lookahead (void)
30620 {
30621 int issue_rate = arm_issue_rate ();
30622
30623 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
30624 }
30625
30626 /* Enable modeling of L2 auto-prefetcher. */
30627 static int
30628 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30629 {
30630 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
30631 }
30632
30633 const char *
30634 arm_mangle_type (const_tree type)
30635 {
30636 /* The ARM ABI documents (10th October 2008) say that "__va_list"
30637 has to be managled as if it is in the "std" namespace. */
30638 if (TARGET_AAPCS_BASED
30639 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30640 return "St9__va_list";
30641
30642 /* Half-precision floating point types. */
30643 if (SCALAR_FLOAT_TYPE_P (type) && TYPE_PRECISION (type) == 16)
30644 {
30645 if (TYPE_MAIN_VARIANT (type) == float16_type_node)
30646 return NULL;
30647 if (TYPE_MODE (type) == BFmode)
30648 return "u6__bf16";
30649 else
30650 return "Dh";
30651 }
30652
30653 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30654 builtin type. */
30655 if (TYPE_NAME (type) != NULL)
30656 return arm_mangle_builtin_type (type);
30657
30658 /* Use the default mangling. */
30659 return NULL;
30660 }
30661
30662 /* Order of allocation of core registers for Thumb: this allocation is
30663 written over the corresponding initial entries of the array
30664 initialized with REG_ALLOC_ORDER. We allocate all low registers
30665 first. Saving and restoring a low register is usually cheaper than
30666 using a call-clobbered high register. */
30667
30668 static const int thumb_core_reg_alloc_order[] =
30669 {
30670 3, 2, 1, 0, 4, 5, 6, 7,
30671 12, 14, 8, 9, 10, 11
30672 };
30673
30674 /* Adjust register allocation order when compiling for Thumb. */
30675
30676 void
30677 arm_order_regs_for_local_alloc (void)
30678 {
30679 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30680 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30681 if (TARGET_THUMB)
30682 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30683 sizeof (thumb_core_reg_alloc_order));
30684 }
30685
30686 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30687
30688 bool
30689 arm_frame_pointer_required (void)
30690 {
30691 if (SUBTARGET_FRAME_POINTER_REQUIRED)
30692 return true;
30693
30694 /* If the function receives nonlocal gotos, it needs to save the frame
30695 pointer in the nonlocal_goto_save_area object. */
30696 if (cfun->has_nonlocal_label)
30697 return true;
30698
30699 /* The frame pointer is required for non-leaf APCS frames. */
30700 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
30701 return true;
30702
30703 /* If we are probing the stack in the prologue, we will have a faulting
30704 instruction prior to the stack adjustment and this requires a frame
30705 pointer if we want to catch the exception using the EABI unwinder. */
30706 if (!IS_INTERRUPT (arm_current_func_type ())
30707 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
30708 || flag_stack_clash_protection)
30709 && arm_except_unwind_info (&global_options) == UI_TARGET
30710 && cfun->can_throw_non_call_exceptions)
30711 {
30712 HOST_WIDE_INT size = get_frame_size ();
30713
30714 /* That's irrelevant if there is no stack adjustment. */
30715 if (size <= 0)
30716 return false;
30717
30718 /* That's relevant only if there is a stack probe. */
30719 if (crtl->is_leaf && !cfun->calls_alloca)
30720 {
30721 /* We don't have the final size of the frame so adjust. */
30722 size += 32 * UNITS_PER_WORD;
30723 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
30724 return true;
30725 }
30726 else
30727 return true;
30728 }
30729
30730 return false;
30731 }
30732
30733 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30734 All modes except THUMB1 have conditional execution.
30735 If we have conditional arithmetic, return false before reload to
30736 enable some ifcvt transformations. */
30737 static bool
30738 arm_have_conditional_execution (void)
30739 {
30740 bool has_cond_exec, enable_ifcvt_trans;
30741
30742 /* Only THUMB1 cannot support conditional execution. */
30743 has_cond_exec = !TARGET_THUMB1;
30744
30745 /* Enable ifcvt transformations if we have conditional arithmetic, but only
30746 before reload. */
30747 enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
30748
30749 return has_cond_exec && !enable_ifcvt_trans;
30750 }
30751
30752 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30753 static HOST_WIDE_INT
30754 arm_vector_alignment (const_tree type)
30755 {
30756 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30757
30758 if (TARGET_AAPCS_BASED)
30759 align = MIN (align, 64);
30760
30761 return align;
30762 }
30763
30764 static unsigned int
30765 arm_autovectorize_vector_modes (vector_modes *modes, bool)
30766 {
30767 if (!TARGET_NEON_VECTORIZE_DOUBLE)
30768 {
30769 modes->safe_push (V16QImode);
30770 modes->safe_push (V8QImode);
30771 }
30772 return 0;
30773 }
30774
30775 static bool
30776 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30777 {
30778 /* Vectors which aren't in packed structures will not be less aligned than
30779 the natural alignment of their element type, so this is safe. */
30780 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30781 return !is_packed;
30782
30783 return default_builtin_vector_alignment_reachable (type, is_packed);
30784 }
30785
30786 static bool
30787 arm_builtin_support_vector_misalignment (machine_mode mode,
30788 const_tree type, int misalignment,
30789 bool is_packed)
30790 {
30791 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30792 {
30793 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30794
30795 if (is_packed)
30796 return align == 1;
30797
30798 /* If the misalignment is unknown, we should be able to handle the access
30799 so long as it is not to a member of a packed data structure. */
30800 if (misalignment == -1)
30801 return true;
30802
30803 /* Return true if the misalignment is a multiple of the natural alignment
30804 of the vector's element type. This is probably always going to be
30805 true in practice, since we've already established that this isn't a
30806 packed access. */
30807 return ((misalignment % align) == 0);
30808 }
30809
30810 return default_builtin_support_vector_misalignment (mode, type, misalignment,
30811 is_packed);
30812 }
30813
30814 static void
30815 arm_conditional_register_usage (void)
30816 {
30817 int regno;
30818
30819 if (TARGET_THUMB1 && optimize_size)
30820 {
30821 /* When optimizing for size on Thumb-1, it's better not
30822 to use the HI regs, because of the overhead of
30823 stacking them. */
30824 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30825 fixed_regs[regno] = call_used_regs[regno] = 1;
30826 }
30827
30828 /* The link register can be clobbered by any branch insn,
30829 but we have no way to track that at present, so mark
30830 it as unavailable. */
30831 if (TARGET_THUMB1)
30832 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30833
30834 if (TARGET_32BIT && TARGET_VFP_BASE)
30835 {
30836 /* VFPv3 registers are disabled when earlier VFP
30837 versions are selected due to the definition of
30838 LAST_VFP_REGNUM. */
30839 for (regno = FIRST_VFP_REGNUM;
30840 regno <= LAST_VFP_REGNUM; ++ regno)
30841 {
30842 fixed_regs[regno] = 0;
30843 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30844 || regno >= FIRST_VFP_REGNUM + 32;
30845 }
30846 if (TARGET_HAVE_MVE)
30847 fixed_regs[VPR_REGNUM] = 0;
30848 }
30849
30850 if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30851 {
30852 regno = FIRST_IWMMXT_GR_REGNUM;
30853 /* The 2002/10/09 revision of the XScale ABI has wCG0
30854 and wCG1 as call-preserved registers. The 2002/11/21
30855 revision changed this so that all wCG registers are
30856 scratch registers. */
30857 for (regno = FIRST_IWMMXT_GR_REGNUM;
30858 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30859 fixed_regs[regno] = 0;
30860 /* The XScale ABI has wR0 - wR9 as scratch registers,
30861 the rest as call-preserved registers. */
30862 for (regno = FIRST_IWMMXT_REGNUM;
30863 regno <= LAST_IWMMXT_REGNUM; ++ regno)
30864 {
30865 fixed_regs[regno] = 0;
30866 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30867 }
30868 }
30869
30870 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30871 {
30872 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30873 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30874 }
30875 else if (TARGET_APCS_STACK)
30876 {
30877 fixed_regs[10] = 1;
30878 call_used_regs[10] = 1;
30879 }
30880 /* -mcaller-super-interworking reserves r11 for calls to
30881 _interwork_r11_call_via_rN(). Making the register global
30882 is an easy way of ensuring that it remains valid for all
30883 calls. */
30884 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30885 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30886 {
30887 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30888 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30889 if (TARGET_CALLER_INTERWORKING)
30890 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30891 }
30892
30893 /* The Q and GE bits are only accessed via special ACLE patterns. */
30894 CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30895 CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30896
30897 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30898 }
30899
30900 static reg_class_t
30901 arm_preferred_rename_class (reg_class_t rclass)
30902 {
30903 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30904 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30905 and code size can be reduced. */
30906 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30907 return LO_REGS;
30908 else
30909 return NO_REGS;
30910 }
30911
30912 /* Compute the attribute "length" of insn "*push_multi".
30913 So this function MUST be kept in sync with that insn pattern. */
30914 int
30915 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30916 {
30917 int i, regno, hi_reg;
30918 int num_saves = XVECLEN (parallel_op, 0);
30919
30920 /* ARM mode. */
30921 if (TARGET_ARM)
30922 return 4;
30923 /* Thumb1 mode. */
30924 if (TARGET_THUMB1)
30925 return 2;
30926
30927 /* Thumb2 mode. */
30928 regno = REGNO (first_op);
30929 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30930 list is 8-bit. Normally this means all registers in the list must be
30931 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
30932 encodings. There is one exception for PUSH that LR in HI_REGS can be used
30933 with 16-bit encoding. */
30934 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30935 for (i = 1; i < num_saves && !hi_reg; i++)
30936 {
30937 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30938 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30939 }
30940
30941 if (!hi_reg)
30942 return 2;
30943 return 4;
30944 }
30945
30946 /* Compute the attribute "length" of insn. Currently, this function is used
30947 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30948 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
30949 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
30950 true if OPERANDS contains insn which explicit updates base register. */
30951
30952 int
30953 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30954 {
30955 /* ARM mode. */
30956 if (TARGET_ARM)
30957 return 4;
30958 /* Thumb1 mode. */
30959 if (TARGET_THUMB1)
30960 return 2;
30961
30962 rtx parallel_op = operands[0];
30963 /* Initialize to elements number of PARALLEL. */
30964 unsigned indx = XVECLEN (parallel_op, 0) - 1;
30965 /* Initialize the value to base register. */
30966 unsigned regno = REGNO (operands[1]);
30967 /* Skip return and write back pattern.
30968 We only need register pop pattern for later analysis. */
30969 unsigned first_indx = 0;
30970 first_indx += return_pc ? 1 : 0;
30971 first_indx += write_back_p ? 1 : 0;
30972
30973 /* A pop operation can be done through LDM or POP. If the base register is SP
30974 and if it's with write back, then a LDM will be alias of POP. */
30975 bool pop_p = (regno == SP_REGNUM && write_back_p);
30976 bool ldm_p = !pop_p;
30977
30978 /* Check base register for LDM. */
30979 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30980 return 4;
30981
30982 /* Check each register in the list. */
30983 for (; indx >= first_indx; indx--)
30984 {
30985 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30986 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
30987 comment in arm_attr_length_push_multi. */
30988 if (REGNO_REG_CLASS (regno) == HI_REGS
30989 && (regno != PC_REGNUM || ldm_p))
30990 return 4;
30991 }
30992
30993 return 2;
30994 }
30995
30996 /* Compute the number of instructions emitted by output_move_double. */
30997 int
30998 arm_count_output_move_double_insns (rtx *operands)
30999 {
31000 int count;
31001 rtx ops[2];
31002 /* output_move_double may modify the operands array, so call it
31003 here on a copy of the array. */
31004 ops[0] = operands[0];
31005 ops[1] = operands[1];
31006 output_move_double (ops, false, &count);
31007 return count;
31008 }
31009
31010 /* Same as above, but operands are a register/memory pair in SImode.
31011 Assumes operands has the base register in position 0 and memory in position
31012 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
31013 int
31014 arm_count_ldrdstrd_insns (rtx *operands, bool load)
31015 {
31016 int count;
31017 rtx ops[2];
31018 int regnum, memnum;
31019 if (load)
31020 regnum = 0, memnum = 1;
31021 else
31022 regnum = 1, memnum = 0;
31023 ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
31024 ops[memnum] = adjust_address (operands[2], DImode, 0);
31025 output_move_double (ops, false, &count);
31026 return count;
31027 }
31028
31029
31030 int
31031 vfp3_const_double_for_fract_bits (rtx operand)
31032 {
31033 REAL_VALUE_TYPE r0;
31034
31035 if (!CONST_DOUBLE_P (operand))
31036 return 0;
31037
31038 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
31039 if (exact_real_inverse (DFmode, &r0)
31040 && !REAL_VALUE_NEGATIVE (r0))
31041 {
31042 if (exact_real_truncate (DFmode, &r0))
31043 {
31044 HOST_WIDE_INT value = real_to_integer (&r0);
31045 value = value & 0xffffffff;
31046 if ((value != 0) && ( (value & (value - 1)) == 0))
31047 {
31048 int ret = exact_log2 (value);
31049 gcc_assert (IN_RANGE (ret, 0, 31));
31050 return ret;
31051 }
31052 }
31053 }
31054 return 0;
31055 }
31056
31057 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
31058 log2 is in [1, 32], return that log2. Otherwise return -1.
31059 This is used in the patterns for vcvt.s32.f32 floating-point to
31060 fixed-point conversions. */
31061
31062 int
31063 vfp3_const_double_for_bits (rtx x)
31064 {
31065 const REAL_VALUE_TYPE *r;
31066
31067 if (!CONST_DOUBLE_P (x))
31068 return -1;
31069
31070 r = CONST_DOUBLE_REAL_VALUE (x);
31071
31072 if (REAL_VALUE_NEGATIVE (*r)
31073 || REAL_VALUE_ISNAN (*r)
31074 || REAL_VALUE_ISINF (*r)
31075 || !real_isinteger (r, SFmode))
31076 return -1;
31077
31078 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
31079
31080 /* The exact_log2 above will have returned -1 if this is
31081 not an exact log2. */
31082 if (!IN_RANGE (hwint, 1, 32))
31083 return -1;
31084
31085 return hwint;
31086 }
31087
31088 \f
31089 /* Emit a memory barrier around an atomic sequence according to MODEL. */
31090
31091 static void
31092 arm_pre_atomic_barrier (enum memmodel model)
31093 {
31094 if (need_atomic_barrier_p (model, true))
31095 emit_insn (gen_memory_barrier ());
31096 }
31097
31098 static void
31099 arm_post_atomic_barrier (enum memmodel model)
31100 {
31101 if (need_atomic_barrier_p (model, false))
31102 emit_insn (gen_memory_barrier ());
31103 }
31104
31105 /* Emit the load-exclusive and store-exclusive instructions.
31106 Use acquire and release versions if necessary. */
31107
31108 static void
31109 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
31110 {
31111 rtx (*gen) (rtx, rtx);
31112
31113 if (acq)
31114 {
31115 switch (mode)
31116 {
31117 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
31118 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
31119 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
31120 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
31121 default:
31122 gcc_unreachable ();
31123 }
31124 }
31125 else
31126 {
31127 switch (mode)
31128 {
31129 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
31130 case E_HImode: gen = gen_arm_load_exclusivehi; break;
31131 case E_SImode: gen = gen_arm_load_exclusivesi; break;
31132 case E_DImode: gen = gen_arm_load_exclusivedi; break;
31133 default:
31134 gcc_unreachable ();
31135 }
31136 }
31137
31138 emit_insn (gen (rval, mem));
31139 }
31140
31141 static void
31142 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
31143 rtx mem, bool rel)
31144 {
31145 rtx (*gen) (rtx, rtx, rtx);
31146
31147 if (rel)
31148 {
31149 switch (mode)
31150 {
31151 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
31152 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
31153 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
31154 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
31155 default:
31156 gcc_unreachable ();
31157 }
31158 }
31159 else
31160 {
31161 switch (mode)
31162 {
31163 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
31164 case E_HImode: gen = gen_arm_store_exclusivehi; break;
31165 case E_SImode: gen = gen_arm_store_exclusivesi; break;
31166 case E_DImode: gen = gen_arm_store_exclusivedi; break;
31167 default:
31168 gcc_unreachable ();
31169 }
31170 }
31171
31172 emit_insn (gen (bval, rval, mem));
31173 }
31174
31175 /* Mark the previous jump instruction as unlikely. */
31176
31177 static void
31178 emit_unlikely_jump (rtx insn)
31179 {
31180 rtx_insn *jump = emit_jump_insn (insn);
31181 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
31182 }
31183
31184 /* Expand a compare and swap pattern. */
31185
31186 void
31187 arm_expand_compare_and_swap (rtx operands[])
31188 {
31189 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
31190 machine_mode mode, cmp_mode;
31191
31192 bval = operands[0];
31193 rval = operands[1];
31194 mem = operands[2];
31195 oldval = operands[3];
31196 newval = operands[4];
31197 is_weak = operands[5];
31198 mod_s = operands[6];
31199 mod_f = operands[7];
31200 mode = GET_MODE (mem);
31201
31202 /* Normally the succ memory model must be stronger than fail, but in the
31203 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
31204 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
31205
31206 if (TARGET_HAVE_LDACQ
31207 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
31208 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
31209 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
31210
31211 switch (mode)
31212 {
31213 case E_QImode:
31214 case E_HImode:
31215 /* For narrow modes, we're going to perform the comparison in SImode,
31216 so do the zero-extension now. */
31217 rval = gen_reg_rtx (SImode);
31218 oldval = convert_modes (SImode, mode, oldval, true);
31219 /* FALLTHRU */
31220
31221 case E_SImode:
31222 /* Force the value into a register if needed. We waited until after
31223 the zero-extension above to do this properly. */
31224 if (!arm_add_operand (oldval, SImode))
31225 oldval = force_reg (SImode, oldval);
31226 break;
31227
31228 case E_DImode:
31229 if (!cmpdi_operand (oldval, mode))
31230 oldval = force_reg (mode, oldval);
31231 break;
31232
31233 default:
31234 gcc_unreachable ();
31235 }
31236
31237 if (TARGET_THUMB1)
31238 cmp_mode = E_SImode;
31239 else
31240 cmp_mode = CC_Zmode;
31241
31242 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
31243 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
31244 oldval, newval, is_weak, mod_s, mod_f));
31245
31246 if (mode == QImode || mode == HImode)
31247 emit_move_insn (operands[1], gen_lowpart (mode, rval));
31248
31249 /* In all cases, we arrange for success to be signaled by Z set.
31250 This arrangement allows for the boolean result to be used directly
31251 in a subsequent branch, post optimization. For Thumb-1 targets, the
31252 boolean negation of the result is also stored in bval because Thumb-1
31253 backend lacks dependency tracking for CC flag due to flag-setting not
31254 being represented at RTL level. */
31255 if (TARGET_THUMB1)
31256 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
31257 else
31258 {
31259 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
31260 emit_insn (gen_rtx_SET (bval, x));
31261 }
31262 }
31263
31264 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
31265 another memory store between the load-exclusive and store-exclusive can
31266 reset the monitor from Exclusive to Open state. This means we must wait
31267 until after reload to split the pattern, lest we get a register spill in
31268 the middle of the atomic sequence. Success of the compare and swap is
31269 indicated by the Z flag set for 32bit targets and by neg_bval being zero
31270 for Thumb-1 targets (ie. negation of the boolean value returned by
31271 atomic_compare_and_swapmode standard pattern in operand 0). */
31272
31273 void
31274 arm_split_compare_and_swap (rtx operands[])
31275 {
31276 rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
31277 machine_mode mode;
31278 enum memmodel mod_s, mod_f;
31279 bool is_weak;
31280 rtx_code_label *label1, *label2;
31281 rtx x, cond;
31282
31283 rval = operands[1];
31284 mem = operands[2];
31285 oldval = operands[3];
31286 newval = operands[4];
31287 is_weak = (operands[5] != const0_rtx);
31288 mod_s_rtx = operands[6];
31289 mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
31290 mod_f = memmodel_from_int (INTVAL (operands[7]));
31291 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
31292 mode = GET_MODE (mem);
31293
31294 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
31295
31296 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
31297 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
31298
31299 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
31300 a full barrier is emitted after the store-release. */
31301 if (is_armv8_sync)
31302 use_acquire = false;
31303
31304 /* Checks whether a barrier is needed and emits one accordingly. */
31305 if (!(use_acquire || use_release))
31306 arm_pre_atomic_barrier (mod_s);
31307
31308 label1 = NULL;
31309 if (!is_weak)
31310 {
31311 label1 = gen_label_rtx ();
31312 emit_label (label1);
31313 }
31314 label2 = gen_label_rtx ();
31315
31316 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
31317
31318 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31319 as required to communicate with arm_expand_compare_and_swap. */
31320 if (TARGET_32BIT)
31321 {
31322 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
31323 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31324 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31325 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
31326 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31327 }
31328 else
31329 {
31330 cond = gen_rtx_NE (VOIDmode, rval, oldval);
31331 if (thumb1_cmpneg_operand (oldval, SImode))
31332 {
31333 rtx src = rval;
31334 if (!satisfies_constraint_L (oldval))
31335 {
31336 gcc_assert (satisfies_constraint_J (oldval));
31337
31338 /* For such immediates, ADDS needs the source and destination regs
31339 to be the same.
31340
31341 Normally this would be handled by RA, but this is all happening
31342 after RA. */
31343 emit_move_insn (neg_bval, rval);
31344 src = neg_bval;
31345 }
31346
31347 emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
31348 label2, cond));
31349 }
31350 else
31351 {
31352 emit_move_insn (neg_bval, const1_rtx);
31353 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
31354 }
31355 }
31356
31357 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
31358
31359 /* Weak or strong, we want EQ to be true for success, so that we
31360 match the flags that we got from the compare above. */
31361 if (TARGET_32BIT)
31362 {
31363 cond = gen_rtx_REG (CCmode, CC_REGNUM);
31364 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
31365 emit_insn (gen_rtx_SET (cond, x));
31366 }
31367
31368 if (!is_weak)
31369 {
31370 /* Z is set to boolean value of !neg_bval, as required to communicate
31371 with arm_expand_compare_and_swap. */
31372 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
31373 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
31374 }
31375
31376 if (!is_mm_relaxed (mod_f))
31377 emit_label (label2);
31378
31379 /* Checks whether a barrier is needed and emits one accordingly. */
31380 if (is_armv8_sync
31381 || !(use_acquire || use_release))
31382 arm_post_atomic_barrier (mod_s);
31383
31384 if (is_mm_relaxed (mod_f))
31385 emit_label (label2);
31386 }
31387
31388 /* Split an atomic operation pattern. Operation is given by CODE and is one
31389 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31390 operation). Operation is performed on the content at MEM and on VALUE
31391 following the memory model MODEL_RTX. The content at MEM before and after
31392 the operation is returned in OLD_OUT and NEW_OUT respectively while the
31393 success of the operation is returned in COND. Using a scratch register or
31394 an operand register for these determines what result is returned for that
31395 pattern. */
31396
31397 void
31398 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
31399 rtx value, rtx model_rtx, rtx cond)
31400 {
31401 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
31402 machine_mode mode = GET_MODE (mem);
31403 machine_mode wmode = (mode == DImode ? DImode : SImode);
31404 rtx_code_label *label;
31405 bool all_low_regs, bind_old_new;
31406 rtx x;
31407
31408 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
31409
31410 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
31411 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
31412
31413 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
31414 a full barrier is emitted after the store-release. */
31415 if (is_armv8_sync)
31416 use_acquire = false;
31417
31418 /* Checks whether a barrier is needed and emits one accordingly. */
31419 if (!(use_acquire || use_release))
31420 arm_pre_atomic_barrier (model);
31421
31422 label = gen_label_rtx ();
31423 emit_label (label);
31424
31425 if (new_out)
31426 new_out = gen_lowpart (wmode, new_out);
31427 if (old_out)
31428 old_out = gen_lowpart (wmode, old_out);
31429 else
31430 old_out = new_out;
31431 value = simplify_gen_subreg (wmode, value, mode, 0);
31432
31433 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
31434
31435 /* Does the operation require destination and first operand to use the same
31436 register? This is decided by register constraints of relevant insn
31437 patterns in thumb1.md. */
31438 gcc_assert (!new_out || REG_P (new_out));
31439 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
31440 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
31441 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
31442 bind_old_new =
31443 (TARGET_THUMB1
31444 && code != SET
31445 && code != MINUS
31446 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
31447
31448 /* We want to return the old value while putting the result of the operation
31449 in the same register as the old value so copy the old value over to the
31450 destination register and use that register for the operation. */
31451 if (old_out && bind_old_new)
31452 {
31453 emit_move_insn (new_out, old_out);
31454 old_out = new_out;
31455 }
31456
31457 switch (code)
31458 {
31459 case SET:
31460 new_out = value;
31461 break;
31462
31463 case NOT:
31464 x = gen_rtx_AND (wmode, old_out, value);
31465 emit_insn (gen_rtx_SET (new_out, x));
31466 x = gen_rtx_NOT (wmode, new_out);
31467 emit_insn (gen_rtx_SET (new_out, x));
31468 break;
31469
31470 case MINUS:
31471 if (CONST_INT_P (value))
31472 {
31473 value = gen_int_mode (-INTVAL (value), wmode);
31474 code = PLUS;
31475 }
31476 /* FALLTHRU */
31477
31478 case PLUS:
31479 if (mode == DImode)
31480 {
31481 /* DImode plus/minus need to clobber flags. */
31482 /* The adddi3 and subdi3 patterns are incorrectly written so that
31483 they require matching operands, even when we could easily support
31484 three operands. Thankfully, this can be fixed up post-splitting,
31485 as the individual add+adc patterns do accept three operands and
31486 post-reload cprop can make these moves go away. */
31487 emit_move_insn (new_out, old_out);
31488 if (code == PLUS)
31489 x = gen_adddi3 (new_out, new_out, value);
31490 else
31491 x = gen_subdi3 (new_out, new_out, value);
31492 emit_insn (x);
31493 break;
31494 }
31495 /* FALLTHRU */
31496
31497 default:
31498 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
31499 emit_insn (gen_rtx_SET (new_out, x));
31500 break;
31501 }
31502
31503 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
31504 use_release);
31505
31506 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31507 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
31508
31509 /* Checks whether a barrier is needed and emits one accordingly. */
31510 if (is_armv8_sync
31511 || !(use_acquire || use_release))
31512 arm_post_atomic_barrier (model);
31513 }
31514 \f
31515 /* Return the mode for the MVE vector of predicates corresponding to MODE. */
31516 opt_machine_mode
31517 arm_mode_to_pred_mode (machine_mode mode)
31518 {
31519 switch (GET_MODE_NUNITS (mode))
31520 {
31521 case 16: return V16BImode;
31522 case 8: return V8BImode;
31523 case 4: return V4BImode;
31524 case 2: return V2QImode;
31525 }
31526 return opt_machine_mode ();
31527 }
31528
31529 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31530 If CAN_INVERT, store either the result or its inverse in TARGET
31531 and return true if TARGET contains the inverse. If !CAN_INVERT,
31532 always store the result in TARGET, never its inverse.
31533
31534 Note that the handling of floating-point comparisons is not
31535 IEEE compliant. */
31536
31537 bool
31538 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
31539 bool can_invert)
31540 {
31541 machine_mode cmp_result_mode = GET_MODE (target);
31542 machine_mode cmp_mode = GET_MODE (op0);
31543
31544 bool inverted;
31545
31546 /* MVE supports more comparisons than Neon. */
31547 if (TARGET_HAVE_MVE)
31548 inverted = false;
31549 else
31550 switch (code)
31551 {
31552 /* For these we need to compute the inverse of the requested
31553 comparison. */
31554 case UNORDERED:
31555 case UNLT:
31556 case UNLE:
31557 case UNGT:
31558 case UNGE:
31559 case UNEQ:
31560 case NE:
31561 code = reverse_condition_maybe_unordered (code);
31562 if (!can_invert)
31563 {
31564 /* Recursively emit the inverted comparison into a temporary
31565 and then store its inverse in TARGET. This avoids reusing
31566 TARGET (which for integer NE could be one of the inputs). */
31567 rtx tmp = gen_reg_rtx (cmp_result_mode);
31568 if (arm_expand_vector_compare (tmp, code, op0, op1, true))
31569 gcc_unreachable ();
31570 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
31571 return false;
31572 }
31573 inverted = true;
31574 break;
31575
31576 default:
31577 inverted = false;
31578 break;
31579 }
31580
31581 switch (code)
31582 {
31583 /* These are natively supported by Neon for zero comparisons, but otherwise
31584 require the operands to be swapped. For MVE, we can only compare
31585 registers. */
31586 case LE:
31587 case LT:
31588 if (!TARGET_HAVE_MVE)
31589 if (op1 != CONST0_RTX (cmp_mode))
31590 {
31591 code = swap_condition (code);
31592 std::swap (op0, op1);
31593 }
31594 /* Fall through. */
31595
31596 /* These are natively supported by Neon for both register and zero
31597 operands. MVE supports registers only. */
31598 case EQ:
31599 case GE:
31600 case GT:
31601 case NE:
31602 if (TARGET_HAVE_MVE)
31603 {
31604 switch (GET_MODE_CLASS (cmp_mode))
31605 {
31606 case MODE_VECTOR_INT:
31607 emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31608 op0, force_reg (cmp_mode, op1)));
31609 break;
31610 case MODE_VECTOR_FLOAT:
31611 if (TARGET_HAVE_MVE_FLOAT)
31612 emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
31613 op0, force_reg (cmp_mode, op1)));
31614 else
31615 gcc_unreachable ();
31616 break;
31617 default:
31618 gcc_unreachable ();
31619 }
31620 }
31621 else
31622 emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
31623 return inverted;
31624
31625 /* These are natively supported for register operands only.
31626 Comparisons with zero aren't useful and should be folded
31627 or canonicalized by target-independent code. */
31628 case GEU:
31629 case GTU:
31630 if (TARGET_HAVE_MVE)
31631 emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31632 op0, force_reg (cmp_mode, op1)));
31633 else
31634 emit_insn (gen_neon_vc (code, cmp_mode, target,
31635 op0, force_reg (cmp_mode, op1)));
31636 return inverted;
31637
31638 /* These require the operands to be swapped and likewise do not
31639 support comparisons with zero. */
31640 case LEU:
31641 case LTU:
31642 if (TARGET_HAVE_MVE)
31643 emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
31644 force_reg (cmp_mode, op1), op0));
31645 else
31646 emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
31647 target, force_reg (cmp_mode, op1), op0));
31648 return inverted;
31649
31650 /* These need a combination of two comparisons. */
31651 case LTGT:
31652 case ORDERED:
31653 {
31654 /* Operands are LTGT iff (a > b || a > b).
31655 Operands are ORDERED iff (a > b || a <= b). */
31656 rtx gt_res = gen_reg_rtx (cmp_result_mode);
31657 rtx alt_res = gen_reg_rtx (cmp_result_mode);
31658 rtx_code alt_code = (code == LTGT ? LT : LE);
31659 if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
31660 || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
31661 gcc_unreachable ();
31662 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
31663 gt_res, alt_res)));
31664 return inverted;
31665 }
31666
31667 default:
31668 gcc_unreachable ();
31669 }
31670 }
31671
31672 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31673 CMP_RESULT_MODE is the mode of the comparison result. */
31674
31675 void
31676 arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
31677 {
31678 /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31679 arm_expand_vector_compare, and another one here. */
31680 rtx mask;
31681
31682 if (TARGET_HAVE_MVE)
31683 mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ());
31684 else
31685 mask = gen_reg_rtx (cmp_result_mode);
31686
31687 bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
31688 operands[4], operands[5], true);
31689 if (inverted)
31690 std::swap (operands[1], operands[2]);
31691 if (TARGET_NEON)
31692 emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
31693 mask, operands[1], operands[2]));
31694 else
31695 {
31696 machine_mode cmp_mode = GET_MODE (operands[0]);
31697
31698 switch (GET_MODE_CLASS (cmp_mode))
31699 {
31700 case MODE_VECTOR_INT:
31701 emit_insn (gen_mve_q (VPSELQ_S, VPSELQ_S, cmp_mode, operands[0],
31702 operands[1], operands[2], mask));
31703 break;
31704 case MODE_VECTOR_FLOAT:
31705 if (TARGET_HAVE_MVE_FLOAT)
31706 emit_insn (gen_mve_q_f (VPSELQ_F, cmp_mode, operands[0],
31707 operands[1], operands[2], mask));
31708 else
31709 gcc_unreachable ();
31710 break;
31711 default:
31712 gcc_unreachable ();
31713 }
31714 }
31715 }
31716 \f
31717 #define MAX_VECT_LEN 16
31718
31719 struct expand_vec_perm_d
31720 {
31721 rtx target, op0, op1;
31722 vec_perm_indices perm;
31723 machine_mode vmode;
31724 bool one_vector_p;
31725 bool testing_p;
31726 };
31727
31728 /* Generate a variable permutation. */
31729
31730 static void
31731 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
31732 {
31733 machine_mode vmode = GET_MODE (target);
31734 bool one_vector_p = rtx_equal_p (op0, op1);
31735
31736 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
31737 gcc_checking_assert (GET_MODE (op0) == vmode);
31738 gcc_checking_assert (GET_MODE (op1) == vmode);
31739 gcc_checking_assert (GET_MODE (sel) == vmode);
31740 gcc_checking_assert (TARGET_NEON);
31741
31742 if (one_vector_p)
31743 {
31744 if (vmode == V8QImode)
31745 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
31746 else
31747 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
31748 }
31749 else
31750 {
31751 rtx pair;
31752
31753 if (vmode == V8QImode)
31754 {
31755 pair = gen_reg_rtx (V16QImode);
31756 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
31757 pair = gen_lowpart (TImode, pair);
31758 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
31759 }
31760 else
31761 {
31762 pair = gen_reg_rtx (OImode);
31763 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
31764 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
31765 }
31766 }
31767 }
31768
31769 void
31770 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
31771 {
31772 machine_mode vmode = GET_MODE (target);
31773 unsigned int nelt = GET_MODE_NUNITS (vmode);
31774 bool one_vector_p = rtx_equal_p (op0, op1);
31775 rtx mask;
31776
31777 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31778 numbering of elements for big-endian, we must reverse the order. */
31779 gcc_checking_assert (!BYTES_BIG_ENDIAN);
31780
31781 /* The VTBL instruction does not use a modulo index, so we must take care
31782 of that ourselves. */
31783 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
31784 mask = gen_const_vec_duplicate (vmode, mask);
31785 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
31786
31787 arm_expand_vec_perm_1 (target, op0, op1, sel);
31788 }
31789
31790 /* Map lane ordering between architectural lane order, and GCC lane order,
31791 taking into account ABI. See comment above output_move_neon for details. */
31792
31793 static int
31794 neon_endian_lane_map (machine_mode mode, int lane)
31795 {
31796 if (BYTES_BIG_ENDIAN)
31797 {
31798 int nelems = GET_MODE_NUNITS (mode);
31799 /* Reverse lane order. */
31800 lane = (nelems - 1 - lane);
31801 /* Reverse D register order, to match ABI. */
31802 if (GET_MODE_SIZE (mode) == 16)
31803 lane = lane ^ (nelems / 2);
31804 }
31805 return lane;
31806 }
31807
31808 /* Some permutations index into pairs of vectors, this is a helper function
31809 to map indexes into those pairs of vectors. */
31810
31811 static int
31812 neon_pair_endian_lane_map (machine_mode mode, int lane)
31813 {
31814 int nelem = GET_MODE_NUNITS (mode);
31815 if (BYTES_BIG_ENDIAN)
31816 lane =
31817 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
31818 return lane;
31819 }
31820
31821 /* Generate or test for an insn that supports a constant permutation. */
31822
31823 /* Recognize patterns for the VUZP insns. */
31824
31825 static bool
31826 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
31827 {
31828 unsigned int i, odd, mask, nelt = d->perm.length ();
31829 rtx out0, out1, in0, in1;
31830 int first_elem;
31831 int swap_nelt;
31832
31833 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31834 return false;
31835
31836 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31837 big endian pattern on 64 bit vectors, so we correct for that. */
31838 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
31839 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
31840
31841 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
31842
31843 if (first_elem == neon_endian_lane_map (d->vmode, 0))
31844 odd = 0;
31845 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
31846 odd = 1;
31847 else
31848 return false;
31849 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31850
31851 for (i = 0; i < nelt; i++)
31852 {
31853 unsigned elt =
31854 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
31855 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
31856 return false;
31857 }
31858
31859 /* Success! */
31860 if (d->testing_p)
31861 return true;
31862
31863 in0 = d->op0;
31864 in1 = d->op1;
31865 if (swap_nelt != 0)
31866 std::swap (in0, in1);
31867
31868 out0 = d->target;
31869 out1 = gen_reg_rtx (d->vmode);
31870 if (odd)
31871 std::swap (out0, out1);
31872
31873 emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
31874 return true;
31875 }
31876
31877 /* Recognize patterns for the VZIP insns. */
31878
31879 static bool
31880 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
31881 {
31882 unsigned int i, high, mask, nelt = d->perm.length ();
31883 rtx out0, out1, in0, in1;
31884 int first_elem;
31885 bool is_swapped;
31886
31887 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31888 return false;
31889
31890 is_swapped = BYTES_BIG_ENDIAN;
31891
31892 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
31893
31894 high = nelt / 2;
31895 if (first_elem == neon_endian_lane_map (d->vmode, high))
31896 ;
31897 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
31898 high = 0;
31899 else
31900 return false;
31901 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31902
31903 for (i = 0; i < nelt / 2; i++)
31904 {
31905 unsigned elt =
31906 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
31907 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
31908 != elt)
31909 return false;
31910 elt =
31911 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
31912 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
31913 != elt)
31914 return false;
31915 }
31916
31917 /* Success! */
31918 if (d->testing_p)
31919 return true;
31920
31921 in0 = d->op0;
31922 in1 = d->op1;
31923 if (is_swapped)
31924 std::swap (in0, in1);
31925
31926 out0 = d->target;
31927 out1 = gen_reg_rtx (d->vmode);
31928 if (high)
31929 std::swap (out0, out1);
31930
31931 emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
31932 return true;
31933 }
31934
31935 /* Recognize patterns for the VREV insns. */
31936 static bool
31937 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31938 {
31939 unsigned int i, j, diff, nelt = d->perm.length ();
31940 rtx (*gen) (machine_mode, rtx, rtx);
31941
31942 if (!d->one_vector_p)
31943 return false;
31944
31945 diff = d->perm[0];
31946 switch (diff)
31947 {
31948 case 7:
31949 switch (d->vmode)
31950 {
31951 case E_V16QImode:
31952 case E_V8QImode:
31953 gen = gen_neon_vrev64;
31954 break;
31955 default:
31956 return false;
31957 }
31958 break;
31959 case 3:
31960 switch (d->vmode)
31961 {
31962 case E_V16QImode:
31963 case E_V8QImode:
31964 gen = gen_neon_vrev32;
31965 break;
31966 case E_V8HImode:
31967 case E_V4HImode:
31968 case E_V8HFmode:
31969 case E_V4HFmode:
31970 gen = gen_neon_vrev64;
31971 break;
31972 default:
31973 return false;
31974 }
31975 break;
31976 case 1:
31977 switch (d->vmode)
31978 {
31979 case E_V16QImode:
31980 case E_V8QImode:
31981 gen = gen_neon_vrev16;
31982 break;
31983 case E_V8HImode:
31984 case E_V4HImode:
31985 gen = gen_neon_vrev32;
31986 break;
31987 case E_V4SImode:
31988 case E_V2SImode:
31989 case E_V4SFmode:
31990 case E_V2SFmode:
31991 gen = gen_neon_vrev64;
31992 break;
31993 default:
31994 return false;
31995 }
31996 break;
31997 default:
31998 return false;
31999 }
32000
32001 for (i = 0; i < nelt ; i += diff + 1)
32002 for (j = 0; j <= diff; j += 1)
32003 {
32004 /* This is guaranteed to be true as the value of diff
32005 is 7, 3, 1 and we should have enough elements in the
32006 queue to generate this. Getting a vector mask with a
32007 value of diff other than these values implies that
32008 something is wrong by the time we get here. */
32009 gcc_assert (i + j < nelt);
32010 if (d->perm[i + j] != i + diff - j)
32011 return false;
32012 }
32013
32014 /* Success! */
32015 if (d->testing_p)
32016 return true;
32017
32018 emit_insn (gen (d->vmode, d->target, d->op0));
32019 return true;
32020 }
32021
32022 /* Recognize patterns for the VTRN insns. */
32023
32024 static bool
32025 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
32026 {
32027 unsigned int i, odd, mask, nelt = d->perm.length ();
32028 rtx out0, out1, in0, in1;
32029
32030 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
32031 return false;
32032
32033 /* Note that these are little-endian tests. Adjust for big-endian later. */
32034 if (d->perm[0] == 0)
32035 odd = 0;
32036 else if (d->perm[0] == 1)
32037 odd = 1;
32038 else
32039 return false;
32040 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
32041
32042 for (i = 0; i < nelt; i += 2)
32043 {
32044 if (d->perm[i] != i + odd)
32045 return false;
32046 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
32047 return false;
32048 }
32049
32050 /* Success! */
32051 if (d->testing_p)
32052 return true;
32053
32054 in0 = d->op0;
32055 in1 = d->op1;
32056 if (BYTES_BIG_ENDIAN)
32057 {
32058 std::swap (in0, in1);
32059 odd = !odd;
32060 }
32061
32062 out0 = d->target;
32063 out1 = gen_reg_rtx (d->vmode);
32064 if (odd)
32065 std::swap (out0, out1);
32066
32067 emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
32068 return true;
32069 }
32070
32071 /* Recognize patterns for the VEXT insns. */
32072
32073 static bool
32074 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
32075 {
32076 unsigned int i, nelt = d->perm.length ();
32077 rtx offset;
32078
32079 unsigned int location;
32080
32081 unsigned int next = d->perm[0] + 1;
32082
32083 /* TODO: Handle GCC's numbering of elements for big-endian. */
32084 if (BYTES_BIG_ENDIAN)
32085 return false;
32086
32087 /* Check if the extracted indexes are increasing by one. */
32088 for (i = 1; i < nelt; next++, i++)
32089 {
32090 /* If we hit the most significant element of the 2nd vector in
32091 the previous iteration, no need to test further. */
32092 if (next == 2 * nelt)
32093 return false;
32094
32095 /* If we are operating on only one vector: it could be a
32096 rotation. If there are only two elements of size < 64, let
32097 arm_evpc_neon_vrev catch it. */
32098 if (d->one_vector_p && (next == nelt))
32099 {
32100 if ((nelt == 2) && (d->vmode != V2DImode))
32101 return false;
32102 else
32103 next = 0;
32104 }
32105
32106 if (d->perm[i] != next)
32107 return false;
32108 }
32109
32110 location = d->perm[0];
32111
32112 /* Success! */
32113 if (d->testing_p)
32114 return true;
32115
32116 offset = GEN_INT (location);
32117
32118 if(d->vmode == E_DImode)
32119 return false;
32120
32121 emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
32122 return true;
32123 }
32124
32125 /* The NEON VTBL instruction is a fully variable permuation that's even
32126 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
32127 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
32128 can do slightly better by expanding this as a constant where we don't
32129 have to apply a mask. */
32130
32131 static bool
32132 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
32133 {
32134 rtx rperm[MAX_VECT_LEN], sel;
32135 machine_mode vmode = d->vmode;
32136 unsigned int i, nelt = d->perm.length ();
32137
32138 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
32139 numbering of elements for big-endian, we must reverse the order. */
32140 if (BYTES_BIG_ENDIAN)
32141 return false;
32142
32143 if (d->testing_p)
32144 return true;
32145
32146 /* Generic code will try constant permutation twice. Once with the
32147 original mode and again with the elements lowered to QImode.
32148 So wait and don't do the selector expansion ourselves. */
32149 if (vmode != V8QImode && vmode != V16QImode)
32150 return false;
32151
32152 for (i = 0; i < nelt; ++i)
32153 rperm[i] = GEN_INT (d->perm[i]);
32154 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
32155 sel = force_reg (vmode, sel);
32156
32157 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
32158 return true;
32159 }
32160
32161 static bool
32162 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
32163 {
32164 /* Check if the input mask matches vext before reordering the
32165 operands. */
32166 if (TARGET_NEON)
32167 if (arm_evpc_neon_vext (d))
32168 return true;
32169
32170 /* The pattern matching functions above are written to look for a small
32171 number to begin the sequence (0, 1, N/2). If we begin with an index
32172 from the second operand, we can swap the operands. */
32173 unsigned int nelt = d->perm.length ();
32174 if (d->perm[0] >= nelt)
32175 {
32176 d->perm.rotate_inputs (1);
32177 std::swap (d->op0, d->op1);
32178 }
32179
32180 if (TARGET_NEON)
32181 {
32182 if (arm_evpc_neon_vuzp (d))
32183 return true;
32184 if (arm_evpc_neon_vzip (d))
32185 return true;
32186 if (arm_evpc_neon_vrev (d))
32187 return true;
32188 if (arm_evpc_neon_vtrn (d))
32189 return true;
32190 return arm_evpc_neon_vtbl (d);
32191 }
32192 return false;
32193 }
32194
32195 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
32196
32197 static bool
32198 arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
32199 rtx target, rtx op0, rtx op1,
32200 const vec_perm_indices &sel)
32201 {
32202 if (vmode != op_mode)
32203 return false;
32204
32205 struct expand_vec_perm_d d;
32206 int i, nelt, which;
32207
32208 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
32209 return false;
32210
32211 d.target = target;
32212 if (op0)
32213 {
32214 rtx nop0 = force_reg (vmode, op0);
32215 if (op0 == op1)
32216 op1 = nop0;
32217 op0 = nop0;
32218 }
32219 if (op1)
32220 op1 = force_reg (vmode, op1);
32221 d.op0 = op0;
32222 d.op1 = op1;
32223
32224 d.vmode = vmode;
32225 gcc_assert (VECTOR_MODE_P (d.vmode));
32226 d.testing_p = !target;
32227
32228 nelt = GET_MODE_NUNITS (d.vmode);
32229 for (i = which = 0; i < nelt; ++i)
32230 {
32231 int ei = sel[i] & (2 * nelt - 1);
32232 which |= (ei < nelt ? 1 : 2);
32233 }
32234
32235 switch (which)
32236 {
32237 default:
32238 gcc_unreachable();
32239
32240 case 3:
32241 d.one_vector_p = false;
32242 if (d.testing_p || !rtx_equal_p (op0, op1))
32243 break;
32244
32245 /* The elements of PERM do not suggest that only the first operand
32246 is used, but both operands are identical. Allow easier matching
32247 of the permutation by folding the permutation into the single
32248 input vector. */
32249 /* FALLTHRU */
32250 case 2:
32251 d.op0 = op1;
32252 d.one_vector_p = true;
32253 break;
32254
32255 case 1:
32256 d.op1 = op0;
32257 d.one_vector_p = true;
32258 break;
32259 }
32260
32261 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
32262
32263 if (!d.testing_p)
32264 return arm_expand_vec_perm_const_1 (&d);
32265
32266 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32267 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32268 if (!d.one_vector_p)
32269 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32270
32271 start_sequence ();
32272 bool ret = arm_expand_vec_perm_const_1 (&d);
32273 end_sequence ();
32274
32275 return ret;
32276 }
32277
32278 bool
32279 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
32280 {
32281 /* If we are soft float and we do not have ldrd
32282 then all auto increment forms are ok. */
32283 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
32284 return true;
32285
32286 switch (code)
32287 {
32288 /* Post increment and Pre Decrement are supported for all
32289 instruction forms except for vector forms. */
32290 case ARM_POST_INC:
32291 case ARM_PRE_DEC:
32292 if (VECTOR_MODE_P (mode))
32293 {
32294 if (code != ARM_PRE_DEC)
32295 return true;
32296 else
32297 return false;
32298 }
32299
32300 return true;
32301
32302 case ARM_POST_DEC:
32303 case ARM_PRE_INC:
32304 /* Without LDRD and mode size greater than
32305 word size, there is no point in auto-incrementing
32306 because ldm and stm will not have these forms. */
32307 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
32308 return false;
32309
32310 /* Vector and floating point modes do not support
32311 these auto increment forms. */
32312 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
32313 return false;
32314
32315 return true;
32316
32317 default:
32318 return false;
32319
32320 }
32321
32322 return false;
32323 }
32324
32325 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32326 on ARM, since we know that shifts by negative amounts are no-ops.
32327 Additionally, the default expansion code is not available or suitable
32328 for post-reload insn splits (this can occur when the register allocator
32329 chooses not to do a shift in NEON).
32330
32331 This function is used in both initial expand and post-reload splits, and
32332 handles all kinds of 64-bit shifts.
32333
32334 Input requirements:
32335 - It is safe for the input and output to be the same register, but
32336 early-clobber rules apply for the shift amount and scratch registers.
32337 - Shift by register requires both scratch registers. In all other cases
32338 the scratch registers may be NULL.
32339 - Ashiftrt by a register also clobbers the CC register. */
32340 void
32341 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
32342 rtx amount, rtx scratch1, rtx scratch2)
32343 {
32344 rtx out_high = gen_highpart (SImode, out);
32345 rtx out_low = gen_lowpart (SImode, out);
32346 rtx in_high = gen_highpart (SImode, in);
32347 rtx in_low = gen_lowpart (SImode, in);
32348
32349 /* Terminology:
32350 in = the register pair containing the input value.
32351 out = the destination register pair.
32352 up = the high- or low-part of each pair.
32353 down = the opposite part to "up".
32354 In a shift, we can consider bits to shift from "up"-stream to
32355 "down"-stream, so in a left-shift "up" is the low-part and "down"
32356 is the high-part of each register pair. */
32357
32358 rtx out_up = code == ASHIFT ? out_low : out_high;
32359 rtx out_down = code == ASHIFT ? out_high : out_low;
32360 rtx in_up = code == ASHIFT ? in_low : in_high;
32361 rtx in_down = code == ASHIFT ? in_high : in_low;
32362
32363 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
32364 gcc_assert (out
32365 && (REG_P (out) || SUBREG_P (out))
32366 && GET_MODE (out) == DImode);
32367 gcc_assert (in
32368 && (REG_P (in) || SUBREG_P (in))
32369 && GET_MODE (in) == DImode);
32370 gcc_assert (amount
32371 && (((REG_P (amount) || SUBREG_P (amount))
32372 && GET_MODE (amount) == SImode)
32373 || CONST_INT_P (amount)));
32374 gcc_assert (scratch1 == NULL
32375 || (GET_CODE (scratch1) == SCRATCH)
32376 || (GET_MODE (scratch1) == SImode
32377 && REG_P (scratch1)));
32378 gcc_assert (scratch2 == NULL
32379 || (GET_CODE (scratch2) == SCRATCH)
32380 || (GET_MODE (scratch2) == SImode
32381 && REG_P (scratch2)));
32382 gcc_assert (!REG_P (out) || !REG_P (amount)
32383 || !HARD_REGISTER_P (out)
32384 || (REGNO (out) != REGNO (amount)
32385 && REGNO (out) + 1 != REGNO (amount)));
32386
32387 /* Macros to make following code more readable. */
32388 #define SUB_32(DEST,SRC) \
32389 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32390 #define RSB_32(DEST,SRC) \
32391 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32392 #define SUB_S_32(DEST,SRC) \
32393 gen_addsi3_compare0 ((DEST), (SRC), \
32394 GEN_INT (-32))
32395 #define SET(DEST,SRC) \
32396 gen_rtx_SET ((DEST), (SRC))
32397 #define SHIFT(CODE,SRC,AMOUNT) \
32398 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32399 #define LSHIFT(CODE,SRC,AMOUNT) \
32400 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32401 SImode, (SRC), (AMOUNT))
32402 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32403 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32404 SImode, (SRC), (AMOUNT))
32405 #define ORR(A,B) \
32406 gen_rtx_IOR (SImode, (A), (B))
32407 #define BRANCH(COND,LABEL) \
32408 gen_arm_cond_branch ((LABEL), \
32409 gen_rtx_ ## COND (CCmode, cc_reg, \
32410 const0_rtx), \
32411 cc_reg)
32412
32413 /* Shifts by register and shifts by constant are handled separately. */
32414 if (CONST_INT_P (amount))
32415 {
32416 /* We have a shift-by-constant. */
32417
32418 /* First, handle out-of-range shift amounts.
32419 In both cases we try to match the result an ARM instruction in a
32420 shift-by-register would give. This helps reduce execution
32421 differences between optimization levels, but it won't stop other
32422 parts of the compiler doing different things. This is "undefined
32423 behavior, in any case. */
32424 if (INTVAL (amount) <= 0)
32425 emit_insn (gen_movdi (out, in));
32426 else if (INTVAL (amount) >= 64)
32427 {
32428 if (code == ASHIFTRT)
32429 {
32430 rtx const31_rtx = GEN_INT (31);
32431 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
32432 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
32433 }
32434 else
32435 emit_insn (gen_movdi (out, const0_rtx));
32436 }
32437
32438 /* Now handle valid shifts. */
32439 else if (INTVAL (amount) < 32)
32440 {
32441 /* Shifts by a constant less than 32. */
32442 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
32443
32444 /* Clearing the out register in DImode first avoids lots
32445 of spilling and results in less stack usage.
32446 Later this redundant insn is completely removed.
32447 Do that only if "in" and "out" are different registers. */
32448 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32449 emit_insn (SET (out, const0_rtx));
32450 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32451 emit_insn (SET (out_down,
32452 ORR (REV_LSHIFT (code, in_up, reverse_amount),
32453 out_down)));
32454 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32455 }
32456 else
32457 {
32458 /* Shifts by a constant greater than 31. */
32459 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
32460
32461 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32462 emit_insn (SET (out, const0_rtx));
32463 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
32464 if (code == ASHIFTRT)
32465 emit_insn (gen_ashrsi3 (out_up, in_up,
32466 GEN_INT (31)));
32467 else
32468 emit_insn (SET (out_up, const0_rtx));
32469 }
32470 }
32471 else
32472 {
32473 /* We have a shift-by-register. */
32474 rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
32475
32476 /* This alternative requires the scratch registers. */
32477 gcc_assert (scratch1 && REG_P (scratch1));
32478 gcc_assert (scratch2 && REG_P (scratch2));
32479
32480 /* We will need the values "amount-32" and "32-amount" later.
32481 Swapping them around now allows the later code to be more general. */
32482 switch (code)
32483 {
32484 case ASHIFT:
32485 emit_insn (SUB_32 (scratch1, amount));
32486 emit_insn (RSB_32 (scratch2, amount));
32487 break;
32488 case ASHIFTRT:
32489 emit_insn (RSB_32 (scratch1, amount));
32490 /* Also set CC = amount > 32. */
32491 emit_insn (SUB_S_32 (scratch2, amount));
32492 break;
32493 case LSHIFTRT:
32494 emit_insn (RSB_32 (scratch1, amount));
32495 emit_insn (SUB_32 (scratch2, amount));
32496 break;
32497 default:
32498 gcc_unreachable ();
32499 }
32500
32501 /* Emit code like this:
32502
32503 arithmetic-left:
32504 out_down = in_down << amount;
32505 out_down = (in_up << (amount - 32)) | out_down;
32506 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32507 out_up = in_up << amount;
32508
32509 arithmetic-right:
32510 out_down = in_down >> amount;
32511 out_down = (in_up << (32 - amount)) | out_down;
32512 if (amount < 32)
32513 out_down = ((signed)in_up >> (amount - 32)) | out_down;
32514 out_up = in_up << amount;
32515
32516 logical-right:
32517 out_down = in_down >> amount;
32518 out_down = (in_up << (32 - amount)) | out_down;
32519 if (amount < 32)
32520 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32521 out_up = in_up << amount;
32522
32523 The ARM and Thumb2 variants are the same but implemented slightly
32524 differently. If this were only called during expand we could just
32525 use the Thumb2 case and let combine do the right thing, but this
32526 can also be called from post-reload splitters. */
32527
32528 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32529
32530 if (!TARGET_THUMB2)
32531 {
32532 /* Emit code for ARM mode. */
32533 emit_insn (SET (out_down,
32534 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
32535 if (code == ASHIFTRT)
32536 {
32537 rtx_code_label *done_label = gen_label_rtx ();
32538 emit_jump_insn (BRANCH (LT, done_label));
32539 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
32540 out_down)));
32541 emit_label (done_label);
32542 }
32543 else
32544 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
32545 out_down)));
32546 }
32547 else
32548 {
32549 /* Emit code for Thumb2 mode.
32550 Thumb2 can't do shift and or in one insn. */
32551 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
32552 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
32553
32554 if (code == ASHIFTRT)
32555 {
32556 rtx_code_label *done_label = gen_label_rtx ();
32557 emit_jump_insn (BRANCH (LT, done_label));
32558 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
32559 emit_insn (SET (out_down, ORR (out_down, scratch2)));
32560 emit_label (done_label);
32561 }
32562 else
32563 {
32564 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
32565 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
32566 }
32567 }
32568
32569 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32570 }
32571
32572 #undef SUB_32
32573 #undef RSB_32
32574 #undef SUB_S_32
32575 #undef SET
32576 #undef SHIFT
32577 #undef LSHIFT
32578 #undef REV_LSHIFT
32579 #undef ORR
32580 #undef BRANCH
32581 }
32582
32583 /* Returns true if the pattern is a valid symbolic address, which is either a
32584 symbol_ref or (symbol_ref + addend).
32585
32586 According to the ARM ELF ABI, the initial addend of REL-type relocations
32587 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32588 literal field of the instruction as a 16-bit signed value in the range
32589 -32768 <= A < 32768.
32590
32591 In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32592 unsigned range of 0 <= A < 256 as described in the AAELF32
32593 relocation handling documentation: REL-type relocations are encoded
32594 as unsigned in this case. */
32595
32596 bool
32597 arm_valid_symbolic_address_p (rtx addr)
32598 {
32599 rtx xop0, xop1 = NULL_RTX;
32600 rtx tmp = addr;
32601
32602 if (target_word_relocations)
32603 return false;
32604
32605 if (SYMBOL_REF_P (tmp) || LABEL_REF_P (tmp))
32606 return true;
32607
32608 /* (const (plus: symbol_ref const_int)) */
32609 if (GET_CODE (addr) == CONST)
32610 tmp = XEXP (addr, 0);
32611
32612 if (GET_CODE (tmp) == PLUS)
32613 {
32614 xop0 = XEXP (tmp, 0);
32615 xop1 = XEXP (tmp, 1);
32616
32617 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
32618 {
32619 if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
32620 return IN_RANGE (INTVAL (xop1), 0, 0xff);
32621 else
32622 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
32623 }
32624 }
32625
32626 return false;
32627 }
32628
32629 /* Returns true if a valid comparison operation and makes
32630 the operands in a form that is valid. */
32631 bool
32632 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
32633 {
32634 enum rtx_code code = GET_CODE (*comparison);
32635 int code_int;
32636 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
32637 ? GET_MODE (*op2) : GET_MODE (*op1);
32638
32639 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
32640
32641 if (code == UNEQ || code == LTGT)
32642 return false;
32643
32644 code_int = (int)code;
32645 arm_canonicalize_comparison (&code_int, op1, op2, 0);
32646 PUT_CODE (*comparison, (enum rtx_code)code_int);
32647
32648 switch (mode)
32649 {
32650 case E_SImode:
32651 if (!arm_add_operand (*op1, mode))
32652 *op1 = force_reg (mode, *op1);
32653 if (!arm_add_operand (*op2, mode))
32654 *op2 = force_reg (mode, *op2);
32655 return true;
32656
32657 case E_DImode:
32658 /* gen_compare_reg() will sort out any invalid operands. */
32659 return true;
32660
32661 case E_HFmode:
32662 if (!TARGET_VFP_FP16INST)
32663 break;
32664 /* FP16 comparisons are done in SF mode. */
32665 mode = SFmode;
32666 *op1 = convert_to_mode (mode, *op1, 1);
32667 *op2 = convert_to_mode (mode, *op2, 1);
32668 /* Fall through. */
32669 case E_SFmode:
32670 case E_DFmode:
32671 if (!vfp_compare_operand (*op1, mode))
32672 *op1 = force_reg (mode, *op1);
32673 if (!vfp_compare_operand (*op2, mode))
32674 *op2 = force_reg (mode, *op2);
32675 return true;
32676 default:
32677 break;
32678 }
32679
32680 return false;
32681
32682 }
32683
32684 /* Maximum number of instructions to set block of memory. */
32685 static int
32686 arm_block_set_max_insns (void)
32687 {
32688 if (optimize_function_for_size_p (cfun))
32689 return 4;
32690 else
32691 return current_tune->max_insns_inline_memset;
32692 }
32693
32694 /* Return TRUE if it's profitable to set block of memory for
32695 non-vectorized case. VAL is the value to set the memory
32696 with. LENGTH is the number of bytes to set. ALIGN is the
32697 alignment of the destination memory in bytes. UNALIGNED_P
32698 is TRUE if we can only set the memory with instructions
32699 meeting alignment requirements. USE_STRD_P is TRUE if we
32700 can use strd to set the memory. */
32701 static bool
32702 arm_block_set_non_vect_profit_p (rtx val,
32703 unsigned HOST_WIDE_INT length,
32704 unsigned HOST_WIDE_INT align,
32705 bool unaligned_p, bool use_strd_p)
32706 {
32707 int num = 0;
32708 /* For leftovers in bytes of 0-7, we can set the memory block using
32709 strb/strh/str with minimum instruction number. */
32710 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32711
32712 if (unaligned_p)
32713 {
32714 num = arm_const_inline_cost (SET, val);
32715 num += length / align + length % align;
32716 }
32717 else if (use_strd_p)
32718 {
32719 num = arm_const_double_inline_cost (val);
32720 num += (length >> 3) + leftover[length & 7];
32721 }
32722 else
32723 {
32724 num = arm_const_inline_cost (SET, val);
32725 num += (length >> 2) + leftover[length & 3];
32726 }
32727
32728 /* We may be able to combine last pair STRH/STRB into a single STR
32729 by shifting one byte back. */
32730 if (unaligned_access && length > 3 && (length & 3) == 3)
32731 num--;
32732
32733 return (num <= arm_block_set_max_insns ());
32734 }
32735
32736 /* Return TRUE if it's profitable to set block of memory for
32737 vectorized case. LENGTH is the number of bytes to set.
32738 ALIGN is the alignment of destination memory in bytes.
32739 MODE is the vector mode used to set the memory. */
32740 static bool
32741 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
32742 unsigned HOST_WIDE_INT align,
32743 machine_mode mode)
32744 {
32745 int num;
32746 bool unaligned_p = ((align & 3) != 0);
32747 unsigned int nelt = GET_MODE_NUNITS (mode);
32748
32749 /* Instruction loading constant value. */
32750 num = 1;
32751 /* Instructions storing the memory. */
32752 num += (length + nelt - 1) / nelt;
32753 /* Instructions adjusting the address expression. Only need to
32754 adjust address expression if it's 4 bytes aligned and bytes
32755 leftover can only be stored by mis-aligned store instruction. */
32756 if (!unaligned_p && (length & 3) != 0)
32757 num++;
32758
32759 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
32760 if (!unaligned_p && mode == V16QImode)
32761 num--;
32762
32763 return (num <= arm_block_set_max_insns ());
32764 }
32765
32766 /* Set a block of memory using vectorization instructions for the
32767 unaligned case. We fill the first LENGTH bytes of the memory
32768 area starting from DSTBASE with byte constant VALUE. ALIGN is
32769 the alignment requirement of memory. Return TRUE if succeeded. */
32770 static bool
32771 arm_block_set_unaligned_vect (rtx dstbase,
32772 unsigned HOST_WIDE_INT length,
32773 unsigned HOST_WIDE_INT value,
32774 unsigned HOST_WIDE_INT align)
32775 {
32776 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
32777 rtx dst, mem;
32778 rtx val_vec, reg;
32779 rtx (*gen_func) (rtx, rtx);
32780 machine_mode mode;
32781 unsigned HOST_WIDE_INT v = value;
32782 unsigned int offset = 0;
32783 gcc_assert ((align & 0x3) != 0);
32784 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32785 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32786 if (length >= nelt_v16)
32787 {
32788 mode = V16QImode;
32789 gen_func = gen_movmisalignv16qi;
32790 }
32791 else
32792 {
32793 mode = V8QImode;
32794 gen_func = gen_movmisalignv8qi;
32795 }
32796 nelt_mode = GET_MODE_NUNITS (mode);
32797 gcc_assert (length >= nelt_mode);
32798 /* Skip if it isn't profitable. */
32799 if (!arm_block_set_vect_profit_p (length, align, mode))
32800 return false;
32801
32802 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32803 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32804
32805 v = sext_hwi (v, BITS_PER_WORD);
32806
32807 reg = gen_reg_rtx (mode);
32808 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
32809 /* Emit instruction loading the constant value. */
32810 emit_move_insn (reg, val_vec);
32811
32812 /* Handle nelt_mode bytes in a vector. */
32813 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
32814 {
32815 emit_insn ((*gen_func) (mem, reg));
32816 if (i + 2 * nelt_mode <= length)
32817 {
32818 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
32819 offset += nelt_mode;
32820 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32821 }
32822 }
32823
32824 /* If there are not less than nelt_v8 bytes leftover, we must be in
32825 V16QI mode. */
32826 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
32827
32828 /* Handle (8, 16) bytes leftover. */
32829 if (i + nelt_v8 < length)
32830 {
32831 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
32832 offset += length - i;
32833 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32834
32835 /* We are shifting bytes back, set the alignment accordingly. */
32836 if ((length & 1) != 0 && align >= 2)
32837 set_mem_align (mem, BITS_PER_UNIT);
32838
32839 emit_insn (gen_movmisalignv16qi (mem, reg));
32840 }
32841 /* Handle (0, 8] bytes leftover. */
32842 else if (i < length && i + nelt_v8 >= length)
32843 {
32844 if (mode == V16QImode)
32845 reg = gen_lowpart (V8QImode, reg);
32846
32847 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
32848 + (nelt_mode - nelt_v8))));
32849 offset += (length - i) + (nelt_mode - nelt_v8);
32850 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
32851
32852 /* We are shifting bytes back, set the alignment accordingly. */
32853 if ((length & 1) != 0 && align >= 2)
32854 set_mem_align (mem, BITS_PER_UNIT);
32855
32856 emit_insn (gen_movmisalignv8qi (mem, reg));
32857 }
32858
32859 return true;
32860 }
32861
32862 /* Set a block of memory using vectorization instructions for the
32863 aligned case. We fill the first LENGTH bytes of the memory area
32864 starting from DSTBASE with byte constant VALUE. ALIGN is the
32865 alignment requirement of memory. Return TRUE if succeeded. */
32866 static bool
32867 arm_block_set_aligned_vect (rtx dstbase,
32868 unsigned HOST_WIDE_INT length,
32869 unsigned HOST_WIDE_INT value,
32870 unsigned HOST_WIDE_INT align)
32871 {
32872 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
32873 rtx dst, addr, mem;
32874 rtx val_vec, reg;
32875 machine_mode mode;
32876 unsigned int offset = 0;
32877
32878 gcc_assert ((align & 0x3) == 0);
32879 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32880 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32881 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
32882 mode = V16QImode;
32883 else
32884 mode = V8QImode;
32885
32886 nelt_mode = GET_MODE_NUNITS (mode);
32887 gcc_assert (length >= nelt_mode);
32888 /* Skip if it isn't profitable. */
32889 if (!arm_block_set_vect_profit_p (length, align, mode))
32890 return false;
32891
32892 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32893
32894 reg = gen_reg_rtx (mode);
32895 val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
32896 /* Emit instruction loading the constant value. */
32897 emit_move_insn (reg, val_vec);
32898
32899 i = 0;
32900 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
32901 if (mode == V16QImode)
32902 {
32903 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32904 emit_insn (gen_movmisalignv16qi (mem, reg));
32905 i += nelt_mode;
32906 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
32907 if (i + nelt_v8 < length && i + nelt_v16 > length)
32908 {
32909 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32910 offset += length - nelt_mode;
32911 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32912 /* We are shifting bytes back, set the alignment accordingly. */
32913 if ((length & 0x3) == 0)
32914 set_mem_align (mem, BITS_PER_UNIT * 4);
32915 else if ((length & 0x1) == 0)
32916 set_mem_align (mem, BITS_PER_UNIT * 2);
32917 else
32918 set_mem_align (mem, BITS_PER_UNIT);
32919
32920 emit_insn (gen_movmisalignv16qi (mem, reg));
32921 return true;
32922 }
32923 /* Fall through for bytes leftover. */
32924 mode = V8QImode;
32925 nelt_mode = GET_MODE_NUNITS (mode);
32926 reg = gen_lowpart (V8QImode, reg);
32927 }
32928
32929 /* Handle 8 bytes in a vector. */
32930 for (; (i + nelt_mode <= length); i += nelt_mode)
32931 {
32932 addr = plus_constant (Pmode, dst, i);
32933 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
32934 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32935 emit_move_insn (mem, reg);
32936 else
32937 emit_insn (gen_unaligned_storev8qi (mem, reg));
32938 }
32939
32940 /* Handle single word leftover by shifting 4 bytes back. We can
32941 use aligned access for this case. */
32942 if (i + UNITS_PER_WORD == length)
32943 {
32944 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
32945 offset += i - UNITS_PER_WORD;
32946 mem = adjust_automodify_address (dstbase, mode, addr, offset);
32947 /* We are shifting 4 bytes back, set the alignment accordingly. */
32948 if (align > UNITS_PER_WORD)
32949 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
32950
32951 emit_insn (gen_unaligned_storev8qi (mem, reg));
32952 }
32953 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32954 We have to use unaligned access for this case. */
32955 else if (i < length)
32956 {
32957 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32958 offset += length - nelt_mode;
32959 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32960 /* We are shifting bytes back, set the alignment accordingly. */
32961 if ((length & 1) == 0)
32962 set_mem_align (mem, BITS_PER_UNIT * 2);
32963 else
32964 set_mem_align (mem, BITS_PER_UNIT);
32965
32966 emit_insn (gen_movmisalignv8qi (mem, reg));
32967 }
32968
32969 return true;
32970 }
32971
32972 /* Set a block of memory using plain strh/strb instructions, only
32973 using instructions allowed by ALIGN on processor. We fill the
32974 first LENGTH bytes of the memory area starting from DSTBASE
32975 with byte constant VALUE. ALIGN is the alignment requirement
32976 of memory. */
32977 static bool
32978 arm_block_set_unaligned_non_vect (rtx dstbase,
32979 unsigned HOST_WIDE_INT length,
32980 unsigned HOST_WIDE_INT value,
32981 unsigned HOST_WIDE_INT align)
32982 {
32983 unsigned int i;
32984 rtx dst, addr, mem;
32985 rtx val_exp, val_reg, reg;
32986 machine_mode mode;
32987 HOST_WIDE_INT v = value;
32988
32989 gcc_assert (align == 1 || align == 2);
32990
32991 if (align == 2)
32992 v |= (value << BITS_PER_UNIT);
32993
32994 v = sext_hwi (v, BITS_PER_WORD);
32995 val_exp = GEN_INT (v);
32996 /* Skip if it isn't profitable. */
32997 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32998 align, true, false))
32999 return false;
33000
33001 dst = copy_addr_to_reg (XEXP (dstbase, 0));
33002 mode = (align == 2 ? HImode : QImode);
33003 val_reg = force_reg (SImode, val_exp);
33004 reg = gen_lowpart (mode, val_reg);
33005
33006 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
33007 {
33008 addr = plus_constant (Pmode, dst, i);
33009 mem = adjust_automodify_address (dstbase, mode, addr, i);
33010 emit_move_insn (mem, reg);
33011 }
33012
33013 /* Handle single byte leftover. */
33014 if (i + 1 == length)
33015 {
33016 reg = gen_lowpart (QImode, val_reg);
33017 addr = plus_constant (Pmode, dst, i);
33018 mem = adjust_automodify_address (dstbase, QImode, addr, i);
33019 emit_move_insn (mem, reg);
33020 i++;
33021 }
33022
33023 gcc_assert (i == length);
33024 return true;
33025 }
33026
33027 /* Set a block of memory using plain strd/str/strh/strb instructions,
33028 to permit unaligned copies on processors which support unaligned
33029 semantics for those instructions. We fill the first LENGTH bytes
33030 of the memory area starting from DSTBASE with byte constant VALUE.
33031 ALIGN is the alignment requirement of memory. */
33032 static bool
33033 arm_block_set_aligned_non_vect (rtx dstbase,
33034 unsigned HOST_WIDE_INT length,
33035 unsigned HOST_WIDE_INT value,
33036 unsigned HOST_WIDE_INT align)
33037 {
33038 unsigned int i;
33039 rtx dst, addr, mem;
33040 rtx val_exp, val_reg, reg;
33041 unsigned HOST_WIDE_INT v;
33042 bool use_strd_p;
33043
33044 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
33045 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
33046
33047 v = (value | (value << 8) | (value << 16) | (value << 24));
33048 if (length < UNITS_PER_WORD)
33049 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
33050
33051 if (use_strd_p)
33052 v |= (v << BITS_PER_WORD);
33053 else
33054 v = sext_hwi (v, BITS_PER_WORD);
33055
33056 val_exp = GEN_INT (v);
33057 /* Skip if it isn't profitable. */
33058 if (!arm_block_set_non_vect_profit_p (val_exp, length,
33059 align, false, use_strd_p))
33060 {
33061 if (!use_strd_p)
33062 return false;
33063
33064 /* Try without strd. */
33065 v = (v >> BITS_PER_WORD);
33066 v = sext_hwi (v, BITS_PER_WORD);
33067 val_exp = GEN_INT (v);
33068 use_strd_p = false;
33069 if (!arm_block_set_non_vect_profit_p (val_exp, length,
33070 align, false, use_strd_p))
33071 return false;
33072 }
33073
33074 i = 0;
33075 dst = copy_addr_to_reg (XEXP (dstbase, 0));
33076 /* Handle double words using strd if possible. */
33077 if (use_strd_p)
33078 {
33079 val_reg = force_reg (DImode, val_exp);
33080 reg = val_reg;
33081 for (; (i + 8 <= length); i += 8)
33082 {
33083 addr = plus_constant (Pmode, dst, i);
33084 mem = adjust_automodify_address (dstbase, DImode, addr, i);
33085 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
33086 emit_move_insn (mem, reg);
33087 else
33088 emit_insn (gen_unaligned_storedi (mem, reg));
33089 }
33090 }
33091 else
33092 val_reg = force_reg (SImode, val_exp);
33093
33094 /* Handle words. */
33095 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
33096 for (; (i + 4 <= length); i += 4)
33097 {
33098 addr = plus_constant (Pmode, dst, i);
33099 mem = adjust_automodify_address (dstbase, SImode, addr, i);
33100 if ((align & 3) == 0)
33101 emit_move_insn (mem, reg);
33102 else
33103 emit_insn (gen_unaligned_storesi (mem, reg));
33104 }
33105
33106 /* Merge last pair of STRH and STRB into a STR if possible. */
33107 if (unaligned_access && i > 0 && (i + 3) == length)
33108 {
33109 addr = plus_constant (Pmode, dst, i - 1);
33110 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
33111 /* We are shifting one byte back, set the alignment accordingly. */
33112 if ((align & 1) == 0)
33113 set_mem_align (mem, BITS_PER_UNIT);
33114
33115 /* Most likely this is an unaligned access, and we can't tell at
33116 compilation time. */
33117 emit_insn (gen_unaligned_storesi (mem, reg));
33118 return true;
33119 }
33120
33121 /* Handle half word leftover. */
33122 if (i + 2 <= length)
33123 {
33124 reg = gen_lowpart (HImode, val_reg);
33125 addr = plus_constant (Pmode, dst, i);
33126 mem = adjust_automodify_address (dstbase, HImode, addr, i);
33127 if ((align & 1) == 0)
33128 emit_move_insn (mem, reg);
33129 else
33130 emit_insn (gen_unaligned_storehi (mem, reg));
33131
33132 i += 2;
33133 }
33134
33135 /* Handle single byte leftover. */
33136 if (i + 1 == length)
33137 {
33138 reg = gen_lowpart (QImode, val_reg);
33139 addr = plus_constant (Pmode, dst, i);
33140 mem = adjust_automodify_address (dstbase, QImode, addr, i);
33141 emit_move_insn (mem, reg);
33142 }
33143
33144 return true;
33145 }
33146
33147 /* Set a block of memory using vectorization instructions for both
33148 aligned and unaligned cases. We fill the first LENGTH bytes of
33149 the memory area starting from DSTBASE with byte constant VALUE.
33150 ALIGN is the alignment requirement of memory. */
33151 static bool
33152 arm_block_set_vect (rtx dstbase,
33153 unsigned HOST_WIDE_INT length,
33154 unsigned HOST_WIDE_INT value,
33155 unsigned HOST_WIDE_INT align)
33156 {
33157 /* Check whether we need to use unaligned store instruction. */
33158 if (((align & 3) != 0 || (length & 3) != 0)
33159 /* Check whether unaligned store instruction is available. */
33160 && (!unaligned_access || BYTES_BIG_ENDIAN))
33161 return false;
33162
33163 if ((align & 3) == 0)
33164 return arm_block_set_aligned_vect (dstbase, length, value, align);
33165 else
33166 return arm_block_set_unaligned_vect (dstbase, length, value, align);
33167 }
33168
33169 /* Expand string store operation. Firstly we try to do that by using
33170 vectorization instructions, then try with ARM unaligned access and
33171 double-word store if profitable. OPERANDS[0] is the destination,
33172 OPERANDS[1] is the number of bytes, operands[2] is the value to
33173 initialize the memory, OPERANDS[3] is the known alignment of the
33174 destination. */
33175 bool
33176 arm_gen_setmem (rtx *operands)
33177 {
33178 rtx dstbase = operands[0];
33179 unsigned HOST_WIDE_INT length;
33180 unsigned HOST_WIDE_INT value;
33181 unsigned HOST_WIDE_INT align;
33182
33183 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
33184 return false;
33185
33186 length = UINTVAL (operands[1]);
33187 if (length > 64)
33188 return false;
33189
33190 value = (UINTVAL (operands[2]) & 0xFF);
33191 align = UINTVAL (operands[3]);
33192 if (TARGET_NEON && length >= 8
33193 && current_tune->string_ops_prefer_neon
33194 && arm_block_set_vect (dstbase, length, value, align))
33195 return true;
33196
33197 if (!unaligned_access && (align & 3) != 0)
33198 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
33199
33200 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
33201 }
33202
33203
33204 static bool
33205 arm_macro_fusion_p (void)
33206 {
33207 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
33208 }
33209
33210 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
33211 for MOVW / MOVT macro fusion. */
33212
33213 static bool
33214 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
33215 {
33216 /* We are trying to fuse
33217 movw imm / movt imm
33218 instructions as a group that gets scheduled together. */
33219
33220 rtx set_dest = SET_DEST (curr_set);
33221
33222 if (GET_MODE (set_dest) != SImode)
33223 return false;
33224
33225 /* We are trying to match:
33226 prev (movw) == (set (reg r0) (const_int imm16))
33227 curr (movt) == (set (zero_extract (reg r0)
33228 (const_int 16)
33229 (const_int 16))
33230 (const_int imm16_1))
33231 or
33232 prev (movw) == (set (reg r1)
33233 (high (symbol_ref ("SYM"))))
33234 curr (movt) == (set (reg r0)
33235 (lo_sum (reg r1)
33236 (symbol_ref ("SYM")))) */
33237
33238 if (GET_CODE (set_dest) == ZERO_EXTRACT)
33239 {
33240 if (CONST_INT_P (SET_SRC (curr_set))
33241 && CONST_INT_P (SET_SRC (prev_set))
33242 && REG_P (XEXP (set_dest, 0))
33243 && REG_P (SET_DEST (prev_set))
33244 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
33245 return true;
33246
33247 }
33248 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
33249 && REG_P (SET_DEST (curr_set))
33250 && REG_P (SET_DEST (prev_set))
33251 && GET_CODE (SET_SRC (prev_set)) == HIGH
33252 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
33253 return true;
33254
33255 return false;
33256 }
33257
33258 static bool
33259 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
33260 {
33261 rtx prev_set = single_set (prev);
33262 rtx curr_set = single_set (curr);
33263
33264 if (!prev_set
33265 || !curr_set)
33266 return false;
33267
33268 if (any_condjump_p (curr))
33269 return false;
33270
33271 if (!arm_macro_fusion_p ())
33272 return false;
33273
33274 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
33275 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
33276 return true;
33277
33278 return false;
33279 }
33280
33281 /* Return true iff the instruction fusion described by OP is enabled. */
33282 bool
33283 arm_fusion_enabled_p (tune_params::fuse_ops op)
33284 {
33285 return current_tune->fusible_ops & op;
33286 }
33287
33288 /* Return TRUE if return address signing mechanism is enabled. */
33289 bool
33290 arm_current_function_pac_enabled_p (void)
33291 {
33292 return (aarch_ra_sign_scope == AARCH_FUNCTION_ALL
33293 || (aarch_ra_sign_scope == AARCH_FUNCTION_NON_LEAF
33294 && !crtl->is_leaf));
33295 }
33296
33297 /* Raise an error if the current target arch is not bti compatible. */
33298 void aarch_bti_arch_check (void)
33299 {
33300 if (!arm_arch8m_main)
33301 error ("This architecture does not support branch protection instructions");
33302 }
33303
33304 /* Return TRUE if Branch Target Identification Mechanism is enabled. */
33305 bool
33306 aarch_bti_enabled (void)
33307 {
33308 return aarch_enable_bti != 0;
33309 }
33310
33311 /* Check if INSN is a BTI J insn. */
33312 bool
33313 aarch_bti_j_insn_p (rtx_insn *insn)
33314 {
33315 if (!insn || !INSN_P (insn))
33316 return false;
33317
33318 rtx pat = PATTERN (insn);
33319 return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == VUNSPEC_BTI_NOP;
33320 }
33321
33322 /* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction. */
33323 bool
33324 aarch_pac_insn_p (rtx x)
33325 {
33326 if (!x || !INSN_P (x))
33327 return false;
33328
33329 rtx pat = PATTERN (x);
33330
33331 if (GET_CODE (pat) == SET)
33332 {
33333 rtx tmp = XEXP (pat, 1);
33334 if (tmp
33335 && ((GET_CODE (tmp) == UNSPEC
33336 && XINT (tmp, 1) == UNSPEC_PAC_NOP)
33337 || (GET_CODE (tmp) == UNSPEC_VOLATILE
33338 && XINT (tmp, 1) == VUNSPEC_PACBTI_NOP)))
33339 return true;
33340 }
33341
33342 return false;
33343 }
33344
33345 /* Target specific mapping for aarch_gen_bti_c and aarch_gen_bti_j.
33346 For Arm, both of these map to a simple BTI instruction. */
33347
33348 rtx
33349 aarch_gen_bti_c (void)
33350 {
33351 return gen_bti_nop ();
33352 }
33353
33354 rtx
33355 aarch_gen_bti_j (void)
33356 {
33357 return gen_bti_nop ();
33358 }
33359
33360 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
33361 scheduled for speculative execution. Reject the long-running division
33362 and square-root instructions. */
33363
33364 static bool
33365 arm_sched_can_speculate_insn (rtx_insn *insn)
33366 {
33367 switch (get_attr_type (insn))
33368 {
33369 case TYPE_SDIV:
33370 case TYPE_UDIV:
33371 case TYPE_FDIVS:
33372 case TYPE_FDIVD:
33373 case TYPE_FSQRTS:
33374 case TYPE_FSQRTD:
33375 case TYPE_NEON_FP_SQRT_S:
33376 case TYPE_NEON_FP_SQRT_D:
33377 case TYPE_NEON_FP_SQRT_S_Q:
33378 case TYPE_NEON_FP_SQRT_D_Q:
33379 case TYPE_NEON_FP_DIV_S:
33380 case TYPE_NEON_FP_DIV_D:
33381 case TYPE_NEON_FP_DIV_S_Q:
33382 case TYPE_NEON_FP_DIV_D_Q:
33383 return false;
33384 default:
33385 return true;
33386 }
33387 }
33388
33389 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
33390
33391 static unsigned HOST_WIDE_INT
33392 arm_asan_shadow_offset (void)
33393 {
33394 return HOST_WIDE_INT_1U << 29;
33395 }
33396
33397
33398 /* This is a temporary fix for PR60655. Ideally we need
33399 to handle most of these cases in the generic part but
33400 currently we reject minus (..) (sym_ref). We try to
33401 ameliorate the case with minus (sym_ref1) (sym_ref2)
33402 where they are in the same section. */
33403
33404 static bool
33405 arm_const_not_ok_for_debug_p (rtx p)
33406 {
33407 tree decl_op0 = NULL;
33408 tree decl_op1 = NULL;
33409
33410 if (GET_CODE (p) == UNSPEC)
33411 return true;
33412 if (GET_CODE (p) == MINUS)
33413 {
33414 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
33415 {
33416 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
33417 if (decl_op1
33418 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
33419 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
33420 {
33421 if ((VAR_P (decl_op1)
33422 || TREE_CODE (decl_op1) == CONST_DECL)
33423 && (VAR_P (decl_op0)
33424 || TREE_CODE (decl_op0) == CONST_DECL))
33425 return (get_variable_section (decl_op1, false)
33426 != get_variable_section (decl_op0, false));
33427
33428 if (TREE_CODE (decl_op1) == LABEL_DECL
33429 && TREE_CODE (decl_op0) == LABEL_DECL)
33430 return (DECL_CONTEXT (decl_op1)
33431 != DECL_CONTEXT (decl_op0));
33432 }
33433
33434 return true;
33435 }
33436 }
33437
33438 return false;
33439 }
33440
33441 /* return TRUE if x is a reference to a value in a constant pool */
33442 extern bool
33443 arm_is_constant_pool_ref (rtx x)
33444 {
33445 return (MEM_P (x)
33446 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
33447 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
33448 }
33449
33450 /* Remember the last target of arm_set_current_function. */
33451 static GTY(()) tree arm_previous_fndecl;
33452
33453 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
33454
33455 void
33456 save_restore_target_globals (tree new_tree)
33457 {
33458 /* If we have a previous state, use it. */
33459 if (TREE_TARGET_GLOBALS (new_tree))
33460 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33461 else if (new_tree == target_option_default_node)
33462 restore_target_globals (&default_target_globals);
33463 else
33464 {
33465 /* Call target_reinit and save the state for TARGET_GLOBALS. */
33466 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
33467 }
33468
33469 arm_option_params_internal ();
33470 }
33471
33472 /* Invalidate arm_previous_fndecl. */
33473
33474 void
33475 arm_reset_previous_fndecl (void)
33476 {
33477 arm_previous_fndecl = NULL_TREE;
33478 }
33479
33480 /* Establish appropriate back-end context for processing the function
33481 FNDECL. The argument might be NULL to indicate processing at top
33482 level, outside of any function scope. */
33483
33484 static void
33485 arm_set_current_function (tree fndecl)
33486 {
33487 if (!fndecl || fndecl == arm_previous_fndecl)
33488 return;
33489
33490 tree old_tree = (arm_previous_fndecl
33491 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
33492 : NULL_TREE);
33493
33494 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33495
33496 /* If current function has no attributes but previous one did,
33497 use the default node. */
33498 if (! new_tree && old_tree)
33499 new_tree = target_option_default_node;
33500
33501 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
33502 the default have been handled by save_restore_target_globals from
33503 arm_pragma_target_parse. */
33504 if (old_tree == new_tree)
33505 return;
33506
33507 arm_previous_fndecl = fndecl;
33508
33509 /* First set the target options. */
33510 cl_target_option_restore (&global_options, &global_options_set,
33511 TREE_TARGET_OPTION (new_tree));
33512
33513 save_restore_target_globals (new_tree);
33514
33515 arm_override_options_after_change_1 (&global_options, &global_options_set);
33516 }
33517
33518 /* Implement TARGET_OPTION_PRINT. */
33519
33520 static void
33521 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
33522 {
33523 int flags = ptr->x_target_flags;
33524 const char *fpu_name;
33525
33526 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
33527 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
33528
33529 fprintf (file, "%*sselected isa %s\n", indent, "",
33530 TARGET_THUMB2_P (flags) ? "thumb2" :
33531 TARGET_THUMB_P (flags) ? "thumb1" :
33532 "arm");
33533
33534 if (ptr->x_arm_arch_string)
33535 fprintf (file, "%*sselected architecture %s\n", indent, "",
33536 ptr->x_arm_arch_string);
33537
33538 if (ptr->x_arm_cpu_string)
33539 fprintf (file, "%*sselected CPU %s\n", indent, "",
33540 ptr->x_arm_cpu_string);
33541
33542 if (ptr->x_arm_tune_string)
33543 fprintf (file, "%*sselected tune %s\n", indent, "",
33544 ptr->x_arm_tune_string);
33545
33546 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
33547 }
33548
33549 /* Hook to determine if one function can safely inline another. */
33550
33551 static bool
33552 arm_can_inline_p (tree caller, tree callee)
33553 {
33554 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
33555 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
33556 bool can_inline = true;
33557
33558 struct cl_target_option *caller_opts
33559 = TREE_TARGET_OPTION (caller_tree ? caller_tree
33560 : target_option_default_node);
33561
33562 struct cl_target_option *callee_opts
33563 = TREE_TARGET_OPTION (callee_tree ? callee_tree
33564 : target_option_default_node);
33565
33566 if (callee_opts == caller_opts)
33567 return true;
33568
33569 /* Callee's ISA features should be a subset of the caller's. */
33570 struct arm_build_target caller_target;
33571 struct arm_build_target callee_target;
33572 caller_target.isa = sbitmap_alloc (isa_num_bits);
33573 callee_target.isa = sbitmap_alloc (isa_num_bits);
33574
33575 arm_configure_build_target (&caller_target, caller_opts, false);
33576 arm_configure_build_target (&callee_target, callee_opts, false);
33577 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
33578 can_inline = false;
33579
33580 sbitmap_free (caller_target.isa);
33581 sbitmap_free (callee_target.isa);
33582
33583 /* OK to inline between different modes.
33584 Function with mode specific instructions, e.g using asm,
33585 must be explicitly protected with noinline. */
33586 return can_inline;
33587 }
33588
33589 /* Hook to fix function's alignment affected by target attribute. */
33590
33591 static void
33592 arm_relayout_function (tree fndecl)
33593 {
33594 if (DECL_USER_ALIGN (fndecl))
33595 return;
33596
33597 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33598
33599 if (!callee_tree)
33600 callee_tree = target_option_default_node;
33601
33602 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
33603 SET_DECL_ALIGN
33604 (fndecl,
33605 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
33606 }
33607
33608 /* Inner function to process the attribute((target(...))), take an argument and
33609 set the current options from the argument. If we have a list, recursively
33610 go over the list. */
33611
33612 static bool
33613 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
33614 {
33615 if (TREE_CODE (args) == TREE_LIST)
33616 {
33617 bool ret = true;
33618
33619 for (; args; args = TREE_CHAIN (args))
33620 if (TREE_VALUE (args)
33621 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
33622 ret = false;
33623 return ret;
33624 }
33625
33626 else if (TREE_CODE (args) != STRING_CST)
33627 {
33628 error ("attribute %<target%> argument not a string");
33629 return false;
33630 }
33631
33632 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
33633 char *q;
33634
33635 while ((q = strtok (argstr, ",")) != NULL)
33636 {
33637 argstr = NULL;
33638 if (!strcmp (q, "thumb"))
33639 {
33640 opts->x_target_flags |= MASK_THUMB;
33641 if (TARGET_FDPIC && !arm_arch_thumb2)
33642 sorry ("FDPIC mode is not supported in Thumb-1 mode");
33643 }
33644
33645 else if (!strcmp (q, "arm"))
33646 opts->x_target_flags &= ~MASK_THUMB;
33647
33648 else if (!strcmp (q, "general-regs-only"))
33649 opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
33650
33651 else if (startswith (q, "fpu="))
33652 {
33653 int fpu_index;
33654 if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
33655 &fpu_index, CL_TARGET))
33656 {
33657 error ("invalid fpu for target attribute or pragma %qs", q);
33658 return false;
33659 }
33660 if (fpu_index == TARGET_FPU_auto)
33661 {
33662 /* This doesn't really make sense until we support
33663 general dynamic selection of the architecture and all
33664 sub-features. */
33665 sorry ("auto fpu selection not currently permitted here");
33666 return false;
33667 }
33668 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
33669 }
33670 else if (startswith (q, "arch="))
33671 {
33672 char *arch = q + 5;
33673 const arch_option *arm_selected_arch
33674 = arm_parse_arch_option_name (all_architectures, "arch", arch);
33675
33676 if (!arm_selected_arch)
33677 {
33678 error ("invalid architecture for target attribute or pragma %qs",
33679 q);
33680 return false;
33681 }
33682
33683 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
33684 }
33685 else if (q[0] == '+')
33686 {
33687 opts->x_arm_arch_string
33688 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
33689 }
33690 else
33691 {
33692 error ("unknown target attribute or pragma %qs", q);
33693 return false;
33694 }
33695 }
33696
33697 return true;
33698 }
33699
33700 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
33701
33702 tree
33703 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
33704 struct gcc_options *opts_set)
33705 {
33706 struct cl_target_option cl_opts;
33707
33708 if (!arm_valid_target_attribute_rec (args, opts))
33709 return NULL_TREE;
33710
33711 cl_target_option_save (&cl_opts, opts, opts_set);
33712 arm_configure_build_target (&arm_active_target, &cl_opts, false);
33713 arm_option_check_internal (opts);
33714 /* Do any overrides, such as global options arch=xxx.
33715 We do this since arm_active_target was overridden. */
33716 arm_option_reconfigure_globals ();
33717 arm_options_perform_arch_sanity_checks ();
33718 arm_option_override_internal (opts, opts_set);
33719
33720 return build_target_option_node (opts, opts_set);
33721 }
33722
33723 static void
33724 add_attribute (const char * mode, tree *attributes)
33725 {
33726 size_t len = strlen (mode);
33727 tree value = build_string (len, mode);
33728
33729 TREE_TYPE (value) = build_array_type (char_type_node,
33730 build_index_type (size_int (len)));
33731
33732 *attributes = tree_cons (get_identifier ("target"),
33733 build_tree_list (NULL_TREE, value),
33734 *attributes);
33735 }
33736
33737 /* For testing. Insert thumb or arm modes alternatively on functions. */
33738
33739 static void
33740 arm_insert_attributes (tree fndecl, tree * attributes)
33741 {
33742 const char *mode;
33743
33744 if (! TARGET_FLIP_THUMB)
33745 return;
33746
33747 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
33748 || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
33749 return;
33750
33751 /* Nested definitions must inherit mode. */
33752 if (current_function_decl)
33753 {
33754 mode = TARGET_THUMB ? "thumb" : "arm";
33755 add_attribute (mode, attributes);
33756 return;
33757 }
33758
33759 /* If there is already a setting don't change it. */
33760 if (lookup_attribute ("target", *attributes) != NULL)
33761 return;
33762
33763 mode = thumb_flipper ? "thumb" : "arm";
33764 add_attribute (mode, attributes);
33765
33766 thumb_flipper = !thumb_flipper;
33767 }
33768
33769 /* Hook to validate attribute((target("string"))). */
33770
33771 static bool
33772 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
33773 tree args, int ARG_UNUSED (flags))
33774 {
33775 bool ret = true;
33776 struct gcc_options func_options, func_options_set;
33777 tree cur_tree, new_optimize;
33778 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33779
33780 /* Get the optimization options of the current function. */
33781 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33782
33783 /* If the function changed the optimization levels as well as setting target
33784 options, start with the optimizations specified. */
33785 if (!func_optimize)
33786 func_optimize = optimization_default_node;
33787
33788 /* Init func_options. */
33789 memset (&func_options, 0, sizeof (func_options));
33790 init_options_struct (&func_options, NULL);
33791 lang_hooks.init_options_struct (&func_options);
33792 memset (&func_options_set, 0, sizeof (func_options_set));
33793
33794 /* Initialize func_options to the defaults. */
33795 cl_optimization_restore (&func_options, &func_options_set,
33796 TREE_OPTIMIZATION (func_optimize));
33797
33798 cl_target_option_restore (&func_options, &func_options_set,
33799 TREE_TARGET_OPTION (target_option_default_node));
33800
33801 /* Set func_options flags with new target mode. */
33802 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
33803 &func_options_set);
33804
33805 if (cur_tree == NULL_TREE)
33806 ret = false;
33807
33808 new_optimize = build_optimization_node (&func_options, &func_options_set);
33809
33810 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
33811
33812 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33813
33814 return ret;
33815 }
33816
33817 /* Match an ISA feature bitmap to a named FPU. We always use the
33818 first entry that exactly matches the feature set, so that we
33819 effectively canonicalize the FPU name for the assembler. */
33820 static const char*
33821 arm_identify_fpu_from_isa (sbitmap isa)
33822 {
33823 auto_sbitmap fpubits (isa_num_bits);
33824 auto_sbitmap cand_fpubits (isa_num_bits);
33825
33826 bitmap_and (fpubits, isa, isa_all_fpubits_internal);
33827
33828 /* If there are no ISA feature bits relating to the FPU, we must be
33829 doing soft-float. */
33830 if (bitmap_empty_p (fpubits))
33831 return "softvfp";
33832
33833 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33834 {
33835 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
33836 if (bitmap_equal_p (fpubits, cand_fpubits))
33837 return all_fpus[i].name;
33838 }
33839 /* We must find an entry, or things have gone wrong. */
33840 gcc_unreachable ();
33841 }
33842
33843 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
33844 by the function fndecl. */
33845 void
33846 arm_declare_function_name (FILE *stream, const char *name, tree decl)
33847 {
33848 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
33849
33850 struct cl_target_option *targ_options;
33851 if (target_parts)
33852 targ_options = TREE_TARGET_OPTION (target_parts);
33853 else
33854 targ_options = TREE_TARGET_OPTION (target_option_current_node);
33855 gcc_assert (targ_options);
33856
33857 arm_print_asm_arch_directives (stream, targ_options);
33858
33859 fprintf (stream, "\t.syntax unified\n");
33860
33861 if (TARGET_THUMB)
33862 {
33863 if (is_called_in_ARM_mode (decl)
33864 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
33865 && cfun->is_thunk))
33866 fprintf (stream, "\t.code 32\n");
33867 else if (TARGET_THUMB1)
33868 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
33869 else
33870 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
33871 }
33872 else
33873 fprintf (stream, "\t.arm\n");
33874
33875 if (TARGET_POKE_FUNCTION_NAME)
33876 arm_poke_function_name (stream, (const char *) name);
33877 }
33878
33879 /* If MEM is in the form of [base+offset], extract the two parts
33880 of address and set to BASE and OFFSET, otherwise return false
33881 after clearing BASE and OFFSET. */
33882
33883 static bool
33884 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
33885 {
33886 rtx addr;
33887
33888 gcc_assert (MEM_P (mem));
33889
33890 addr = XEXP (mem, 0);
33891
33892 /* Strip off const from addresses like (const (addr)). */
33893 if (GET_CODE (addr) == CONST)
33894 addr = XEXP (addr, 0);
33895
33896 if (REG_P (addr))
33897 {
33898 *base = addr;
33899 *offset = const0_rtx;
33900 return true;
33901 }
33902
33903 if (GET_CODE (addr) == PLUS
33904 && GET_CODE (XEXP (addr, 0)) == REG
33905 && CONST_INT_P (XEXP (addr, 1)))
33906 {
33907 *base = XEXP (addr, 0);
33908 *offset = XEXP (addr, 1);
33909 return true;
33910 }
33911
33912 *base = NULL_RTX;
33913 *offset = NULL_RTX;
33914
33915 return false;
33916 }
33917
33918 /* If INSN is a load or store of address in the form of [base+offset],
33919 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
33920 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
33921 otherwise return FALSE. */
33922
33923 static bool
33924 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
33925 {
33926 rtx x, dest, src;
33927
33928 gcc_assert (INSN_P (insn));
33929 x = PATTERN (insn);
33930 if (GET_CODE (x) != SET)
33931 return false;
33932
33933 src = SET_SRC (x);
33934 dest = SET_DEST (x);
33935 if (REG_P (src) && MEM_P (dest))
33936 {
33937 *is_load = false;
33938 extract_base_offset_in_addr (dest, base, offset);
33939 }
33940 else if (MEM_P (src) && REG_P (dest))
33941 {
33942 *is_load = true;
33943 extract_base_offset_in_addr (src, base, offset);
33944 }
33945 else
33946 return false;
33947
33948 return (*base != NULL_RTX && *offset != NULL_RTX);
33949 }
33950
33951 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33952
33953 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33954 and PRI are only calculated for these instructions. For other instruction,
33955 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
33956 instruction fusion can be supported by returning different priorities.
33957
33958 It's important that irrelevant instructions get the largest FUSION_PRI. */
33959
33960 static void
33961 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
33962 int *fusion_pri, int *pri)
33963 {
33964 int tmp, off_val;
33965 bool is_load;
33966 rtx base, offset;
33967
33968 gcc_assert (INSN_P (insn));
33969
33970 tmp = max_pri - 1;
33971 if (!fusion_load_store (insn, &base, &offset, &is_load))
33972 {
33973 *pri = tmp;
33974 *fusion_pri = tmp;
33975 return;
33976 }
33977
33978 /* Load goes first. */
33979 if (is_load)
33980 *fusion_pri = tmp - 1;
33981 else
33982 *fusion_pri = tmp - 2;
33983
33984 tmp /= 2;
33985
33986 /* INSN with smaller base register goes first. */
33987 tmp -= ((REGNO (base) & 0xff) << 20);
33988
33989 /* INSN with smaller offset goes first. */
33990 off_val = (int)(INTVAL (offset));
33991 if (off_val >= 0)
33992 tmp -= (off_val & 0xfffff);
33993 else
33994 tmp += ((- off_val) & 0xfffff);
33995
33996 *pri = tmp;
33997 return;
33998 }
33999
34000
34001 /* Construct and return a PARALLEL RTX vector with elements numbering the
34002 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
34003 the vector - from the perspective of the architecture. This does not
34004 line up with GCC's perspective on lane numbers, so we end up with
34005 different masks depending on our target endian-ness. The diagram
34006 below may help. We must draw the distinction when building masks
34007 which select one half of the vector. An instruction selecting
34008 architectural low-lanes for a big-endian target, must be described using
34009 a mask selecting GCC high-lanes.
34010
34011 Big-Endian Little-Endian
34012
34013 GCC 0 1 2 3 3 2 1 0
34014 | x | x | x | x | | x | x | x | x |
34015 Architecture 3 2 1 0 3 2 1 0
34016
34017 Low Mask: { 2, 3 } { 0, 1 }
34018 High Mask: { 0, 1 } { 2, 3 }
34019 */
34020
34021 rtx
34022 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
34023 {
34024 int nunits = GET_MODE_NUNITS (mode);
34025 rtvec v = rtvec_alloc (nunits / 2);
34026 int high_base = nunits / 2;
34027 int low_base = 0;
34028 int base;
34029 rtx t1;
34030 int i;
34031
34032 if (BYTES_BIG_ENDIAN)
34033 base = high ? low_base : high_base;
34034 else
34035 base = high ? high_base : low_base;
34036
34037 for (i = 0; i < nunits / 2; i++)
34038 RTVEC_ELT (v, i) = GEN_INT (base + i);
34039
34040 t1 = gen_rtx_PARALLEL (mode, v);
34041 return t1;
34042 }
34043
34044 /* Check OP for validity as a PARALLEL RTX vector with elements
34045 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
34046 from the perspective of the architecture. See the diagram above
34047 arm_simd_vect_par_cnst_half_p for more details. */
34048
34049 bool
34050 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
34051 bool high)
34052 {
34053 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
34054 HOST_WIDE_INT count_op = XVECLEN (op, 0);
34055 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
34056 int i = 0;
34057
34058 if (!VECTOR_MODE_P (mode))
34059 return false;
34060
34061 if (count_op != count_ideal)
34062 return false;
34063
34064 for (i = 0; i < count_ideal; i++)
34065 {
34066 rtx elt_op = XVECEXP (op, 0, i);
34067 rtx elt_ideal = XVECEXP (ideal, 0, i);
34068
34069 if (!CONST_INT_P (elt_op)
34070 || INTVAL (elt_ideal) != INTVAL (elt_op))
34071 return false;
34072 }
34073 return true;
34074 }
34075
34076 /* Can output mi_thunk for all cases except for non-zero vcall_offset
34077 in Thumb1. */
34078 static bool
34079 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
34080 const_tree)
34081 {
34082 /* For now, we punt and not handle this for TARGET_THUMB1. */
34083 if (vcall_offset && TARGET_THUMB1)
34084 return false;
34085
34086 /* Otherwise ok. */
34087 return true;
34088 }
34089
34090 /* Generate RTL for a conditional branch with rtx comparison CODE in
34091 mode CC_MODE. The destination of the unlikely conditional branch
34092 is LABEL_REF. */
34093
34094 void
34095 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
34096 rtx label_ref)
34097 {
34098 rtx x;
34099 x = gen_rtx_fmt_ee (code, VOIDmode,
34100 gen_rtx_REG (cc_mode, CC_REGNUM),
34101 const0_rtx);
34102
34103 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
34104 gen_rtx_LABEL_REF (VOIDmode, label_ref),
34105 pc_rtx);
34106 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
34107 }
34108
34109 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
34110
34111 For pure-code sections there is no letter code for this attribute, so
34112 output all the section flags numerically when this is needed. */
34113
34114 static bool
34115 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
34116 {
34117
34118 if (flags & SECTION_ARM_PURECODE)
34119 {
34120 *num = 0x20000000;
34121
34122 if (!(flags & SECTION_DEBUG))
34123 *num |= 0x2;
34124 if (flags & SECTION_EXCLUDE)
34125 *num |= 0x80000000;
34126 if (flags & SECTION_WRITE)
34127 *num |= 0x1;
34128 if (flags & SECTION_CODE)
34129 *num |= 0x4;
34130 if (flags & SECTION_MERGE)
34131 *num |= 0x10;
34132 if (flags & SECTION_STRINGS)
34133 *num |= 0x20;
34134 if (flags & SECTION_TLS)
34135 *num |= 0x400;
34136 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
34137 *num |= 0x200;
34138
34139 return true;
34140 }
34141
34142 return false;
34143 }
34144
34145 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
34146
34147 If pure-code is passed as an option, make sure all functions are in
34148 sections that have the SHF_ARM_PURECODE attribute. */
34149
34150 static section *
34151 arm_function_section (tree decl, enum node_frequency freq,
34152 bool startup, bool exit)
34153 {
34154 const char * section_name;
34155 section * sec;
34156
34157 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
34158 return default_function_section (decl, freq, startup, exit);
34159
34160 if (!target_pure_code)
34161 return default_function_section (decl, freq, startup, exit);
34162
34163
34164 section_name = DECL_SECTION_NAME (decl);
34165
34166 /* If a function is not in a named section then it falls under the 'default'
34167 text section, also known as '.text'. We can preserve previous behavior as
34168 the default text section already has the SHF_ARM_PURECODE section
34169 attribute. */
34170 if (!section_name)
34171 {
34172 section *default_sec = default_function_section (decl, freq, startup,
34173 exit);
34174
34175 /* If default_sec is not null, then it must be a special section like for
34176 example .text.startup. We set the pure-code attribute and return the
34177 same section to preserve existing behavior. */
34178 if (default_sec)
34179 default_sec->common.flags |= SECTION_ARM_PURECODE;
34180 return default_sec;
34181 }
34182
34183 /* Otherwise look whether a section has already been created with
34184 'section_name'. */
34185 sec = get_named_section (decl, section_name, 0);
34186 if (!sec)
34187 /* If that is not the case passing NULL as the section's name to
34188 'get_named_section' will create a section with the declaration's
34189 section name. */
34190 sec = get_named_section (decl, NULL, 0);
34191
34192 /* Set the SHF_ARM_PURECODE attribute. */
34193 sec->common.flags |= SECTION_ARM_PURECODE;
34194
34195 return sec;
34196 }
34197
34198 /* Implements the TARGET_SECTION_FLAGS hook.
34199
34200 If DECL is a function declaration and pure-code is passed as an option
34201 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
34202 section's name and RELOC indicates whether the declarations initializer may
34203 contain runtime relocations. */
34204
34205 static unsigned int
34206 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
34207 {
34208 unsigned int flags = default_section_type_flags (decl, name, reloc);
34209
34210 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
34211 flags |= SECTION_ARM_PURECODE;
34212
34213 return flags;
34214 }
34215
34216 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
34217
34218 static void
34219 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
34220 rtx op0, rtx op1,
34221 rtx *quot_p, rtx *rem_p)
34222 {
34223 if (mode == SImode)
34224 gcc_assert (!TARGET_IDIV);
34225
34226 scalar_int_mode libval_mode
34227 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
34228
34229 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
34230 libval_mode, op0, mode, op1, mode);
34231
34232 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
34233 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
34234 GET_MODE_SIZE (mode));
34235
34236 gcc_assert (quotient);
34237 gcc_assert (remainder);
34238
34239 *quot_p = quotient;
34240 *rem_p = remainder;
34241 }
34242
34243 /* This function checks for the availability of the coprocessor builtin passed
34244 in BUILTIN for the current target. Returns true if it is available and
34245 false otherwise. If a BUILTIN is passed for which this function has not
34246 been implemented it will cause an exception. */
34247
34248 bool
34249 arm_coproc_builtin_available (enum unspecv builtin)
34250 {
34251 /* None of these builtins are available in Thumb mode if the target only
34252 supports Thumb-1. */
34253 if (TARGET_THUMB1)
34254 return false;
34255
34256 switch (builtin)
34257 {
34258 case VUNSPEC_CDP:
34259 case VUNSPEC_LDC:
34260 case VUNSPEC_LDCL:
34261 case VUNSPEC_STC:
34262 case VUNSPEC_STCL:
34263 case VUNSPEC_MCR:
34264 case VUNSPEC_MRC:
34265 if (arm_arch4)
34266 return true;
34267 break;
34268 case VUNSPEC_CDP2:
34269 case VUNSPEC_LDC2:
34270 case VUNSPEC_LDC2L:
34271 case VUNSPEC_STC2:
34272 case VUNSPEC_STC2L:
34273 case VUNSPEC_MCR2:
34274 case VUNSPEC_MRC2:
34275 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
34276 ARMv8-{A,M}. */
34277 if (arm_arch5t)
34278 return true;
34279 break;
34280 case VUNSPEC_MCRR:
34281 case VUNSPEC_MRRC:
34282 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
34283 ARMv8-{A,M}. */
34284 if (arm_arch6 || arm_arch5te)
34285 return true;
34286 break;
34287 case VUNSPEC_MCRR2:
34288 case VUNSPEC_MRRC2:
34289 if (arm_arch6)
34290 return true;
34291 break;
34292 default:
34293 gcc_unreachable ();
34294 }
34295 return false;
34296 }
34297
34298 /* This function returns true if OP is a valid memory operand for the ldc and
34299 stc coprocessor instructions and false otherwise. */
34300
34301 bool
34302 arm_coproc_ldc_stc_legitimate_address (rtx op)
34303 {
34304 HOST_WIDE_INT range;
34305 /* Has to be a memory operand. */
34306 if (!MEM_P (op))
34307 return false;
34308
34309 op = XEXP (op, 0);
34310
34311 /* We accept registers. */
34312 if (REG_P (op))
34313 return true;
34314
34315 switch GET_CODE (op)
34316 {
34317 case PLUS:
34318 {
34319 /* Or registers with an offset. */
34320 if (!REG_P (XEXP (op, 0)))
34321 return false;
34322
34323 op = XEXP (op, 1);
34324
34325 /* The offset must be an immediate though. */
34326 if (!CONST_INT_P (op))
34327 return false;
34328
34329 range = INTVAL (op);
34330
34331 /* Within the range of [-1020,1020]. */
34332 if (!IN_RANGE (range, -1020, 1020))
34333 return false;
34334
34335 /* And a multiple of 4. */
34336 return (range % 4) == 0;
34337 }
34338 case PRE_INC:
34339 case POST_INC:
34340 case PRE_DEC:
34341 case POST_DEC:
34342 return REG_P (XEXP (op, 0));
34343 default:
34344 gcc_unreachable ();
34345 }
34346 return false;
34347 }
34348
34349 /* Return the diagnostic message string if conversion from FROMTYPE to
34350 TOTYPE is not allowed, NULL otherwise. */
34351
34352 static const char *
34353 arm_invalid_conversion (const_tree fromtype, const_tree totype)
34354 {
34355 if (element_mode (fromtype) != element_mode (totype))
34356 {
34357 /* Do no allow conversions to/from BFmode scalar types. */
34358 if (TYPE_MODE (fromtype) == BFmode)
34359 return N_("invalid conversion from type %<bfloat16_t%>");
34360 if (TYPE_MODE (totype) == BFmode)
34361 return N_("invalid conversion to type %<bfloat16_t%>");
34362 }
34363
34364 /* Conversion allowed. */
34365 return NULL;
34366 }
34367
34368 /* Return the diagnostic message string if the unary operation OP is
34369 not permitted on TYPE, NULL otherwise. */
34370
34371 static const char *
34372 arm_invalid_unary_op (int op, const_tree type)
34373 {
34374 /* Reject all single-operand operations on BFmode except for &. */
34375 if (element_mode (type) == BFmode && op != ADDR_EXPR)
34376 return N_("operation not permitted on type %<bfloat16_t%>");
34377
34378 /* Operation allowed. */
34379 return NULL;
34380 }
34381
34382 /* Return the diagnostic message string if the binary operation OP is
34383 not permitted on TYPE1 and TYPE2, NULL otherwise. */
34384
34385 static const char *
34386 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
34387 const_tree type2)
34388 {
34389 /* Reject all 2-operand operations on BFmode. */
34390 if (element_mode (type1) == BFmode
34391 || element_mode (type2) == BFmode)
34392 return N_("operation not permitted on type %<bfloat16_t%>");
34393
34394 /* Operation allowed. */
34395 return NULL;
34396 }
34397
34398 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34399
34400 In VFPv1, VFP registers could only be accessed in the mode they were
34401 set, so subregs would be invalid there. However, we don't support
34402 VFPv1 at the moment, and the restriction was lifted in VFPv2.
34403
34404 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34405 VFP registers in little-endian order. We can't describe that accurately to
34406 GCC, so avoid taking subregs of such values.
34407
34408 The only exception is going from a 128-bit to a 64-bit type. In that
34409 case the data layout happens to be consistent for big-endian, so we
34410 explicitly allow that case. */
34411
34412 static bool
34413 arm_can_change_mode_class (machine_mode from, machine_mode to,
34414 reg_class_t rclass)
34415 {
34416 if (TARGET_BIG_END
34417 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
34418 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
34419 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
34420 && reg_classes_intersect_p (VFP_REGS, rclass))
34421 return false;
34422 return true;
34423 }
34424
34425 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
34426 strcpy from constants will be faster. */
34427
34428 static HOST_WIDE_INT
34429 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
34430 {
34431 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
34432 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
34433 return MAX (align, BITS_PER_WORD * factor);
34434 return align;
34435 }
34436
34437 /* Emit a speculation barrier on target architectures that do not have
34438 DSB/ISB directly. Such systems probably don't need a barrier
34439 themselves, but if the code is ever run on a later architecture, it
34440 might become a problem. */
34441 void
34442 arm_emit_speculation_barrier_function ()
34443 {
34444 emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
34445 }
34446
34447 /* Have we recorded an explicit access to the Q bit of APSR?. */
34448 bool
34449 arm_q_bit_access (void)
34450 {
34451 if (cfun && cfun->decl)
34452 return lookup_attribute ("acle qbit",
34453 DECL_ATTRIBUTES (cfun->decl));
34454 return true;
34455 }
34456
34457 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
34458 bool
34459 arm_ge_bits_access (void)
34460 {
34461 if (cfun && cfun->decl)
34462 return lookup_attribute ("acle gebits",
34463 DECL_ATTRIBUTES (cfun->decl));
34464 return true;
34465 }
34466
34467 /* NULL if insn INSN is valid within a low-overhead loop.
34468 Otherwise return why doloop cannot be applied. */
34469
34470 static const char *
34471 arm_invalid_within_doloop (const rtx_insn *insn)
34472 {
34473 if (!TARGET_HAVE_LOB)
34474 return default_invalid_within_doloop (insn);
34475
34476 if (CALL_P (insn))
34477 return "Function call in the loop.";
34478
34479 if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
34480 return "LR is used inside loop.";
34481
34482 return NULL;
34483 }
34484
34485 bool
34486 arm_target_insn_ok_for_lob (rtx insn)
34487 {
34488 basic_block bb = BLOCK_FOR_INSN (insn);
34489 /* Make sure the basic block of the target insn is a simple latch
34490 having as single predecessor and successor the body of the loop
34491 itself. Only simple loops with a single basic block as body are
34492 supported for 'low over head loop' making sure that LE target is
34493 above LE itself in the generated code. */
34494
34495 return single_succ_p (bb)
34496 && single_pred_p (bb)
34497 && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
34498 && contains_no_active_insn_p (bb);
34499 }
34500
34501 #if CHECKING_P
34502 namespace selftest {
34503
34504 /* Scan the static data tables generated by parsecpu.awk looking for
34505 potential issues with the data. We primarily check for
34506 inconsistencies in the option extensions at present (extensions
34507 that duplicate others but aren't marked as aliases). Furthermore,
34508 for correct canonicalization later options must never be a subset
34509 of an earlier option. Any extension should also only specify other
34510 feature bits and never an architecture bit. The architecture is inferred
34511 from the declaration of the extension. */
34512 static void
34513 arm_test_cpu_arch_data (void)
34514 {
34515 const arch_option *arch;
34516 const cpu_option *cpu;
34517 auto_sbitmap target_isa (isa_num_bits);
34518 auto_sbitmap isa1 (isa_num_bits);
34519 auto_sbitmap isa2 (isa_num_bits);
34520
34521 for (arch = all_architectures; arch->common.name != NULL; ++arch)
34522 {
34523 const cpu_arch_extension *ext1, *ext2;
34524
34525 if (arch->common.extensions == NULL)
34526 continue;
34527
34528 arm_initialize_isa (target_isa, arch->common.isa_bits);
34529
34530 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
34531 {
34532 if (ext1->alias)
34533 continue;
34534
34535 arm_initialize_isa (isa1, ext1->isa_bits);
34536 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34537 {
34538 if (ext2->alias || ext1->remove != ext2->remove)
34539 continue;
34540
34541 arm_initialize_isa (isa2, ext2->isa_bits);
34542 /* If the option is a subset of the parent option, it doesn't
34543 add anything and so isn't useful. */
34544 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34545
34546 /* If the extension specifies any architectural bits then
34547 disallow it. Extensions should only specify feature bits. */
34548 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34549 }
34550 }
34551 }
34552
34553 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
34554 {
34555 const cpu_arch_extension *ext1, *ext2;
34556
34557 if (cpu->common.extensions == NULL)
34558 continue;
34559
34560 arm_initialize_isa (target_isa, arch->common.isa_bits);
34561
34562 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
34563 {
34564 if (ext1->alias)
34565 continue;
34566
34567 arm_initialize_isa (isa1, ext1->isa_bits);
34568 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34569 {
34570 if (ext2->alias || ext1->remove != ext2->remove)
34571 continue;
34572
34573 arm_initialize_isa (isa2, ext2->isa_bits);
34574 /* If the option is a subset of the parent option, it doesn't
34575 add anything and so isn't useful. */
34576 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34577
34578 /* If the extension specifies any architectural bits then
34579 disallow it. Extensions should only specify feature bits. */
34580 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34581 }
34582 }
34583 }
34584 }
34585
34586 /* Scan the static data tables generated by parsecpu.awk looking for
34587 potential issues with the data. Here we check for consistency between the
34588 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34589 a feature bit that is not defined by any FPU flag. */
34590 static void
34591 arm_test_fpu_data (void)
34592 {
34593 auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
34594 auto_sbitmap fpubits (isa_num_bits);
34595 auto_sbitmap tmpset (isa_num_bits);
34596
34597 static const enum isa_feature fpu_bitlist_internal[]
34598 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
34599 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
34600
34601 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
34602 {
34603 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
34604 bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
34605 bitmap_clear (isa_all_fpubits_internal);
34606 bitmap_copy (isa_all_fpubits_internal, tmpset);
34607 }
34608
34609 if (!bitmap_empty_p (isa_all_fpubits_internal))
34610 {
34611 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
34612 " group that are not defined by any FPU.\n"
34613 " Check your arm-cpus.in.\n");
34614 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
34615 }
34616 }
34617
34618 static void
34619 arm_run_selftests (void)
34620 {
34621 arm_test_cpu_arch_data ();
34622 arm_test_fpu_data ();
34623 }
34624 } /* Namespace selftest. */
34625
34626 #undef TARGET_RUN_TARGET_SELFTESTS
34627 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34628 #endif /* CHECKING_P */
34629
34630 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34631 global variable based guard use the default else
34632 return a null tree. */
34633 static tree
34634 arm_stack_protect_guard (void)
34635 {
34636 if (arm_stack_protector_guard == SSP_GLOBAL)
34637 return default_stack_protect_guard ();
34638
34639 return NULL_TREE;
34640 }
34641
34642 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34643 Unlike the arm version, we do NOT implement asm flag outputs. */
34644
34645 rtx_insn *
34646 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
34647 vec<machine_mode> & /*input_modes*/,
34648 vec<const char *> &constraints,
34649 vec<rtx> &, vec<rtx> & /*clobbers*/,
34650 HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
34651 {
34652 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
34653 if (startswith (constraints[i], "=@cc"))
34654 {
34655 sorry ("%<asm%> flags not supported in thumb1 mode");
34656 break;
34657 }
34658 return NULL;
34659 }
34660
34661 /* Generate code to enable conditional branches in functions over 1 MiB.
34662 Parameters are:
34663 operands: is the operands list of the asm insn (see arm_cond_branch or
34664 arm_cond_branch_reversed).
34665 pos_label: is an index into the operands array where operands[pos_label] is
34666 the asm label of the final jump destination.
34667 dest: is a string which is used to generate the asm label of the intermediate
34668 destination
34669 branch_format: is a string denoting the intermediate branch format, e.g.
34670 "beq", "bne", etc. */
34671
34672 const char *
34673 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
34674 const char * branch_format)
34675 {
34676 rtx_code_label * tmp_label = gen_label_rtx ();
34677 char label_buf[256];
34678 char buffer[128];
34679 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
34680 CODE_LABEL_NUMBER (tmp_label));
34681 const char *label_ptr = arm_strip_name_encoding (label_buf);
34682 rtx dest_label = operands[pos_label];
34683 operands[pos_label] = tmp_label;
34684
34685 snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
34686 output_asm_insn (buffer, operands);
34687
34688 snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
34689 operands[pos_label] = dest_label;
34690 output_asm_insn (buffer, operands);
34691 return "";
34692 }
34693
34694 /* If given mode matches, load from memory to LO_REGS.
34695 (i.e [Rn], Rn <= LO_REGS). */
34696 enum reg_class
34697 arm_mode_base_reg_class (machine_mode mode)
34698 {
34699 if (TARGET_HAVE_MVE
34700 && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
34701 return LO_REGS;
34702
34703 return MODE_BASE_REG_REG_CLASS (mode);
34704 }
34705
34706 struct gcc_target targetm = TARGET_INITIALIZER;
34707
34708 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
34709
34710 opt_machine_mode
34711 arm_get_mask_mode (machine_mode mode)
34712 {
34713 if (TARGET_HAVE_MVE)
34714 return arm_mode_to_pred_mode (mode);
34715
34716 return default_get_mask_mode (mode);
34717 }
34718
34719 /* Output assembly to read the thread pointer from the appropriate TPIDR
34720 register into DEST. If PRED_P also emit the %? that can be used to
34721 output the predication code. */
34722
34723 const char *
34724 arm_output_load_tpidr (rtx dst, bool pred_p)
34725 {
34726 char buf[64];
34727 int tpidr_coproc_num = -1;
34728 switch (target_thread_pointer)
34729 {
34730 case TP_TPIDRURW:
34731 tpidr_coproc_num = 2;
34732 break;
34733 case TP_TPIDRURO:
34734 tpidr_coproc_num = 3;
34735 break;
34736 case TP_TPIDRPRW:
34737 tpidr_coproc_num = 4;
34738 break;
34739 default:
34740 gcc_unreachable ();
34741 }
34742 snprintf (buf, sizeof (buf),
34743 "mrc%s\tp15, 0, %%0, c13, c0, %d\t@ load_tp_hard",
34744 pred_p ? "%?" : "", tpidr_coproc_num);
34745 output_asm_insn (buf, &dst);
34746 return "";
34747 }
34748
34749 #include "gt-arm.h"