]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.cc
aarch64,arm: Remove accepted_branch_protection_string
[thirdparty/gcc.git] / gcc / config / arm / arm.cc
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2023 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "tm_p.h"
38 #include "stringpool.h"
39 #include "attribs.h"
40 #include "optabs.h"
41 #include "regs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "cgraph.h"
45 #include "diagnostic-core.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "varasm.h"
51 #include "output.h"
52 #include "insn-attr.h"
53 #include "flags.h"
54 #include "reload.h"
55 #include "explow.h"
56 #include "expr.h"
57 #include "cfgrtl.h"
58 #include "sched-int.h"
59 #include "common/common-target.h"
60 #include "langhooks.h"
61 #include "intl.h"
62 #include "libfuncs.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "gimple-iterator.h"
73 #include "selftest.h"
74 #include "tree-vectorizer.h"
75 #include "opts.h"
76 #include "aarch-common.h"
77 #include "aarch-common-protos.h"
78
79 /* This file should be included last. */
80 #include "target-def.h"
81
82 /* Forward definitions of types. */
83 typedef struct minipool_node Mnode;
84 typedef struct minipool_fixup Mfix;
85
86 void (*arm_lang_output_object_attributes_hook)(void);
87
88 struct four_ints
89 {
90 int i[4];
91 };
92
93 /* Forward function declarations. */
94 static bool arm_const_not_ok_for_debug_p (rtx);
95 static int arm_needs_doubleword_align (machine_mode, const_tree);
96 static int arm_compute_static_chain_stack_bytes (void);
97 static arm_stack_offsets *arm_get_frame_offsets (void);
98 static void arm_compute_frame_layout (void);
99 static void arm_add_gc_roots (void);
100 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
101 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
102 static unsigned bit_count (unsigned long);
103 static unsigned bitmap_popcount (const sbitmap);
104 static int arm_address_register_rtx_p (rtx, int);
105 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
106 static bool is_called_in_ARM_mode (tree);
107 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
108 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
109 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
110 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
111 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
112 inline static int thumb1_index_register_rtx_p (rtx, int);
113 static int thumb_far_jump_used_p (void);
114 static bool thumb_force_lr_save (void);
115 static unsigned arm_size_return_regs (void);
116 static bool arm_assemble_integer (rtx, unsigned int, int);
117 static void arm_print_operand (FILE *, rtx, int);
118 static void arm_print_operand_address (FILE *, machine_mode, rtx);
119 static bool arm_print_operand_punct_valid_p (unsigned char code);
120 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
121 static arm_cc get_arm_condition_code (rtx);
122 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
123 static const char *output_multi_immediate (rtx *, const char *, const char *,
124 int, HOST_WIDE_INT);
125 static const char *shift_op (rtx, HOST_WIDE_INT *);
126 static struct machine_function *arm_init_machine_status (void);
127 static void thumb_exit (FILE *, int);
128 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
129 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
130 static Mnode *add_minipool_forward_ref (Mfix *);
131 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
132 static Mnode *add_minipool_backward_ref (Mfix *);
133 static void assign_minipool_offsets (Mfix *);
134 static void arm_print_value (FILE *, rtx);
135 static void dump_minipool (rtx_insn *);
136 static int arm_barrier_cost (rtx_insn *);
137 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
138 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
139 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
140 machine_mode, rtx);
141 static void arm_reorg (void);
142 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
143 static unsigned long arm_compute_save_reg0_reg12_mask (void);
144 static unsigned long arm_compute_save_core_reg_mask (void);
145 static unsigned long arm_isr_value (tree);
146 static unsigned long arm_compute_func_type (void);
147 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
148 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
149 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
150 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
151 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
152 #endif
153 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
154 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
155 static void arm_output_function_epilogue (FILE *);
156 static void arm_output_function_prologue (FILE *);
157 static int arm_comp_type_attributes (const_tree, const_tree);
158 static void arm_set_default_type_attributes (tree);
159 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
160 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
161 static int optimal_immediate_sequence (enum rtx_code code,
162 unsigned HOST_WIDE_INT val,
163 struct four_ints *return_sequence);
164 static int optimal_immediate_sequence_1 (enum rtx_code code,
165 unsigned HOST_WIDE_INT val,
166 struct four_ints *return_sequence,
167 int i);
168 static int arm_get_strip_length (int);
169 static bool arm_function_ok_for_sibcall (tree, tree);
170 static machine_mode arm_promote_function_mode (const_tree,
171 machine_mode, int *,
172 const_tree, int);
173 static bool arm_return_in_memory (const_tree, const_tree);
174 static rtx arm_function_value (const_tree, const_tree, bool);
175 static rtx arm_libcall_value_1 (machine_mode);
176 static rtx arm_libcall_value (machine_mode, const_rtx);
177 static bool arm_function_value_regno_p (const unsigned int);
178 static void arm_internal_label (FILE *, const char *, unsigned long);
179 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
180 tree);
181 static bool arm_have_conditional_execution (void);
182 static bool arm_cannot_force_const_mem (machine_mode, rtx);
183 static bool arm_legitimate_constant_p (machine_mode, rtx);
184 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
185 static int arm_insn_cost (rtx_insn *, bool);
186 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
187 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
188 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
189 static void emit_constant_insn (rtx cond, rtx pattern);
190 static rtx_insn *emit_set_insn (rtx, rtx);
191 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
192 static rtx emit_multi_reg_push (unsigned long, unsigned long);
193 static void arm_emit_multi_reg_pop (unsigned long);
194 static int vfp_emit_fstmd (int, int);
195 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
196 static int arm_arg_partial_bytes (cumulative_args_t,
197 const function_arg_info &);
198 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
199 static void arm_function_arg_advance (cumulative_args_t,
200 const function_arg_info &);
201 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
202 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
203 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
204 const_tree);
205 static rtx aapcs_libcall_value (machine_mode);
206 static int aapcs_select_return_coproc (const_tree, const_tree);
207
208 #ifdef OBJECT_FORMAT_ELF
209 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
210 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
211 #endif
212 #ifndef ARM_PE
213 static void arm_encode_section_info (tree, rtx, int);
214 #endif
215
216 static void arm_file_end (void);
217 static void arm_file_start (void);
218 static void arm_insert_attributes (tree, tree *);
219
220 static void arm_setup_incoming_varargs (cumulative_args_t,
221 const function_arg_info &, int *, int);
222 static bool arm_pass_by_reference (cumulative_args_t,
223 const function_arg_info &);
224 static bool arm_promote_prototypes (const_tree);
225 static bool arm_default_short_enums (void);
226 static bool arm_align_anon_bitfield (void);
227 static bool arm_return_in_msb (const_tree);
228 static bool arm_must_pass_in_stack (const function_arg_info &);
229 static bool arm_return_in_memory (const_tree, const_tree);
230 #if ARM_UNWIND_INFO
231 static void arm_unwind_emit (FILE *, rtx_insn *);
232 static bool arm_output_ttype (rtx);
233 static void arm_asm_emit_except_personality (rtx);
234 #endif
235 static void arm_asm_init_sections (void);
236 static rtx arm_dwarf_register_span (rtx);
237
238 static tree arm_cxx_guard_type (void);
239 static bool arm_cxx_guard_mask_bit (void);
240 static tree arm_get_cookie_size (tree);
241 static bool arm_cookie_has_size (void);
242 static bool arm_cxx_cdtor_returns_this (void);
243 static bool arm_cxx_key_method_may_be_inline (void);
244 static void arm_cxx_determine_class_data_visibility (tree);
245 static bool arm_cxx_class_data_always_comdat (void);
246 static bool arm_cxx_use_aeabi_atexit (void);
247 static void arm_init_libfuncs (void);
248 static tree arm_build_builtin_va_list (void);
249 static void arm_expand_builtin_va_start (tree, rtx);
250 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
251 static void arm_option_override (void);
252 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
253 struct cl_target_option *);
254 static void arm_override_options_after_change (void);
255 static void arm_option_print (FILE *, int, struct cl_target_option *);
256 static void arm_set_current_function (tree);
257 static bool arm_can_inline_p (tree, tree);
258 static void arm_relayout_function (tree);
259 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
260 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
261 static bool arm_sched_can_speculate_insn (rtx_insn *);
262 static bool arm_macro_fusion_p (void);
263 static bool arm_cannot_copy_insn_p (rtx_insn *);
264 static int arm_issue_rate (void);
265 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
266 static int arm_first_cycle_multipass_dfa_lookahead (void);
267 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
268 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
269 static bool arm_output_addr_const_extra (FILE *, rtx);
270 static bool arm_allocate_stack_slots_for_args (void);
271 static bool arm_warn_func_return (tree);
272 static tree arm_promoted_type (const_tree t);
273 static bool arm_scalar_mode_supported_p (scalar_mode);
274 static bool arm_frame_pointer_required (void);
275 static bool arm_can_eliminate (const int, const int);
276 static void arm_asm_trampoline_template (FILE *);
277 static void arm_trampoline_init (rtx, tree, rtx);
278 static rtx arm_trampoline_adjust_address (rtx);
279 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
280 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
281 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
282 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
283 static bool arm_array_mode_supported_p (machine_mode,
284 unsigned HOST_WIDE_INT);
285 static machine_mode arm_preferred_simd_mode (scalar_mode);
286 static bool arm_class_likely_spilled_p (reg_class_t);
287 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
288 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
289 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
290 const_tree type,
291 int misalignment,
292 bool is_packed);
293 static void arm_conditional_register_usage (void);
294 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
295 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
296 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
297 static int arm_default_branch_cost (bool, bool);
298 static int arm_cortex_a5_branch_cost (bool, bool);
299 static int arm_cortex_m_branch_cost (bool, bool);
300 static int arm_cortex_m7_branch_cost (bool, bool);
301
302 static bool arm_vectorize_vec_perm_const (machine_mode, machine_mode, rtx, rtx,
303 rtx, const vec_perm_indices &);
304
305 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
306
307 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
308 tree vectype,
309 int misalign ATTRIBUTE_UNUSED);
310
311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
312 bool op0_preserve_value);
313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
314
315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
317 const_tree);
318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
321 int reloc);
322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
328 static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
329 vec<machine_mode> &,
330 vec<const char *> &, vec<rtx> &,
331 vec<rtx> &, HARD_REG_SET &, location_t);
332 static const char *arm_identify_fpu_from_isa (sbitmap);
333 \f
334 /* Table of machine attributes. */
335 static const attribute_spec arm_gnu_attributes[] =
336 {
337 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
338 affects_type_identity, handler, exclude } */
339 /* Function calls made to this symbol must be done indirectly, because
340 it may lie outside of the 26 bit addressing range of a normal function
341 call. */
342 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
343 /* Whereas these functions are always known to reside within the 26 bit
344 addressing range. */
345 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
346 /* Specify the procedure call conventions for a function. */
347 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
348 NULL },
349 /* Interrupt Service Routines have special prologue and epilogue requirements. */
350 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
351 NULL },
352 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
353 NULL },
354 { "naked", 0, 0, true, false, false, false,
355 arm_handle_fndecl_attribute, NULL },
356 #ifdef ARM_PE
357 /* ARM/PE has three new attributes:
358 interfacearm - ?
359 dllexport - for exporting a function/variable that will live in a dll
360 dllimport - for importing a function/variable from a dll
361
362 Microsoft allows multiple declspecs in one __declspec, separating
363 them with spaces. We do NOT support this. Instead, use __declspec
364 multiple times.
365 */
366 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
367 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
368 { "interfacearm", 0, 0, true, false, false, false,
369 arm_handle_fndecl_attribute, NULL },
370 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
371 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
372 NULL },
373 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
374 NULL },
375 { "notshared", 0, 0, false, true, false, false,
376 arm_handle_notshared_attribute, NULL },
377 #endif
378 /* ARMv8-M Security Extensions support. */
379 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
380 arm_handle_cmse_nonsecure_entry, NULL },
381 { "cmse_nonsecure_call", 0, 0, false, false, false, true,
382 arm_handle_cmse_nonsecure_call, NULL },
383 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL }
384 };
385
386 static const scoped_attribute_specs arm_gnu_attribute_table =
387 {
388 "gnu", { arm_gnu_attributes }
389 };
390
391 static const scoped_attribute_specs *const arm_attribute_table[] =
392 {
393 &arm_gnu_attribute_table
394 };
395 \f
396 /* Initialize the GCC target structure. */
397 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
398 #undef TARGET_MERGE_DECL_ATTRIBUTES
399 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
400 #endif
401
402 #undef TARGET_CHECK_BUILTIN_CALL
403 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
404
405 #undef TARGET_LEGITIMIZE_ADDRESS
406 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
407
408 #undef TARGET_ATTRIBUTE_TABLE
409 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
410
411 #undef TARGET_INSERT_ATTRIBUTES
412 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
413
414 #undef TARGET_ASM_FILE_START
415 #define TARGET_ASM_FILE_START arm_file_start
416 #undef TARGET_ASM_FILE_END
417 #define TARGET_ASM_FILE_END arm_file_end
418
419 #undef TARGET_ASM_ALIGNED_SI_OP
420 #define TARGET_ASM_ALIGNED_SI_OP NULL
421 #undef TARGET_ASM_INTEGER
422 #define TARGET_ASM_INTEGER arm_assemble_integer
423
424 #undef TARGET_PRINT_OPERAND
425 #define TARGET_PRINT_OPERAND arm_print_operand
426 #undef TARGET_PRINT_OPERAND_ADDRESS
427 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
428 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
429 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
430
431 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
432 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
433
434 #undef TARGET_ASM_FUNCTION_PROLOGUE
435 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
436
437 #undef TARGET_ASM_FUNCTION_EPILOGUE
438 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
439
440 #undef TARGET_CAN_INLINE_P
441 #define TARGET_CAN_INLINE_P arm_can_inline_p
442
443 #undef TARGET_RELAYOUT_FUNCTION
444 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
445
446 #undef TARGET_OPTION_OVERRIDE
447 #define TARGET_OPTION_OVERRIDE arm_option_override
448
449 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
450 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
451
452 #undef TARGET_OPTION_RESTORE
453 #define TARGET_OPTION_RESTORE arm_option_restore
454
455 #undef TARGET_OPTION_PRINT
456 #define TARGET_OPTION_PRINT arm_option_print
457
458 #undef TARGET_COMP_TYPE_ATTRIBUTES
459 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
460
461 #undef TARGET_SCHED_CAN_SPECULATE_INSN
462 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
463
464 #undef TARGET_SCHED_MACRO_FUSION_P
465 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
466
467 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
468 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
469
470 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
471 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
472
473 #undef TARGET_SCHED_ADJUST_COST
474 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
475
476 #undef TARGET_SET_CURRENT_FUNCTION
477 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
478
479 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
480 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
481
482 #undef TARGET_SCHED_REORDER
483 #define TARGET_SCHED_REORDER arm_sched_reorder
484
485 #undef TARGET_REGISTER_MOVE_COST
486 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
487
488 #undef TARGET_MEMORY_MOVE_COST
489 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
490
491 #undef TARGET_ENCODE_SECTION_INFO
492 #ifdef ARM_PE
493 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
494 #else
495 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
496 #endif
497
498 #undef TARGET_STRIP_NAME_ENCODING
499 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
500
501 #undef TARGET_ASM_INTERNAL_LABEL
502 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
503
504 #undef TARGET_FLOATN_MODE
505 #define TARGET_FLOATN_MODE arm_floatn_mode
506
507 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
508 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
509
510 #undef TARGET_FUNCTION_VALUE
511 #define TARGET_FUNCTION_VALUE arm_function_value
512
513 #undef TARGET_LIBCALL_VALUE
514 #define TARGET_LIBCALL_VALUE arm_libcall_value
515
516 #undef TARGET_FUNCTION_VALUE_REGNO_P
517 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
518
519 #undef TARGET_GIMPLE_FOLD_BUILTIN
520 #define TARGET_GIMPLE_FOLD_BUILTIN arm_gimple_fold_builtin
521
522 #undef TARGET_ASM_OUTPUT_MI_THUNK
523 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
524 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
525 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
526
527 #undef TARGET_RTX_COSTS
528 #define TARGET_RTX_COSTS arm_rtx_costs
529 #undef TARGET_ADDRESS_COST
530 #define TARGET_ADDRESS_COST arm_address_cost
531 #undef TARGET_INSN_COST
532 #define TARGET_INSN_COST arm_insn_cost
533
534 #undef TARGET_SHIFT_TRUNCATION_MASK
535 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
536 #undef TARGET_VECTOR_MODE_SUPPORTED_P
537 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
538 #undef TARGET_ARRAY_MODE_SUPPORTED_P
539 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
540 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
541 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
542 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
543 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
544 arm_autovectorize_vector_modes
545
546 #undef TARGET_MACHINE_DEPENDENT_REORG
547 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
548
549 #undef TARGET_INIT_BUILTINS
550 #define TARGET_INIT_BUILTINS arm_init_builtins
551 #undef TARGET_EXPAND_BUILTIN
552 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
553 #undef TARGET_BUILTIN_DECL
554 #define TARGET_BUILTIN_DECL arm_builtin_decl
555
556 #undef TARGET_INIT_LIBFUNCS
557 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
558
559 #undef TARGET_PROMOTE_FUNCTION_MODE
560 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
561 #undef TARGET_PROMOTE_PROTOTYPES
562 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
563 #undef TARGET_PASS_BY_REFERENCE
564 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
565 #undef TARGET_ARG_PARTIAL_BYTES
566 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
567 #undef TARGET_FUNCTION_ARG
568 #define TARGET_FUNCTION_ARG arm_function_arg
569 #undef TARGET_FUNCTION_ARG_ADVANCE
570 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
571 #undef TARGET_FUNCTION_ARG_PADDING
572 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
573 #undef TARGET_FUNCTION_ARG_BOUNDARY
574 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
575
576 #undef TARGET_SETUP_INCOMING_VARARGS
577 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
578
579 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
580 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
581
582 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
583 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
584 #undef TARGET_TRAMPOLINE_INIT
585 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
586 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
587 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
588
589 #undef TARGET_WARN_FUNC_RETURN
590 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
591
592 #undef TARGET_DEFAULT_SHORT_ENUMS
593 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
594
595 #undef TARGET_ALIGN_ANON_BITFIELD
596 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
597
598 #undef TARGET_NARROW_VOLATILE_BITFIELD
599 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
600
601 #undef TARGET_CXX_GUARD_TYPE
602 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
603
604 #undef TARGET_CXX_GUARD_MASK_BIT
605 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
606
607 #undef TARGET_CXX_GET_COOKIE_SIZE
608 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
609
610 #undef TARGET_CXX_COOKIE_HAS_SIZE
611 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
612
613 #undef TARGET_CXX_CDTOR_RETURNS_THIS
614 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
615
616 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
617 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
618
619 #undef TARGET_CXX_USE_AEABI_ATEXIT
620 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
621
622 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
623 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
624 arm_cxx_determine_class_data_visibility
625
626 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
627 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
628
629 #undef TARGET_RETURN_IN_MSB
630 #define TARGET_RETURN_IN_MSB arm_return_in_msb
631
632 #undef TARGET_RETURN_IN_MEMORY
633 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
634
635 #undef TARGET_MUST_PASS_IN_STACK
636 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
637
638 #if ARM_UNWIND_INFO
639 #undef TARGET_ASM_UNWIND_EMIT
640 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
641
642 /* EABI unwinding tables use a different format for the typeinfo tables. */
643 #undef TARGET_ASM_TTYPE
644 #define TARGET_ASM_TTYPE arm_output_ttype
645
646 #undef TARGET_ARM_EABI_UNWINDER
647 #define TARGET_ARM_EABI_UNWINDER true
648
649 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
650 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
651
652 #endif /* ARM_UNWIND_INFO */
653
654 #undef TARGET_ASM_INIT_SECTIONS
655 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
656
657 #undef TARGET_DWARF_REGISTER_SPAN
658 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
659
660 #undef TARGET_CANNOT_COPY_INSN_P
661 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
662
663 #ifdef HAVE_AS_TLS
664 #undef TARGET_HAVE_TLS
665 #define TARGET_HAVE_TLS true
666 #endif
667
668 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
669 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
670
671 #undef TARGET_LEGITIMATE_CONSTANT_P
672 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
673
674 #undef TARGET_CANNOT_FORCE_CONST_MEM
675 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
676
677 #undef TARGET_MAX_ANCHOR_OFFSET
678 #define TARGET_MAX_ANCHOR_OFFSET 4095
679
680 /* The minimum is set such that the total size of the block
681 for a particular anchor is -4088 + 1 + 4095 bytes, which is
682 divisible by eight, ensuring natural spacing of anchors. */
683 #undef TARGET_MIN_ANCHOR_OFFSET
684 #define TARGET_MIN_ANCHOR_OFFSET -4088
685
686 #undef TARGET_SCHED_ISSUE_RATE
687 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
688
689 #undef TARGET_SCHED_VARIABLE_ISSUE
690 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
691
692 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
693 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
694 arm_first_cycle_multipass_dfa_lookahead
695
696 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
697 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
698 arm_first_cycle_multipass_dfa_lookahead_guard
699
700 #undef TARGET_MANGLE_TYPE
701 #define TARGET_MANGLE_TYPE arm_mangle_type
702
703 #undef TARGET_INVALID_CONVERSION
704 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
705
706 #undef TARGET_INVALID_UNARY_OP
707 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
708
709 #undef TARGET_INVALID_BINARY_OP
710 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
711
712 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
713 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
714
715 #undef TARGET_BUILD_BUILTIN_VA_LIST
716 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
717 #undef TARGET_EXPAND_BUILTIN_VA_START
718 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
719 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
720 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
721
722 #ifdef HAVE_AS_TLS
723 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
724 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
725 #endif
726
727 #undef TARGET_LEGITIMATE_ADDRESS_P
728 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
729
730 #undef TARGET_PREFERRED_RELOAD_CLASS
731 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
732
733 #undef TARGET_PROMOTED_TYPE
734 #define TARGET_PROMOTED_TYPE arm_promoted_type
735
736 #undef TARGET_SCALAR_MODE_SUPPORTED_P
737 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
738
739 #undef TARGET_COMPUTE_FRAME_LAYOUT
740 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
741
742 #undef TARGET_FRAME_POINTER_REQUIRED
743 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
744
745 #undef TARGET_CAN_ELIMINATE
746 #define TARGET_CAN_ELIMINATE arm_can_eliminate
747
748 #undef TARGET_CONDITIONAL_REGISTER_USAGE
749 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
750
751 #undef TARGET_CLASS_LIKELY_SPILLED_P
752 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
753
754 #undef TARGET_VECTORIZE_BUILTINS
755 #define TARGET_VECTORIZE_BUILTINS
756
757 #undef TARGET_VECTOR_ALIGNMENT
758 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
759
760 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
761 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
762 arm_vector_alignment_reachable
763
764 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
765 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
766 arm_builtin_support_vector_misalignment
767
768 #undef TARGET_PREFERRED_RENAME_CLASS
769 #define TARGET_PREFERRED_RENAME_CLASS \
770 arm_preferred_rename_class
771
772 #undef TARGET_VECTORIZE_VEC_PERM_CONST
773 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
774
775 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
776 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
777 arm_builtin_vectorization_cost
778
779 #undef TARGET_CANONICALIZE_COMPARISON
780 #define TARGET_CANONICALIZE_COMPARISON \
781 arm_canonicalize_comparison
782
783 #undef TARGET_ASAN_SHADOW_OFFSET
784 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
785
786 #undef MAX_INSN_PER_IT_BLOCK
787 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
788
789 #undef TARGET_CAN_USE_DOLOOP_P
790 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
791
792 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
793 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
794
795 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
796 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
797
798 #undef TARGET_SCHED_FUSION_PRIORITY
799 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
800
801 #undef TARGET_ASM_FUNCTION_SECTION
802 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
803
804 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
805 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
806
807 #undef TARGET_SECTION_TYPE_FLAGS
808 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
809
810 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
811 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
812
813 #undef TARGET_C_EXCESS_PRECISION
814 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
815
816 /* Although the architecture reserves bits 0 and 1, only the former is
817 used for ARM/Thumb ISA selection in v7 and earlier versions. */
818 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
819 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
820
821 #undef TARGET_FIXED_CONDITION_CODE_REGS
822 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
823
824 #undef TARGET_HARD_REGNO_NREGS
825 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
826 #undef TARGET_HARD_REGNO_MODE_OK
827 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
828
829 #undef TARGET_MODES_TIEABLE_P
830 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
831
832 #undef TARGET_CAN_CHANGE_MODE_CLASS
833 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
834
835 #undef TARGET_CONSTANT_ALIGNMENT
836 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
837
838 #undef TARGET_INVALID_WITHIN_DOLOOP
839 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
840
841 #undef TARGET_MD_ASM_ADJUST
842 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
843
844 #undef TARGET_STACK_PROTECT_GUARD
845 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
846
847 #undef TARGET_VECTORIZE_GET_MASK_MODE
848 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
849 \f
850 /* Obstack for minipool constant handling. */
851 static struct obstack minipool_obstack;
852 static char * minipool_startobj;
853
854 /* The maximum number of insns skipped which
855 will be conditionalised if possible. */
856 static int max_insns_skipped = 5;
857
858 /* True if we are currently building a constant table. */
859 int making_const_table;
860
861 /* The processor for which instructions should be scheduled. */
862 enum processor_type arm_tune = TARGET_CPU_arm_none;
863
864 /* The current tuning set. */
865 const struct tune_params *current_tune;
866
867 /* Which floating point hardware to schedule for. */
868 int arm_fpu_attr;
869
870 /* Used for Thumb call_via trampolines. */
871 rtx thumb_call_via_label[14];
872 static int thumb_call_reg_needed;
873
874 /* The bits in this mask specify which instruction scheduling options should
875 be used. */
876 unsigned int tune_flags = 0;
877
878 /* The highest ARM architecture version supported by the
879 target. */
880 enum base_architecture arm_base_arch = BASE_ARCH_0;
881
882 /* Active target architecture and tuning. */
883
884 struct arm_build_target arm_active_target;
885
886 /* The following are used in the arm.md file as equivalents to bits
887 in the above two flag variables. */
888
889 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
890 int arm_arch4 = 0;
891
892 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
893 int arm_arch4t = 0;
894
895 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
896 int arm_arch5t = 0;
897
898 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
899 int arm_arch5te = 0;
900
901 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
902 int arm_arch6 = 0;
903
904 /* Nonzero if this chip supports the ARM 6K extensions. */
905 int arm_arch6k = 0;
906
907 /* Nonzero if this chip supports the ARM 6KZ extensions. */
908 int arm_arch6kz = 0;
909
910 /* Nonzero if instructions present in ARMv6-M can be used. */
911 int arm_arch6m = 0;
912
913 /* Nonzero if this chip supports the ARM 7 extensions. */
914 int arm_arch7 = 0;
915
916 /* Nonzero if this chip supports the Large Physical Address Extension. */
917 int arm_arch_lpae = 0;
918
919 /* Nonzero if instructions not present in the 'M' profile can be used. */
920 int arm_arch_notm = 0;
921
922 /* Nonzero if instructions present in ARMv7E-M can be used. */
923 int arm_arch7em = 0;
924
925 /* Nonzero if instructions present in ARMv8 can be used. */
926 int arm_arch8 = 0;
927
928 /* Nonzero if this chip supports the ARMv8.1 extensions. */
929 int arm_arch8_1 = 0;
930
931 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
932 int arm_arch8_2 = 0;
933
934 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
935 int arm_arch8_3 = 0;
936
937 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
938 int arm_arch8_4 = 0;
939
940 /* Nonzero if this chip supports the ARM Architecture 8-M Mainline
941 extensions. */
942 int arm_arch8m_main = 0;
943
944 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
945 extensions. */
946 int arm_arch8_1m_main = 0;
947
948 /* Nonzero if this chip supports the FP16 instructions extension of ARM
949 Architecture 8.2. */
950 int arm_fp16_inst = 0;
951
952 /* Nonzero if this chip can benefit from load scheduling. */
953 int arm_ld_sched = 0;
954
955 /* Nonzero if this chip is a StrongARM. */
956 int arm_tune_strongarm = 0;
957
958 /* Nonzero if this chip supports Intel Wireless MMX technology. */
959 int arm_arch_iwmmxt = 0;
960
961 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
962 int arm_arch_iwmmxt2 = 0;
963
964 /* Nonzero if this chip is an XScale. */
965 int arm_arch_xscale = 0;
966
967 /* Nonzero if tuning for XScale */
968 int arm_tune_xscale = 0;
969
970 /* Nonzero if we want to tune for stores that access the write-buffer.
971 This typically means an ARM6 or ARM7 with MMU or MPU. */
972 int arm_tune_wbuf = 0;
973
974 /* Nonzero if tuning for Cortex-A9. */
975 int arm_tune_cortex_a9 = 0;
976
977 /* Nonzero if we should define __THUMB_INTERWORK__ in the
978 preprocessor.
979 XXX This is a bit of a hack, it's intended to help work around
980 problems in GLD which doesn't understand that armv5t code is
981 interworking clean. */
982 int arm_cpp_interwork = 0;
983
984 /* Nonzero if chip supports Thumb 1. */
985 int arm_arch_thumb1;
986
987 /* Nonzero if chip supports Thumb 2. */
988 int arm_arch_thumb2;
989
990 /* Nonzero if chip supports integer division instruction. */
991 int arm_arch_arm_hwdiv;
992 int arm_arch_thumb_hwdiv;
993
994 /* Nonzero if chip disallows volatile memory access in IT block. */
995 int arm_arch_no_volatile_ce;
996
997 /* Nonzero if we shouldn't use literal pools. */
998 bool arm_disable_literal_pool = false;
999
1000 /* The register number to be used for the PIC offset register. */
1001 unsigned arm_pic_register = INVALID_REGNUM;
1002
1003 enum arm_pcs arm_pcs_default;
1004
1005 /* For an explanation of these variables, see final_prescan_insn below. */
1006 int arm_ccfsm_state;
1007 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
1008 enum arm_cond_code arm_current_cc;
1009
1010 rtx arm_target_insn;
1011 int arm_target_label;
1012 /* The number of conditionally executed insns, including the current insn. */
1013 int arm_condexec_count = 0;
1014 /* A bitmask specifying the patterns for the IT block.
1015 Zero means do not output an IT block before this insn. */
1016 int arm_condexec_mask = 0;
1017 /* The number of bits used in arm_condexec_mask. */
1018 int arm_condexec_masklen = 0;
1019
1020 /* Nonzero if chip supports the ARMv8 CRC instructions. */
1021 int arm_arch_crc = 0;
1022
1023 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
1024 int arm_arch_dotprod = 0;
1025
1026 /* Nonzero if chip supports the ARMv8-M security extensions. */
1027 int arm_arch_cmse = 0;
1028
1029 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1030 int arm_m_profile_small_mul = 0;
1031
1032 /* Nonzero if chip supports the AdvSIMD I8MM instructions. */
1033 int arm_arch_i8mm = 0;
1034
1035 /* Nonzero if chip supports the BFloat16 instructions. */
1036 int arm_arch_bf16 = 0;
1037
1038 /* Nonzero if chip supports the Custom Datapath Extension. */
1039 int arm_arch_cde = 0;
1040 int arm_arch_cde_coproc = 0;
1041 const int arm_arch_cde_coproc_bits[] = {
1042 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1043 };
1044
1045 /* The condition codes of the ARM, and the inverse function. */
1046 static const char * const arm_condition_codes[] =
1047 {
1048 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1049 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1050 };
1051
1052 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1053 int arm_regs_in_sequence[] =
1054 {
1055 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1056 };
1057
1058 #define DEF_FP_SYSREG(reg) #reg,
1059 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1060 FP_SYSREGS
1061 };
1062 #undef DEF_FP_SYSREG
1063
1064 #define ARM_LSL_NAME "lsl"
1065 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1066
1067 #define THUMB2_WORK_REGS \
1068 (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1069 | (1 << SP_REGNUM) \
1070 | (1 << PC_REGNUM) \
1071 | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM \
1072 ? (1 << PIC_OFFSET_TABLE_REGNUM) \
1073 : 0)))
1074 \f
1075 /* Initialization code. */
1076
1077 struct cpu_tune
1078 {
1079 enum processor_type scheduler;
1080 unsigned int tune_flags;
1081 const struct tune_params *tune;
1082 };
1083
1084 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1085 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1086 { \
1087 num_slots, \
1088 l1_size, \
1089 l1_line_size \
1090 }
1091
1092 /* arm generic vectorizer costs. */
1093 static const
1094 struct cpu_vec_costs arm_default_vec_cost = {
1095 1, /* scalar_stmt_cost. */
1096 1, /* scalar load_cost. */
1097 1, /* scalar_store_cost. */
1098 1, /* vec_stmt_cost. */
1099 1, /* vec_to_scalar_cost. */
1100 1, /* scalar_to_vec_cost. */
1101 1, /* vec_align_load_cost. */
1102 1, /* vec_unalign_load_cost. */
1103 1, /* vec_unalign_store_cost. */
1104 1, /* vec_store_cost. */
1105 3, /* cond_taken_branch_cost. */
1106 1, /* cond_not_taken_branch_cost. */
1107 };
1108
1109 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1110 #include "aarch-cost-tables.h"
1111
1112
1113
1114 const struct cpu_cost_table cortexa9_extra_costs =
1115 {
1116 /* ALU */
1117 {
1118 0, /* arith. */
1119 0, /* logical. */
1120 0, /* shift. */
1121 COSTS_N_INSNS (1), /* shift_reg. */
1122 COSTS_N_INSNS (1), /* arith_shift. */
1123 COSTS_N_INSNS (2), /* arith_shift_reg. */
1124 0, /* log_shift. */
1125 COSTS_N_INSNS (1), /* log_shift_reg. */
1126 COSTS_N_INSNS (1), /* extend. */
1127 COSTS_N_INSNS (2), /* extend_arith. */
1128 COSTS_N_INSNS (1), /* bfi. */
1129 COSTS_N_INSNS (1), /* bfx. */
1130 0, /* clz. */
1131 0, /* rev. */
1132 0, /* non_exec. */
1133 true /* non_exec_costs_exec. */
1134 },
1135 {
1136 /* MULT SImode */
1137 {
1138 COSTS_N_INSNS (3), /* simple. */
1139 COSTS_N_INSNS (3), /* flag_setting. */
1140 COSTS_N_INSNS (2), /* extend. */
1141 COSTS_N_INSNS (3), /* add. */
1142 COSTS_N_INSNS (2), /* extend_add. */
1143 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1144 },
1145 /* MULT DImode */
1146 {
1147 0, /* simple (N/A). */
1148 0, /* flag_setting (N/A). */
1149 COSTS_N_INSNS (4), /* extend. */
1150 0, /* add (N/A). */
1151 COSTS_N_INSNS (4), /* extend_add. */
1152 0 /* idiv (N/A). */
1153 }
1154 },
1155 /* LD/ST */
1156 {
1157 COSTS_N_INSNS (2), /* load. */
1158 COSTS_N_INSNS (2), /* load_sign_extend. */
1159 COSTS_N_INSNS (2), /* ldrd. */
1160 COSTS_N_INSNS (2), /* ldm_1st. */
1161 1, /* ldm_regs_per_insn_1st. */
1162 2, /* ldm_regs_per_insn_subsequent. */
1163 COSTS_N_INSNS (5), /* loadf. */
1164 COSTS_N_INSNS (5), /* loadd. */
1165 COSTS_N_INSNS (1), /* load_unaligned. */
1166 COSTS_N_INSNS (2), /* store. */
1167 COSTS_N_INSNS (2), /* strd. */
1168 COSTS_N_INSNS (2), /* stm_1st. */
1169 1, /* stm_regs_per_insn_1st. */
1170 2, /* stm_regs_per_insn_subsequent. */
1171 COSTS_N_INSNS (1), /* storef. */
1172 COSTS_N_INSNS (1), /* stored. */
1173 COSTS_N_INSNS (1), /* store_unaligned. */
1174 COSTS_N_INSNS (1), /* loadv. */
1175 COSTS_N_INSNS (1) /* storev. */
1176 },
1177 {
1178 /* FP SFmode */
1179 {
1180 COSTS_N_INSNS (14), /* div. */
1181 COSTS_N_INSNS (4), /* mult. */
1182 COSTS_N_INSNS (7), /* mult_addsub. */
1183 COSTS_N_INSNS (30), /* fma. */
1184 COSTS_N_INSNS (3), /* addsub. */
1185 COSTS_N_INSNS (1), /* fpconst. */
1186 COSTS_N_INSNS (1), /* neg. */
1187 COSTS_N_INSNS (3), /* compare. */
1188 COSTS_N_INSNS (3), /* widen. */
1189 COSTS_N_INSNS (3), /* narrow. */
1190 COSTS_N_INSNS (3), /* toint. */
1191 COSTS_N_INSNS (3), /* fromint. */
1192 COSTS_N_INSNS (3) /* roundint. */
1193 },
1194 /* FP DFmode */
1195 {
1196 COSTS_N_INSNS (24), /* div. */
1197 COSTS_N_INSNS (5), /* mult. */
1198 COSTS_N_INSNS (8), /* mult_addsub. */
1199 COSTS_N_INSNS (30), /* fma. */
1200 COSTS_N_INSNS (3), /* addsub. */
1201 COSTS_N_INSNS (1), /* fpconst. */
1202 COSTS_N_INSNS (1), /* neg. */
1203 COSTS_N_INSNS (3), /* compare. */
1204 COSTS_N_INSNS (3), /* widen. */
1205 COSTS_N_INSNS (3), /* narrow. */
1206 COSTS_N_INSNS (3), /* toint. */
1207 COSTS_N_INSNS (3), /* fromint. */
1208 COSTS_N_INSNS (3) /* roundint. */
1209 }
1210 },
1211 /* Vector */
1212 {
1213 COSTS_N_INSNS (1), /* alu. */
1214 COSTS_N_INSNS (4), /* mult. */
1215 COSTS_N_INSNS (1), /* movi. */
1216 COSTS_N_INSNS (2), /* dup. */
1217 COSTS_N_INSNS (2) /* extract. */
1218 }
1219 };
1220
1221 const struct cpu_cost_table cortexa8_extra_costs =
1222 {
1223 /* ALU */
1224 {
1225 0, /* arith. */
1226 0, /* logical. */
1227 COSTS_N_INSNS (1), /* shift. */
1228 0, /* shift_reg. */
1229 COSTS_N_INSNS (1), /* arith_shift. */
1230 0, /* arith_shift_reg. */
1231 COSTS_N_INSNS (1), /* log_shift. */
1232 0, /* log_shift_reg. */
1233 0, /* extend. */
1234 0, /* extend_arith. */
1235 0, /* bfi. */
1236 0, /* bfx. */
1237 0, /* clz. */
1238 0, /* rev. */
1239 0, /* non_exec. */
1240 true /* non_exec_costs_exec. */
1241 },
1242 {
1243 /* MULT SImode */
1244 {
1245 COSTS_N_INSNS (1), /* simple. */
1246 COSTS_N_INSNS (1), /* flag_setting. */
1247 COSTS_N_INSNS (1), /* extend. */
1248 COSTS_N_INSNS (1), /* add. */
1249 COSTS_N_INSNS (1), /* extend_add. */
1250 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1251 },
1252 /* MULT DImode */
1253 {
1254 0, /* simple (N/A). */
1255 0, /* flag_setting (N/A). */
1256 COSTS_N_INSNS (2), /* extend. */
1257 0, /* add (N/A). */
1258 COSTS_N_INSNS (2), /* extend_add. */
1259 0 /* idiv (N/A). */
1260 }
1261 },
1262 /* LD/ST */
1263 {
1264 COSTS_N_INSNS (1), /* load. */
1265 COSTS_N_INSNS (1), /* load_sign_extend. */
1266 COSTS_N_INSNS (1), /* ldrd. */
1267 COSTS_N_INSNS (1), /* ldm_1st. */
1268 1, /* ldm_regs_per_insn_1st. */
1269 2, /* ldm_regs_per_insn_subsequent. */
1270 COSTS_N_INSNS (1), /* loadf. */
1271 COSTS_N_INSNS (1), /* loadd. */
1272 COSTS_N_INSNS (1), /* load_unaligned. */
1273 COSTS_N_INSNS (1), /* store. */
1274 COSTS_N_INSNS (1), /* strd. */
1275 COSTS_N_INSNS (1), /* stm_1st. */
1276 1, /* stm_regs_per_insn_1st. */
1277 2, /* stm_regs_per_insn_subsequent. */
1278 COSTS_N_INSNS (1), /* storef. */
1279 COSTS_N_INSNS (1), /* stored. */
1280 COSTS_N_INSNS (1), /* store_unaligned. */
1281 COSTS_N_INSNS (1), /* loadv. */
1282 COSTS_N_INSNS (1) /* storev. */
1283 },
1284 {
1285 /* FP SFmode */
1286 {
1287 COSTS_N_INSNS (36), /* div. */
1288 COSTS_N_INSNS (11), /* mult. */
1289 COSTS_N_INSNS (20), /* mult_addsub. */
1290 COSTS_N_INSNS (30), /* fma. */
1291 COSTS_N_INSNS (9), /* addsub. */
1292 COSTS_N_INSNS (3), /* fpconst. */
1293 COSTS_N_INSNS (3), /* neg. */
1294 COSTS_N_INSNS (6), /* compare. */
1295 COSTS_N_INSNS (4), /* widen. */
1296 COSTS_N_INSNS (4), /* narrow. */
1297 COSTS_N_INSNS (8), /* toint. */
1298 COSTS_N_INSNS (8), /* fromint. */
1299 COSTS_N_INSNS (8) /* roundint. */
1300 },
1301 /* FP DFmode */
1302 {
1303 COSTS_N_INSNS (64), /* div. */
1304 COSTS_N_INSNS (16), /* mult. */
1305 COSTS_N_INSNS (25), /* mult_addsub. */
1306 COSTS_N_INSNS (30), /* fma. */
1307 COSTS_N_INSNS (9), /* addsub. */
1308 COSTS_N_INSNS (3), /* fpconst. */
1309 COSTS_N_INSNS (3), /* neg. */
1310 COSTS_N_INSNS (6), /* compare. */
1311 COSTS_N_INSNS (6), /* widen. */
1312 COSTS_N_INSNS (6), /* narrow. */
1313 COSTS_N_INSNS (8), /* toint. */
1314 COSTS_N_INSNS (8), /* fromint. */
1315 COSTS_N_INSNS (8) /* roundint. */
1316 }
1317 },
1318 /* Vector */
1319 {
1320 COSTS_N_INSNS (1), /* alu. */
1321 COSTS_N_INSNS (4), /* mult. */
1322 COSTS_N_INSNS (1), /* movi. */
1323 COSTS_N_INSNS (2), /* dup. */
1324 COSTS_N_INSNS (2) /* extract. */
1325 }
1326 };
1327
1328 const struct cpu_cost_table cortexa5_extra_costs =
1329 {
1330 /* ALU */
1331 {
1332 0, /* arith. */
1333 0, /* logical. */
1334 COSTS_N_INSNS (1), /* shift. */
1335 COSTS_N_INSNS (1), /* shift_reg. */
1336 COSTS_N_INSNS (1), /* arith_shift. */
1337 COSTS_N_INSNS (1), /* arith_shift_reg. */
1338 COSTS_N_INSNS (1), /* log_shift. */
1339 COSTS_N_INSNS (1), /* log_shift_reg. */
1340 COSTS_N_INSNS (1), /* extend. */
1341 COSTS_N_INSNS (1), /* extend_arith. */
1342 COSTS_N_INSNS (1), /* bfi. */
1343 COSTS_N_INSNS (1), /* bfx. */
1344 COSTS_N_INSNS (1), /* clz. */
1345 COSTS_N_INSNS (1), /* rev. */
1346 0, /* non_exec. */
1347 true /* non_exec_costs_exec. */
1348 },
1349
1350 {
1351 /* MULT SImode */
1352 {
1353 0, /* simple. */
1354 COSTS_N_INSNS (1), /* flag_setting. */
1355 COSTS_N_INSNS (1), /* extend. */
1356 COSTS_N_INSNS (1), /* add. */
1357 COSTS_N_INSNS (1), /* extend_add. */
1358 COSTS_N_INSNS (7) /* idiv. */
1359 },
1360 /* MULT DImode */
1361 {
1362 0, /* simple (N/A). */
1363 0, /* flag_setting (N/A). */
1364 COSTS_N_INSNS (1), /* extend. */
1365 0, /* add. */
1366 COSTS_N_INSNS (2), /* extend_add. */
1367 0 /* idiv (N/A). */
1368 }
1369 },
1370 /* LD/ST */
1371 {
1372 COSTS_N_INSNS (1), /* load. */
1373 COSTS_N_INSNS (1), /* load_sign_extend. */
1374 COSTS_N_INSNS (6), /* ldrd. */
1375 COSTS_N_INSNS (1), /* ldm_1st. */
1376 1, /* ldm_regs_per_insn_1st. */
1377 2, /* ldm_regs_per_insn_subsequent. */
1378 COSTS_N_INSNS (2), /* loadf. */
1379 COSTS_N_INSNS (4), /* loadd. */
1380 COSTS_N_INSNS (1), /* load_unaligned. */
1381 COSTS_N_INSNS (1), /* store. */
1382 COSTS_N_INSNS (3), /* strd. */
1383 COSTS_N_INSNS (1), /* stm_1st. */
1384 1, /* stm_regs_per_insn_1st. */
1385 2, /* stm_regs_per_insn_subsequent. */
1386 COSTS_N_INSNS (2), /* storef. */
1387 COSTS_N_INSNS (2), /* stored. */
1388 COSTS_N_INSNS (1), /* store_unaligned. */
1389 COSTS_N_INSNS (1), /* loadv. */
1390 COSTS_N_INSNS (1) /* storev. */
1391 },
1392 {
1393 /* FP SFmode */
1394 {
1395 COSTS_N_INSNS (15), /* div. */
1396 COSTS_N_INSNS (3), /* mult. */
1397 COSTS_N_INSNS (7), /* mult_addsub. */
1398 COSTS_N_INSNS (7), /* fma. */
1399 COSTS_N_INSNS (3), /* addsub. */
1400 COSTS_N_INSNS (3), /* fpconst. */
1401 COSTS_N_INSNS (3), /* neg. */
1402 COSTS_N_INSNS (3), /* compare. */
1403 COSTS_N_INSNS (3), /* widen. */
1404 COSTS_N_INSNS (3), /* narrow. */
1405 COSTS_N_INSNS (3), /* toint. */
1406 COSTS_N_INSNS (3), /* fromint. */
1407 COSTS_N_INSNS (3) /* roundint. */
1408 },
1409 /* FP DFmode */
1410 {
1411 COSTS_N_INSNS (30), /* div. */
1412 COSTS_N_INSNS (6), /* mult. */
1413 COSTS_N_INSNS (10), /* mult_addsub. */
1414 COSTS_N_INSNS (7), /* fma. */
1415 COSTS_N_INSNS (3), /* addsub. */
1416 COSTS_N_INSNS (3), /* fpconst. */
1417 COSTS_N_INSNS (3), /* neg. */
1418 COSTS_N_INSNS (3), /* compare. */
1419 COSTS_N_INSNS (3), /* widen. */
1420 COSTS_N_INSNS (3), /* narrow. */
1421 COSTS_N_INSNS (3), /* toint. */
1422 COSTS_N_INSNS (3), /* fromint. */
1423 COSTS_N_INSNS (3) /* roundint. */
1424 }
1425 },
1426 /* Vector */
1427 {
1428 COSTS_N_INSNS (1), /* alu. */
1429 COSTS_N_INSNS (4), /* mult. */
1430 COSTS_N_INSNS (1), /* movi. */
1431 COSTS_N_INSNS (2), /* dup. */
1432 COSTS_N_INSNS (2) /* extract. */
1433 }
1434 };
1435
1436
1437 const struct cpu_cost_table cortexa7_extra_costs =
1438 {
1439 /* ALU */
1440 {
1441 0, /* arith. */
1442 0, /* logical. */
1443 COSTS_N_INSNS (1), /* shift. */
1444 COSTS_N_INSNS (1), /* shift_reg. */
1445 COSTS_N_INSNS (1), /* arith_shift. */
1446 COSTS_N_INSNS (1), /* arith_shift_reg. */
1447 COSTS_N_INSNS (1), /* log_shift. */
1448 COSTS_N_INSNS (1), /* log_shift_reg. */
1449 COSTS_N_INSNS (1), /* extend. */
1450 COSTS_N_INSNS (1), /* extend_arith. */
1451 COSTS_N_INSNS (1), /* bfi. */
1452 COSTS_N_INSNS (1), /* bfx. */
1453 COSTS_N_INSNS (1), /* clz. */
1454 COSTS_N_INSNS (1), /* rev. */
1455 0, /* non_exec. */
1456 true /* non_exec_costs_exec. */
1457 },
1458
1459 {
1460 /* MULT SImode */
1461 {
1462 0, /* simple. */
1463 COSTS_N_INSNS (1), /* flag_setting. */
1464 COSTS_N_INSNS (1), /* extend. */
1465 COSTS_N_INSNS (1), /* add. */
1466 COSTS_N_INSNS (1), /* extend_add. */
1467 COSTS_N_INSNS (7) /* idiv. */
1468 },
1469 /* MULT DImode */
1470 {
1471 0, /* simple (N/A). */
1472 0, /* flag_setting (N/A). */
1473 COSTS_N_INSNS (1), /* extend. */
1474 0, /* add. */
1475 COSTS_N_INSNS (2), /* extend_add. */
1476 0 /* idiv (N/A). */
1477 }
1478 },
1479 /* LD/ST */
1480 {
1481 COSTS_N_INSNS (1), /* load. */
1482 COSTS_N_INSNS (1), /* load_sign_extend. */
1483 COSTS_N_INSNS (3), /* ldrd. */
1484 COSTS_N_INSNS (1), /* ldm_1st. */
1485 1, /* ldm_regs_per_insn_1st. */
1486 2, /* ldm_regs_per_insn_subsequent. */
1487 COSTS_N_INSNS (2), /* loadf. */
1488 COSTS_N_INSNS (2), /* loadd. */
1489 COSTS_N_INSNS (1), /* load_unaligned. */
1490 COSTS_N_INSNS (1), /* store. */
1491 COSTS_N_INSNS (3), /* strd. */
1492 COSTS_N_INSNS (1), /* stm_1st. */
1493 1, /* stm_regs_per_insn_1st. */
1494 2, /* stm_regs_per_insn_subsequent. */
1495 COSTS_N_INSNS (2), /* storef. */
1496 COSTS_N_INSNS (2), /* stored. */
1497 COSTS_N_INSNS (1), /* store_unaligned. */
1498 COSTS_N_INSNS (1), /* loadv. */
1499 COSTS_N_INSNS (1) /* storev. */
1500 },
1501 {
1502 /* FP SFmode */
1503 {
1504 COSTS_N_INSNS (15), /* div. */
1505 COSTS_N_INSNS (3), /* mult. */
1506 COSTS_N_INSNS (7), /* mult_addsub. */
1507 COSTS_N_INSNS (7), /* fma. */
1508 COSTS_N_INSNS (3), /* addsub. */
1509 COSTS_N_INSNS (3), /* fpconst. */
1510 COSTS_N_INSNS (3), /* neg. */
1511 COSTS_N_INSNS (3), /* compare. */
1512 COSTS_N_INSNS (3), /* widen. */
1513 COSTS_N_INSNS (3), /* narrow. */
1514 COSTS_N_INSNS (3), /* toint. */
1515 COSTS_N_INSNS (3), /* fromint. */
1516 COSTS_N_INSNS (3) /* roundint. */
1517 },
1518 /* FP DFmode */
1519 {
1520 COSTS_N_INSNS (30), /* div. */
1521 COSTS_N_INSNS (6), /* mult. */
1522 COSTS_N_INSNS (10), /* mult_addsub. */
1523 COSTS_N_INSNS (7), /* fma. */
1524 COSTS_N_INSNS (3), /* addsub. */
1525 COSTS_N_INSNS (3), /* fpconst. */
1526 COSTS_N_INSNS (3), /* neg. */
1527 COSTS_N_INSNS (3), /* compare. */
1528 COSTS_N_INSNS (3), /* widen. */
1529 COSTS_N_INSNS (3), /* narrow. */
1530 COSTS_N_INSNS (3), /* toint. */
1531 COSTS_N_INSNS (3), /* fromint. */
1532 COSTS_N_INSNS (3) /* roundint. */
1533 }
1534 },
1535 /* Vector */
1536 {
1537 COSTS_N_INSNS (1), /* alu. */
1538 COSTS_N_INSNS (4), /* mult. */
1539 COSTS_N_INSNS (1), /* movi. */
1540 COSTS_N_INSNS (2), /* dup. */
1541 COSTS_N_INSNS (2) /* extract. */
1542 }
1543 };
1544
1545 const struct cpu_cost_table cortexa12_extra_costs =
1546 {
1547 /* ALU */
1548 {
1549 0, /* arith. */
1550 0, /* logical. */
1551 0, /* shift. */
1552 COSTS_N_INSNS (1), /* shift_reg. */
1553 COSTS_N_INSNS (1), /* arith_shift. */
1554 COSTS_N_INSNS (1), /* arith_shift_reg. */
1555 COSTS_N_INSNS (1), /* log_shift. */
1556 COSTS_N_INSNS (1), /* log_shift_reg. */
1557 0, /* extend. */
1558 COSTS_N_INSNS (1), /* extend_arith. */
1559 0, /* bfi. */
1560 COSTS_N_INSNS (1), /* bfx. */
1561 COSTS_N_INSNS (1), /* clz. */
1562 COSTS_N_INSNS (1), /* rev. */
1563 0, /* non_exec. */
1564 true /* non_exec_costs_exec. */
1565 },
1566 /* MULT SImode */
1567 {
1568 {
1569 COSTS_N_INSNS (2), /* simple. */
1570 COSTS_N_INSNS (3), /* flag_setting. */
1571 COSTS_N_INSNS (2), /* extend. */
1572 COSTS_N_INSNS (3), /* add. */
1573 COSTS_N_INSNS (2), /* extend_add. */
1574 COSTS_N_INSNS (18) /* idiv. */
1575 },
1576 /* MULT DImode */
1577 {
1578 0, /* simple (N/A). */
1579 0, /* flag_setting (N/A). */
1580 COSTS_N_INSNS (3), /* extend. */
1581 0, /* add (N/A). */
1582 COSTS_N_INSNS (3), /* extend_add. */
1583 0 /* idiv (N/A). */
1584 }
1585 },
1586 /* LD/ST */
1587 {
1588 COSTS_N_INSNS (3), /* load. */
1589 COSTS_N_INSNS (3), /* load_sign_extend. */
1590 COSTS_N_INSNS (3), /* ldrd. */
1591 COSTS_N_INSNS (3), /* ldm_1st. */
1592 1, /* ldm_regs_per_insn_1st. */
1593 2, /* ldm_regs_per_insn_subsequent. */
1594 COSTS_N_INSNS (3), /* loadf. */
1595 COSTS_N_INSNS (3), /* loadd. */
1596 0, /* load_unaligned. */
1597 0, /* store. */
1598 0, /* strd. */
1599 0, /* stm_1st. */
1600 1, /* stm_regs_per_insn_1st. */
1601 2, /* stm_regs_per_insn_subsequent. */
1602 COSTS_N_INSNS (2), /* storef. */
1603 COSTS_N_INSNS (2), /* stored. */
1604 0, /* store_unaligned. */
1605 COSTS_N_INSNS (1), /* loadv. */
1606 COSTS_N_INSNS (1) /* storev. */
1607 },
1608 {
1609 /* FP SFmode */
1610 {
1611 COSTS_N_INSNS (17), /* div. */
1612 COSTS_N_INSNS (4), /* mult. */
1613 COSTS_N_INSNS (8), /* mult_addsub. */
1614 COSTS_N_INSNS (8), /* fma. */
1615 COSTS_N_INSNS (4), /* addsub. */
1616 COSTS_N_INSNS (2), /* fpconst. */
1617 COSTS_N_INSNS (2), /* neg. */
1618 COSTS_N_INSNS (2), /* compare. */
1619 COSTS_N_INSNS (4), /* widen. */
1620 COSTS_N_INSNS (4), /* narrow. */
1621 COSTS_N_INSNS (4), /* toint. */
1622 COSTS_N_INSNS (4), /* fromint. */
1623 COSTS_N_INSNS (4) /* roundint. */
1624 },
1625 /* FP DFmode */
1626 {
1627 COSTS_N_INSNS (31), /* div. */
1628 COSTS_N_INSNS (4), /* mult. */
1629 COSTS_N_INSNS (8), /* mult_addsub. */
1630 COSTS_N_INSNS (8), /* fma. */
1631 COSTS_N_INSNS (4), /* addsub. */
1632 COSTS_N_INSNS (2), /* fpconst. */
1633 COSTS_N_INSNS (2), /* neg. */
1634 COSTS_N_INSNS (2), /* compare. */
1635 COSTS_N_INSNS (4), /* widen. */
1636 COSTS_N_INSNS (4), /* narrow. */
1637 COSTS_N_INSNS (4), /* toint. */
1638 COSTS_N_INSNS (4), /* fromint. */
1639 COSTS_N_INSNS (4) /* roundint. */
1640 }
1641 },
1642 /* Vector */
1643 {
1644 COSTS_N_INSNS (1), /* alu. */
1645 COSTS_N_INSNS (4), /* mult. */
1646 COSTS_N_INSNS (1), /* movi. */
1647 COSTS_N_INSNS (2), /* dup. */
1648 COSTS_N_INSNS (2) /* extract. */
1649 }
1650 };
1651
1652 const struct cpu_cost_table cortexa15_extra_costs =
1653 {
1654 /* ALU */
1655 {
1656 0, /* arith. */
1657 0, /* logical. */
1658 0, /* shift. */
1659 0, /* shift_reg. */
1660 COSTS_N_INSNS (1), /* arith_shift. */
1661 COSTS_N_INSNS (1), /* arith_shift_reg. */
1662 COSTS_N_INSNS (1), /* log_shift. */
1663 COSTS_N_INSNS (1), /* log_shift_reg. */
1664 0, /* extend. */
1665 COSTS_N_INSNS (1), /* extend_arith. */
1666 COSTS_N_INSNS (1), /* bfi. */
1667 0, /* bfx. */
1668 0, /* clz. */
1669 0, /* rev. */
1670 0, /* non_exec. */
1671 true /* non_exec_costs_exec. */
1672 },
1673 /* MULT SImode */
1674 {
1675 {
1676 COSTS_N_INSNS (2), /* simple. */
1677 COSTS_N_INSNS (3), /* flag_setting. */
1678 COSTS_N_INSNS (2), /* extend. */
1679 COSTS_N_INSNS (2), /* add. */
1680 COSTS_N_INSNS (2), /* extend_add. */
1681 COSTS_N_INSNS (18) /* idiv. */
1682 },
1683 /* MULT DImode */
1684 {
1685 0, /* simple (N/A). */
1686 0, /* flag_setting (N/A). */
1687 COSTS_N_INSNS (3), /* extend. */
1688 0, /* add (N/A). */
1689 COSTS_N_INSNS (3), /* extend_add. */
1690 0 /* idiv (N/A). */
1691 }
1692 },
1693 /* LD/ST */
1694 {
1695 COSTS_N_INSNS (3), /* load. */
1696 COSTS_N_INSNS (3), /* load_sign_extend. */
1697 COSTS_N_INSNS (3), /* ldrd. */
1698 COSTS_N_INSNS (4), /* ldm_1st. */
1699 1, /* ldm_regs_per_insn_1st. */
1700 2, /* ldm_regs_per_insn_subsequent. */
1701 COSTS_N_INSNS (4), /* loadf. */
1702 COSTS_N_INSNS (4), /* loadd. */
1703 0, /* load_unaligned. */
1704 0, /* store. */
1705 0, /* strd. */
1706 COSTS_N_INSNS (1), /* stm_1st. */
1707 1, /* stm_regs_per_insn_1st. */
1708 2, /* stm_regs_per_insn_subsequent. */
1709 0, /* storef. */
1710 0, /* stored. */
1711 0, /* store_unaligned. */
1712 COSTS_N_INSNS (1), /* loadv. */
1713 COSTS_N_INSNS (1) /* storev. */
1714 },
1715 {
1716 /* FP SFmode */
1717 {
1718 COSTS_N_INSNS (17), /* div. */
1719 COSTS_N_INSNS (4), /* mult. */
1720 COSTS_N_INSNS (8), /* mult_addsub. */
1721 COSTS_N_INSNS (8), /* fma. */
1722 COSTS_N_INSNS (4), /* addsub. */
1723 COSTS_N_INSNS (2), /* fpconst. */
1724 COSTS_N_INSNS (2), /* neg. */
1725 COSTS_N_INSNS (5), /* compare. */
1726 COSTS_N_INSNS (4), /* widen. */
1727 COSTS_N_INSNS (4), /* narrow. */
1728 COSTS_N_INSNS (4), /* toint. */
1729 COSTS_N_INSNS (4), /* fromint. */
1730 COSTS_N_INSNS (4) /* roundint. */
1731 },
1732 /* FP DFmode */
1733 {
1734 COSTS_N_INSNS (31), /* div. */
1735 COSTS_N_INSNS (4), /* mult. */
1736 COSTS_N_INSNS (8), /* mult_addsub. */
1737 COSTS_N_INSNS (8), /* fma. */
1738 COSTS_N_INSNS (4), /* addsub. */
1739 COSTS_N_INSNS (2), /* fpconst. */
1740 COSTS_N_INSNS (2), /* neg. */
1741 COSTS_N_INSNS (2), /* compare. */
1742 COSTS_N_INSNS (4), /* widen. */
1743 COSTS_N_INSNS (4), /* narrow. */
1744 COSTS_N_INSNS (4), /* toint. */
1745 COSTS_N_INSNS (4), /* fromint. */
1746 COSTS_N_INSNS (4) /* roundint. */
1747 }
1748 },
1749 /* Vector */
1750 {
1751 COSTS_N_INSNS (1), /* alu. */
1752 COSTS_N_INSNS (4), /* mult. */
1753 COSTS_N_INSNS (1), /* movi. */
1754 COSTS_N_INSNS (2), /* dup. */
1755 COSTS_N_INSNS (2) /* extract. */
1756 }
1757 };
1758
1759 const struct cpu_cost_table v7m_extra_costs =
1760 {
1761 /* ALU */
1762 {
1763 0, /* arith. */
1764 0, /* logical. */
1765 0, /* shift. */
1766 0, /* shift_reg. */
1767 0, /* arith_shift. */
1768 COSTS_N_INSNS (1), /* arith_shift_reg. */
1769 0, /* log_shift. */
1770 COSTS_N_INSNS (1), /* log_shift_reg. */
1771 0, /* extend. */
1772 COSTS_N_INSNS (1), /* extend_arith. */
1773 0, /* bfi. */
1774 0, /* bfx. */
1775 0, /* clz. */
1776 0, /* rev. */
1777 COSTS_N_INSNS (1), /* non_exec. */
1778 false /* non_exec_costs_exec. */
1779 },
1780 {
1781 /* MULT SImode */
1782 {
1783 COSTS_N_INSNS (1), /* simple. */
1784 COSTS_N_INSNS (1), /* flag_setting. */
1785 COSTS_N_INSNS (2), /* extend. */
1786 COSTS_N_INSNS (1), /* add. */
1787 COSTS_N_INSNS (3), /* extend_add. */
1788 COSTS_N_INSNS (8) /* idiv. */
1789 },
1790 /* MULT DImode */
1791 {
1792 0, /* simple (N/A). */
1793 0, /* flag_setting (N/A). */
1794 COSTS_N_INSNS (2), /* extend. */
1795 0, /* add (N/A). */
1796 COSTS_N_INSNS (3), /* extend_add. */
1797 0 /* idiv (N/A). */
1798 }
1799 },
1800 /* LD/ST */
1801 {
1802 COSTS_N_INSNS (2), /* load. */
1803 0, /* load_sign_extend. */
1804 COSTS_N_INSNS (3), /* ldrd. */
1805 COSTS_N_INSNS (2), /* ldm_1st. */
1806 1, /* ldm_regs_per_insn_1st. */
1807 1, /* ldm_regs_per_insn_subsequent. */
1808 COSTS_N_INSNS (2), /* loadf. */
1809 COSTS_N_INSNS (3), /* loadd. */
1810 COSTS_N_INSNS (1), /* load_unaligned. */
1811 COSTS_N_INSNS (2), /* store. */
1812 COSTS_N_INSNS (3), /* strd. */
1813 COSTS_N_INSNS (2), /* stm_1st. */
1814 1, /* stm_regs_per_insn_1st. */
1815 1, /* stm_regs_per_insn_subsequent. */
1816 COSTS_N_INSNS (2), /* storef. */
1817 COSTS_N_INSNS (3), /* stored. */
1818 COSTS_N_INSNS (1), /* store_unaligned. */
1819 COSTS_N_INSNS (1), /* loadv. */
1820 COSTS_N_INSNS (1) /* storev. */
1821 },
1822 {
1823 /* FP SFmode */
1824 {
1825 COSTS_N_INSNS (7), /* div. */
1826 COSTS_N_INSNS (2), /* mult. */
1827 COSTS_N_INSNS (5), /* mult_addsub. */
1828 COSTS_N_INSNS (3), /* fma. */
1829 COSTS_N_INSNS (1), /* addsub. */
1830 0, /* fpconst. */
1831 0, /* neg. */
1832 0, /* compare. */
1833 0, /* widen. */
1834 0, /* narrow. */
1835 0, /* toint. */
1836 0, /* fromint. */
1837 0 /* roundint. */
1838 },
1839 /* FP DFmode */
1840 {
1841 COSTS_N_INSNS (15), /* div. */
1842 COSTS_N_INSNS (5), /* mult. */
1843 COSTS_N_INSNS (7), /* mult_addsub. */
1844 COSTS_N_INSNS (7), /* fma. */
1845 COSTS_N_INSNS (3), /* addsub. */
1846 0, /* fpconst. */
1847 0, /* neg. */
1848 0, /* compare. */
1849 0, /* widen. */
1850 0, /* narrow. */
1851 0, /* toint. */
1852 0, /* fromint. */
1853 0 /* roundint. */
1854 }
1855 },
1856 /* Vector */
1857 {
1858 COSTS_N_INSNS (1), /* alu. */
1859 COSTS_N_INSNS (4), /* mult. */
1860 COSTS_N_INSNS (1), /* movi. */
1861 COSTS_N_INSNS (2), /* dup. */
1862 COSTS_N_INSNS (2) /* extract. */
1863 }
1864 };
1865
1866 const struct addr_mode_cost_table generic_addr_mode_costs =
1867 {
1868 /* int. */
1869 {
1870 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1871 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1872 COSTS_N_INSNS (0) /* AMO_WB. */
1873 },
1874 /* float. */
1875 {
1876 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1877 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1878 COSTS_N_INSNS (0) /* AMO_WB. */
1879 },
1880 /* vector. */
1881 {
1882 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1883 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1884 COSTS_N_INSNS (0) /* AMO_WB. */
1885 }
1886 };
1887
1888 const struct tune_params arm_slowmul_tune =
1889 {
1890 &generic_extra_costs, /* Insn extra costs. */
1891 &generic_addr_mode_costs, /* Addressing mode costs. */
1892 NULL, /* Sched adj cost. */
1893 arm_default_branch_cost,
1894 &arm_default_vec_cost,
1895 3, /* Constant limit. */
1896 5, /* Max cond insns. */
1897 8, /* Memset max inline. */
1898 1, /* Issue rate. */
1899 ARM_PREFETCH_NOT_BENEFICIAL,
1900 tune_params::PREF_CONST_POOL_TRUE,
1901 tune_params::PREF_LDRD_FALSE,
1902 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1903 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1904 tune_params::DISPARAGE_FLAGS_NEITHER,
1905 tune_params::PREF_NEON_STRINGOPS_FALSE,
1906 tune_params::FUSE_NOTHING,
1907 tune_params::SCHED_AUTOPREF_OFF
1908 };
1909
1910 const struct tune_params arm_fastmul_tune =
1911 {
1912 &generic_extra_costs, /* Insn extra costs. */
1913 &generic_addr_mode_costs, /* Addressing mode costs. */
1914 NULL, /* Sched adj cost. */
1915 arm_default_branch_cost,
1916 &arm_default_vec_cost,
1917 1, /* Constant limit. */
1918 5, /* Max cond insns. */
1919 8, /* Memset max inline. */
1920 1, /* Issue rate. */
1921 ARM_PREFETCH_NOT_BENEFICIAL,
1922 tune_params::PREF_CONST_POOL_TRUE,
1923 tune_params::PREF_LDRD_FALSE,
1924 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1925 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1926 tune_params::DISPARAGE_FLAGS_NEITHER,
1927 tune_params::PREF_NEON_STRINGOPS_FALSE,
1928 tune_params::FUSE_NOTHING,
1929 tune_params::SCHED_AUTOPREF_OFF
1930 };
1931
1932 /* StrongARM has early execution of branches, so a sequence that is worth
1933 skipping is shorter. Set max_insns_skipped to a lower value. */
1934
1935 const struct tune_params arm_strongarm_tune =
1936 {
1937 &generic_extra_costs, /* Insn extra costs. */
1938 &generic_addr_mode_costs, /* Addressing mode costs. */
1939 NULL, /* Sched adj cost. */
1940 arm_default_branch_cost,
1941 &arm_default_vec_cost,
1942 1, /* Constant limit. */
1943 3, /* Max cond insns. */
1944 8, /* Memset max inline. */
1945 1, /* Issue rate. */
1946 ARM_PREFETCH_NOT_BENEFICIAL,
1947 tune_params::PREF_CONST_POOL_TRUE,
1948 tune_params::PREF_LDRD_FALSE,
1949 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1950 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1951 tune_params::DISPARAGE_FLAGS_NEITHER,
1952 tune_params::PREF_NEON_STRINGOPS_FALSE,
1953 tune_params::FUSE_NOTHING,
1954 tune_params::SCHED_AUTOPREF_OFF
1955 };
1956
1957 const struct tune_params arm_xscale_tune =
1958 {
1959 &generic_extra_costs, /* Insn extra costs. */
1960 &generic_addr_mode_costs, /* Addressing mode costs. */
1961 xscale_sched_adjust_cost,
1962 arm_default_branch_cost,
1963 &arm_default_vec_cost,
1964 2, /* Constant limit. */
1965 3, /* Max cond insns. */
1966 8, /* Memset max inline. */
1967 1, /* Issue rate. */
1968 ARM_PREFETCH_NOT_BENEFICIAL,
1969 tune_params::PREF_CONST_POOL_TRUE,
1970 tune_params::PREF_LDRD_FALSE,
1971 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1972 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1973 tune_params::DISPARAGE_FLAGS_NEITHER,
1974 tune_params::PREF_NEON_STRINGOPS_FALSE,
1975 tune_params::FUSE_NOTHING,
1976 tune_params::SCHED_AUTOPREF_OFF
1977 };
1978
1979 const struct tune_params arm_9e_tune =
1980 {
1981 &generic_extra_costs, /* Insn extra costs. */
1982 &generic_addr_mode_costs, /* Addressing mode costs. */
1983 NULL, /* Sched adj cost. */
1984 arm_default_branch_cost,
1985 &arm_default_vec_cost,
1986 1, /* Constant limit. */
1987 5, /* Max cond insns. */
1988 8, /* Memset max inline. */
1989 1, /* Issue rate. */
1990 ARM_PREFETCH_NOT_BENEFICIAL,
1991 tune_params::PREF_CONST_POOL_TRUE,
1992 tune_params::PREF_LDRD_FALSE,
1993 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1994 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1995 tune_params::DISPARAGE_FLAGS_NEITHER,
1996 tune_params::PREF_NEON_STRINGOPS_FALSE,
1997 tune_params::FUSE_NOTHING,
1998 tune_params::SCHED_AUTOPREF_OFF
1999 };
2000
2001 const struct tune_params arm_marvell_pj4_tune =
2002 {
2003 &generic_extra_costs, /* Insn extra costs. */
2004 &generic_addr_mode_costs, /* Addressing mode costs. */
2005 NULL, /* Sched adj cost. */
2006 arm_default_branch_cost,
2007 &arm_default_vec_cost,
2008 1, /* Constant limit. */
2009 5, /* Max cond insns. */
2010 8, /* Memset max inline. */
2011 2, /* Issue rate. */
2012 ARM_PREFETCH_NOT_BENEFICIAL,
2013 tune_params::PREF_CONST_POOL_TRUE,
2014 tune_params::PREF_LDRD_FALSE,
2015 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2016 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2017 tune_params::DISPARAGE_FLAGS_NEITHER,
2018 tune_params::PREF_NEON_STRINGOPS_FALSE,
2019 tune_params::FUSE_NOTHING,
2020 tune_params::SCHED_AUTOPREF_OFF
2021 };
2022
2023 const struct tune_params arm_v6t2_tune =
2024 {
2025 &generic_extra_costs, /* Insn extra costs. */
2026 &generic_addr_mode_costs, /* Addressing mode costs. */
2027 NULL, /* Sched adj cost. */
2028 arm_default_branch_cost,
2029 &arm_default_vec_cost,
2030 1, /* Constant limit. */
2031 5, /* Max cond insns. */
2032 8, /* Memset max inline. */
2033 1, /* Issue rate. */
2034 ARM_PREFETCH_NOT_BENEFICIAL,
2035 tune_params::PREF_CONST_POOL_FALSE,
2036 tune_params::PREF_LDRD_FALSE,
2037 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2038 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2039 tune_params::DISPARAGE_FLAGS_NEITHER,
2040 tune_params::PREF_NEON_STRINGOPS_FALSE,
2041 tune_params::FUSE_NOTHING,
2042 tune_params::SCHED_AUTOPREF_OFF
2043 };
2044
2045
2046 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
2047 const struct tune_params arm_cortex_tune =
2048 {
2049 &generic_extra_costs,
2050 &generic_addr_mode_costs, /* Addressing mode costs. */
2051 NULL, /* Sched adj cost. */
2052 arm_default_branch_cost,
2053 &arm_default_vec_cost,
2054 1, /* Constant limit. */
2055 5, /* Max cond insns. */
2056 8, /* Memset max inline. */
2057 2, /* Issue rate. */
2058 ARM_PREFETCH_NOT_BENEFICIAL,
2059 tune_params::PREF_CONST_POOL_FALSE,
2060 tune_params::PREF_LDRD_FALSE,
2061 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2062 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2063 tune_params::DISPARAGE_FLAGS_NEITHER,
2064 tune_params::PREF_NEON_STRINGOPS_FALSE,
2065 tune_params::FUSE_NOTHING,
2066 tune_params::SCHED_AUTOPREF_OFF
2067 };
2068
2069 const struct tune_params arm_cortex_a8_tune =
2070 {
2071 &cortexa8_extra_costs,
2072 &generic_addr_mode_costs, /* Addressing mode costs. */
2073 NULL, /* Sched adj cost. */
2074 arm_default_branch_cost,
2075 &arm_default_vec_cost,
2076 1, /* Constant limit. */
2077 5, /* Max cond insns. */
2078 8, /* Memset max inline. */
2079 2, /* Issue rate. */
2080 ARM_PREFETCH_NOT_BENEFICIAL,
2081 tune_params::PREF_CONST_POOL_FALSE,
2082 tune_params::PREF_LDRD_FALSE,
2083 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2084 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2085 tune_params::DISPARAGE_FLAGS_NEITHER,
2086 tune_params::PREF_NEON_STRINGOPS_TRUE,
2087 tune_params::FUSE_NOTHING,
2088 tune_params::SCHED_AUTOPREF_OFF
2089 };
2090
2091 const struct tune_params arm_cortex_a7_tune =
2092 {
2093 &cortexa7_extra_costs,
2094 &generic_addr_mode_costs, /* Addressing mode costs. */
2095 NULL, /* Sched adj cost. */
2096 arm_default_branch_cost,
2097 &arm_default_vec_cost,
2098 1, /* Constant limit. */
2099 5, /* Max cond insns. */
2100 8, /* Memset max inline. */
2101 2, /* Issue rate. */
2102 ARM_PREFETCH_NOT_BENEFICIAL,
2103 tune_params::PREF_CONST_POOL_FALSE,
2104 tune_params::PREF_LDRD_FALSE,
2105 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2106 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2107 tune_params::DISPARAGE_FLAGS_NEITHER,
2108 tune_params::PREF_NEON_STRINGOPS_TRUE,
2109 tune_params::FUSE_NOTHING,
2110 tune_params::SCHED_AUTOPREF_OFF
2111 };
2112
2113 const struct tune_params arm_cortex_a15_tune =
2114 {
2115 &cortexa15_extra_costs,
2116 &generic_addr_mode_costs, /* Addressing mode costs. */
2117 NULL, /* Sched adj cost. */
2118 arm_default_branch_cost,
2119 &arm_default_vec_cost,
2120 1, /* Constant limit. */
2121 2, /* Max cond insns. */
2122 8, /* Memset max inline. */
2123 3, /* Issue rate. */
2124 ARM_PREFETCH_NOT_BENEFICIAL,
2125 tune_params::PREF_CONST_POOL_FALSE,
2126 tune_params::PREF_LDRD_TRUE,
2127 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2128 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2129 tune_params::DISPARAGE_FLAGS_ALL,
2130 tune_params::PREF_NEON_STRINGOPS_TRUE,
2131 tune_params::FUSE_NOTHING,
2132 tune_params::SCHED_AUTOPREF_FULL
2133 };
2134
2135 const struct tune_params arm_cortex_a35_tune =
2136 {
2137 &cortexa53_extra_costs,
2138 &generic_addr_mode_costs, /* Addressing mode costs. */
2139 NULL, /* Sched adj cost. */
2140 arm_default_branch_cost,
2141 &arm_default_vec_cost,
2142 1, /* Constant limit. */
2143 5, /* Max cond insns. */
2144 8, /* Memset max inline. */
2145 1, /* Issue rate. */
2146 ARM_PREFETCH_NOT_BENEFICIAL,
2147 tune_params::PREF_CONST_POOL_FALSE,
2148 tune_params::PREF_LDRD_FALSE,
2149 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2150 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2151 tune_params::DISPARAGE_FLAGS_NEITHER,
2152 tune_params::PREF_NEON_STRINGOPS_TRUE,
2153 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2154 tune_params::SCHED_AUTOPREF_OFF
2155 };
2156
2157 const struct tune_params arm_cortex_a53_tune =
2158 {
2159 &cortexa53_extra_costs,
2160 &generic_addr_mode_costs, /* Addressing mode costs. */
2161 NULL, /* Sched adj cost. */
2162 arm_default_branch_cost,
2163 &arm_default_vec_cost,
2164 1, /* Constant limit. */
2165 5, /* Max cond insns. */
2166 8, /* Memset max inline. */
2167 2, /* Issue rate. */
2168 ARM_PREFETCH_NOT_BENEFICIAL,
2169 tune_params::PREF_CONST_POOL_FALSE,
2170 tune_params::PREF_LDRD_FALSE,
2171 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2172 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2173 tune_params::DISPARAGE_FLAGS_NEITHER,
2174 tune_params::PREF_NEON_STRINGOPS_TRUE,
2175 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2176 tune_params::SCHED_AUTOPREF_OFF
2177 };
2178
2179 const struct tune_params arm_cortex_a57_tune =
2180 {
2181 &cortexa57_extra_costs,
2182 &generic_addr_mode_costs, /* addressing mode costs */
2183 NULL, /* Sched adj cost. */
2184 arm_default_branch_cost,
2185 &arm_default_vec_cost,
2186 1, /* Constant limit. */
2187 2, /* Max cond insns. */
2188 8, /* Memset max inline. */
2189 3, /* Issue rate. */
2190 ARM_PREFETCH_NOT_BENEFICIAL,
2191 tune_params::PREF_CONST_POOL_FALSE,
2192 tune_params::PREF_LDRD_TRUE,
2193 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2194 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2195 tune_params::DISPARAGE_FLAGS_ALL,
2196 tune_params::PREF_NEON_STRINGOPS_TRUE,
2197 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2198 tune_params::SCHED_AUTOPREF_FULL
2199 };
2200
2201 const struct tune_params arm_exynosm1_tune =
2202 {
2203 &exynosm1_extra_costs,
2204 &generic_addr_mode_costs, /* Addressing mode costs. */
2205 NULL, /* Sched adj cost. */
2206 arm_default_branch_cost,
2207 &arm_default_vec_cost,
2208 1, /* Constant limit. */
2209 2, /* Max cond insns. */
2210 8, /* Memset max inline. */
2211 3, /* Issue rate. */
2212 ARM_PREFETCH_NOT_BENEFICIAL,
2213 tune_params::PREF_CONST_POOL_FALSE,
2214 tune_params::PREF_LDRD_TRUE,
2215 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2216 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2217 tune_params::DISPARAGE_FLAGS_ALL,
2218 tune_params::PREF_NEON_STRINGOPS_TRUE,
2219 tune_params::FUSE_NOTHING,
2220 tune_params::SCHED_AUTOPREF_OFF
2221 };
2222
2223 const struct tune_params arm_xgene1_tune =
2224 {
2225 &xgene1_extra_costs,
2226 &generic_addr_mode_costs, /* Addressing mode costs. */
2227 NULL, /* Sched adj cost. */
2228 arm_default_branch_cost,
2229 &arm_default_vec_cost,
2230 1, /* Constant limit. */
2231 2, /* Max cond insns. */
2232 32, /* Memset max inline. */
2233 4, /* Issue rate. */
2234 ARM_PREFETCH_NOT_BENEFICIAL,
2235 tune_params::PREF_CONST_POOL_FALSE,
2236 tune_params::PREF_LDRD_TRUE,
2237 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2238 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2239 tune_params::DISPARAGE_FLAGS_ALL,
2240 tune_params::PREF_NEON_STRINGOPS_FALSE,
2241 tune_params::FUSE_NOTHING,
2242 tune_params::SCHED_AUTOPREF_OFF
2243 };
2244
2245 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2246 less appealing. Set max_insns_skipped to a low value. */
2247
2248 const struct tune_params arm_cortex_a5_tune =
2249 {
2250 &cortexa5_extra_costs,
2251 &generic_addr_mode_costs, /* Addressing mode costs. */
2252 NULL, /* Sched adj cost. */
2253 arm_cortex_a5_branch_cost,
2254 &arm_default_vec_cost,
2255 1, /* Constant limit. */
2256 1, /* Max cond insns. */
2257 8, /* Memset max inline. */
2258 2, /* Issue rate. */
2259 ARM_PREFETCH_NOT_BENEFICIAL,
2260 tune_params::PREF_CONST_POOL_FALSE,
2261 tune_params::PREF_LDRD_FALSE,
2262 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2263 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2264 tune_params::DISPARAGE_FLAGS_NEITHER,
2265 tune_params::PREF_NEON_STRINGOPS_TRUE,
2266 tune_params::FUSE_NOTHING,
2267 tune_params::SCHED_AUTOPREF_OFF
2268 };
2269
2270 const struct tune_params arm_cortex_a9_tune =
2271 {
2272 &cortexa9_extra_costs,
2273 &generic_addr_mode_costs, /* Addressing mode costs. */
2274 cortex_a9_sched_adjust_cost,
2275 arm_default_branch_cost,
2276 &arm_default_vec_cost,
2277 1, /* Constant limit. */
2278 5, /* Max cond insns. */
2279 8, /* Memset max inline. */
2280 2, /* Issue rate. */
2281 ARM_PREFETCH_BENEFICIAL(4,32,32),
2282 tune_params::PREF_CONST_POOL_FALSE,
2283 tune_params::PREF_LDRD_FALSE,
2284 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2285 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2286 tune_params::DISPARAGE_FLAGS_NEITHER,
2287 tune_params::PREF_NEON_STRINGOPS_FALSE,
2288 tune_params::FUSE_NOTHING,
2289 tune_params::SCHED_AUTOPREF_OFF
2290 };
2291
2292 const struct tune_params arm_cortex_a12_tune =
2293 {
2294 &cortexa12_extra_costs,
2295 &generic_addr_mode_costs, /* Addressing mode costs. */
2296 NULL, /* Sched adj cost. */
2297 arm_default_branch_cost,
2298 &arm_default_vec_cost, /* Vectorizer costs. */
2299 1, /* Constant limit. */
2300 2, /* Max cond insns. */
2301 8, /* Memset max inline. */
2302 2, /* Issue rate. */
2303 ARM_PREFETCH_NOT_BENEFICIAL,
2304 tune_params::PREF_CONST_POOL_FALSE,
2305 tune_params::PREF_LDRD_TRUE,
2306 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2307 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2308 tune_params::DISPARAGE_FLAGS_ALL,
2309 tune_params::PREF_NEON_STRINGOPS_TRUE,
2310 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2311 tune_params::SCHED_AUTOPREF_OFF
2312 };
2313
2314 const struct tune_params arm_cortex_a73_tune =
2315 {
2316 &cortexa57_extra_costs,
2317 &generic_addr_mode_costs, /* Addressing mode costs. */
2318 NULL, /* Sched adj cost. */
2319 arm_default_branch_cost,
2320 &arm_default_vec_cost, /* Vectorizer costs. */
2321 1, /* Constant limit. */
2322 2, /* Max cond insns. */
2323 8, /* Memset max inline. */
2324 2, /* Issue rate. */
2325 ARM_PREFETCH_NOT_BENEFICIAL,
2326 tune_params::PREF_CONST_POOL_FALSE,
2327 tune_params::PREF_LDRD_TRUE,
2328 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2329 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2330 tune_params::DISPARAGE_FLAGS_ALL,
2331 tune_params::PREF_NEON_STRINGOPS_TRUE,
2332 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2333 tune_params::SCHED_AUTOPREF_FULL
2334 };
2335
2336 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2337 cycle to execute each. An LDR from the constant pool also takes two cycles
2338 to execute, but mildly increases pipelining opportunity (consecutive
2339 loads/stores can be pipelined together, saving one cycle), and may also
2340 improve icache utilisation. Hence we prefer the constant pool for such
2341 processors. */
2342
2343 const struct tune_params arm_v7m_tune =
2344 {
2345 &v7m_extra_costs,
2346 &generic_addr_mode_costs, /* Addressing mode costs. */
2347 NULL, /* Sched adj cost. */
2348 arm_cortex_m_branch_cost,
2349 &arm_default_vec_cost,
2350 1, /* Constant limit. */
2351 2, /* Max cond insns. */
2352 8, /* Memset max inline. */
2353 1, /* Issue rate. */
2354 ARM_PREFETCH_NOT_BENEFICIAL,
2355 tune_params::PREF_CONST_POOL_TRUE,
2356 tune_params::PREF_LDRD_FALSE,
2357 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2358 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2359 tune_params::DISPARAGE_FLAGS_NEITHER,
2360 tune_params::PREF_NEON_STRINGOPS_FALSE,
2361 tune_params::FUSE_NOTHING,
2362 tune_params::SCHED_AUTOPREF_OFF
2363 };
2364
2365 /* Cortex-M7 tuning. */
2366
2367 const struct tune_params arm_cortex_m7_tune =
2368 {
2369 &v7m_extra_costs,
2370 &generic_addr_mode_costs, /* Addressing mode costs. */
2371 NULL, /* Sched adj cost. */
2372 arm_cortex_m7_branch_cost,
2373 &arm_default_vec_cost,
2374 0, /* Constant limit. */
2375 1, /* Max cond insns. */
2376 8, /* Memset max inline. */
2377 2, /* Issue rate. */
2378 ARM_PREFETCH_NOT_BENEFICIAL,
2379 tune_params::PREF_CONST_POOL_TRUE,
2380 tune_params::PREF_LDRD_FALSE,
2381 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2382 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2383 tune_params::DISPARAGE_FLAGS_NEITHER,
2384 tune_params::PREF_NEON_STRINGOPS_FALSE,
2385 tune_params::FUSE_NOTHING,
2386 tune_params::SCHED_AUTOPREF_OFF
2387 };
2388
2389 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2390 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2391 cortex-m23. */
2392 const struct tune_params arm_v6m_tune =
2393 {
2394 &generic_extra_costs, /* Insn extra costs. */
2395 &generic_addr_mode_costs, /* Addressing mode costs. */
2396 NULL, /* Sched adj cost. */
2397 arm_default_branch_cost,
2398 &arm_default_vec_cost, /* Vectorizer costs. */
2399 1, /* Constant limit. */
2400 5, /* Max cond insns. */
2401 8, /* Memset max inline. */
2402 1, /* Issue rate. */
2403 ARM_PREFETCH_NOT_BENEFICIAL,
2404 tune_params::PREF_CONST_POOL_FALSE,
2405 tune_params::PREF_LDRD_FALSE,
2406 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2407 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2408 tune_params::DISPARAGE_FLAGS_NEITHER,
2409 tune_params::PREF_NEON_STRINGOPS_FALSE,
2410 tune_params::FUSE_NOTHING,
2411 tune_params::SCHED_AUTOPREF_OFF
2412 };
2413
2414 const struct tune_params arm_fa726te_tune =
2415 {
2416 &generic_extra_costs, /* Insn extra costs. */
2417 &generic_addr_mode_costs, /* Addressing mode costs. */
2418 fa726te_sched_adjust_cost,
2419 arm_default_branch_cost,
2420 &arm_default_vec_cost,
2421 1, /* Constant limit. */
2422 5, /* Max cond insns. */
2423 8, /* Memset max inline. */
2424 2, /* Issue rate. */
2425 ARM_PREFETCH_NOT_BENEFICIAL,
2426 tune_params::PREF_CONST_POOL_TRUE,
2427 tune_params::PREF_LDRD_FALSE,
2428 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2429 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2430 tune_params::DISPARAGE_FLAGS_NEITHER,
2431 tune_params::PREF_NEON_STRINGOPS_FALSE,
2432 tune_params::FUSE_NOTHING,
2433 tune_params::SCHED_AUTOPREF_OFF
2434 };
2435
2436 /* Auto-generated CPU, FPU and architecture tables. */
2437 #include "arm-cpu-data.h"
2438
2439 /* The name of the preprocessor macro to define for this architecture. PROFILE
2440 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2441 is thus chosen to be big enough to hold the longest architecture name. */
2442
2443 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2444
2445 /* Supported TLS relocations. */
2446
2447 enum tls_reloc {
2448 TLS_GD32,
2449 TLS_GD32_FDPIC,
2450 TLS_LDM32,
2451 TLS_LDM32_FDPIC,
2452 TLS_LDO32,
2453 TLS_IE32,
2454 TLS_IE32_FDPIC,
2455 TLS_LE32,
2456 TLS_DESCSEQ /* GNU scheme */
2457 };
2458
2459 /* The maximum number of insns to be used when loading a constant. */
2460 inline static int
2461 arm_constant_limit (bool size_p)
2462 {
2463 return size_p ? 1 : current_tune->constant_limit;
2464 }
2465
2466 /* Emit an insn that's a simple single-set. Both the operands must be known
2467 to be valid. */
2468 inline static rtx_insn *
2469 emit_set_insn (rtx x, rtx y)
2470 {
2471 return emit_insn (gen_rtx_SET (x, y));
2472 }
2473
2474 /* Return the number of bits set in VALUE. */
2475 static unsigned
2476 bit_count (unsigned long value)
2477 {
2478 unsigned long count = 0;
2479
2480 while (value)
2481 {
2482 count++;
2483 value &= value - 1; /* Clear the least-significant set bit. */
2484 }
2485
2486 return count;
2487 }
2488
2489 /* Return the number of bits set in BMAP. */
2490 static unsigned
2491 bitmap_popcount (const sbitmap bmap)
2492 {
2493 unsigned int count = 0;
2494 unsigned int n = 0;
2495 sbitmap_iterator sbi;
2496
2497 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2498 count++;
2499 return count;
2500 }
2501
2502 typedef struct
2503 {
2504 machine_mode mode;
2505 const char *name;
2506 } arm_fixed_mode_set;
2507
2508 /* A small helper for setting fixed-point library libfuncs. */
2509
2510 static void
2511 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2512 const char *funcname, const char *modename,
2513 int num_suffix)
2514 {
2515 char buffer[50];
2516
2517 if (num_suffix == 0)
2518 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2519 else
2520 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2521
2522 set_optab_libfunc (optable, mode, buffer);
2523 }
2524
2525 static void
2526 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2527 machine_mode from, const char *funcname,
2528 const char *toname, const char *fromname)
2529 {
2530 char buffer[50];
2531 const char *maybe_suffix_2 = "";
2532
2533 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2534 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2535 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2536 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2537 maybe_suffix_2 = "2";
2538
2539 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2540 maybe_suffix_2);
2541
2542 set_conv_libfunc (optable, to, from, buffer);
2543 }
2544
2545 static GTY(()) rtx speculation_barrier_libfunc;
2546
2547 /* Record that we have no arithmetic or comparison libfuncs for
2548 machine mode MODE. */
2549
2550 static void
2551 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2552 {
2553 /* Arithmetic. */
2554 set_optab_libfunc (add_optab, mode, NULL);
2555 set_optab_libfunc (sdiv_optab, mode, NULL);
2556 set_optab_libfunc (smul_optab, mode, NULL);
2557 set_optab_libfunc (neg_optab, mode, NULL);
2558 set_optab_libfunc (sub_optab, mode, NULL);
2559
2560 /* Comparisons. */
2561 set_optab_libfunc (eq_optab, mode, NULL);
2562 set_optab_libfunc (ne_optab, mode, NULL);
2563 set_optab_libfunc (lt_optab, mode, NULL);
2564 set_optab_libfunc (le_optab, mode, NULL);
2565 set_optab_libfunc (ge_optab, mode, NULL);
2566 set_optab_libfunc (gt_optab, mode, NULL);
2567 set_optab_libfunc (unord_optab, mode, NULL);
2568 }
2569
2570 /* Set up library functions unique to ARM. */
2571 static void
2572 arm_init_libfuncs (void)
2573 {
2574 machine_mode mode_iter;
2575
2576 /* For Linux, we have access to kernel support for atomic operations. */
2577 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2578 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2579
2580 /* There are no special library functions unless we are using the
2581 ARM BPABI. */
2582 if (!TARGET_BPABI)
2583 return;
2584
2585 /* The functions below are described in Section 4 of the "Run-Time
2586 ABI for the ARM architecture", Version 1.0. */
2587
2588 /* Double-precision floating-point arithmetic. Table 2. */
2589 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2590 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2591 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2592 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2593 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2594
2595 /* Double-precision comparisons. Table 3. */
2596 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2597 set_optab_libfunc (ne_optab, DFmode, NULL);
2598 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2599 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2600 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2601 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2602 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2603
2604 /* Single-precision floating-point arithmetic. Table 4. */
2605 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2606 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2607 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2608 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2609 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2610
2611 /* Single-precision comparisons. Table 5. */
2612 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2613 set_optab_libfunc (ne_optab, SFmode, NULL);
2614 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2615 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2616 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2617 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2618 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2619
2620 /* Floating-point to integer conversions. Table 6. */
2621 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2622 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2623 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2624 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2625 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2626 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2627 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2628 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2629
2630 /* Conversions between floating types. Table 7. */
2631 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2632 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2633
2634 /* Integer to floating-point conversions. Table 8. */
2635 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2636 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2637 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2638 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2639 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2640 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2641 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2642 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2643
2644 /* Long long. Table 9. */
2645 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2646 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2647 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2648 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2649 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2650 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2651 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2652 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2653
2654 /* Integer (32/32->32) division. \S 4.3.1. */
2655 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2656 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2657
2658 /* The divmod functions are designed so that they can be used for
2659 plain division, even though they return both the quotient and the
2660 remainder. The quotient is returned in the usual location (i.e.,
2661 r0 for SImode, {r0, r1} for DImode), just as would be expected
2662 for an ordinary division routine. Because the AAPCS calling
2663 conventions specify that all of { r0, r1, r2, r3 } are
2664 callee-saved registers, there is no need to tell the compiler
2665 explicitly that those registers are clobbered by these
2666 routines. */
2667 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2668 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2669
2670 /* For SImode division the ABI provides div-without-mod routines,
2671 which are faster. */
2672 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2673 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2674
2675 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2676 divmod libcalls instead. */
2677 set_optab_libfunc (smod_optab, DImode, NULL);
2678 set_optab_libfunc (umod_optab, DImode, NULL);
2679 set_optab_libfunc (smod_optab, SImode, NULL);
2680 set_optab_libfunc (umod_optab, SImode, NULL);
2681
2682 /* Half-precision float operations. The compiler handles all operations
2683 with NULL libfuncs by converting the SFmode. */
2684 switch (arm_fp16_format)
2685 {
2686 case ARM_FP16_FORMAT_IEEE:
2687 case ARM_FP16_FORMAT_ALTERNATIVE:
2688
2689 /* Conversions. */
2690 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2691 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2692 ? "__gnu_f2h_ieee"
2693 : "__gnu_f2h_alternative"));
2694 set_conv_libfunc (sext_optab, SFmode, HFmode,
2695 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2696 ? "__gnu_h2f_ieee"
2697 : "__gnu_h2f_alternative"));
2698
2699 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2700 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2701 ? "__gnu_d2h_ieee"
2702 : "__gnu_d2h_alternative"));
2703
2704 arm_block_arith_comp_libfuncs_for_mode (HFmode);
2705 break;
2706
2707 default:
2708 break;
2709 }
2710
2711 /* For all possible libcalls in BFmode, record NULL. */
2712 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2713 {
2714 set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2715 set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2716 set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2717 set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2718 }
2719 arm_block_arith_comp_libfuncs_for_mode (BFmode);
2720
2721 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2722 {
2723 const arm_fixed_mode_set fixed_arith_modes[] =
2724 {
2725 { E_QQmode, "qq" },
2726 { E_UQQmode, "uqq" },
2727 { E_HQmode, "hq" },
2728 { E_UHQmode, "uhq" },
2729 { E_SQmode, "sq" },
2730 { E_USQmode, "usq" },
2731 { E_DQmode, "dq" },
2732 { E_UDQmode, "udq" },
2733 { E_TQmode, "tq" },
2734 { E_UTQmode, "utq" },
2735 { E_HAmode, "ha" },
2736 { E_UHAmode, "uha" },
2737 { E_SAmode, "sa" },
2738 { E_USAmode, "usa" },
2739 { E_DAmode, "da" },
2740 { E_UDAmode, "uda" },
2741 { E_TAmode, "ta" },
2742 { E_UTAmode, "uta" }
2743 };
2744 const arm_fixed_mode_set fixed_conv_modes[] =
2745 {
2746 { E_QQmode, "qq" },
2747 { E_UQQmode, "uqq" },
2748 { E_HQmode, "hq" },
2749 { E_UHQmode, "uhq" },
2750 { E_SQmode, "sq" },
2751 { E_USQmode, "usq" },
2752 { E_DQmode, "dq" },
2753 { E_UDQmode, "udq" },
2754 { E_TQmode, "tq" },
2755 { E_UTQmode, "utq" },
2756 { E_HAmode, "ha" },
2757 { E_UHAmode, "uha" },
2758 { E_SAmode, "sa" },
2759 { E_USAmode, "usa" },
2760 { E_DAmode, "da" },
2761 { E_UDAmode, "uda" },
2762 { E_TAmode, "ta" },
2763 { E_UTAmode, "uta" },
2764 { E_QImode, "qi" },
2765 { E_HImode, "hi" },
2766 { E_SImode, "si" },
2767 { E_DImode, "di" },
2768 { E_TImode, "ti" },
2769 { E_SFmode, "sf" },
2770 { E_DFmode, "df" }
2771 };
2772 unsigned int i, j;
2773
2774 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2775 {
2776 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2777 "add", fixed_arith_modes[i].name, 3);
2778 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2779 "ssadd", fixed_arith_modes[i].name, 3);
2780 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2781 "usadd", fixed_arith_modes[i].name, 3);
2782 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2783 "sub", fixed_arith_modes[i].name, 3);
2784 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2785 "sssub", fixed_arith_modes[i].name, 3);
2786 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2787 "ussub", fixed_arith_modes[i].name, 3);
2788 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2789 "mul", fixed_arith_modes[i].name, 3);
2790 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2791 "ssmul", fixed_arith_modes[i].name, 3);
2792 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2793 "usmul", fixed_arith_modes[i].name, 3);
2794 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2795 "div", fixed_arith_modes[i].name, 3);
2796 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2797 "udiv", fixed_arith_modes[i].name, 3);
2798 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2799 "ssdiv", fixed_arith_modes[i].name, 3);
2800 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2801 "usdiv", fixed_arith_modes[i].name, 3);
2802 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2803 "neg", fixed_arith_modes[i].name, 2);
2804 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2805 "ssneg", fixed_arith_modes[i].name, 2);
2806 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2807 "usneg", fixed_arith_modes[i].name, 2);
2808 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2809 "ashl", fixed_arith_modes[i].name, 3);
2810 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2811 "ashr", fixed_arith_modes[i].name, 3);
2812 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2813 "lshr", fixed_arith_modes[i].name, 3);
2814 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2815 "ssashl", fixed_arith_modes[i].name, 3);
2816 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2817 "usashl", fixed_arith_modes[i].name, 3);
2818 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2819 "cmp", fixed_arith_modes[i].name, 2);
2820 }
2821
2822 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2823 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2824 {
2825 if (i == j
2826 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2827 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2828 continue;
2829
2830 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2831 fixed_conv_modes[j].mode, "fract",
2832 fixed_conv_modes[i].name,
2833 fixed_conv_modes[j].name);
2834 arm_set_fixed_conv_libfunc (satfract_optab,
2835 fixed_conv_modes[i].mode,
2836 fixed_conv_modes[j].mode, "satfract",
2837 fixed_conv_modes[i].name,
2838 fixed_conv_modes[j].name);
2839 arm_set_fixed_conv_libfunc (fractuns_optab,
2840 fixed_conv_modes[i].mode,
2841 fixed_conv_modes[j].mode, "fractuns",
2842 fixed_conv_modes[i].name,
2843 fixed_conv_modes[j].name);
2844 arm_set_fixed_conv_libfunc (satfractuns_optab,
2845 fixed_conv_modes[i].mode,
2846 fixed_conv_modes[j].mode, "satfractuns",
2847 fixed_conv_modes[i].name,
2848 fixed_conv_modes[j].name);
2849 }
2850 }
2851
2852 if (TARGET_AAPCS_BASED)
2853 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2854
2855 speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2856 }
2857
2858 /* Implement TARGET_GIMPLE_FOLD_BUILTIN. */
2859 static bool
2860 arm_gimple_fold_builtin (gimple_stmt_iterator *gsi)
2861 {
2862 gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
2863 tree fndecl = gimple_call_fndecl (stmt);
2864 unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
2865 unsigned int subcode = code >> ARM_BUILTIN_SHIFT;
2866 gimple *new_stmt = NULL;
2867 switch (code & ARM_BUILTIN_CLASS)
2868 {
2869 case ARM_BUILTIN_GENERAL:
2870 break;
2871 case ARM_BUILTIN_MVE:
2872 new_stmt = arm_mve::gimple_fold_builtin (subcode, stmt);
2873 }
2874 if (!new_stmt)
2875 return false;
2876
2877 gsi_replace (gsi, new_stmt, true);
2878 return true;
2879 }
2880
2881 /* On AAPCS systems, this is the "struct __va_list". */
2882 static GTY(()) tree va_list_type;
2883
2884 /* Return the type to use as __builtin_va_list. */
2885 static tree
2886 arm_build_builtin_va_list (void)
2887 {
2888 tree va_list_name;
2889 tree ap_field;
2890
2891 if (!TARGET_AAPCS_BASED)
2892 return std_build_builtin_va_list ();
2893
2894 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2895 defined as:
2896
2897 struct __va_list
2898 {
2899 void *__ap;
2900 };
2901
2902 The C Library ABI further reinforces this definition in \S
2903 4.1.
2904
2905 We must follow this definition exactly. The structure tag
2906 name is visible in C++ mangled names, and thus forms a part
2907 of the ABI. The field name may be used by people who
2908 #include <stdarg.h>. */
2909 /* Create the type. */
2910 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2911 /* Give it the required name. */
2912 va_list_name = build_decl (BUILTINS_LOCATION,
2913 TYPE_DECL,
2914 get_identifier ("__va_list"),
2915 va_list_type);
2916 DECL_ARTIFICIAL (va_list_name) = 1;
2917 TYPE_NAME (va_list_type) = va_list_name;
2918 TYPE_STUB_DECL (va_list_type) = va_list_name;
2919 /* Create the __ap field. */
2920 ap_field = build_decl (BUILTINS_LOCATION,
2921 FIELD_DECL,
2922 get_identifier ("__ap"),
2923 ptr_type_node);
2924 DECL_ARTIFICIAL (ap_field) = 1;
2925 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2926 TYPE_FIELDS (va_list_type) = ap_field;
2927 /* Compute its layout. */
2928 layout_type (va_list_type);
2929
2930 return va_list_type;
2931 }
2932
2933 /* Return an expression of type "void *" pointing to the next
2934 available argument in a variable-argument list. VALIST is the
2935 user-level va_list object, of type __builtin_va_list. */
2936 static tree
2937 arm_extract_valist_ptr (tree valist)
2938 {
2939 if (TREE_TYPE (valist) == error_mark_node)
2940 return error_mark_node;
2941
2942 /* On an AAPCS target, the pointer is stored within "struct
2943 va_list". */
2944 if (TARGET_AAPCS_BASED)
2945 {
2946 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2947 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2948 valist, ap_field, NULL_TREE);
2949 }
2950
2951 return valist;
2952 }
2953
2954 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2955 static void
2956 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2957 {
2958 valist = arm_extract_valist_ptr (valist);
2959 std_expand_builtin_va_start (valist, nextarg);
2960 }
2961
2962 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2963 static tree
2964 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2965 gimple_seq *post_p)
2966 {
2967 valist = arm_extract_valist_ptr (valist);
2968 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2969 }
2970
2971 /* Check any incompatible options that the user has specified. */
2972 static void
2973 arm_option_check_internal (struct gcc_options *opts)
2974 {
2975 int flags = opts->x_target_flags;
2976
2977 /* iWMMXt and NEON are incompatible. */
2978 if (TARGET_IWMMXT
2979 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2980 error ("iWMMXt and NEON are incompatible");
2981
2982 /* Make sure that the processor choice does not conflict with any of the
2983 other command line choices. */
2984 if (TARGET_ARM_P (flags)
2985 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2986 error ("target CPU does not support ARM mode");
2987
2988 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2989 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2990 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2991
2992 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2993 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2994
2995 /* If this target is normally configured to use APCS frames, warn if they
2996 are turned off and debugging is turned on. */
2997 if (TARGET_ARM_P (flags)
2998 && write_symbols != NO_DEBUG
2999 && !TARGET_APCS_FRAME
3000 && (TARGET_DEFAULT & MASK_APCS_FRAME))
3001 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
3002 "debugging");
3003
3004 /* iWMMXt unsupported under Thumb mode. */
3005 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
3006 error ("iWMMXt unsupported under Thumb mode");
3007
3008 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
3009 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
3010
3011 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
3012 {
3013 error ("RTP PIC is incompatible with Thumb");
3014 flag_pic = 0;
3015 }
3016
3017 if (target_pure_code || target_slow_flash_data)
3018 {
3019 const char *flag = (target_pure_code ? "-mpure-code" :
3020 "-mslow-flash-data");
3021 bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
3022
3023 /* We only support -mslow-flash-data on M-profile targets with
3024 MOVT. */
3025 if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
3026 error ("%s only supports non-pic code on M-profile targets with the "
3027 "MOVT instruction", flag);
3028
3029 /* We only support -mpure-code on M-profile targets. */
3030 if (target_pure_code && common_unsupported_modes)
3031 error ("%s only supports non-pic code on M-profile targets", flag);
3032
3033 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
3034 -mword-relocations forbids relocation of MOVT/MOVW. */
3035 if (target_word_relocations)
3036 error ("%s incompatible with %<-mword-relocations%>", flag);
3037 }
3038 }
3039
3040 /* Recompute the global settings depending on target attribute options. */
3041
3042 static void
3043 arm_option_params_internal (void)
3044 {
3045 /* If we are not using the default (ARM mode) section anchor offset
3046 ranges, then set the correct ranges now. */
3047 if (TARGET_THUMB1)
3048 {
3049 /* Thumb-1 LDR instructions cannot have negative offsets.
3050 Permissible positive offset ranges are 5-bit (for byte loads),
3051 6-bit (for halfword loads), or 7-bit (for word loads).
3052 Empirical results suggest a 7-bit anchor range gives the best
3053 overall code size. */
3054 targetm.min_anchor_offset = 0;
3055 targetm.max_anchor_offset = 127;
3056 }
3057 else if (TARGET_THUMB2)
3058 {
3059 /* The minimum is set such that the total size of the block
3060 for a particular anchor is 248 + 1 + 4095 bytes, which is
3061 divisible by eight, ensuring natural spacing of anchors. */
3062 targetm.min_anchor_offset = -248;
3063 targetm.max_anchor_offset = 4095;
3064 }
3065 else
3066 {
3067 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
3068 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3069 }
3070
3071 /* Increase the number of conditional instructions with -Os. */
3072 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3073
3074 /* For THUMB2, we limit the conditional sequence to one IT block. */
3075 if (TARGET_THUMB2)
3076 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3077
3078 if (TARGET_THUMB1)
3079 targetm.md_asm_adjust = thumb1_md_asm_adjust;
3080 else
3081 targetm.md_asm_adjust = arm_md_asm_adjust;
3082 }
3083
3084 /* True if -mflip-thumb should next add an attribute for the default
3085 mode, false if it should next add an attribute for the opposite mode. */
3086 static GTY(()) bool thumb_flipper;
3087
3088 /* Options after initial target override. */
3089 static GTY(()) tree init_optimize;
3090
3091 static void
3092 arm_override_options_after_change_1 (struct gcc_options *opts,
3093 struct gcc_options *opts_set)
3094 {
3095 /* -falign-functions without argument: supply one. */
3096 if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3097 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3098 && opts->x_optimize_size ? "2" : "4";
3099 }
3100
3101 /* Implement targetm.override_options_after_change. */
3102
3103 static void
3104 arm_override_options_after_change (void)
3105 {
3106 arm_override_options_after_change_1 (&global_options, &global_options_set);
3107 }
3108
3109 /* Implement TARGET_OPTION_RESTORE. */
3110 static void
3111 arm_option_restore (struct gcc_options */* opts */,
3112 struct gcc_options */* opts_set */,
3113 struct cl_target_option *ptr)
3114 {
3115 arm_configure_build_target (&arm_active_target, ptr, false);
3116 arm_option_reconfigure_globals ();
3117 }
3118
3119 /* Reset options between modes that the user has specified. */
3120 static void
3121 arm_option_override_internal (struct gcc_options *opts,
3122 struct gcc_options *opts_set)
3123 {
3124 arm_override_options_after_change_1 (opts, opts_set);
3125
3126 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3127 {
3128 /* The default is to enable interworking, so this warning message would
3129 be confusing to users who have just compiled with
3130 eg, -march=armv4. */
3131 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3132 opts->x_target_flags &= ~MASK_INTERWORK;
3133 }
3134
3135 if (TARGET_THUMB_P (opts->x_target_flags)
3136 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3137 {
3138 warning (0, "target CPU does not support THUMB instructions");
3139 opts->x_target_flags &= ~MASK_THUMB;
3140 }
3141
3142 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3143 {
3144 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3145 opts->x_target_flags &= ~MASK_APCS_FRAME;
3146 }
3147
3148 /* Callee super interworking implies thumb interworking. Adding
3149 this to the flags here simplifies the logic elsewhere. */
3150 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3151 opts->x_target_flags |= MASK_INTERWORK;
3152
3153 /* need to remember initial values so combinaisons of options like
3154 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3155 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3156
3157 if (! opts_set->x_arm_restrict_it)
3158 opts->x_arm_restrict_it = arm_arch8;
3159
3160 /* ARM execution state and M profile don't have [restrict] IT. */
3161 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3162 opts->x_arm_restrict_it = 0;
3163
3164 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3165 if (!opts_set->x_arm_restrict_it
3166 && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3167 opts->x_arm_restrict_it = 0;
3168
3169 /* Enable -munaligned-access by default for
3170 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3171 i.e. Thumb2 and ARM state only.
3172 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3173 - ARMv8 architecture-base processors.
3174
3175 Disable -munaligned-access by default for
3176 - all pre-ARMv6 architecture-based processors
3177 - ARMv6-M architecture-based processors
3178 - ARMv8-M Baseline processors. */
3179
3180 if (! opts_set->x_unaligned_access)
3181 {
3182 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3183 && arm_arch6 && (arm_arch_notm || arm_arch7));
3184 }
3185 else if (opts->x_unaligned_access == 1
3186 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3187 {
3188 warning (0, "target CPU does not support unaligned accesses");
3189 opts->x_unaligned_access = 0;
3190 }
3191
3192 /* Don't warn since it's on by default in -O2. */
3193 if (TARGET_THUMB1_P (opts->x_target_flags))
3194 opts->x_flag_schedule_insns = 0;
3195 else
3196 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3197
3198 /* Disable shrink-wrap when optimizing function for size, since it tends to
3199 generate additional returns. */
3200 if (optimize_function_for_size_p (cfun)
3201 && TARGET_THUMB2_P (opts->x_target_flags))
3202 opts->x_flag_shrink_wrap = false;
3203 else
3204 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3205
3206 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3207 - epilogue_insns - does not accurately model the corresponding insns
3208 emitted in the asm file. In particular, see the comment in thumb_exit
3209 'Find out how many of the (return) argument registers we can corrupt'.
3210 As a consequence, the epilogue may clobber registers without fipa-ra
3211 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3212 TODO: Accurately model clobbers for epilogue_insns and reenable
3213 fipa-ra. */
3214 if (TARGET_THUMB1_P (opts->x_target_flags))
3215 opts->x_flag_ipa_ra = 0;
3216 else
3217 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3218
3219 /* Thumb2 inline assembly code should always use unified syntax.
3220 This will apply to ARM and Thumb1 eventually. */
3221 if (TARGET_THUMB2_P (opts->x_target_flags))
3222 opts->x_inline_asm_unified = true;
3223
3224 if (arm_stack_protector_guard == SSP_GLOBAL
3225 && opts->x_arm_stack_protector_guard_offset_str)
3226 {
3227 error ("incompatible options %<-mstack-protector-guard=global%> and "
3228 "%<-mstack-protector-guard-offset=%s%>",
3229 arm_stack_protector_guard_offset_str);
3230 }
3231
3232 if (opts->x_arm_stack_protector_guard_offset_str)
3233 {
3234 char *end;
3235 const char *str = arm_stack_protector_guard_offset_str;
3236 errno = 0;
3237 long offs = strtol (arm_stack_protector_guard_offset_str, &end, 0);
3238 if (!*str || *end || errno)
3239 error ("%qs is not a valid offset in %qs", str,
3240 "-mstack-protector-guard-offset=");
3241 arm_stack_protector_guard_offset = offs;
3242 }
3243
3244 if (arm_current_function_pac_enabled_p ())
3245 {
3246 if (!arm_arch8m_main)
3247 error ("This architecture does not support branch protection "
3248 "instructions");
3249 if (TARGET_TPCS_FRAME)
3250 sorry ("Return address signing is not supported with %<-mtpcs-frame%>.");
3251 }
3252
3253 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3254 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3255 #endif
3256 }
3257
3258 static sbitmap isa_all_fpubits_internal;
3259 static sbitmap isa_all_fpbits;
3260 static sbitmap isa_quirkbits;
3261
3262 /* Configure a build target TARGET from the user-specified options OPTS and
3263 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3264 architecture have been specified, but the two are not identical. */
3265 void
3266 arm_configure_build_target (struct arm_build_target *target,
3267 struct cl_target_option *opts,
3268 bool warn_compatible)
3269 {
3270 const cpu_option *arm_selected_tune = NULL;
3271 const arch_option *arm_selected_arch = NULL;
3272 const cpu_option *arm_selected_cpu = NULL;
3273 const arm_fpu_desc *arm_selected_fpu = NULL;
3274 const char *tune_opts = NULL;
3275 const char *arch_opts = NULL;
3276 const char *cpu_opts = NULL;
3277
3278 bitmap_clear (target->isa);
3279 target->core_name = NULL;
3280 target->arch_name = NULL;
3281
3282 if (opts->x_arm_arch_string)
3283 {
3284 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3285 "-march",
3286 opts->x_arm_arch_string);
3287 arch_opts = strchr (opts->x_arm_arch_string, '+');
3288 }
3289
3290 if (opts->x_arm_cpu_string)
3291 {
3292 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3293 opts->x_arm_cpu_string);
3294 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3295 arm_selected_tune = arm_selected_cpu;
3296 /* If taking the tuning from -mcpu, we don't need to rescan the
3297 options for tuning. */
3298 }
3299
3300 if (opts->x_arm_tune_string)
3301 {
3302 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3303 opts->x_arm_tune_string);
3304 tune_opts = strchr (opts->x_arm_tune_string, '+');
3305 }
3306
3307 if (opts->x_arm_branch_protection_string)
3308 {
3309 aarch_validate_mbranch_protection (opts->x_arm_branch_protection_string);
3310
3311 if (aarch_ra_sign_key != AARCH_KEY_A)
3312 {
3313 warning (0, "invalid key type for %<-mbranch-protection=%>");
3314 aarch_ra_sign_key = AARCH_KEY_A;
3315 }
3316 }
3317
3318 if (arm_selected_arch)
3319 {
3320 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3321 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3322 arch_opts);
3323
3324 if (arm_selected_cpu)
3325 {
3326 auto_sbitmap cpu_isa (isa_num_bits);
3327 auto_sbitmap isa_delta (isa_num_bits);
3328
3329 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3330 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3331 cpu_opts);
3332 bitmap_xor (isa_delta, cpu_isa, target->isa);
3333 /* Ignore any bits that are quirk bits. */
3334 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3335 /* If the user (or the default configuration) has specified a
3336 specific FPU, then ignore any bits that depend on the FPU
3337 configuration. Do similarly if using the soft-float
3338 ABI. */
3339 if (opts->x_arm_fpu_index != TARGET_FPU_auto
3340 || arm_float_abi == ARM_FLOAT_ABI_SOFT)
3341 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3342
3343 if (!bitmap_empty_p (isa_delta))
3344 {
3345 if (warn_compatible)
3346 warning (0, "switch %<-mcpu=%s%> conflicts "
3347 "with switch %<-march=%s%>",
3348 opts->x_arm_cpu_string,
3349 opts->x_arm_arch_string);
3350
3351 /* -march wins for code generation.
3352 -mcpu wins for default tuning. */
3353 if (!arm_selected_tune)
3354 arm_selected_tune = arm_selected_cpu;
3355
3356 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3357 target->arch_name = arm_selected_arch->common.name;
3358 }
3359 else
3360 {
3361 /* Architecture and CPU are essentially the same.
3362 Prefer the CPU setting. */
3363 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3364 target->core_name = arm_selected_cpu->common.name;
3365 /* Copy the CPU's capabilities, so that we inherit the
3366 appropriate extensions and quirks. */
3367 bitmap_copy (target->isa, cpu_isa);
3368 }
3369 }
3370 else
3371 {
3372 /* Pick a CPU based on the architecture. */
3373 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3374 target->arch_name = arm_selected_arch->common.name;
3375 /* Note: target->core_name is left unset in this path. */
3376 }
3377 }
3378 else if (arm_selected_cpu)
3379 {
3380 target->core_name = arm_selected_cpu->common.name;
3381 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3382 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3383 cpu_opts);
3384 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3385 }
3386 /* If the user did not specify a processor or architecture, choose
3387 one for them. */
3388 else
3389 {
3390 const cpu_option *sel;
3391 auto_sbitmap sought_isa (isa_num_bits);
3392 bitmap_clear (sought_isa);
3393 auto_sbitmap default_isa (isa_num_bits);
3394
3395 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3396 TARGET_CPU_DEFAULT);
3397 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3398 gcc_assert (arm_selected_cpu->common.name);
3399
3400 /* RWE: All of the selection logic below (to the end of this
3401 'if' clause) looks somewhat suspect. It appears to be mostly
3402 there to support forcing thumb support when the default CPU
3403 does not have thumb (somewhat dubious in terms of what the
3404 user might be expecting). I think it should be removed once
3405 support for the pre-thumb era cores is removed. */
3406 sel = arm_selected_cpu;
3407 arm_initialize_isa (default_isa, sel->common.isa_bits);
3408 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3409 cpu_opts);
3410
3411 /* Now check to see if the user has specified any command line
3412 switches that require certain abilities from the cpu. */
3413
3414 if (TARGET_INTERWORK || TARGET_THUMB)
3415 bitmap_set_bit (sought_isa, isa_bit_thumb);
3416
3417 /* If there are such requirements and the default CPU does not
3418 satisfy them, we need to run over the complete list of
3419 cores looking for one that is satisfactory. */
3420 if (!bitmap_empty_p (sought_isa)
3421 && !bitmap_subset_p (sought_isa, default_isa))
3422 {
3423 auto_sbitmap candidate_isa (isa_num_bits);
3424 /* We're only interested in a CPU with at least the
3425 capabilities of the default CPU and the required
3426 additional features. */
3427 bitmap_ior (default_isa, default_isa, sought_isa);
3428
3429 /* Try to locate a CPU type that supports all of the abilities
3430 of the default CPU, plus the extra abilities requested by
3431 the user. */
3432 for (sel = all_cores; sel->common.name != NULL; sel++)
3433 {
3434 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3435 /* An exact match? */
3436 if (bitmap_equal_p (default_isa, candidate_isa))
3437 break;
3438 }
3439
3440 if (sel->common.name == NULL)
3441 {
3442 unsigned current_bit_count = isa_num_bits;
3443 const cpu_option *best_fit = NULL;
3444
3445 /* Ideally we would like to issue an error message here
3446 saying that it was not possible to find a CPU compatible
3447 with the default CPU, but which also supports the command
3448 line options specified by the programmer, and so they
3449 ought to use the -mcpu=<name> command line option to
3450 override the default CPU type.
3451
3452 If we cannot find a CPU that has exactly the
3453 characteristics of the default CPU and the given
3454 command line options we scan the array again looking
3455 for a best match. The best match must have at least
3456 the capabilities of the perfect match. */
3457 for (sel = all_cores; sel->common.name != NULL; sel++)
3458 {
3459 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3460
3461 if (bitmap_subset_p (default_isa, candidate_isa))
3462 {
3463 unsigned count;
3464
3465 bitmap_and_compl (candidate_isa, candidate_isa,
3466 default_isa);
3467 count = bitmap_popcount (candidate_isa);
3468
3469 if (count < current_bit_count)
3470 {
3471 best_fit = sel;
3472 current_bit_count = count;
3473 }
3474 }
3475
3476 gcc_assert (best_fit);
3477 sel = best_fit;
3478 }
3479 }
3480 arm_selected_cpu = sel;
3481 }
3482
3483 /* Now we know the CPU, we can finally initialize the target
3484 structure. */
3485 target->core_name = arm_selected_cpu->common.name;
3486 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3487 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3488 cpu_opts);
3489 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3490 }
3491
3492 gcc_assert (arm_selected_cpu);
3493 gcc_assert (arm_selected_arch);
3494
3495 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3496 {
3497 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3498 auto_sbitmap fpu_bits (isa_num_bits);
3499
3500 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3501 /* This should clear out ALL bits relating to the FPU/simd
3502 extensions, to avoid potentially invalid combinations later on
3503 that we can't match. At present we only clear out those bits
3504 that can be set by -mfpu. This should be fixed in GCC-12. */
3505 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3506 bitmap_ior (target->isa, target->isa, fpu_bits);
3507 }
3508
3509 /* If we have the soft-float ABI, clear any feature bits relating to use of
3510 floating-point operations. They'll just confuse things later on. */
3511 if (arm_float_abi == ARM_FLOAT_ABI_SOFT)
3512 bitmap_and_compl (target->isa, target->isa, isa_all_fpbits);
3513
3514 /* There may be implied bits which we still need to enable. These are
3515 non-named features which are needed to complete other sets of features,
3516 but cannot be enabled from arm-cpus.in due to being shared between
3517 multiple fgroups. Each entry in all_implied_fbits is of the form
3518 ante -> cons, meaning that if the feature "ante" is enabled, we should
3519 implicitly enable "cons". */
3520 const struct fbit_implication *impl = all_implied_fbits;
3521 while (impl->ante)
3522 {
3523 if (bitmap_bit_p (target->isa, impl->ante))
3524 bitmap_set_bit (target->isa, impl->cons);
3525 impl++;
3526 }
3527
3528 if (!arm_selected_tune)
3529 arm_selected_tune = arm_selected_cpu;
3530 else /* Validate the features passed to -mtune. */
3531 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3532
3533 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3534
3535 /* Finish initializing the target structure. */
3536 if (!target->arch_name)
3537 target->arch_name = arm_selected_arch->common.name;
3538 target->arch_pp_name = arm_selected_arch->arch;
3539 target->base_arch = arm_selected_arch->base_arch;
3540 target->profile = arm_selected_arch->profile;
3541
3542 target->tune_flags = tune_data->tune_flags;
3543 target->tune = tune_data->tune;
3544 target->tune_core = tune_data->scheduler;
3545 }
3546
3547 /* Fix up any incompatible options that the user has specified. */
3548 static void
3549 arm_option_override (void)
3550 {
3551 static const enum isa_feature fpu_bitlist_internal[]
3552 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3553 /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main. */
3554 static const enum isa_feature fp_bitlist[]
3555 = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3556 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3557 cl_target_option opts;
3558
3559 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3560 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3561
3562 isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3563 isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3564 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3565 arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3566
3567 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3568
3569 if (!OPTION_SET_P (arm_fpu_index))
3570 {
3571 bool ok;
3572 int fpu_index;
3573
3574 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3575 CL_TARGET);
3576 gcc_assert (ok);
3577 arm_fpu_index = (enum fpu_type) fpu_index;
3578 }
3579
3580 cl_target_option_save (&opts, &global_options, &global_options_set);
3581 arm_configure_build_target (&arm_active_target, &opts, true);
3582
3583 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3584 SUBTARGET_OVERRIDE_OPTIONS;
3585 #endif
3586
3587 /* Initialize boolean versions of the architectural flags, for use
3588 in the arm.md file and for enabling feature flags. */
3589 arm_option_reconfigure_globals ();
3590
3591 arm_tune = arm_active_target.tune_core;
3592 tune_flags = arm_active_target.tune_flags;
3593 current_tune = arm_active_target.tune;
3594
3595 /* TBD: Dwarf info for apcs frame is not handled yet. */
3596 if (TARGET_APCS_FRAME)
3597 flag_shrink_wrap = false;
3598
3599 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3600 {
3601 warning (0, "%<-mapcs-stack-check%> incompatible with "
3602 "%<-mno-apcs-frame%>");
3603 target_flags |= MASK_APCS_FRAME;
3604 }
3605
3606 if (TARGET_POKE_FUNCTION_NAME)
3607 target_flags |= MASK_APCS_FRAME;
3608
3609 if (TARGET_APCS_REENT && flag_pic)
3610 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3611
3612 if (TARGET_APCS_REENT)
3613 warning (0, "APCS reentrant code not supported. Ignored");
3614
3615 /* Set up some tuning parameters. */
3616 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3617 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3618 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3619 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3620 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3621 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3622
3623 /* For arm2/3 there is no need to do any scheduling if we are doing
3624 software floating-point. */
3625 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3626 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3627
3628 /* Override the default structure alignment for AAPCS ABI. */
3629 if (!OPTION_SET_P (arm_structure_size_boundary))
3630 {
3631 if (TARGET_AAPCS_BASED)
3632 arm_structure_size_boundary = 8;
3633 }
3634 else
3635 {
3636 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3637
3638 if (arm_structure_size_boundary != 8
3639 && arm_structure_size_boundary != 32
3640 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3641 {
3642 if (ARM_DOUBLEWORD_ALIGN)
3643 warning (0,
3644 "structure size boundary can only be set to 8, 32 or 64");
3645 else
3646 warning (0, "structure size boundary can only be set to 8 or 32");
3647 arm_structure_size_boundary
3648 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3649 }
3650 }
3651
3652 if (TARGET_VXWORKS_RTP)
3653 {
3654 if (!OPTION_SET_P (arm_pic_data_is_text_relative))
3655 arm_pic_data_is_text_relative = 0;
3656 }
3657 else if (flag_pic
3658 && !arm_pic_data_is_text_relative
3659 && !(OPTION_SET_P (target_flags) & MASK_SINGLE_PIC_BASE))
3660 /* When text & data segments don't have a fixed displacement, the
3661 intended use is with a single, read only, pic base register.
3662 Unless the user explicitly requested not to do that, set
3663 it. */
3664 target_flags |= MASK_SINGLE_PIC_BASE;
3665
3666 /* If stack checking is disabled, we can use r10 as the PIC register,
3667 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3668 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3669 {
3670 if (TARGET_VXWORKS_RTP)
3671 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3672 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3673 }
3674
3675 if (flag_pic && TARGET_VXWORKS_RTP)
3676 arm_pic_register = 9;
3677
3678 /* If in FDPIC mode then force arm_pic_register to be r9. */
3679 if (TARGET_FDPIC)
3680 {
3681 arm_pic_register = FDPIC_REGNUM;
3682 if (TARGET_THUMB1)
3683 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3684 }
3685
3686 if (arm_pic_register_string != NULL)
3687 {
3688 int pic_register = decode_reg_name (arm_pic_register_string);
3689
3690 if (!flag_pic)
3691 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3692
3693 /* Prevent the user from choosing an obviously stupid PIC register. */
3694 else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3695 || pic_register == HARD_FRAME_POINTER_REGNUM
3696 || pic_register == STACK_POINTER_REGNUM
3697 || pic_register >= PC_REGNUM
3698 || (TARGET_VXWORKS_RTP
3699 && (unsigned int) pic_register != arm_pic_register))
3700 error ("unable to use %qs for PIC register", arm_pic_register_string);
3701 else
3702 arm_pic_register = pic_register;
3703 }
3704
3705 if (flag_pic)
3706 target_word_relocations = 1;
3707
3708 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3709 if (fix_cm3_ldrd == 2)
3710 {
3711 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3712 fix_cm3_ldrd = 1;
3713 else
3714 fix_cm3_ldrd = 0;
3715 }
3716
3717 /* Enable fix_vlldm by default if required. */
3718 if (fix_vlldm == 2)
3719 {
3720 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
3721 fix_vlldm = 1;
3722 else
3723 fix_vlldm = 0;
3724 }
3725
3726 /* Enable fix_aes by default if required. */
3727 if (fix_aes_erratum_1742098 == 2)
3728 {
3729 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_aes_1742098))
3730 fix_aes_erratum_1742098 = 1;
3731 else
3732 fix_aes_erratum_1742098 = 0;
3733 }
3734
3735 /* Hot/Cold partitioning is not currently supported, since we can't
3736 handle literal pool placement in that case. */
3737 if (flag_reorder_blocks_and_partition)
3738 {
3739 inform (input_location,
3740 "%<-freorder-blocks-and-partition%> not supported "
3741 "on this architecture");
3742 flag_reorder_blocks_and_partition = 0;
3743 flag_reorder_blocks = 1;
3744 }
3745
3746 if (flag_pic)
3747 /* Hoisting PIC address calculations more aggressively provides a small,
3748 but measurable, size reduction for PIC code. Therefore, we decrease
3749 the bar for unrestricted expression hoisting to the cost of PIC address
3750 calculation, which is 2 instructions. */
3751 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3752 param_gcse_unrestricted_cost, 2);
3753
3754 /* ARM EABI defaults to strict volatile bitfields. */
3755 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3756 && abi_version_at_least(2))
3757 flag_strict_volatile_bitfields = 1;
3758
3759 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3760 have deemed it beneficial (signified by setting
3761 prefetch.num_slots to 1 or more). */
3762 if (flag_prefetch_loop_arrays < 0
3763 && HAVE_prefetch
3764 && optimize >= 3
3765 && current_tune->prefetch.num_slots > 0)
3766 flag_prefetch_loop_arrays = 1;
3767
3768 /* Set up parameters to be used in prefetching algorithm. Do not
3769 override the defaults unless we are tuning for a core we have
3770 researched values for. */
3771 if (current_tune->prefetch.num_slots > 0)
3772 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3773 param_simultaneous_prefetches,
3774 current_tune->prefetch.num_slots);
3775 if (current_tune->prefetch.l1_cache_line_size >= 0)
3776 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3777 param_l1_cache_line_size,
3778 current_tune->prefetch.l1_cache_line_size);
3779 if (current_tune->prefetch.l1_cache_line_size >= 0)
3780 {
3781 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3782 param_destruct_interfere_size,
3783 current_tune->prefetch.l1_cache_line_size);
3784 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3785 param_construct_interfere_size,
3786 current_tune->prefetch.l1_cache_line_size);
3787 }
3788 else
3789 {
3790 /* For a generic ARM target, JF Bastien proposed using 64 for both. */
3791 /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3792 constructive? */
3793 /* More recent Cortex chips have a 64-byte cache line, but are marked
3794 ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults. */
3795 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3796 param_destruct_interfere_size, 64);
3797 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3798 param_construct_interfere_size, 64);
3799 }
3800
3801 if (current_tune->prefetch.l1_cache_size >= 0)
3802 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3803 param_l1_cache_size,
3804 current_tune->prefetch.l1_cache_size);
3805
3806 /* Look through ready list and all of queue for instructions
3807 relevant for L2 auto-prefetcher. */
3808 int sched_autopref_queue_depth;
3809
3810 switch (current_tune->sched_autopref)
3811 {
3812 case tune_params::SCHED_AUTOPREF_OFF:
3813 sched_autopref_queue_depth = -1;
3814 break;
3815
3816 case tune_params::SCHED_AUTOPREF_RANK:
3817 sched_autopref_queue_depth = 0;
3818 break;
3819
3820 case tune_params::SCHED_AUTOPREF_FULL:
3821 sched_autopref_queue_depth = max_insn_queue_index + 1;
3822 break;
3823
3824 default:
3825 gcc_unreachable ();
3826 }
3827
3828 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3829 param_sched_autopref_queue_depth,
3830 sched_autopref_queue_depth);
3831
3832 /* Currently, for slow flash data, we just disable literal pools. We also
3833 disable it for pure-code. */
3834 if (target_slow_flash_data || target_pure_code)
3835 arm_disable_literal_pool = true;
3836
3837 /* Disable scheduling fusion by default if it's not armv7 processor
3838 or doesn't prefer ldrd/strd. */
3839 if (flag_schedule_fusion == 2
3840 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3841 flag_schedule_fusion = 0;
3842
3843 /* Need to remember initial options before they are overriden. */
3844 init_optimize = build_optimization_node (&global_options,
3845 &global_options_set);
3846
3847 arm_options_perform_arch_sanity_checks ();
3848 arm_option_override_internal (&global_options, &global_options_set);
3849 arm_option_check_internal (&global_options);
3850 arm_option_params_internal ();
3851
3852 /* Create the default target_options structure. */
3853 target_option_default_node = target_option_current_node
3854 = build_target_option_node (&global_options, &global_options_set);
3855
3856 /* Register global variables with the garbage collector. */
3857 arm_add_gc_roots ();
3858
3859 /* Init initial mode for testing. */
3860 thumb_flipper = TARGET_THUMB;
3861 }
3862
3863
3864 /* Reconfigure global status flags from the active_target.isa. */
3865 void
3866 arm_option_reconfigure_globals (void)
3867 {
3868 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3869 arm_base_arch = arm_active_target.base_arch;
3870
3871 /* Initialize boolean versions of the architectural flags, for use
3872 in the arm.md file. */
3873 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3874 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3875 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3876 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3877 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3878 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3879 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3880 arm_arch6m = arm_arch6 && !arm_arch_notm;
3881 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3882 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3883 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3884 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3885 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3886 arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3887 arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3888 arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3889 isa_bit_armv8_1m_main);
3890 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3891 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3892 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3893 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3894 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3895 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3896 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3897 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3898 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3899 arm_arch8m_main = arm_arch7 && arm_arch_cmse;
3900 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3901 arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3902 arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3903
3904 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3905 if (arm_fp16_inst)
3906 {
3907 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3908 error ("selected fp16 options are incompatible");
3909 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3910 }
3911
3912 arm_arch_cde = 0;
3913 arm_arch_cde_coproc = 0;
3914 int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3915 isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3916 isa_bit_cdecp6, isa_bit_cdecp7};
3917 for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3918 {
3919 int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3920 if (cde_bit)
3921 {
3922 arm_arch_cde |= cde_bit;
3923 arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3924 }
3925 }
3926
3927 /* And finally, set up some quirks. */
3928 arm_arch_no_volatile_ce
3929 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3930 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3931 isa_bit_quirk_armv6kz);
3932
3933 /* Use the cp15 method if it is available. */
3934 if (target_thread_pointer == TP_AUTO)
3935 {
3936 if (arm_arch6k && !TARGET_THUMB1)
3937 target_thread_pointer = TP_TPIDRURO;
3938 else
3939 target_thread_pointer = TP_SOFT;
3940 }
3941
3942 if (!TARGET_HARD_TP && arm_stack_protector_guard == SSP_TLSREG)
3943 error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3944 }
3945
3946 /* Perform some validation between the desired architecture and the rest of the
3947 options. */
3948 void
3949 arm_options_perform_arch_sanity_checks (void)
3950 {
3951 /* V5T code we generate is completely interworking capable, so we turn off
3952 TARGET_INTERWORK here to avoid many tests later on. */
3953
3954 /* XXX However, we must pass the right pre-processor defines to CPP
3955 or GLD can get confused. This is a hack. */
3956 if (TARGET_INTERWORK)
3957 arm_cpp_interwork = 1;
3958
3959 if (arm_arch5t)
3960 target_flags &= ~MASK_INTERWORK;
3961
3962 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3963 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3964
3965 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3966 error ("iwmmxt abi requires an iwmmxt capable cpu");
3967
3968 /* BPABI targets use linker tricks to allow interworking on cores
3969 without thumb support. */
3970 if (TARGET_INTERWORK
3971 && !TARGET_BPABI
3972 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3973 {
3974 warning (0, "target CPU does not support interworking" );
3975 target_flags &= ~MASK_INTERWORK;
3976 }
3977
3978 /* If soft-float is specified then don't use FPU. */
3979 if (TARGET_SOFT_FLOAT)
3980 arm_fpu_attr = FPU_NONE;
3981 else
3982 arm_fpu_attr = FPU_VFP;
3983
3984 if (TARGET_AAPCS_BASED)
3985 {
3986 if (TARGET_CALLER_INTERWORKING)
3987 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3988 else
3989 if (TARGET_CALLEE_INTERWORKING)
3990 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3991 }
3992
3993 /* __fp16 support currently assumes the core has ldrh. */
3994 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3995 sorry ("%<__fp16%> and no ldrh");
3996
3997 if (use_cmse && !arm_arch_cmse)
3998 error ("target CPU does not support ARMv8-M Security Extensions");
3999
4000 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
4001 and ARMv8-M Baseline and Mainline do not allow such configuration. */
4002 if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
4003 error ("ARMv8-M Security Extensions incompatible with selected FPU");
4004
4005
4006 if (TARGET_AAPCS_BASED)
4007 {
4008 if (arm_abi == ARM_ABI_IWMMXT)
4009 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
4010 else if (TARGET_HARD_FLOAT_ABI)
4011 {
4012 arm_pcs_default = ARM_PCS_AAPCS_VFP;
4013 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
4014 && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
4015 error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
4016 }
4017 else
4018 arm_pcs_default = ARM_PCS_AAPCS;
4019 }
4020 else
4021 {
4022 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
4023 sorry ("%<-mfloat-abi=hard%> and VFP");
4024
4025 if (arm_abi == ARM_ABI_APCS)
4026 arm_pcs_default = ARM_PCS_APCS;
4027 else
4028 arm_pcs_default = ARM_PCS_ATPCS;
4029 }
4030 }
4031
4032 /* Test whether a local function descriptor is canonical, i.e.,
4033 whether we can use GOTOFFFUNCDESC to compute the address of the
4034 function. */
4035 static bool
4036 arm_fdpic_local_funcdesc_p (rtx fnx)
4037 {
4038 tree fn;
4039 enum symbol_visibility vis;
4040 bool ret;
4041
4042 if (!TARGET_FDPIC)
4043 return true;
4044
4045 if (! SYMBOL_REF_LOCAL_P (fnx))
4046 return false;
4047
4048 fn = SYMBOL_REF_DECL (fnx);
4049
4050 if (! fn)
4051 return false;
4052
4053 vis = DECL_VISIBILITY (fn);
4054
4055 if (vis == VISIBILITY_PROTECTED)
4056 /* Private function descriptors for protected functions are not
4057 canonical. Temporarily change the visibility to global so that
4058 we can ensure uniqueness of funcdesc pointers. */
4059 DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
4060
4061 ret = default_binds_local_p_1 (fn, flag_pic);
4062
4063 DECL_VISIBILITY (fn) = vis;
4064
4065 return ret;
4066 }
4067
4068 static void
4069 arm_add_gc_roots (void)
4070 {
4071 gcc_obstack_init(&minipool_obstack);
4072 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
4073 }
4074 \f
4075 /* A table of known ARM exception types.
4076 For use with the interrupt function attribute. */
4077
4078 typedef struct
4079 {
4080 const char *const arg;
4081 const unsigned long return_value;
4082 }
4083 isr_attribute_arg;
4084
4085 static const isr_attribute_arg isr_attribute_args [] =
4086 {
4087 { "IRQ", ARM_FT_ISR },
4088 { "irq", ARM_FT_ISR },
4089 { "FIQ", ARM_FT_FIQ },
4090 { "fiq", ARM_FT_FIQ },
4091 { "ABORT", ARM_FT_ISR },
4092 { "abort", ARM_FT_ISR },
4093 { "UNDEF", ARM_FT_EXCEPTION },
4094 { "undef", ARM_FT_EXCEPTION },
4095 { "SWI", ARM_FT_EXCEPTION },
4096 { "swi", ARM_FT_EXCEPTION },
4097 { NULL, ARM_FT_NORMAL }
4098 };
4099
4100 /* Returns the (interrupt) function type of the current
4101 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
4102
4103 static unsigned long
4104 arm_isr_value (tree argument)
4105 {
4106 const isr_attribute_arg * ptr;
4107 const char * arg;
4108
4109 if (!arm_arch_notm)
4110 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
4111
4112 /* No argument - default to IRQ. */
4113 if (argument == NULL_TREE)
4114 return ARM_FT_ISR;
4115
4116 /* Get the value of the argument. */
4117 if (TREE_VALUE (argument) == NULL_TREE
4118 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
4119 return ARM_FT_UNKNOWN;
4120
4121 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
4122
4123 /* Check it against the list of known arguments. */
4124 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
4125 if (streq (arg, ptr->arg))
4126 return ptr->return_value;
4127
4128 /* An unrecognized interrupt type. */
4129 return ARM_FT_UNKNOWN;
4130 }
4131
4132 /* Computes the type of the current function. */
4133
4134 static unsigned long
4135 arm_compute_func_type (void)
4136 {
4137 unsigned long type = ARM_FT_UNKNOWN;
4138 tree a;
4139 tree attr;
4140
4141 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
4142
4143 /* Decide if the current function is volatile. Such functions
4144 never return, and many memory cycles can be saved by not storing
4145 register values that will never be needed again. This optimization
4146 was added to speed up context switching in a kernel application. */
4147 if (optimize > 0
4148 && (TREE_NOTHROW (current_function_decl)
4149 || !(flag_unwind_tables
4150 || (flag_exceptions
4151 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4152 && TREE_THIS_VOLATILE (current_function_decl))
4153 type |= ARM_FT_VOLATILE;
4154
4155 if (cfun->static_chain_decl != NULL)
4156 type |= ARM_FT_NESTED;
4157
4158 attr = DECL_ATTRIBUTES (current_function_decl);
4159
4160 a = lookup_attribute ("naked", attr);
4161 if (a != NULL_TREE)
4162 type |= ARM_FT_NAKED;
4163
4164 a = lookup_attribute ("isr", attr);
4165 if (a == NULL_TREE)
4166 a = lookup_attribute ("interrupt", attr);
4167
4168 if (a == NULL_TREE)
4169 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4170 else
4171 type |= arm_isr_value (TREE_VALUE (a));
4172
4173 if (lookup_attribute ("cmse_nonsecure_entry", attr))
4174 type |= ARM_FT_CMSE_ENTRY;
4175
4176 return type;
4177 }
4178
4179 /* Returns the type of the current function. */
4180
4181 unsigned long
4182 arm_current_func_type (void)
4183 {
4184 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4185 cfun->machine->func_type = arm_compute_func_type ();
4186
4187 return cfun->machine->func_type;
4188 }
4189
4190 bool
4191 arm_allocate_stack_slots_for_args (void)
4192 {
4193 /* Naked functions should not allocate stack slots for arguments. */
4194 return !IS_NAKED (arm_current_func_type ());
4195 }
4196
4197 static bool
4198 arm_warn_func_return (tree decl)
4199 {
4200 /* Naked functions are implemented entirely in assembly, including the
4201 return sequence, so suppress warnings about this. */
4202 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4203 }
4204
4205 \f
4206 /* Output assembler code for a block containing the constant parts
4207 of a trampoline, leaving space for the variable parts.
4208
4209 On the ARM, (if r8 is the static chain regnum, and remembering that
4210 referencing pc adds an offset of 8) the trampoline looks like:
4211 ldr r8, [pc, #0]
4212 ldr pc, [pc]
4213 .word static chain value
4214 .word function's address
4215 XXX FIXME: When the trampoline returns, r8 will be clobbered.
4216
4217 In FDPIC mode, the trampoline looks like:
4218 .word trampoline address
4219 .word trampoline GOT address
4220 ldr r12, [pc, #8] ; #4 for Arm mode
4221 ldr r9, [pc, #8] ; #4 for Arm mode
4222 ldr pc, [pc, #8] ; #4 for Arm mode
4223 .word static chain value
4224 .word GOT address
4225 .word function's address
4226 */
4227
4228 static void
4229 arm_asm_trampoline_template (FILE *f)
4230 {
4231 fprintf (f, "\t.syntax unified\n");
4232
4233 if (TARGET_FDPIC)
4234 {
4235 /* The first two words are a function descriptor pointing to the
4236 trampoline code just below. */
4237 if (TARGET_ARM)
4238 fprintf (f, "\t.arm\n");
4239 else if (TARGET_THUMB2)
4240 fprintf (f, "\t.thumb\n");
4241 else
4242 /* Only ARM and Thumb-2 are supported. */
4243 gcc_unreachable ();
4244
4245 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4246 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4247 /* Trampoline code which sets the static chain register but also
4248 PIC register before jumping into real code. */
4249 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4250 STATIC_CHAIN_REGNUM, PC_REGNUM,
4251 TARGET_THUMB2 ? 8 : 4);
4252 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4253 PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4254 TARGET_THUMB2 ? 8 : 4);
4255 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4256 PC_REGNUM, PC_REGNUM,
4257 TARGET_THUMB2 ? 8 : 4);
4258 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4259 }
4260 else if (TARGET_ARM)
4261 {
4262 fprintf (f, "\t.arm\n");
4263 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4264 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4265 }
4266 else if (TARGET_THUMB2)
4267 {
4268 fprintf (f, "\t.thumb\n");
4269 /* The Thumb-2 trampoline is similar to the arm implementation.
4270 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4271 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4272 STATIC_CHAIN_REGNUM, PC_REGNUM);
4273 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4274 }
4275 else
4276 {
4277 ASM_OUTPUT_ALIGN (f, 2);
4278 fprintf (f, "\t.code\t16\n");
4279 fprintf (f, ".Ltrampoline_start:\n");
4280 asm_fprintf (f, "\tpush\t{r0, r1}\n");
4281 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4282 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4283 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4284 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4285 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4286 }
4287 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4288 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4289 }
4290
4291 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4292
4293 static void
4294 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4295 {
4296 rtx fnaddr, mem, a_tramp;
4297
4298 emit_block_move (m_tramp, assemble_trampoline_template (),
4299 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4300
4301 if (TARGET_FDPIC)
4302 {
4303 rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4304 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4305 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4306 /* The function start address is at offset 8, but in Thumb mode
4307 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4308 below. */
4309 rtx trampoline_code_start
4310 = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4311
4312 /* Write initial funcdesc which points to the trampoline. */
4313 mem = adjust_address (m_tramp, SImode, 0);
4314 emit_move_insn (mem, trampoline_code_start);
4315 mem = adjust_address (m_tramp, SImode, 4);
4316 emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4317 /* Setup static chain. */
4318 mem = adjust_address (m_tramp, SImode, 20);
4319 emit_move_insn (mem, chain_value);
4320 /* GOT + real function entry point. */
4321 mem = adjust_address (m_tramp, SImode, 24);
4322 emit_move_insn (mem, gotaddr);
4323 mem = adjust_address (m_tramp, SImode, 28);
4324 emit_move_insn (mem, fnaddr);
4325 }
4326 else
4327 {
4328 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4329 emit_move_insn (mem, chain_value);
4330
4331 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4332 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4333 emit_move_insn (mem, fnaddr);
4334 }
4335
4336 a_tramp = XEXP (m_tramp, 0);
4337 maybe_emit_call_builtin___clear_cache (a_tramp,
4338 plus_constant (ptr_mode,
4339 a_tramp,
4340 TRAMPOLINE_SIZE));
4341 }
4342
4343 /* Thumb trampolines should be entered in thumb mode, so set
4344 the bottom bit of the address. */
4345
4346 static rtx
4347 arm_trampoline_adjust_address (rtx addr)
4348 {
4349 /* For FDPIC don't fix trampoline address since it's a function
4350 descriptor and not a function address. */
4351 if (TARGET_THUMB && !TARGET_FDPIC)
4352 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4353 NULL, 0, OPTAB_LIB_WIDEN);
4354 return addr;
4355 }
4356 \f
4357 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4358 includes call-clobbered registers too. If this is a leaf function
4359 we can just examine the registers used by the RTL, but otherwise we
4360 have to assume that whatever function is called might clobber
4361 anything, and so we have to save all the call-clobbered registers
4362 as well. */
4363 static inline bool reg_needs_saving_p (unsigned reg)
4364 {
4365 unsigned long func_type = arm_current_func_type ();
4366
4367 if (IS_INTERRUPT (func_type))
4368 if (df_regs_ever_live_p (reg)
4369 /* Save call-clobbered core registers. */
4370 || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4371 return true;
4372 else
4373 return false;
4374 else
4375 if (!df_regs_ever_live_p (reg)
4376 || call_used_or_fixed_reg_p (reg))
4377 return false;
4378 else
4379 return true;
4380 }
4381
4382 /* Return 1 if it is possible to return using a single instruction.
4383 If SIBLING is non-null, this is a test for a return before a sibling
4384 call. SIBLING is the call insn, so we can examine its register usage. */
4385
4386 int
4387 use_return_insn (int iscond, rtx sibling)
4388 {
4389 int regno;
4390 unsigned int func_type;
4391 unsigned long saved_int_regs;
4392 unsigned HOST_WIDE_INT stack_adjust;
4393 arm_stack_offsets *offsets;
4394
4395 /* Never use a return instruction before reload has run. */
4396 if (!reload_completed)
4397 return 0;
4398
4399 /* Never use a return instruction when return address signing
4400 mechanism is enabled as it requires more than one
4401 instruction. */
4402 if (arm_current_function_pac_enabled_p ())
4403 return 0;
4404
4405 func_type = arm_current_func_type ();
4406
4407 /* Naked, volatile and stack alignment functions need special
4408 consideration. */
4409 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4410 return 0;
4411
4412 /* So do interrupt functions that use the frame pointer and Thumb
4413 interrupt functions. */
4414 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4415 return 0;
4416
4417 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4418 && !optimize_function_for_size_p (cfun))
4419 return 0;
4420
4421 offsets = arm_get_frame_offsets ();
4422 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4423
4424 /* As do variadic functions. */
4425 if (crtl->args.pretend_args_size
4426 || cfun->machine->uses_anonymous_args
4427 /* Or if the function calls __builtin_eh_return () */
4428 || crtl->calls_eh_return
4429 /* Or if the function calls alloca */
4430 || cfun->calls_alloca
4431 /* Or if there is a stack adjustment. However, if the stack pointer
4432 is saved on the stack, we can use a pre-incrementing stack load. */
4433 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4434 && stack_adjust == 4))
4435 /* Or if the static chain register was saved above the frame, under the
4436 assumption that the stack pointer isn't saved on the stack. */
4437 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4438 && arm_compute_static_chain_stack_bytes() != 0))
4439 return 0;
4440
4441 saved_int_regs = offsets->saved_regs_mask;
4442
4443 /* Unfortunately, the insn
4444
4445 ldmib sp, {..., sp, ...}
4446
4447 triggers a bug on most SA-110 based devices, such that the stack
4448 pointer won't be correctly restored if the instruction takes a
4449 page fault. We work around this problem by popping r3 along with
4450 the other registers, since that is never slower than executing
4451 another instruction.
4452
4453 We test for !arm_arch5t here, because code for any architecture
4454 less than this could potentially be run on one of the buggy
4455 chips. */
4456 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4457 {
4458 /* Validate that r3 is a call-clobbered register (always true in
4459 the default abi) ... */
4460 if (!call_used_or_fixed_reg_p (3))
4461 return 0;
4462
4463 /* ... that it isn't being used for a return value ... */
4464 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4465 return 0;
4466
4467 /* ... or for a tail-call argument ... */
4468 if (sibling)
4469 {
4470 gcc_assert (CALL_P (sibling));
4471
4472 if (find_regno_fusage (sibling, USE, 3))
4473 return 0;
4474 }
4475
4476 /* ... and that there are no call-saved registers in r0-r2
4477 (always true in the default ABI). */
4478 if (saved_int_regs & 0x7)
4479 return 0;
4480 }
4481
4482 /* Can't be done if interworking with Thumb, and any registers have been
4483 stacked. */
4484 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4485 return 0;
4486
4487 /* On StrongARM, conditional returns are expensive if they aren't
4488 taken and multiple registers have been stacked. */
4489 if (iscond && arm_tune_strongarm)
4490 {
4491 /* Conditional return when just the LR is stored is a simple
4492 conditional-load instruction, that's not expensive. */
4493 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4494 return 0;
4495
4496 if (flag_pic
4497 && arm_pic_register != INVALID_REGNUM
4498 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4499 return 0;
4500 }
4501
4502 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4503 several instructions if anything needs to be popped. Armv8.1-M Mainline
4504 also needs several instructions to save and restore FP context. */
4505 if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4506 return 0;
4507
4508 /* If there are saved registers but the LR isn't saved, then we need
4509 two instructions for the return. */
4510 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4511 return 0;
4512
4513 /* Can't be done if any of the VFP regs are pushed,
4514 since this also requires an insn. */
4515 if (TARGET_VFP_BASE)
4516 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4517 if (reg_needs_saving_p (regno))
4518 return 0;
4519
4520 if (TARGET_REALLY_IWMMXT)
4521 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4522 if (reg_needs_saving_p (regno))
4523 return 0;
4524
4525 return 1;
4526 }
4527
4528 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4529 shrink-wrapping if possible. This is the case if we need to emit a
4530 prologue, which we can test by looking at the offsets. */
4531 bool
4532 use_simple_return_p (void)
4533 {
4534 arm_stack_offsets *offsets;
4535
4536 /* Note this function can be called before or after reload. */
4537 if (!reload_completed)
4538 arm_compute_frame_layout ();
4539
4540 offsets = arm_get_frame_offsets ();
4541 return offsets->outgoing_args != 0;
4542 }
4543
4544 /* Return TRUE if int I is a valid immediate ARM constant. */
4545
4546 int
4547 const_ok_for_arm (HOST_WIDE_INT i)
4548 {
4549 int lowbit;
4550
4551 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4552 be all zero, or all one. */
4553 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4554 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4555 != ((~(unsigned HOST_WIDE_INT) 0)
4556 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4557 return FALSE;
4558
4559 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4560
4561 /* Fast return for 0 and small values. We must do this for zero, since
4562 the code below can't handle that one case. */
4563 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4564 return TRUE;
4565
4566 /* Get the number of trailing zeros. */
4567 lowbit = ffs((int) i) - 1;
4568
4569 /* Only even shifts are allowed in ARM mode so round down to the
4570 nearest even number. */
4571 if (TARGET_ARM)
4572 lowbit &= ~1;
4573
4574 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4575 return TRUE;
4576
4577 if (TARGET_ARM)
4578 {
4579 /* Allow rotated constants in ARM mode. */
4580 if (lowbit <= 4
4581 && ((i & ~0xc000003f) == 0
4582 || (i & ~0xf000000f) == 0
4583 || (i & ~0xfc000003) == 0))
4584 return TRUE;
4585 }
4586 else if (TARGET_THUMB2)
4587 {
4588 HOST_WIDE_INT v;
4589
4590 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4591 v = i & 0xff;
4592 v |= v << 16;
4593 if (i == v || i == (v | (v << 8)))
4594 return TRUE;
4595
4596 /* Allow repeated pattern 0xXY00XY00. */
4597 v = i & 0xff00;
4598 v |= v << 16;
4599 if (i == v)
4600 return TRUE;
4601 }
4602 else if (TARGET_HAVE_MOVT)
4603 {
4604 /* Thumb-1 Targets with MOVT. */
4605 if (i > 0xffff)
4606 return FALSE;
4607 else
4608 return TRUE;
4609 }
4610
4611 return FALSE;
4612 }
4613
4614 /* Return true if I is a valid constant for the operation CODE. */
4615 int
4616 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4617 {
4618 if (const_ok_for_arm (i))
4619 return 1;
4620
4621 switch (code)
4622 {
4623 case SET:
4624 /* See if we can use movw. */
4625 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4626 return 1;
4627 else
4628 /* Otherwise, try mvn. */
4629 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4630
4631 case PLUS:
4632 /* See if we can use addw or subw. */
4633 if (TARGET_THUMB2
4634 && ((i & 0xfffff000) == 0
4635 || ((-i) & 0xfffff000) == 0))
4636 return 1;
4637 /* Fall through. */
4638 case COMPARE:
4639 case EQ:
4640 case NE:
4641 case GT:
4642 case LE:
4643 case LT:
4644 case GE:
4645 case GEU:
4646 case LTU:
4647 case GTU:
4648 case LEU:
4649 case UNORDERED:
4650 case ORDERED:
4651 case UNEQ:
4652 case UNGE:
4653 case UNLT:
4654 case UNGT:
4655 case UNLE:
4656 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4657
4658 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4659 case XOR:
4660 return 0;
4661
4662 case IOR:
4663 if (TARGET_THUMB2)
4664 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4665 return 0;
4666
4667 case AND:
4668 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4669
4670 default:
4671 gcc_unreachable ();
4672 }
4673 }
4674
4675 /* Return true if I is a valid di mode constant for the operation CODE. */
4676 int
4677 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4678 {
4679 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4680 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4681 rtx hi = GEN_INT (hi_val);
4682 rtx lo = GEN_INT (lo_val);
4683
4684 if (TARGET_THUMB1)
4685 return 0;
4686
4687 switch (code)
4688 {
4689 case AND:
4690 case IOR:
4691 case XOR:
4692 return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4693 || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4694 case PLUS:
4695 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4696
4697 default:
4698 return 0;
4699 }
4700 }
4701
4702 /* Emit a sequence of insns to handle a large constant.
4703 CODE is the code of the operation required, it can be any of SET, PLUS,
4704 IOR, AND, XOR, MINUS;
4705 MODE is the mode in which the operation is being performed;
4706 VAL is the integer to operate on;
4707 SOURCE is the other operand (a register, or a null-pointer for SET);
4708 SUBTARGETS means it is safe to create scratch registers if that will
4709 either produce a simpler sequence, or we will want to cse the values.
4710 Return value is the number of insns emitted. */
4711
4712 /* ??? Tweak this for thumb2. */
4713 int
4714 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4715 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4716 {
4717 rtx cond;
4718
4719 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4720 cond = COND_EXEC_TEST (PATTERN (insn));
4721 else
4722 cond = NULL_RTX;
4723
4724 if (subtargets || code == SET
4725 || (REG_P (target) && REG_P (source)
4726 && REGNO (target) != REGNO (source)))
4727 {
4728 /* After arm_reorg has been called, we can't fix up expensive
4729 constants by pushing them into memory so we must synthesize
4730 them in-line, regardless of the cost. This is only likely to
4731 be more costly on chips that have load delay slots and we are
4732 compiling without running the scheduler (so no splitting
4733 occurred before the final instruction emission).
4734
4735 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4736 */
4737 if (!cfun->machine->after_arm_reorg
4738 && !cond
4739 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4740 1, 0)
4741 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4742 + (code != SET))))
4743 {
4744 if (code == SET)
4745 {
4746 /* Currently SET is the only monadic value for CODE, all
4747 the rest are diadic. */
4748 if (TARGET_USE_MOVT)
4749 arm_emit_movpair (target, GEN_INT (val));
4750 else
4751 emit_set_insn (target, GEN_INT (val));
4752
4753 return 1;
4754 }
4755 else
4756 {
4757 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4758
4759 if (TARGET_USE_MOVT)
4760 arm_emit_movpair (temp, GEN_INT (val));
4761 else
4762 emit_set_insn (temp, GEN_INT (val));
4763
4764 /* For MINUS, the value is subtracted from, since we never
4765 have subtraction of a constant. */
4766 if (code == MINUS)
4767 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4768 else
4769 emit_set_insn (target,
4770 gen_rtx_fmt_ee (code, mode, source, temp));
4771 return 2;
4772 }
4773 }
4774 }
4775
4776 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4777 1);
4778 }
4779
4780 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4781 ARM/THUMB2 immediates, and add up to VAL.
4782 Thr function return value gives the number of insns required. */
4783 static int
4784 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4785 struct four_ints *return_sequence)
4786 {
4787 int best_consecutive_zeros = 0;
4788 int i;
4789 int best_start = 0;
4790 int insns1, insns2;
4791 struct four_ints tmp_sequence;
4792
4793 /* If we aren't targeting ARM, the best place to start is always at
4794 the bottom, otherwise look more closely. */
4795 if (TARGET_ARM)
4796 {
4797 for (i = 0; i < 32; i += 2)
4798 {
4799 int consecutive_zeros = 0;
4800
4801 if (!(val & (3 << i)))
4802 {
4803 while ((i < 32) && !(val & (3 << i)))
4804 {
4805 consecutive_zeros += 2;
4806 i += 2;
4807 }
4808 if (consecutive_zeros > best_consecutive_zeros)
4809 {
4810 best_consecutive_zeros = consecutive_zeros;
4811 best_start = i - consecutive_zeros;
4812 }
4813 i -= 2;
4814 }
4815 }
4816 }
4817
4818 /* So long as it won't require any more insns to do so, it's
4819 desirable to emit a small constant (in bits 0...9) in the last
4820 insn. This way there is more chance that it can be combined with
4821 a later addressing insn to form a pre-indexed load or store
4822 operation. Consider:
4823
4824 *((volatile int *)0xe0000100) = 1;
4825 *((volatile int *)0xe0000110) = 2;
4826
4827 We want this to wind up as:
4828
4829 mov rA, #0xe0000000
4830 mov rB, #1
4831 str rB, [rA, #0x100]
4832 mov rB, #2
4833 str rB, [rA, #0x110]
4834
4835 rather than having to synthesize both large constants from scratch.
4836
4837 Therefore, we calculate how many insns would be required to emit
4838 the constant starting from `best_start', and also starting from
4839 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4840 yield a shorter sequence, we may as well use zero. */
4841 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4842 if (best_start != 0
4843 && ((HOST_WIDE_INT_1U << best_start) < val))
4844 {
4845 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4846 if (insns2 <= insns1)
4847 {
4848 *return_sequence = tmp_sequence;
4849 insns1 = insns2;
4850 }
4851 }
4852
4853 return insns1;
4854 }
4855
4856 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4857 static int
4858 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4859 struct four_ints *return_sequence, int i)
4860 {
4861 int remainder = val & 0xffffffff;
4862 int insns = 0;
4863
4864 /* Try and find a way of doing the job in either two or three
4865 instructions.
4866
4867 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4868 location. We start at position I. This may be the MSB, or
4869 optimial_immediate_sequence may have positioned it at the largest block
4870 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4871 wrapping around to the top of the word when we drop off the bottom.
4872 In the worst case this code should produce no more than four insns.
4873
4874 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4875 constants, shifted to any arbitrary location. We should always start
4876 at the MSB. */
4877 do
4878 {
4879 int end;
4880 unsigned int b1, b2, b3, b4;
4881 unsigned HOST_WIDE_INT result;
4882 int loc;
4883
4884 gcc_assert (insns < 4);
4885
4886 if (i <= 0)
4887 i += 32;
4888
4889 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4890 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4891 {
4892 loc = i;
4893 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4894 /* We can use addw/subw for the last 12 bits. */
4895 result = remainder;
4896 else
4897 {
4898 /* Use an 8-bit shifted/rotated immediate. */
4899 end = i - 8;
4900 if (end < 0)
4901 end += 32;
4902 result = remainder & ((0x0ff << end)
4903 | ((i < end) ? (0xff >> (32 - end))
4904 : 0));
4905 i -= 8;
4906 }
4907 }
4908 else
4909 {
4910 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4911 arbitrary shifts. */
4912 i -= TARGET_ARM ? 2 : 1;
4913 continue;
4914 }
4915
4916 /* Next, see if we can do a better job with a thumb2 replicated
4917 constant.
4918
4919 We do it this way around to catch the cases like 0x01F001E0 where
4920 two 8-bit immediates would work, but a replicated constant would
4921 make it worse.
4922
4923 TODO: 16-bit constants that don't clear all the bits, but still win.
4924 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4925 if (TARGET_THUMB2)
4926 {
4927 b1 = (remainder & 0xff000000) >> 24;
4928 b2 = (remainder & 0x00ff0000) >> 16;
4929 b3 = (remainder & 0x0000ff00) >> 8;
4930 b4 = remainder & 0xff;
4931
4932 if (loc > 24)
4933 {
4934 /* The 8-bit immediate already found clears b1 (and maybe b2),
4935 but must leave b3 and b4 alone. */
4936
4937 /* First try to find a 32-bit replicated constant that clears
4938 almost everything. We can assume that we can't do it in one,
4939 or else we wouldn't be here. */
4940 unsigned int tmp = b1 & b2 & b3 & b4;
4941 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4942 + (tmp << 24);
4943 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4944 + (tmp == b3) + (tmp == b4);
4945 if (tmp
4946 && (matching_bytes >= 3
4947 || (matching_bytes == 2
4948 && const_ok_for_op (remainder & ~tmp2, code))))
4949 {
4950 /* At least 3 of the bytes match, and the fourth has at
4951 least as many bits set, or two of the bytes match
4952 and it will only require one more insn to finish. */
4953 result = tmp2;
4954 i = tmp != b1 ? 32
4955 : tmp != b2 ? 24
4956 : tmp != b3 ? 16
4957 : 8;
4958 }
4959
4960 /* Second, try to find a 16-bit replicated constant that can
4961 leave three of the bytes clear. If b2 or b4 is already
4962 zero, then we can. If the 8-bit from above would not
4963 clear b2 anyway, then we still win. */
4964 else if (b1 == b3 && (!b2 || !b4
4965 || (remainder & 0x00ff0000 & ~result)))
4966 {
4967 result = remainder & 0xff00ff00;
4968 i = 24;
4969 }
4970 }
4971 else if (loc > 16)
4972 {
4973 /* The 8-bit immediate already found clears b2 (and maybe b3)
4974 and we don't get here unless b1 is alredy clear, but it will
4975 leave b4 unchanged. */
4976
4977 /* If we can clear b2 and b4 at once, then we win, since the
4978 8-bits couldn't possibly reach that far. */
4979 if (b2 == b4)
4980 {
4981 result = remainder & 0x00ff00ff;
4982 i = 16;
4983 }
4984 }
4985 }
4986
4987 return_sequence->i[insns++] = result;
4988 remainder &= ~result;
4989
4990 if (code == SET || code == MINUS)
4991 code = PLUS;
4992 }
4993 while (remainder);
4994
4995 return insns;
4996 }
4997
4998 /* Emit an instruction with the indicated PATTERN. If COND is
4999 non-NULL, conditionalize the execution of the instruction on COND
5000 being true. */
5001
5002 static void
5003 emit_constant_insn (rtx cond, rtx pattern)
5004 {
5005 if (cond)
5006 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
5007 emit_insn (pattern);
5008 }
5009
5010 /* As above, but extra parameter GENERATE which, if clear, suppresses
5011 RTL generation. */
5012
5013 static int
5014 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
5015 unsigned HOST_WIDE_INT val, rtx target, rtx source,
5016 int subtargets, int generate)
5017 {
5018 int can_invert = 0;
5019 int can_negate = 0;
5020 int final_invert = 0;
5021 int i;
5022 int set_sign_bit_copies = 0;
5023 int clear_sign_bit_copies = 0;
5024 int clear_zero_bit_copies = 0;
5025 int set_zero_bit_copies = 0;
5026 int insns = 0, neg_insns, inv_insns;
5027 unsigned HOST_WIDE_INT temp1, temp2;
5028 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
5029 struct four_ints *immediates;
5030 struct four_ints pos_immediates, neg_immediates, inv_immediates;
5031
5032 /* Find out which operations are safe for a given CODE. Also do a quick
5033 check for degenerate cases; these can occur when DImode operations
5034 are split. */
5035 switch (code)
5036 {
5037 case SET:
5038 can_invert = 1;
5039 break;
5040
5041 case PLUS:
5042 can_negate = 1;
5043 break;
5044
5045 case IOR:
5046 if (remainder == 0xffffffff)
5047 {
5048 if (generate)
5049 emit_constant_insn (cond,
5050 gen_rtx_SET (target,
5051 GEN_INT (ARM_SIGN_EXTEND (val))));
5052 return 1;
5053 }
5054
5055 if (remainder == 0)
5056 {
5057 if (reload_completed && rtx_equal_p (target, source))
5058 return 0;
5059
5060 if (generate)
5061 emit_constant_insn (cond, gen_rtx_SET (target, source));
5062 return 1;
5063 }
5064 break;
5065
5066 case AND:
5067 if (remainder == 0)
5068 {
5069 if (generate)
5070 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
5071 return 1;
5072 }
5073 if (remainder == 0xffffffff)
5074 {
5075 if (reload_completed && rtx_equal_p (target, source))
5076 return 0;
5077 if (generate)
5078 emit_constant_insn (cond, gen_rtx_SET (target, source));
5079 return 1;
5080 }
5081 can_invert = 1;
5082 break;
5083
5084 case XOR:
5085 if (remainder == 0)
5086 {
5087 if (reload_completed && rtx_equal_p (target, source))
5088 return 0;
5089 if (generate)
5090 emit_constant_insn (cond, gen_rtx_SET (target, source));
5091 return 1;
5092 }
5093
5094 if (remainder == 0xffffffff)
5095 {
5096 if (generate)
5097 emit_constant_insn (cond,
5098 gen_rtx_SET (target,
5099 gen_rtx_NOT (mode, source)));
5100 return 1;
5101 }
5102 final_invert = 1;
5103 break;
5104
5105 case MINUS:
5106 /* We treat MINUS as (val - source), since (source - val) is always
5107 passed as (source + (-val)). */
5108 if (remainder == 0)
5109 {
5110 if (generate)
5111 emit_constant_insn (cond,
5112 gen_rtx_SET (target,
5113 gen_rtx_NEG (mode, source)));
5114 return 1;
5115 }
5116 if (const_ok_for_arm (val))
5117 {
5118 if (generate)
5119 emit_constant_insn (cond,
5120 gen_rtx_SET (target,
5121 gen_rtx_MINUS (mode, GEN_INT (val),
5122 source)));
5123 return 1;
5124 }
5125
5126 break;
5127
5128 default:
5129 gcc_unreachable ();
5130 }
5131
5132 /* If we can do it in one insn get out quickly. */
5133 if (const_ok_for_op (val, code))
5134 {
5135 if (generate)
5136 emit_constant_insn (cond,
5137 gen_rtx_SET (target,
5138 (source
5139 ? gen_rtx_fmt_ee (code, mode, source,
5140 GEN_INT (val))
5141 : GEN_INT (val))));
5142 return 1;
5143 }
5144
5145 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5146 insn. */
5147 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5148 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5149 {
5150 if (generate)
5151 {
5152 if (mode == SImode && i == 16)
5153 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5154 smaller insn. */
5155 emit_constant_insn (cond,
5156 gen_zero_extendhisi2
5157 (target, gen_lowpart (HImode, source)));
5158 else
5159 /* Extz only supports SImode, but we can coerce the operands
5160 into that mode. */
5161 emit_constant_insn (cond,
5162 gen_extzv_t2 (gen_lowpart (SImode, target),
5163 gen_lowpart (SImode, source),
5164 GEN_INT (i), const0_rtx));
5165 }
5166
5167 return 1;
5168 }
5169
5170 /* Calculate a few attributes that may be useful for specific
5171 optimizations. */
5172 /* Count number of leading zeros. */
5173 for (i = 31; i >= 0; i--)
5174 {
5175 if ((remainder & (1 << i)) == 0)
5176 clear_sign_bit_copies++;
5177 else
5178 break;
5179 }
5180
5181 /* Count number of leading 1's. */
5182 for (i = 31; i >= 0; i--)
5183 {
5184 if ((remainder & (1 << i)) != 0)
5185 set_sign_bit_copies++;
5186 else
5187 break;
5188 }
5189
5190 /* Count number of trailing zero's. */
5191 for (i = 0; i <= 31; i++)
5192 {
5193 if ((remainder & (1 << i)) == 0)
5194 clear_zero_bit_copies++;
5195 else
5196 break;
5197 }
5198
5199 /* Count number of trailing 1's. */
5200 for (i = 0; i <= 31; i++)
5201 {
5202 if ((remainder & (1 << i)) != 0)
5203 set_zero_bit_copies++;
5204 else
5205 break;
5206 }
5207
5208 switch (code)
5209 {
5210 case SET:
5211 /* See if we can do this by sign_extending a constant that is known
5212 to be negative. This is a good, way of doing it, since the shift
5213 may well merge into a subsequent insn. */
5214 if (set_sign_bit_copies > 1)
5215 {
5216 if (const_ok_for_arm
5217 (temp1 = ARM_SIGN_EXTEND (remainder
5218 << (set_sign_bit_copies - 1))))
5219 {
5220 if (generate)
5221 {
5222 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5223 emit_constant_insn (cond,
5224 gen_rtx_SET (new_src, GEN_INT (temp1)));
5225 emit_constant_insn (cond,
5226 gen_ashrsi3 (target, new_src,
5227 GEN_INT (set_sign_bit_copies - 1)));
5228 }
5229 return 2;
5230 }
5231 /* For an inverted constant, we will need to set the low bits,
5232 these will be shifted out of harm's way. */
5233 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5234 if (const_ok_for_arm (~temp1))
5235 {
5236 if (generate)
5237 {
5238 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5239 emit_constant_insn (cond,
5240 gen_rtx_SET (new_src, GEN_INT (temp1)));
5241 emit_constant_insn (cond,
5242 gen_ashrsi3 (target, new_src,
5243 GEN_INT (set_sign_bit_copies - 1)));
5244 }
5245 return 2;
5246 }
5247 }
5248
5249 /* See if we can calculate the value as the difference between two
5250 valid immediates. */
5251 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5252 {
5253 int topshift = clear_sign_bit_copies & ~1;
5254
5255 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5256 & (0xff000000 >> topshift));
5257
5258 /* If temp1 is zero, then that means the 9 most significant
5259 bits of remainder were 1 and we've caused it to overflow.
5260 When topshift is 0 we don't need to do anything since we
5261 can borrow from 'bit 32'. */
5262 if (temp1 == 0 && topshift != 0)
5263 temp1 = 0x80000000 >> (topshift - 1);
5264
5265 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5266
5267 if (const_ok_for_arm (temp2))
5268 {
5269 if (generate)
5270 {
5271 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5272 emit_constant_insn (cond,
5273 gen_rtx_SET (new_src, GEN_INT (temp1)));
5274 emit_constant_insn (cond,
5275 gen_addsi3 (target, new_src,
5276 GEN_INT (-temp2)));
5277 }
5278
5279 return 2;
5280 }
5281 }
5282
5283 /* See if we can generate this by setting the bottom (or the top)
5284 16 bits, and then shifting these into the other half of the
5285 word. We only look for the simplest cases, to do more would cost
5286 too much. Be careful, however, not to generate this when the
5287 alternative would take fewer insns. */
5288 if (val & 0xffff0000)
5289 {
5290 temp1 = remainder & 0xffff0000;
5291 temp2 = remainder & 0x0000ffff;
5292
5293 /* Overlaps outside this range are best done using other methods. */
5294 for (i = 9; i < 24; i++)
5295 {
5296 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5297 && !const_ok_for_arm (temp2))
5298 {
5299 rtx new_src = (subtargets
5300 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5301 : target);
5302 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5303 source, subtargets, generate);
5304 source = new_src;
5305 if (generate)
5306 emit_constant_insn
5307 (cond,
5308 gen_rtx_SET
5309 (target,
5310 gen_rtx_IOR (mode,
5311 gen_rtx_ASHIFT (mode, source,
5312 GEN_INT (i)),
5313 source)));
5314 return insns + 1;
5315 }
5316 }
5317
5318 /* Don't duplicate cases already considered. */
5319 for (i = 17; i < 24; i++)
5320 {
5321 if (((temp1 | (temp1 >> i)) == remainder)
5322 && !const_ok_for_arm (temp1))
5323 {
5324 rtx new_src = (subtargets
5325 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5326 : target);
5327 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5328 source, subtargets, generate);
5329 source = new_src;
5330 if (generate)
5331 emit_constant_insn
5332 (cond,
5333 gen_rtx_SET (target,
5334 gen_rtx_IOR
5335 (mode,
5336 gen_rtx_LSHIFTRT (mode, source,
5337 GEN_INT (i)),
5338 source)));
5339 return insns + 1;
5340 }
5341 }
5342 }
5343 break;
5344
5345 case IOR:
5346 case XOR:
5347 /* If we have IOR or XOR, and the constant can be loaded in a
5348 single instruction, and we can find a temporary to put it in,
5349 then this can be done in two instructions instead of 3-4. */
5350 if (subtargets
5351 /* TARGET can't be NULL if SUBTARGETS is 0 */
5352 || (reload_completed && !reg_mentioned_p (target, source)))
5353 {
5354 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5355 {
5356 if (generate)
5357 {
5358 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5359
5360 emit_constant_insn (cond,
5361 gen_rtx_SET (sub, GEN_INT (val)));
5362 emit_constant_insn (cond,
5363 gen_rtx_SET (target,
5364 gen_rtx_fmt_ee (code, mode,
5365 source, sub)));
5366 }
5367 return 2;
5368 }
5369 }
5370
5371 if (code == XOR)
5372 break;
5373
5374 /* Convert.
5375 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5376 and the remainder 0s for e.g. 0xfff00000)
5377 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5378
5379 This can be done in 2 instructions by using shifts with mov or mvn.
5380 e.g. for
5381 x = x | 0xfff00000;
5382 we generate.
5383 mvn r0, r0, asl #12
5384 mvn r0, r0, lsr #12 */
5385 if (set_sign_bit_copies > 8
5386 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5387 {
5388 if (generate)
5389 {
5390 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5391 rtx shift = GEN_INT (set_sign_bit_copies);
5392
5393 emit_constant_insn
5394 (cond,
5395 gen_rtx_SET (sub,
5396 gen_rtx_NOT (mode,
5397 gen_rtx_ASHIFT (mode,
5398 source,
5399 shift))));
5400 emit_constant_insn
5401 (cond,
5402 gen_rtx_SET (target,
5403 gen_rtx_NOT (mode,
5404 gen_rtx_LSHIFTRT (mode, sub,
5405 shift))));
5406 }
5407 return 2;
5408 }
5409
5410 /* Convert
5411 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5412 to
5413 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5414
5415 For eg. r0 = r0 | 0xfff
5416 mvn r0, r0, lsr #12
5417 mvn r0, r0, asl #12
5418
5419 */
5420 if (set_zero_bit_copies > 8
5421 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5422 {
5423 if (generate)
5424 {
5425 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5426 rtx shift = GEN_INT (set_zero_bit_copies);
5427
5428 emit_constant_insn
5429 (cond,
5430 gen_rtx_SET (sub,
5431 gen_rtx_NOT (mode,
5432 gen_rtx_LSHIFTRT (mode,
5433 source,
5434 shift))));
5435 emit_constant_insn
5436 (cond,
5437 gen_rtx_SET (target,
5438 gen_rtx_NOT (mode,
5439 gen_rtx_ASHIFT (mode, sub,
5440 shift))));
5441 }
5442 return 2;
5443 }
5444
5445 /* This will never be reached for Thumb2 because orn is a valid
5446 instruction. This is for Thumb1 and the ARM 32 bit cases.
5447
5448 x = y | constant (such that ~constant is a valid constant)
5449 Transform this to
5450 x = ~(~y & ~constant).
5451 */
5452 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5453 {
5454 if (generate)
5455 {
5456 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5457 emit_constant_insn (cond,
5458 gen_rtx_SET (sub,
5459 gen_rtx_NOT (mode, source)));
5460 source = sub;
5461 if (subtargets)
5462 sub = gen_reg_rtx (mode);
5463 emit_constant_insn (cond,
5464 gen_rtx_SET (sub,
5465 gen_rtx_AND (mode, source,
5466 GEN_INT (temp1))));
5467 emit_constant_insn (cond,
5468 gen_rtx_SET (target,
5469 gen_rtx_NOT (mode, sub)));
5470 }
5471 return 3;
5472 }
5473 break;
5474
5475 case AND:
5476 /* See if two shifts will do 2 or more insn's worth of work. */
5477 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5478 {
5479 HOST_WIDE_INT shift_mask = ((0xffffffff
5480 << (32 - clear_sign_bit_copies))
5481 & 0xffffffff);
5482
5483 if ((remainder | shift_mask) != 0xffffffff)
5484 {
5485 HOST_WIDE_INT new_val
5486 = ARM_SIGN_EXTEND (remainder | shift_mask);
5487
5488 if (generate)
5489 {
5490 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5491 insns = arm_gen_constant (AND, SImode, cond, new_val,
5492 new_src, source, subtargets, 1);
5493 source = new_src;
5494 }
5495 else
5496 {
5497 rtx targ = subtargets ? NULL_RTX : target;
5498 insns = arm_gen_constant (AND, mode, cond, new_val,
5499 targ, source, subtargets, 0);
5500 }
5501 }
5502
5503 if (generate)
5504 {
5505 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5506 rtx shift = GEN_INT (clear_sign_bit_copies);
5507
5508 emit_insn (gen_ashlsi3 (new_src, source, shift));
5509 emit_insn (gen_lshrsi3 (target, new_src, shift));
5510 }
5511
5512 return insns + 2;
5513 }
5514
5515 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5516 {
5517 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5518
5519 if ((remainder | shift_mask) != 0xffffffff)
5520 {
5521 HOST_WIDE_INT new_val
5522 = ARM_SIGN_EXTEND (remainder | shift_mask);
5523 if (generate)
5524 {
5525 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5526
5527 insns = arm_gen_constant (AND, mode, cond, new_val,
5528 new_src, source, subtargets, 1);
5529 source = new_src;
5530 }
5531 else
5532 {
5533 rtx targ = subtargets ? NULL_RTX : target;
5534
5535 insns = arm_gen_constant (AND, mode, cond, new_val,
5536 targ, source, subtargets, 0);
5537 }
5538 }
5539
5540 if (generate)
5541 {
5542 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5543 rtx shift = GEN_INT (clear_zero_bit_copies);
5544
5545 emit_insn (gen_lshrsi3 (new_src, source, shift));
5546 emit_insn (gen_ashlsi3 (target, new_src, shift));
5547 }
5548
5549 return insns + 2;
5550 }
5551
5552 break;
5553
5554 default:
5555 break;
5556 }
5557
5558 /* Calculate what the instruction sequences would be if we generated it
5559 normally, negated, or inverted. */
5560 if (code == AND)
5561 /* AND cannot be split into multiple insns, so invert and use BIC. */
5562 insns = 99;
5563 else
5564 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5565
5566 if (can_negate)
5567 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5568 &neg_immediates);
5569 else
5570 neg_insns = 99;
5571
5572 if (can_invert || final_invert)
5573 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5574 &inv_immediates);
5575 else
5576 inv_insns = 99;
5577
5578 immediates = &pos_immediates;
5579
5580 /* Is the negated immediate sequence more efficient? */
5581 if (neg_insns < insns && neg_insns <= inv_insns)
5582 {
5583 insns = neg_insns;
5584 immediates = &neg_immediates;
5585 }
5586 else
5587 can_negate = 0;
5588
5589 /* Is the inverted immediate sequence more efficient?
5590 We must allow for an extra NOT instruction for XOR operations, although
5591 there is some chance that the final 'mvn' will get optimized later. */
5592 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5593 {
5594 insns = inv_insns;
5595 immediates = &inv_immediates;
5596 }
5597 else
5598 {
5599 can_invert = 0;
5600 final_invert = 0;
5601 }
5602
5603 /* Now output the chosen sequence as instructions. */
5604 if (generate)
5605 {
5606 for (i = 0; i < insns; i++)
5607 {
5608 rtx new_src, temp1_rtx;
5609
5610 temp1 = immediates->i[i];
5611
5612 if (code == SET || code == MINUS)
5613 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5614 else if ((final_invert || i < (insns - 1)) && subtargets)
5615 new_src = gen_reg_rtx (mode);
5616 else
5617 new_src = target;
5618
5619 if (can_invert)
5620 temp1 = ~temp1;
5621 else if (can_negate)
5622 temp1 = -temp1;
5623
5624 temp1 = trunc_int_for_mode (temp1, mode);
5625 temp1_rtx = GEN_INT (temp1);
5626
5627 if (code == SET)
5628 ;
5629 else if (code == MINUS)
5630 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5631 else
5632 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5633
5634 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5635 source = new_src;
5636
5637 if (code == SET)
5638 {
5639 can_negate = can_invert;
5640 can_invert = 0;
5641 code = PLUS;
5642 }
5643 else if (code == MINUS)
5644 code = PLUS;
5645 }
5646 }
5647
5648 if (final_invert)
5649 {
5650 if (generate)
5651 emit_constant_insn (cond, gen_rtx_SET (target,
5652 gen_rtx_NOT (mode, source)));
5653 insns++;
5654 }
5655
5656 return insns;
5657 }
5658
5659 /* Return TRUE if op is a constant where both the low and top words are
5660 suitable for RSB/RSC instructions. This is never true for Thumb, since
5661 we do not have RSC in that case. */
5662 static bool
5663 arm_const_double_prefer_rsbs_rsc (rtx op)
5664 {
5665 /* Thumb lacks RSC, so we never prefer that sequence. */
5666 if (TARGET_THUMB || !CONST_INT_P (op))
5667 return false;
5668 HOST_WIDE_INT hi, lo;
5669 lo = UINTVAL (op) & 0xffffffffULL;
5670 hi = UINTVAL (op) >> 32;
5671 return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5672 }
5673
5674 /* Canonicalize a comparison so that we are more likely to recognize it.
5675 This can be done for a few constant compares, where we can make the
5676 immediate value easier to load. */
5677
5678 static void
5679 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5680 bool op0_preserve_value)
5681 {
5682 machine_mode mode;
5683 unsigned HOST_WIDE_INT i, maxval;
5684
5685 mode = GET_MODE (*op0);
5686 if (mode == VOIDmode)
5687 mode = GET_MODE (*op1);
5688
5689 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5690
5691 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5692 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5693 either reversed or (for constant OP1) adjusted to GE/LT.
5694 Similarly for GTU/LEU in Thumb mode. */
5695 if (mode == DImode)
5696 {
5697
5698 if (*code == GT || *code == LE
5699 || *code == GTU || *code == LEU)
5700 {
5701 /* Missing comparison. First try to use an available
5702 comparison. */
5703 if (CONST_INT_P (*op1))
5704 {
5705 i = INTVAL (*op1);
5706 switch (*code)
5707 {
5708 case GT:
5709 case LE:
5710 if (i != maxval)
5711 {
5712 /* Try to convert to GE/LT, unless that would be more
5713 expensive. */
5714 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5715 && arm_const_double_prefer_rsbs_rsc (*op1))
5716 return;
5717 *op1 = GEN_INT (i + 1);
5718 *code = *code == GT ? GE : LT;
5719 }
5720 else
5721 {
5722 /* GT maxval is always false, LE maxval is always true.
5723 We can't fold that away here as we must make a
5724 comparison, but we can fold them to comparisons
5725 with the same result that can be handled:
5726 op0 GT maxval -> op0 LT minval
5727 op0 LE maxval -> op0 GE minval
5728 where minval = (-maxval - 1). */
5729 *op1 = GEN_INT (-maxval - 1);
5730 *code = *code == GT ? LT : GE;
5731 }
5732 return;
5733
5734 case GTU:
5735 case LEU:
5736 if (i != ~((unsigned HOST_WIDE_INT) 0))
5737 {
5738 /* Try to convert to GEU/LTU, unless that would
5739 be more expensive. */
5740 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5741 && arm_const_double_prefer_rsbs_rsc (*op1))
5742 return;
5743 *op1 = GEN_INT (i + 1);
5744 *code = *code == GTU ? GEU : LTU;
5745 }
5746 else
5747 {
5748 /* GTU ~0 is always false, LEU ~0 is always true.
5749 We can't fold that away here as we must make a
5750 comparison, but we can fold them to comparisons
5751 with the same result that can be handled:
5752 op0 GTU ~0 -> op0 LTU 0
5753 op0 LEU ~0 -> op0 GEU 0. */
5754 *op1 = const0_rtx;
5755 *code = *code == GTU ? LTU : GEU;
5756 }
5757 return;
5758
5759 default:
5760 gcc_unreachable ();
5761 }
5762 }
5763
5764 if (!op0_preserve_value)
5765 {
5766 std::swap (*op0, *op1);
5767 *code = (int)swap_condition ((enum rtx_code)*code);
5768 }
5769 }
5770 return;
5771 }
5772
5773 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5774 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5775 to facilitate possible combining with a cmp into 'ands'. */
5776 if (mode == SImode
5777 && GET_CODE (*op0) == ZERO_EXTEND
5778 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5779 && GET_MODE (XEXP (*op0, 0)) == QImode
5780 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5781 && subreg_lowpart_p (XEXP (*op0, 0))
5782 && *op1 == const0_rtx)
5783 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5784 GEN_INT (255));
5785
5786 /* Comparisons smaller than DImode. Only adjust comparisons against
5787 an out-of-range constant. */
5788 if (!CONST_INT_P (*op1)
5789 || const_ok_for_arm (INTVAL (*op1))
5790 || const_ok_for_arm (- INTVAL (*op1)))
5791 return;
5792
5793 i = INTVAL (*op1);
5794
5795 switch (*code)
5796 {
5797 case EQ:
5798 case NE:
5799 return;
5800
5801 case GT:
5802 case LE:
5803 if (i != maxval
5804 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5805 {
5806 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5807 *code = *code == GT ? GE : LT;
5808 return;
5809 }
5810 break;
5811
5812 case GE:
5813 case LT:
5814 if (i != ~maxval
5815 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5816 {
5817 *op1 = GEN_INT (i - 1);
5818 *code = *code == GE ? GT : LE;
5819 return;
5820 }
5821 break;
5822
5823 case GTU:
5824 case LEU:
5825 if (i != ~((unsigned HOST_WIDE_INT) 0)
5826 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5827 {
5828 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5829 *code = *code == GTU ? GEU : LTU;
5830 return;
5831 }
5832 break;
5833
5834 case GEU:
5835 case LTU:
5836 if (i != 0
5837 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5838 {
5839 *op1 = GEN_INT (i - 1);
5840 *code = *code == GEU ? GTU : LEU;
5841 return;
5842 }
5843 break;
5844
5845 default:
5846 gcc_unreachable ();
5847 }
5848 }
5849
5850
5851 /* Define how to find the value returned by a function. */
5852
5853 static rtx
5854 arm_function_value(const_tree type, const_tree func,
5855 bool outgoing ATTRIBUTE_UNUSED)
5856 {
5857 machine_mode mode;
5858 int unsignedp ATTRIBUTE_UNUSED;
5859 rtx r ATTRIBUTE_UNUSED;
5860
5861 mode = TYPE_MODE (type);
5862
5863 if (TARGET_AAPCS_BASED)
5864 return aapcs_allocate_return_reg (mode, type, func);
5865
5866 /* Promote integer types. */
5867 if (INTEGRAL_TYPE_P (type))
5868 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5869
5870 /* Promotes small structs returned in a register to full-word size
5871 for big-endian AAPCS. */
5872 if (arm_return_in_msb (type))
5873 {
5874 HOST_WIDE_INT size = int_size_in_bytes (type);
5875 if (size % UNITS_PER_WORD != 0)
5876 {
5877 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5878 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5879 }
5880 }
5881
5882 return arm_libcall_value_1 (mode);
5883 }
5884
5885 /* libcall hashtable helpers. */
5886
5887 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5888 {
5889 static inline hashval_t hash (const rtx_def *);
5890 static inline bool equal (const rtx_def *, const rtx_def *);
5891 static inline void remove (rtx_def *);
5892 };
5893
5894 inline bool
5895 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5896 {
5897 return rtx_equal_p (p1, p2);
5898 }
5899
5900 inline hashval_t
5901 libcall_hasher::hash (const rtx_def *p1)
5902 {
5903 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5904 }
5905
5906 typedef hash_table<libcall_hasher> libcall_table_type;
5907
5908 static void
5909 add_libcall (libcall_table_type *htab, rtx libcall)
5910 {
5911 *htab->find_slot (libcall, INSERT) = libcall;
5912 }
5913
5914 static bool
5915 arm_libcall_uses_aapcs_base (const_rtx libcall)
5916 {
5917 static bool init_done = false;
5918 static libcall_table_type *libcall_htab = NULL;
5919
5920 if (!init_done)
5921 {
5922 init_done = true;
5923
5924 libcall_htab = new libcall_table_type (31);
5925 add_libcall (libcall_htab,
5926 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5927 add_libcall (libcall_htab,
5928 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5929 add_libcall (libcall_htab,
5930 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5931 add_libcall (libcall_htab,
5932 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5933
5934 add_libcall (libcall_htab,
5935 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5936 add_libcall (libcall_htab,
5937 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5938 add_libcall (libcall_htab,
5939 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5940 add_libcall (libcall_htab,
5941 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5942
5943 add_libcall (libcall_htab,
5944 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5945 add_libcall (libcall_htab,
5946 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5947 add_libcall (libcall_htab,
5948 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5949 add_libcall (libcall_htab,
5950 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5951 add_libcall (libcall_htab,
5952 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5953 add_libcall (libcall_htab,
5954 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5955 add_libcall (libcall_htab,
5956 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5957 add_libcall (libcall_htab,
5958 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5959 add_libcall (libcall_htab,
5960 convert_optab_libfunc (sfix_optab, SImode, SFmode));
5961 add_libcall (libcall_htab,
5962 convert_optab_libfunc (ufix_optab, SImode, SFmode));
5963
5964 /* Values from double-precision helper functions are returned in core
5965 registers if the selected core only supports single-precision
5966 arithmetic, even if we are using the hard-float ABI. The same is
5967 true for single-precision helpers except in case of MVE, because in
5968 MVE we will be using the hard-float ABI on a CPU which doesn't support
5969 single-precision operations in hardware. In MVE the following check
5970 enables use of emulation for the single-precision arithmetic
5971 operations. */
5972 if (TARGET_HAVE_MVE)
5973 {
5974 add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5975 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5976 add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5977 add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5978 add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5979 add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5980 add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5981 add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5982 add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5983 add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5984 add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5985 }
5986 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5987 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5988 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5989 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5990 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5991 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5992 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5993 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5994 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5995 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5996 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5997 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5998 SFmode));
5999 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
6000 DFmode));
6001 add_libcall (libcall_htab,
6002 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
6003 }
6004
6005 return libcall && libcall_htab->find (libcall) != NULL;
6006 }
6007
6008 static rtx
6009 arm_libcall_value_1 (machine_mode mode)
6010 {
6011 if (TARGET_AAPCS_BASED)
6012 return aapcs_libcall_value (mode);
6013 else if (TARGET_IWMMXT_ABI
6014 && arm_vector_mode_supported_p (mode))
6015 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
6016 else
6017 return gen_rtx_REG (mode, ARG_REGISTER (1));
6018 }
6019
6020 /* Define how to find the value returned by a library function
6021 assuming the value has mode MODE. */
6022
6023 static rtx
6024 arm_libcall_value (machine_mode mode, const_rtx libcall)
6025 {
6026 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
6027 && GET_MODE_CLASS (mode) == MODE_FLOAT)
6028 {
6029 /* The following libcalls return their result in integer registers,
6030 even though they return a floating point value. */
6031 if (arm_libcall_uses_aapcs_base (libcall))
6032 return gen_rtx_REG (mode, ARG_REGISTER(1));
6033
6034 }
6035
6036 return arm_libcall_value_1 (mode);
6037 }
6038
6039 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
6040
6041 static bool
6042 arm_function_value_regno_p (const unsigned int regno)
6043 {
6044 if (regno == ARG_REGISTER (1)
6045 || (TARGET_32BIT
6046 && TARGET_AAPCS_BASED
6047 && TARGET_HARD_FLOAT
6048 && regno == FIRST_VFP_REGNUM)
6049 || (TARGET_IWMMXT_ABI
6050 && regno == FIRST_IWMMXT_REGNUM))
6051 return true;
6052
6053 return false;
6054 }
6055
6056 /* Determine the amount of memory needed to store the possible return
6057 registers of an untyped call. */
6058 int
6059 arm_apply_result_size (void)
6060 {
6061 int size = 16;
6062
6063 if (TARGET_32BIT)
6064 {
6065 if (TARGET_HARD_FLOAT_ABI)
6066 size += 32;
6067 if (TARGET_IWMMXT_ABI)
6068 size += 8;
6069 }
6070
6071 return size;
6072 }
6073
6074 /* Decide whether TYPE should be returned in memory (true)
6075 or in a register (false). FNTYPE is the type of the function making
6076 the call. */
6077 static bool
6078 arm_return_in_memory (const_tree type, const_tree fntype)
6079 {
6080 HOST_WIDE_INT size;
6081
6082 size = int_size_in_bytes (type); /* Negative if not fixed size. */
6083
6084 if (TARGET_AAPCS_BASED)
6085 {
6086 /* Simple, non-aggregate types (ie not including vectors and
6087 complex) are always returned in a register (or registers).
6088 We don't care about which register here, so we can short-cut
6089 some of the detail. */
6090 if (!AGGREGATE_TYPE_P (type)
6091 && TREE_CODE (type) != VECTOR_TYPE
6092 && TREE_CODE (type) != COMPLEX_TYPE)
6093 return false;
6094
6095 /* Any return value that is no larger than one word can be
6096 returned in r0. */
6097 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
6098 return false;
6099
6100 /* Check any available co-processors to see if they accept the
6101 type as a register candidate (VFP, for example, can return
6102 some aggregates in consecutive registers). These aren't
6103 available if the call is variadic. */
6104 if (aapcs_select_return_coproc (type, fntype) >= 0)
6105 return false;
6106
6107 /* Vector values should be returned using ARM registers, not
6108 memory (unless they're over 16 bytes, which will break since
6109 we only have four call-clobbered registers to play with). */
6110 if (TREE_CODE (type) == VECTOR_TYPE)
6111 return (size < 0 || size > (4 * UNITS_PER_WORD));
6112
6113 /* The rest go in memory. */
6114 return true;
6115 }
6116
6117 if (TREE_CODE (type) == VECTOR_TYPE)
6118 return (size < 0 || size > (4 * UNITS_PER_WORD));
6119
6120 if (!AGGREGATE_TYPE_P (type) &&
6121 (TREE_CODE (type) != VECTOR_TYPE))
6122 /* All simple types are returned in registers. */
6123 return false;
6124
6125 if (arm_abi != ARM_ABI_APCS)
6126 {
6127 /* ATPCS and later return aggregate types in memory only if they are
6128 larger than a word (or are variable size). */
6129 return (size < 0 || size > UNITS_PER_WORD);
6130 }
6131
6132 /* For the arm-wince targets we choose to be compatible with Microsoft's
6133 ARM and Thumb compilers, which always return aggregates in memory. */
6134 #ifndef ARM_WINCE
6135 /* All structures/unions bigger than one word are returned in memory.
6136 Also catch the case where int_size_in_bytes returns -1. In this case
6137 the aggregate is either huge or of variable size, and in either case
6138 we will want to return it via memory and not in a register. */
6139 if (size < 0 || size > UNITS_PER_WORD)
6140 return true;
6141
6142 if (TREE_CODE (type) == RECORD_TYPE)
6143 {
6144 tree field;
6145
6146 /* For a struct the APCS says that we only return in a register
6147 if the type is 'integer like' and every addressable element
6148 has an offset of zero. For practical purposes this means
6149 that the structure can have at most one non bit-field element
6150 and that this element must be the first one in the structure. */
6151
6152 /* Find the first field, ignoring non FIELD_DECL things which will
6153 have been created by C++. */
6154 /* NOTE: This code is deprecated and has not been updated to handle
6155 DECL_FIELD_ABI_IGNORED. */
6156 for (field = TYPE_FIELDS (type);
6157 field && TREE_CODE (field) != FIELD_DECL;
6158 field = DECL_CHAIN (field))
6159 continue;
6160
6161 if (field == NULL)
6162 return false; /* An empty structure. Allowed by an extension to ANSI C. */
6163
6164 /* Check that the first field is valid for returning in a register. */
6165
6166 /* ... Floats are not allowed */
6167 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6168 return true;
6169
6170 /* ... Aggregates that are not themselves valid for returning in
6171 a register are not allowed. */
6172 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6173 return true;
6174
6175 /* Now check the remaining fields, if any. Only bitfields are allowed,
6176 since they are not addressable. */
6177 for (field = DECL_CHAIN (field);
6178 field;
6179 field = DECL_CHAIN (field))
6180 {
6181 if (TREE_CODE (field) != FIELD_DECL)
6182 continue;
6183
6184 if (!DECL_BIT_FIELD_TYPE (field))
6185 return true;
6186 }
6187
6188 return false;
6189 }
6190
6191 if (TREE_CODE (type) == UNION_TYPE)
6192 {
6193 tree field;
6194
6195 /* Unions can be returned in registers if every element is
6196 integral, or can be returned in an integer register. */
6197 for (field = TYPE_FIELDS (type);
6198 field;
6199 field = DECL_CHAIN (field))
6200 {
6201 if (TREE_CODE (field) != FIELD_DECL)
6202 continue;
6203
6204 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6205 return true;
6206
6207 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6208 return true;
6209 }
6210
6211 return false;
6212 }
6213 #endif /* not ARM_WINCE */
6214
6215 /* Return all other types in memory. */
6216 return true;
6217 }
6218
6219 const struct pcs_attribute_arg
6220 {
6221 const char *arg;
6222 enum arm_pcs value;
6223 } pcs_attribute_args[] =
6224 {
6225 {"aapcs", ARM_PCS_AAPCS},
6226 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6227 #if 0
6228 /* We could recognize these, but changes would be needed elsewhere
6229 * to implement them. */
6230 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6231 {"atpcs", ARM_PCS_ATPCS},
6232 {"apcs", ARM_PCS_APCS},
6233 #endif
6234 {NULL, ARM_PCS_UNKNOWN}
6235 };
6236
6237 static enum arm_pcs
6238 arm_pcs_from_attribute (tree attr)
6239 {
6240 const struct pcs_attribute_arg *ptr;
6241 const char *arg;
6242
6243 /* Get the value of the argument. */
6244 if (TREE_VALUE (attr) == NULL_TREE
6245 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6246 return ARM_PCS_UNKNOWN;
6247
6248 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6249
6250 /* Check it against the list of known arguments. */
6251 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6252 if (streq (arg, ptr->arg))
6253 return ptr->value;
6254
6255 /* An unrecognized interrupt type. */
6256 return ARM_PCS_UNKNOWN;
6257 }
6258
6259 /* Get the PCS variant to use for this call. TYPE is the function's type
6260 specification, DECL is the specific declartion. DECL may be null if
6261 the call could be indirect or if this is a library call. */
6262 static enum arm_pcs
6263 arm_get_pcs_model (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
6264 {
6265 bool user_convention = false;
6266 enum arm_pcs user_pcs = arm_pcs_default;
6267 tree attr;
6268
6269 gcc_assert (type);
6270
6271 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6272 if (attr)
6273 {
6274 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6275 user_convention = true;
6276 }
6277
6278 if (TARGET_AAPCS_BASED)
6279 {
6280 /* Detect varargs functions. These always use the base rules
6281 (no argument is ever a candidate for a co-processor
6282 register). */
6283 bool base_rules = stdarg_p (type);
6284
6285 if (user_convention)
6286 {
6287 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6288 sorry ("non-AAPCS derived PCS variant");
6289 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6290 error ("variadic functions must use the base AAPCS variant");
6291 }
6292
6293 if (base_rules)
6294 return ARM_PCS_AAPCS;
6295 else if (user_convention)
6296 return user_pcs;
6297 #if 0
6298 /* Unfortunately, this is not safe and can lead to wrong code
6299 being generated (PR96882). Not all calls into the back-end
6300 pass the DECL, so it is unsafe to make any PCS-changing
6301 decisions based on it. In particular the RETURN_IN_MEMORY
6302 hook is only ever passed a TYPE. This needs revisiting to
6303 see if there are any partial improvements that can be
6304 re-enabled. */
6305 else if (decl && flag_unit_at_a_time)
6306 {
6307 /* Local functions never leak outside this compilation unit,
6308 so we are free to use whatever conventions are
6309 appropriate. */
6310 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
6311 cgraph_node *local_info_node
6312 = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6313 if (local_info_node && local_info_node->local)
6314 return ARM_PCS_AAPCS_LOCAL;
6315 }
6316 #endif
6317 }
6318 else if (user_convention && user_pcs != arm_pcs_default)
6319 sorry ("PCS variant");
6320
6321 /* For everything else we use the target's default. */
6322 return arm_pcs_default;
6323 }
6324
6325
6326 static void
6327 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6328 const_tree fntype ATTRIBUTE_UNUSED,
6329 rtx libcall ATTRIBUTE_UNUSED,
6330 const_tree fndecl ATTRIBUTE_UNUSED)
6331 {
6332 /* Record the unallocated VFP registers. */
6333 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6334 pcum->aapcs_vfp_reg_alloc = 0;
6335 }
6336
6337 /* Bitmasks that indicate whether earlier versions of GCC would have
6338 taken a different path through the ABI logic. This should result in
6339 a -Wpsabi warning if the earlier path led to a different ABI decision.
6340
6341 WARN_PSABI_EMPTY_CXX17_BASE
6342 Indicates that the type includes an artificial empty C++17 base field
6343 that, prior to GCC 10.1, would prevent the type from being treated as
6344 a HFA or HVA. See PR94711 for details.
6345
6346 WARN_PSABI_NO_UNIQUE_ADDRESS
6347 Indicates that the type includes an empty [[no_unique_address]] field
6348 that, prior to GCC 10.1, would prevent the type from being treated as
6349 a HFA or HVA. */
6350 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6351 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6352 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD = 1U << 2;
6353
6354 /* Walk down the type tree of TYPE counting consecutive base elements.
6355 If *MODEP is VOIDmode, then set it to the first valid floating point
6356 type. If a non-floating point type is found, or if a floating point
6357 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6358 otherwise return the count in the sub-tree.
6359
6360 The WARN_PSABI_FLAGS argument allows the caller to check whether this
6361 function has changed its behavior relative to earlier versions of GCC.
6362 Normally the argument should be nonnull and point to a zero-initialized
6363 variable. The function then records whether the ABI decision might
6364 be affected by a known fix to the ABI logic, setting the associated
6365 WARN_PSABI_* bits if so.
6366
6367 When the argument is instead a null pointer, the function tries to
6368 simulate the behavior of GCC before all such ABI fixes were made.
6369 This is useful to check whether the function returns something
6370 different after the ABI fixes. */
6371 static int
6372 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6373 unsigned int *warn_psabi_flags)
6374 {
6375 machine_mode mode;
6376 HOST_WIDE_INT size;
6377
6378 switch (TREE_CODE (type))
6379 {
6380 case REAL_TYPE:
6381 mode = TYPE_MODE (type);
6382 if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6383 return -1;
6384
6385 if (*modep == VOIDmode)
6386 *modep = mode;
6387
6388 if (*modep == mode)
6389 return 1;
6390
6391 break;
6392
6393 case COMPLEX_TYPE:
6394 mode = TYPE_MODE (TREE_TYPE (type));
6395 if (mode != DFmode && mode != SFmode)
6396 return -1;
6397
6398 if (*modep == VOIDmode)
6399 *modep = mode;
6400
6401 if (*modep == mode)
6402 return 2;
6403
6404 break;
6405
6406 case VECTOR_TYPE:
6407 /* Use V2SImode and V4SImode as representatives of all 64-bit
6408 and 128-bit vector types, whether or not those modes are
6409 supported with the present options. */
6410 size = int_size_in_bytes (type);
6411 switch (size)
6412 {
6413 case 8:
6414 mode = V2SImode;
6415 break;
6416 case 16:
6417 mode = V4SImode;
6418 break;
6419 default:
6420 return -1;
6421 }
6422
6423 if (*modep == VOIDmode)
6424 *modep = mode;
6425
6426 /* Vector modes are considered to be opaque: two vectors are
6427 equivalent for the purposes of being homogeneous aggregates
6428 if they are the same size. */
6429 if (*modep == mode)
6430 return 1;
6431
6432 break;
6433
6434 case ARRAY_TYPE:
6435 {
6436 int count;
6437 tree index = TYPE_DOMAIN (type);
6438
6439 /* Can't handle incomplete types nor sizes that are not
6440 fixed. */
6441 if (!COMPLETE_TYPE_P (type)
6442 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6443 return -1;
6444
6445 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6446 warn_psabi_flags);
6447 if (count == -1
6448 || !index
6449 || !TYPE_MAX_VALUE (index)
6450 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6451 || !TYPE_MIN_VALUE (index)
6452 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6453 || count < 0)
6454 return -1;
6455
6456 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6457 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6458
6459 /* There must be no padding. */
6460 if (wi::to_wide (TYPE_SIZE (type))
6461 != count * GET_MODE_BITSIZE (*modep))
6462 return -1;
6463
6464 return count;
6465 }
6466
6467 case RECORD_TYPE:
6468 {
6469 int count = 0;
6470 int sub_count;
6471 tree field;
6472
6473 /* Can't handle incomplete types nor sizes that are not
6474 fixed. */
6475 if (!COMPLETE_TYPE_P (type)
6476 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6477 return -1;
6478
6479 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6480 {
6481 if (TREE_CODE (field) != FIELD_DECL)
6482 continue;
6483
6484 if (DECL_FIELD_ABI_IGNORED (field))
6485 {
6486 /* See whether this is something that earlier versions of
6487 GCC failed to ignore. */
6488 unsigned int flag;
6489 if (lookup_attribute ("no_unique_address",
6490 DECL_ATTRIBUTES (field)))
6491 flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6492 else if (cxx17_empty_base_field_p (field))
6493 flag = WARN_PSABI_EMPTY_CXX17_BASE;
6494 else
6495 /* No compatibility problem. */
6496 continue;
6497
6498 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
6499 if (warn_psabi_flags)
6500 {
6501 *warn_psabi_flags |= flag;
6502 continue;
6503 }
6504 }
6505 /* A zero-width bitfield may affect layout in some
6506 circumstances, but adds no members. The determination
6507 of whether or not a type is an HFA is performed after
6508 layout is complete, so if the type still looks like an
6509 HFA afterwards, it is still classed as one. This is
6510 potentially an ABI break for the hard-float ABI. */
6511 else if (DECL_BIT_FIELD (field)
6512 && integer_zerop (DECL_SIZE (field)))
6513 {
6514 /* Prior to GCC-12 these fields were striped early,
6515 hiding them from the back-end entirely and
6516 resulting in the correct behaviour for argument
6517 passing. Simulate that old behaviour without
6518 generating a warning. */
6519 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
6520 continue;
6521 if (warn_psabi_flags)
6522 {
6523 *warn_psabi_flags |= WARN_PSABI_ZERO_WIDTH_BITFIELD;
6524 continue;
6525 }
6526 }
6527
6528 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6529 warn_psabi_flags);
6530 if (sub_count < 0)
6531 return -1;
6532 count += sub_count;
6533 }
6534
6535 /* There must be no padding. */
6536 if (wi::to_wide (TYPE_SIZE (type))
6537 != count * GET_MODE_BITSIZE (*modep))
6538 return -1;
6539
6540 return count;
6541 }
6542
6543 case UNION_TYPE:
6544 case QUAL_UNION_TYPE:
6545 {
6546 /* These aren't very interesting except in a degenerate case. */
6547 int count = 0;
6548 int sub_count;
6549 tree field;
6550
6551 /* Can't handle incomplete types nor sizes that are not
6552 fixed. */
6553 if (!COMPLETE_TYPE_P (type)
6554 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6555 return -1;
6556
6557 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6558 {
6559 if (TREE_CODE (field) != FIELD_DECL)
6560 continue;
6561
6562 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6563 warn_psabi_flags);
6564 if (sub_count < 0)
6565 return -1;
6566 count = count > sub_count ? count : sub_count;
6567 }
6568
6569 /* There must be no padding. */
6570 if (wi::to_wide (TYPE_SIZE (type))
6571 != count * GET_MODE_BITSIZE (*modep))
6572 return -1;
6573
6574 return count;
6575 }
6576
6577 default:
6578 break;
6579 }
6580
6581 return -1;
6582 }
6583
6584 /* Return true if PCS_VARIANT should use VFP registers. */
6585 static bool
6586 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6587 {
6588 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6589 {
6590 static bool seen_thumb1_vfp = false;
6591
6592 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6593 {
6594 sorry ("Thumb-1 %<hard-float%> VFP ABI");
6595 /* sorry() is not immediately fatal, so only display this once. */
6596 seen_thumb1_vfp = true;
6597 }
6598
6599 return true;
6600 }
6601
6602 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6603 return false;
6604
6605 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6606 (TARGET_VFP_DOUBLE || !is_double));
6607 }
6608
6609 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6610 suitable for passing or returning in VFP registers for the PCS
6611 variant selected. If it is, then *BASE_MODE is updated to contain
6612 a machine mode describing each element of the argument's type and
6613 *COUNT to hold the number of such elements. */
6614 static bool
6615 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6616 machine_mode mode, const_tree type,
6617 machine_mode *base_mode, int *count)
6618 {
6619 machine_mode new_mode = VOIDmode;
6620
6621 /* If we have the type information, prefer that to working things
6622 out from the mode. */
6623 if (type)
6624 {
6625 unsigned int warn_psabi_flags = 0;
6626 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6627 &warn_psabi_flags);
6628 if (ag_count > 0 && ag_count <= 4)
6629 {
6630 static unsigned last_reported_type_uid;
6631 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6632 int alt;
6633 if (warn_psabi
6634 && warn_psabi_flags
6635 && uid != last_reported_type_uid
6636 && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6637 != ag_count))
6638 {
6639 const char *url10
6640 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6641 const char *url12
6642 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
6643 gcc_assert (alt == -1);
6644 last_reported_type_uid = uid;
6645 /* Use TYPE_MAIN_VARIANT to strip any redundant const
6646 qualification. */
6647 if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6648 inform (input_location, "parameter passing for argument of "
6649 "type %qT with %<[[no_unique_address]]%> members "
6650 "changed %{in GCC 10.1%}",
6651 TYPE_MAIN_VARIANT (type), url10);
6652 else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6653 inform (input_location, "parameter passing for argument of "
6654 "type %qT when C++17 is enabled changed to match "
6655 "C++14 %{in GCC 10.1%}",
6656 TYPE_MAIN_VARIANT (type), url10);
6657 else if (warn_psabi_flags & WARN_PSABI_ZERO_WIDTH_BITFIELD)
6658 inform (input_location, "parameter passing for argument of "
6659 "type %qT changed %{in GCC 12.1%}",
6660 TYPE_MAIN_VARIANT (type), url12);
6661 }
6662 *count = ag_count;
6663 }
6664 else
6665 return false;
6666 }
6667 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6668 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6669 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6670 {
6671 *count = 1;
6672 new_mode = mode;
6673 }
6674 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6675 {
6676 *count = 2;
6677 new_mode = (mode == DCmode ? DFmode : SFmode);
6678 }
6679 else
6680 return false;
6681
6682
6683 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6684 return false;
6685
6686 *base_mode = new_mode;
6687
6688 if (TARGET_GENERAL_REGS_ONLY)
6689 error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6690 type);
6691
6692 return true;
6693 }
6694
6695 static bool
6696 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6697 machine_mode mode, const_tree type)
6698 {
6699 int count ATTRIBUTE_UNUSED;
6700 machine_mode ag_mode ATTRIBUTE_UNUSED;
6701
6702 if (!use_vfp_abi (pcs_variant, false))
6703 return false;
6704 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6705 &ag_mode, &count);
6706 }
6707
6708 static bool
6709 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6710 const_tree type)
6711 {
6712 if (!use_vfp_abi (pcum->pcs_variant, false))
6713 return false;
6714
6715 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6716 &pcum->aapcs_vfp_rmode,
6717 &pcum->aapcs_vfp_rcount);
6718 }
6719
6720 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6721 for the behaviour of this function. */
6722
6723 static bool
6724 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6725 const_tree type ATTRIBUTE_UNUSED)
6726 {
6727 int rmode_size
6728 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6729 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6730 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6731 int regno;
6732
6733 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6734 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6735 {
6736 pcum->aapcs_vfp_reg_alloc = mask << regno;
6737 if (mode == BLKmode
6738 || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6739 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6740 {
6741 int i;
6742 int rcount = pcum->aapcs_vfp_rcount;
6743 int rshift = shift;
6744 machine_mode rmode = pcum->aapcs_vfp_rmode;
6745 rtx par;
6746 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6747 {
6748 /* Avoid using unsupported vector modes. */
6749 if (rmode == V2SImode)
6750 rmode = DImode;
6751 else if (rmode == V4SImode)
6752 {
6753 rmode = DImode;
6754 rcount *= 2;
6755 rshift /= 2;
6756 }
6757 }
6758 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6759 for (i = 0; i < rcount; i++)
6760 {
6761 rtx tmp = gen_rtx_REG (rmode,
6762 FIRST_VFP_REGNUM + regno + i * rshift);
6763 tmp = gen_rtx_EXPR_LIST
6764 (VOIDmode, tmp,
6765 GEN_INT (i * GET_MODE_SIZE (rmode)));
6766 XVECEXP (par, 0, i) = tmp;
6767 }
6768
6769 pcum->aapcs_reg = par;
6770 }
6771 else
6772 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6773 return true;
6774 }
6775 return false;
6776 }
6777
6778 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6779 comment there for the behaviour of this function. */
6780
6781 static rtx
6782 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6783 machine_mode mode,
6784 const_tree type ATTRIBUTE_UNUSED)
6785 {
6786 if (!use_vfp_abi (pcs_variant, false))
6787 return NULL;
6788
6789 if (mode == BLKmode
6790 || (GET_MODE_CLASS (mode) == MODE_INT
6791 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6792 && !(TARGET_NEON || TARGET_HAVE_MVE)))
6793 {
6794 int count;
6795 machine_mode ag_mode;
6796 int i;
6797 rtx par;
6798 int shift;
6799
6800 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6801 &ag_mode, &count);
6802
6803 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6804 {
6805 if (ag_mode == V2SImode)
6806 ag_mode = DImode;
6807 else if (ag_mode == V4SImode)
6808 {
6809 ag_mode = DImode;
6810 count *= 2;
6811 }
6812 }
6813 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6814 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6815 for (i = 0; i < count; i++)
6816 {
6817 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6818 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6819 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6820 XVECEXP (par, 0, i) = tmp;
6821 }
6822
6823 return par;
6824 }
6825
6826 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6827 }
6828
6829 static void
6830 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6831 machine_mode mode ATTRIBUTE_UNUSED,
6832 const_tree type ATTRIBUTE_UNUSED)
6833 {
6834 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6835 pcum->aapcs_vfp_reg_alloc = 0;
6836 return;
6837 }
6838
6839 #define AAPCS_CP(X) \
6840 { \
6841 aapcs_ ## X ## _cum_init, \
6842 aapcs_ ## X ## _is_call_candidate, \
6843 aapcs_ ## X ## _allocate, \
6844 aapcs_ ## X ## _is_return_candidate, \
6845 aapcs_ ## X ## _allocate_return_reg, \
6846 aapcs_ ## X ## _advance \
6847 }
6848
6849 /* Table of co-processors that can be used to pass arguments in
6850 registers. Idealy no arugment should be a candidate for more than
6851 one co-processor table entry, but the table is processed in order
6852 and stops after the first match. If that entry then fails to put
6853 the argument into a co-processor register, the argument will go on
6854 the stack. */
6855 static struct
6856 {
6857 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6858 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6859
6860 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6861 BLKmode) is a candidate for this co-processor's registers; this
6862 function should ignore any position-dependent state in
6863 CUMULATIVE_ARGS and only use call-type dependent information. */
6864 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6865
6866 /* Return true if the argument does get a co-processor register; it
6867 should set aapcs_reg to an RTX of the register allocated as is
6868 required for a return from FUNCTION_ARG. */
6869 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6870
6871 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6872 be returned in this co-processor's registers. */
6873 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6874
6875 /* Allocate and return an RTX element to hold the return type of a call. This
6876 routine must not fail and will only be called if is_return_candidate
6877 returned true with the same parameters. */
6878 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6879
6880 /* Finish processing this argument and prepare to start processing
6881 the next one. */
6882 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6883 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6884 {
6885 AAPCS_CP(vfp)
6886 };
6887
6888 #undef AAPCS_CP
6889
6890 static int
6891 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6892 const_tree type)
6893 {
6894 int i;
6895
6896 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6897 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6898 return i;
6899
6900 return -1;
6901 }
6902
6903 static int
6904 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6905 {
6906 /* We aren't passed a decl, so we can't check that a call is local.
6907 However, it isn't clear that that would be a win anyway, since it
6908 might limit some tail-calling opportunities. */
6909 enum arm_pcs pcs_variant;
6910
6911 if (fntype)
6912 {
6913 const_tree fndecl = NULL_TREE;
6914
6915 if (TREE_CODE (fntype) == FUNCTION_DECL)
6916 {
6917 fndecl = fntype;
6918 fntype = TREE_TYPE (fntype);
6919 }
6920
6921 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6922 }
6923 else
6924 pcs_variant = arm_pcs_default;
6925
6926 if (pcs_variant != ARM_PCS_AAPCS)
6927 {
6928 int i;
6929
6930 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6931 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6932 TYPE_MODE (type),
6933 type))
6934 return i;
6935 }
6936 return -1;
6937 }
6938
6939 static rtx
6940 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6941 const_tree fntype)
6942 {
6943 /* We aren't passed a decl, so we can't check that a call is local.
6944 However, it isn't clear that that would be a win anyway, since it
6945 might limit some tail-calling opportunities. */
6946 enum arm_pcs pcs_variant;
6947 int unsignedp ATTRIBUTE_UNUSED;
6948
6949 if (fntype)
6950 {
6951 const_tree fndecl = NULL_TREE;
6952
6953 if (TREE_CODE (fntype) == FUNCTION_DECL)
6954 {
6955 fndecl = fntype;
6956 fntype = TREE_TYPE (fntype);
6957 }
6958
6959 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6960 }
6961 else
6962 pcs_variant = arm_pcs_default;
6963
6964 /* Promote integer types. */
6965 if (type && INTEGRAL_TYPE_P (type))
6966 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6967
6968 if (pcs_variant != ARM_PCS_AAPCS)
6969 {
6970 int i;
6971
6972 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6973 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6974 type))
6975 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6976 mode, type);
6977 }
6978
6979 /* Promotes small structs returned in a register to full-word size
6980 for big-endian AAPCS. */
6981 if (type && arm_return_in_msb (type))
6982 {
6983 HOST_WIDE_INT size = int_size_in_bytes (type);
6984 if (size % UNITS_PER_WORD != 0)
6985 {
6986 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6987 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6988 }
6989 }
6990
6991 return gen_rtx_REG (mode, R0_REGNUM);
6992 }
6993
6994 static rtx
6995 aapcs_libcall_value (machine_mode mode)
6996 {
6997 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6998 && GET_MODE_SIZE (mode) <= 4)
6999 mode = SImode;
7000
7001 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
7002 }
7003
7004 /* Lay out a function argument using the AAPCS rules. The rule
7005 numbers referred to here are those in the AAPCS. */
7006 static void
7007 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
7008 const_tree type, bool named)
7009 {
7010 int nregs, nregs2;
7011 int ncrn;
7012
7013 /* We only need to do this once per argument. */
7014 if (pcum->aapcs_arg_processed)
7015 return;
7016
7017 pcum->aapcs_arg_processed = true;
7018
7019 /* Special case: if named is false then we are handling an incoming
7020 anonymous argument which is on the stack. */
7021 if (!named)
7022 return;
7023
7024 /* Is this a potential co-processor register candidate? */
7025 if (pcum->pcs_variant != ARM_PCS_AAPCS)
7026 {
7027 int slot = aapcs_select_call_coproc (pcum, mode, type);
7028 pcum->aapcs_cprc_slot = slot;
7029
7030 /* We don't have to apply any of the rules from part B of the
7031 preparation phase, these are handled elsewhere in the
7032 compiler. */
7033
7034 if (slot >= 0)
7035 {
7036 /* A Co-processor register candidate goes either in its own
7037 class of registers or on the stack. */
7038 if (!pcum->aapcs_cprc_failed[slot])
7039 {
7040 /* C1.cp - Try to allocate the argument to co-processor
7041 registers. */
7042 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
7043 return;
7044
7045 /* C2.cp - Put the argument on the stack and note that we
7046 can't assign any more candidates in this slot. We also
7047 need to note that we have allocated stack space, so that
7048 we won't later try to split a non-cprc candidate between
7049 core registers and the stack. */
7050 pcum->aapcs_cprc_failed[slot] = true;
7051 pcum->can_split = false;
7052 }
7053
7054 /* We didn't get a register, so this argument goes on the
7055 stack. */
7056 gcc_assert (pcum->can_split == false);
7057 return;
7058 }
7059 }
7060
7061 /* C3 - For double-word aligned arguments, round the NCRN up to the
7062 next even number. */
7063 ncrn = pcum->aapcs_ncrn;
7064 if (ncrn & 1)
7065 {
7066 int res = arm_needs_doubleword_align (mode, type);
7067 /* Only warn during RTL expansion of call stmts, otherwise we would
7068 warn e.g. during gimplification even on functions that will be
7069 always inlined, and we'd warn multiple times. Don't warn when
7070 called in expand_function_start either, as we warn instead in
7071 arm_function_arg_boundary in that case. */
7072 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
7073 inform (input_location, "parameter passing for argument of type "
7074 "%qT changed in GCC 7.1", type);
7075 else if (res > 0)
7076 ncrn++;
7077 }
7078
7079 nregs = ARM_NUM_REGS2(mode, type);
7080
7081 /* Sigh, this test should really assert that nregs > 0, but a GCC
7082 extension allows empty structs and then gives them empty size; it
7083 then allows such a structure to be passed by value. For some of
7084 the code below we have to pretend that such an argument has
7085 non-zero size so that we 'locate' it correctly either in
7086 registers or on the stack. */
7087 gcc_assert (nregs >= 0);
7088
7089 nregs2 = nregs ? nregs : 1;
7090
7091 /* C4 - Argument fits entirely in core registers. */
7092 if (ncrn + nregs2 <= NUM_ARG_REGS)
7093 {
7094 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7095 pcum->aapcs_next_ncrn = ncrn + nregs;
7096 return;
7097 }
7098
7099 /* C5 - Some core registers left and there are no arguments already
7100 on the stack: split this argument between the remaining core
7101 registers and the stack. */
7102 if (ncrn < NUM_ARG_REGS && pcum->can_split)
7103 {
7104 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7105 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7106 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
7107 return;
7108 }
7109
7110 /* C6 - NCRN is set to 4. */
7111 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7112
7113 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
7114 return;
7115 }
7116
7117 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7118 for a call to a function whose data type is FNTYPE.
7119 For a library call, FNTYPE is NULL. */
7120 void
7121 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
7122 rtx libname,
7123 tree fndecl ATTRIBUTE_UNUSED)
7124 {
7125 /* Long call handling. */
7126 if (fntype)
7127 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
7128 else
7129 pcum->pcs_variant = arm_pcs_default;
7130
7131 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7132 {
7133 if (arm_libcall_uses_aapcs_base (libname))
7134 pcum->pcs_variant = ARM_PCS_AAPCS;
7135
7136 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
7137 pcum->aapcs_reg = NULL_RTX;
7138 pcum->aapcs_partial = 0;
7139 pcum->aapcs_arg_processed = false;
7140 pcum->aapcs_cprc_slot = -1;
7141 pcum->can_split = true;
7142
7143 if (pcum->pcs_variant != ARM_PCS_AAPCS)
7144 {
7145 int i;
7146
7147 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
7148 {
7149 pcum->aapcs_cprc_failed[i] = false;
7150 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
7151 }
7152 }
7153 return;
7154 }
7155
7156 /* Legacy ABIs */
7157
7158 /* On the ARM, the offset starts at 0. */
7159 pcum->nregs = 0;
7160 pcum->iwmmxt_nregs = 0;
7161 pcum->can_split = true;
7162
7163 /* Varargs vectors are treated the same as long long.
7164 named_count avoids having to change the way arm handles 'named' */
7165 pcum->named_count = 0;
7166 pcum->nargs = 0;
7167
7168 if (TARGET_REALLY_IWMMXT && fntype)
7169 {
7170 tree fn_arg;
7171
7172 for (fn_arg = TYPE_ARG_TYPES (fntype);
7173 fn_arg;
7174 fn_arg = TREE_CHAIN (fn_arg))
7175 pcum->named_count += 1;
7176
7177 if (! pcum->named_count)
7178 pcum->named_count = INT_MAX;
7179 }
7180 }
7181
7182 /* Return 2 if double word alignment is required for argument passing,
7183 but wasn't required before the fix for PR88469.
7184 Return 1 if double word alignment is required for argument passing.
7185 Return -1 if double word alignment used to be required for argument
7186 passing before PR77728 ABI fix, but is not required anymore.
7187 Return 0 if double word alignment is not required and wasn't requried
7188 before either. */
7189 static int
7190 arm_needs_doubleword_align (machine_mode mode, const_tree type)
7191 {
7192 if (!type)
7193 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7194
7195 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
7196 if (!AGGREGATE_TYPE_P (type))
7197 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7198
7199 /* Array types: Use member alignment of element type. */
7200 if (TREE_CODE (type) == ARRAY_TYPE)
7201 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7202
7203 int ret = 0;
7204 int ret2 = 0;
7205 /* Record/aggregate types: Use greatest member alignment of any member.
7206
7207 Note that we explicitly consider zero-sized fields here, even though
7208 they don't map to AAPCS machine types. For example, in:
7209
7210 struct __attribute__((aligned(8))) empty {};
7211
7212 struct s {
7213 [[no_unique_address]] empty e;
7214 int x;
7215 };
7216
7217 "s" contains only one Fundamental Data Type (the int field)
7218 but gains 8-byte alignment and size thanks to "e". */
7219 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7220 if (DECL_ALIGN (field) > PARM_BOUNDARY)
7221 {
7222 if (TREE_CODE (field) == FIELD_DECL)
7223 return 1;
7224 else
7225 /* Before PR77728 fix, we were incorrectly considering also
7226 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7227 Make sure we can warn about that with -Wpsabi. */
7228 ret = -1;
7229 }
7230 else if (TREE_CODE (field) == FIELD_DECL
7231 && DECL_BIT_FIELD_TYPE (field)
7232 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7233 ret2 = 1;
7234
7235 if (ret2)
7236 return 2;
7237
7238 return ret;
7239 }
7240
7241
7242 /* Determine where to put an argument to a function.
7243 Value is zero to push the argument on the stack,
7244 or a hard register in which to store the argument.
7245
7246 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7247 the preceding args and about the function being called.
7248 ARG is a description of the argument.
7249
7250 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7251 other arguments are passed on the stack. If (NAMED == 0) (which happens
7252 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7253 defined), say it is passed in the stack (function_prologue will
7254 indeed make it pass in the stack if necessary). */
7255
7256 static rtx
7257 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7258 {
7259 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7260 int nregs;
7261
7262 /* Handle the special case quickly. Pick an arbitrary value for op2 of
7263 a call insn (op3 of a call_value insn). */
7264 if (arg.end_marker_p ())
7265 return const0_rtx;
7266
7267 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7268 {
7269 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7270 return pcum->aapcs_reg;
7271 }
7272
7273 /* Varargs vectors are treated the same as long long.
7274 named_count avoids having to change the way arm handles 'named' */
7275 if (TARGET_IWMMXT_ABI
7276 && arm_vector_mode_supported_p (arg.mode)
7277 && pcum->named_count > pcum->nargs + 1)
7278 {
7279 if (pcum->iwmmxt_nregs <= 9)
7280 return gen_rtx_REG (arg.mode,
7281 pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7282 else
7283 {
7284 pcum->can_split = false;
7285 return NULL_RTX;
7286 }
7287 }
7288
7289 /* Put doubleword aligned quantities in even register pairs. */
7290 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7291 {
7292 int res = arm_needs_doubleword_align (arg.mode, arg.type);
7293 if (res < 0 && warn_psabi)
7294 inform (input_location, "parameter passing for argument of type "
7295 "%qT changed in GCC 7.1", arg.type);
7296 else if (res > 0)
7297 {
7298 pcum->nregs++;
7299 if (res > 1 && warn_psabi)
7300 inform (input_location, "parameter passing for argument of type "
7301 "%qT changed in GCC 9.1", arg.type);
7302 }
7303 }
7304
7305 /* Only allow splitting an arg between regs and memory if all preceding
7306 args were allocated to regs. For args passed by reference we only count
7307 the reference pointer. */
7308 if (pcum->can_split)
7309 nregs = 1;
7310 else
7311 nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7312
7313 if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7314 return NULL_RTX;
7315
7316 return gen_rtx_REG (arg.mode, pcum->nregs);
7317 }
7318
7319 static unsigned int
7320 arm_function_arg_boundary (machine_mode mode, const_tree type)
7321 {
7322 if (!ARM_DOUBLEWORD_ALIGN)
7323 return PARM_BOUNDARY;
7324
7325 int res = arm_needs_doubleword_align (mode, type);
7326 if (res < 0 && warn_psabi)
7327 inform (input_location, "parameter passing for argument of type %qT "
7328 "changed in GCC 7.1", type);
7329 if (res > 1 && warn_psabi)
7330 inform (input_location, "parameter passing for argument of type "
7331 "%qT changed in GCC 9.1", type);
7332
7333 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7334 }
7335
7336 static int
7337 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7338 {
7339 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7340 int nregs = pcum->nregs;
7341
7342 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7343 {
7344 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7345 return pcum->aapcs_partial;
7346 }
7347
7348 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7349 return 0;
7350
7351 if (NUM_ARG_REGS > nregs
7352 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7353 && pcum->can_split)
7354 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7355
7356 return 0;
7357 }
7358
7359 /* Update the data in PCUM to advance over argument ARG. */
7360
7361 static void
7362 arm_function_arg_advance (cumulative_args_t pcum_v,
7363 const function_arg_info &arg)
7364 {
7365 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7366
7367 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7368 {
7369 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7370
7371 if (pcum->aapcs_cprc_slot >= 0)
7372 {
7373 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7374 arg.type);
7375 pcum->aapcs_cprc_slot = -1;
7376 }
7377
7378 /* Generic stuff. */
7379 pcum->aapcs_arg_processed = false;
7380 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7381 pcum->aapcs_reg = NULL_RTX;
7382 pcum->aapcs_partial = 0;
7383 }
7384 else
7385 {
7386 pcum->nargs += 1;
7387 if (arm_vector_mode_supported_p (arg.mode)
7388 && pcum->named_count > pcum->nargs
7389 && TARGET_IWMMXT_ABI)
7390 pcum->iwmmxt_nregs += 1;
7391 else
7392 pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7393 }
7394 }
7395
7396 /* Variable sized types are passed by reference. This is a GCC
7397 extension to the ARM ABI. */
7398
7399 static bool
7400 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7401 {
7402 return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7403 }
7404 \f
7405 /* Encode the current state of the #pragma [no_]long_calls. */
7406 typedef enum
7407 {
7408 OFF, /* No #pragma [no_]long_calls is in effect. */
7409 LONG, /* #pragma long_calls is in effect. */
7410 SHORT /* #pragma no_long_calls is in effect. */
7411 } arm_pragma_enum;
7412
7413 static arm_pragma_enum arm_pragma_long_calls = OFF;
7414
7415 void
7416 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7417 {
7418 arm_pragma_long_calls = LONG;
7419 }
7420
7421 void
7422 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7423 {
7424 arm_pragma_long_calls = SHORT;
7425 }
7426
7427 void
7428 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7429 {
7430 arm_pragma_long_calls = OFF;
7431 }
7432 \f
7433 /* Handle an attribute requiring a FUNCTION_DECL;
7434 arguments as in struct attribute_spec.handler. */
7435 static tree
7436 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7437 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7438 {
7439 if (TREE_CODE (*node) != FUNCTION_DECL)
7440 {
7441 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7442 name);
7443 *no_add_attrs = true;
7444 }
7445
7446 return NULL_TREE;
7447 }
7448
7449 /* Handle an "interrupt" or "isr" attribute;
7450 arguments as in struct attribute_spec.handler. */
7451 static tree
7452 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7453 bool *no_add_attrs)
7454 {
7455 if (DECL_P (*node))
7456 {
7457 if (TREE_CODE (*node) != FUNCTION_DECL)
7458 {
7459 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7460 name);
7461 *no_add_attrs = true;
7462 }
7463 else if (TARGET_VFP_BASE)
7464 {
7465 warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7466 name);
7467 }
7468 /* FIXME: the argument if any is checked for type attributes;
7469 should it be checked for decl ones? */
7470 }
7471 else
7472 {
7473 if (FUNC_OR_METHOD_TYPE_P (*node))
7474 {
7475 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7476 {
7477 warning (OPT_Wattributes, "%qE attribute ignored",
7478 name);
7479 *no_add_attrs = true;
7480 }
7481 }
7482 else if (TREE_CODE (*node) == POINTER_TYPE
7483 && FUNC_OR_METHOD_TYPE_P (TREE_TYPE (*node))
7484 && arm_isr_value (args) != ARM_FT_UNKNOWN)
7485 {
7486 *node = build_variant_type_copy (*node);
7487 TREE_TYPE (*node) = build_type_attribute_variant
7488 (TREE_TYPE (*node),
7489 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7490 *no_add_attrs = true;
7491 }
7492 else
7493 {
7494 /* Possibly pass this attribute on from the type to a decl. */
7495 if (flags & ((int) ATTR_FLAG_DECL_NEXT
7496 | (int) ATTR_FLAG_FUNCTION_NEXT
7497 | (int) ATTR_FLAG_ARRAY_NEXT))
7498 {
7499 *no_add_attrs = true;
7500 return tree_cons (name, args, NULL_TREE);
7501 }
7502 else
7503 {
7504 warning (OPT_Wattributes, "%qE attribute ignored",
7505 name);
7506 }
7507 }
7508 }
7509
7510 return NULL_TREE;
7511 }
7512
7513 /* Handle a "pcs" attribute; arguments as in struct
7514 attribute_spec.handler. */
7515 static tree
7516 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7517 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7518 {
7519 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7520 {
7521 warning (OPT_Wattributes, "%qE attribute ignored", name);
7522 *no_add_attrs = true;
7523 }
7524 return NULL_TREE;
7525 }
7526
7527 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7528 /* Handle the "notshared" attribute. This attribute is another way of
7529 requesting hidden visibility. ARM's compiler supports
7530 "__declspec(notshared)"; we support the same thing via an
7531 attribute. */
7532
7533 static tree
7534 arm_handle_notshared_attribute (tree *node,
7535 tree name ATTRIBUTE_UNUSED,
7536 tree args ATTRIBUTE_UNUSED,
7537 int flags ATTRIBUTE_UNUSED,
7538 bool *no_add_attrs)
7539 {
7540 tree decl = TYPE_NAME (*node);
7541
7542 if (decl)
7543 {
7544 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7545 DECL_VISIBILITY_SPECIFIED (decl) = 1;
7546 *no_add_attrs = false;
7547 }
7548 return NULL_TREE;
7549 }
7550 #endif
7551
7552 /* This function returns true if a function with declaration FNDECL and type
7553 FNTYPE uses the stack to pass arguments or return variables and false
7554 otherwise. This is used for functions with the attributes
7555 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7556 diagnostic messages if the stack is used. NAME is the name of the attribute
7557 used. */
7558
7559 static bool
7560 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7561 {
7562 function_args_iterator args_iter;
7563 CUMULATIVE_ARGS args_so_far_v;
7564 cumulative_args_t args_so_far;
7565 bool first_param = true;
7566 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7567
7568 /* Error out if any argument is passed on the stack. */
7569 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7570 args_so_far = pack_cumulative_args (&args_so_far_v);
7571 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7572 {
7573 rtx arg_rtx;
7574
7575 prev_arg_type = arg_type;
7576 if (VOID_TYPE_P (arg_type))
7577 continue;
7578
7579 function_arg_info arg (arg_type, /*named=*/true);
7580 if (!first_param)
7581 /* ??? We should advance after processing the argument and pass
7582 the argument we're advancing past. */
7583 arm_function_arg_advance (args_so_far, arg);
7584 arg_rtx = arm_function_arg (args_so_far, arg);
7585 if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7586 {
7587 error ("%qE attribute not available to functions with arguments "
7588 "passed on the stack", name);
7589 return true;
7590 }
7591 first_param = false;
7592 }
7593
7594 /* Error out for variadic functions since we cannot control how many
7595 arguments will be passed and thus stack could be used. stdarg_p () is not
7596 used for the checking to avoid browsing arguments twice. */
7597 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7598 {
7599 error ("%qE attribute not available to functions with variable number "
7600 "of arguments", name);
7601 return true;
7602 }
7603
7604 /* Error out if return value is passed on the stack. */
7605 ret_type = TREE_TYPE (fntype);
7606 if (arm_return_in_memory (ret_type, fntype))
7607 {
7608 error ("%qE attribute not available to functions that return value on "
7609 "the stack", name);
7610 return true;
7611 }
7612 return false;
7613 }
7614
7615 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7616 function will check whether the attribute is allowed here and will add the
7617 attribute to the function declaration tree or otherwise issue a warning. */
7618
7619 static tree
7620 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7621 tree /* args */,
7622 int /* flags */,
7623 bool *no_add_attrs)
7624 {
7625 tree fndecl;
7626
7627 if (!use_cmse)
7628 {
7629 *no_add_attrs = true;
7630 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7631 "option", name);
7632 return NULL_TREE;
7633 }
7634
7635 /* Ignore attribute for function types. */
7636 if (TREE_CODE (*node) != FUNCTION_DECL)
7637 {
7638 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7639 name);
7640 *no_add_attrs = true;
7641 return NULL_TREE;
7642 }
7643
7644 fndecl = *node;
7645
7646 /* Warn for static linkage functions. */
7647 if (!TREE_PUBLIC (fndecl))
7648 {
7649 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7650 "with static linkage", name);
7651 *no_add_attrs = true;
7652 return NULL_TREE;
7653 }
7654
7655 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7656 TREE_TYPE (fndecl));
7657 return NULL_TREE;
7658 }
7659
7660
7661 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7662 function will check whether the attribute is allowed here and will add the
7663 attribute to the function type tree or otherwise issue a diagnostic. The
7664 reason we check this at declaration time is to only allow the use of the
7665 attribute with declarations of function pointers and not function
7666 declarations. This function checks NODE is of the expected type and issues
7667 diagnostics otherwise using NAME. If it is not of the expected type
7668 *NO_ADD_ATTRS will be set to true. */
7669
7670 static tree
7671 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7672 tree /* args */,
7673 int /* flags */,
7674 bool *no_add_attrs)
7675 {
7676 tree decl = NULL_TREE;
7677 tree fntype, type;
7678
7679 if (!use_cmse)
7680 {
7681 *no_add_attrs = true;
7682 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7683 "option", name);
7684 return NULL_TREE;
7685 }
7686
7687 if (DECL_P (*node))
7688 {
7689 fntype = TREE_TYPE (*node);
7690
7691 if (VAR_P (*node) || TREE_CODE (*node) == TYPE_DECL)
7692 decl = *node;
7693 }
7694 else
7695 fntype = *node;
7696
7697 while (fntype && TREE_CODE (fntype) == POINTER_TYPE)
7698 fntype = TREE_TYPE (fntype);
7699
7700 if ((DECL_P (*node) && !decl) || TREE_CODE (fntype) != FUNCTION_TYPE)
7701 {
7702 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7703 "function pointer", name);
7704 *no_add_attrs = true;
7705 return NULL_TREE;
7706 }
7707
7708 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7709
7710 if (*no_add_attrs)
7711 return NULL_TREE;
7712
7713 /* Prevent trees being shared among function types with and without
7714 cmse_nonsecure_call attribute. */
7715 if (decl)
7716 {
7717 type = build_distinct_type_copy (TREE_TYPE (decl));
7718 TREE_TYPE (decl) = type;
7719 }
7720 else
7721 {
7722 type = build_distinct_type_copy (*node);
7723 *node = type;
7724 }
7725
7726 fntype = type;
7727
7728 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7729 {
7730 type = fntype;
7731 fntype = TREE_TYPE (fntype);
7732 fntype = build_distinct_type_copy (fntype);
7733 TREE_TYPE (type) = fntype;
7734 }
7735
7736 /* Construct a type attribute and add it to the function type. */
7737 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7738 TYPE_ATTRIBUTES (fntype));
7739 TYPE_ATTRIBUTES (fntype) = attrs;
7740 return NULL_TREE;
7741 }
7742
7743 /* Return 0 if the attributes for two types are incompatible, 1 if they
7744 are compatible, and 2 if they are nearly compatible (which causes a
7745 warning to be generated). */
7746 static int
7747 arm_comp_type_attributes (const_tree type1, const_tree type2)
7748 {
7749 int l1, l2, s1, s2;
7750
7751 tree attrs1 = lookup_attribute ("Advanced SIMD type",
7752 TYPE_ATTRIBUTES (type1));
7753 tree attrs2 = lookup_attribute ("Advanced SIMD type",
7754 TYPE_ATTRIBUTES (type2));
7755 if (bool (attrs1) != bool (attrs2))
7756 return 0;
7757 if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7758 return 0;
7759
7760 /* Check for mismatch of non-default calling convention. */
7761 if (TREE_CODE (type1) != FUNCTION_TYPE)
7762 return 1;
7763
7764 /* Check for mismatched call attributes. */
7765 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7766 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7767 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7768 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7769
7770 /* Only bother to check if an attribute is defined. */
7771 if (l1 | l2 | s1 | s2)
7772 {
7773 /* If one type has an attribute, the other must have the same attribute. */
7774 if ((l1 != l2) || (s1 != s2))
7775 return 0;
7776
7777 /* Disallow mixed attributes. */
7778 if ((l1 & s2) || (l2 & s1))
7779 return 0;
7780 }
7781
7782 /* Check for mismatched ISR attribute. */
7783 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7784 if (! l1)
7785 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7786 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7787 if (! l2)
7788 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7789 if (l1 != l2)
7790 return 0;
7791
7792 l1 = lookup_attribute ("cmse_nonsecure_call",
7793 TYPE_ATTRIBUTES (type1)) != NULL;
7794 l2 = lookup_attribute ("cmse_nonsecure_call",
7795 TYPE_ATTRIBUTES (type2)) != NULL;
7796
7797 if (l1 != l2)
7798 return 0;
7799
7800 return 1;
7801 }
7802
7803 /* Assigns default attributes to newly defined type. This is used to
7804 set short_call/long_call attributes for function types of
7805 functions defined inside corresponding #pragma scopes. */
7806 static void
7807 arm_set_default_type_attributes (tree type)
7808 {
7809 /* Add __attribute__ ((long_call)) to all functions, when
7810 inside #pragma long_calls or __attribute__ ((short_call)),
7811 when inside #pragma no_long_calls. */
7812 if (FUNC_OR_METHOD_TYPE_P (type))
7813 {
7814 tree type_attr_list, attr_name;
7815 type_attr_list = TYPE_ATTRIBUTES (type);
7816
7817 if (arm_pragma_long_calls == LONG)
7818 attr_name = get_identifier ("long_call");
7819 else if (arm_pragma_long_calls == SHORT)
7820 attr_name = get_identifier ("short_call");
7821 else
7822 return;
7823
7824 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7825 TYPE_ATTRIBUTES (type) = type_attr_list;
7826 }
7827 }
7828 \f
7829 /* Return true if DECL is known to be linked into section SECTION. */
7830
7831 static bool
7832 arm_function_in_section_p (tree decl, section *section)
7833 {
7834 /* We can only be certain about the prevailing symbol definition. */
7835 if (!decl_binds_to_current_def_p (decl))
7836 return false;
7837
7838 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7839 if (!DECL_SECTION_NAME (decl))
7840 {
7841 /* Make sure that we will not create a unique section for DECL. */
7842 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7843 return false;
7844 }
7845
7846 return function_section (decl) == section;
7847 }
7848
7849 /* Return nonzero if a 32-bit "long_call" should be generated for
7850 a call from the current function to DECL. We generate a long_call
7851 if the function:
7852
7853 a. has an __attribute__((long call))
7854 or b. is within the scope of a #pragma long_calls
7855 or c. the -mlong-calls command line switch has been specified
7856
7857 However we do not generate a long call if the function:
7858
7859 d. has an __attribute__ ((short_call))
7860 or e. is inside the scope of a #pragma no_long_calls
7861 or f. is defined in the same section as the current function. */
7862
7863 bool
7864 arm_is_long_call_p (tree decl)
7865 {
7866 tree attrs;
7867
7868 if (!decl)
7869 return TARGET_LONG_CALLS;
7870
7871 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7872 if (lookup_attribute ("short_call", attrs))
7873 return false;
7874
7875 /* For "f", be conservative, and only cater for cases in which the
7876 whole of the current function is placed in the same section. */
7877 if (!flag_reorder_blocks_and_partition
7878 && TREE_CODE (decl) == FUNCTION_DECL
7879 && arm_function_in_section_p (decl, current_function_section ()))
7880 return false;
7881
7882 if (lookup_attribute ("long_call", attrs))
7883 return true;
7884
7885 return TARGET_LONG_CALLS;
7886 }
7887
7888 /* Return nonzero if it is ok to make a tail-call to DECL. */
7889 static bool
7890 arm_function_ok_for_sibcall (tree decl, tree exp)
7891 {
7892 unsigned long func_type;
7893
7894 if (cfun->machine->sibcall_blocked)
7895 return false;
7896
7897 if (TARGET_FDPIC)
7898 {
7899 /* In FDPIC, never tailcall something for which we have no decl:
7900 the target function could be in a different module, requiring
7901 a different FDPIC register value. */
7902 if (decl == NULL)
7903 return false;
7904 }
7905
7906 /* Never tailcall something if we are generating code for Thumb-1. */
7907 if (TARGET_THUMB1)
7908 return false;
7909
7910 /* The PIC register is live on entry to VxWorks PLT entries, so we
7911 must make the call before restoring the PIC register. */
7912 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7913 return false;
7914
7915 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7916 may be used both as target of the call and base register for restoring
7917 the VFP registers */
7918 if (TARGET_APCS_FRAME && TARGET_ARM
7919 && TARGET_HARD_FLOAT
7920 && decl && arm_is_long_call_p (decl))
7921 return false;
7922
7923 /* If we are interworking and the function is not declared static
7924 then we can't tail-call it unless we know that it exists in this
7925 compilation unit (since it might be a Thumb routine). */
7926 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7927 && !TREE_ASM_WRITTEN (decl))
7928 return false;
7929
7930 func_type = arm_current_func_type ();
7931 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7932 if (IS_INTERRUPT (func_type))
7933 return false;
7934
7935 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7936 generated for entry functions themselves. */
7937 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7938 return false;
7939
7940 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7941 this would complicate matters for later code generation. */
7942 if (TREE_CODE (exp) == CALL_EXPR)
7943 {
7944 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7945 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7946 return false;
7947 }
7948
7949 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7950 {
7951 /* Check that the return value locations are the same. For
7952 example that we aren't returning a value from the sibling in
7953 a VFP register but then need to transfer it to a core
7954 register. */
7955 rtx a, b;
7956 tree decl_or_type = decl;
7957
7958 /* If it is an indirect function pointer, get the function type. */
7959 if (!decl)
7960 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7961
7962 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7963 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7964 cfun->decl, false);
7965 if (!rtx_equal_p (a, b))
7966 return false;
7967 }
7968
7969 /* Never tailcall if function may be called with a misaligned SP. */
7970 if (IS_STACKALIGN (func_type))
7971 return false;
7972
7973 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7974 references should become a NOP. Don't convert such calls into
7975 sibling calls. */
7976 if (TARGET_AAPCS_BASED
7977 && arm_abi == ARM_ABI_AAPCS
7978 && decl
7979 && DECL_WEAK (decl))
7980 return false;
7981
7982 /* We cannot do a tailcall for an indirect call by descriptor if all the
7983 argument registers are used because the only register left to load the
7984 address is IP and it will already contain the static chain. */
7985 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7986 {
7987 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7988 CUMULATIVE_ARGS cum;
7989 cumulative_args_t cum_v;
7990
7991 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7992 cum_v = pack_cumulative_args (&cum);
7993
7994 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7995 {
7996 tree type = TREE_VALUE (t);
7997 if (!VOID_TYPE_P (type))
7998 {
7999 function_arg_info arg (type, /*named=*/true);
8000 arm_function_arg_advance (cum_v, arg);
8001 }
8002 }
8003
8004 function_arg_info arg (integer_type_node, /*named=*/true);
8005 if (!arm_function_arg (cum_v, arg))
8006 return false;
8007 }
8008
8009 /* Everything else is ok. */
8010 return true;
8011 }
8012
8013 \f
8014 /* Addressing mode support functions. */
8015
8016 /* Return nonzero if X is a legitimate immediate operand when compiling
8017 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
8018 int
8019 legitimate_pic_operand_p (rtx x)
8020 {
8021 if (SYMBOL_REF_P (x)
8022 || (GET_CODE (x) == CONST
8023 && GET_CODE (XEXP (x, 0)) == PLUS
8024 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
8025 return 0;
8026
8027 return 1;
8028 }
8029
8030 /* Record that the current function needs a PIC register. If PIC_REG is null,
8031 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
8032 both case cfun->machine->pic_reg is initialized if we have not already done
8033 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
8034 PIC register is reloaded in the current position of the instruction stream
8035 irregardless of whether it was loaded before. Otherwise, it is only loaded
8036 if not already done so (crtl->uses_pic_offset_table is null). Note that
8037 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
8038 is only supported iff COMPUTE_NOW is false. */
8039
8040 static void
8041 require_pic_register (rtx pic_reg, bool compute_now)
8042 {
8043 gcc_assert (compute_now == (pic_reg != NULL_RTX));
8044
8045 /* A lot of the logic here is made obscure by the fact that this
8046 routine gets called as part of the rtx cost estimation process.
8047 We don't want those calls to affect any assumptions about the real
8048 function; and further, we can't call entry_of_function() until we
8049 start the real expansion process. */
8050 if (!crtl->uses_pic_offset_table || compute_now)
8051 {
8052 gcc_assert (can_create_pseudo_p ()
8053 || (pic_reg != NULL_RTX
8054 && REG_P (pic_reg)
8055 && GET_MODE (pic_reg) == Pmode));
8056 if (arm_pic_register != INVALID_REGNUM
8057 && !compute_now
8058 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
8059 {
8060 if (!cfun->machine->pic_reg)
8061 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
8062
8063 /* Play games to avoid marking the function as needing pic
8064 if we are being called as part of the cost-estimation
8065 process. */
8066 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8067 crtl->uses_pic_offset_table = 1;
8068 }
8069 else
8070 {
8071 rtx_insn *seq, *insn;
8072
8073 if (pic_reg == NULL_RTX)
8074 pic_reg = gen_reg_rtx (Pmode);
8075 if (!cfun->machine->pic_reg)
8076 cfun->machine->pic_reg = pic_reg;
8077
8078 /* Play games to avoid marking the function as needing pic
8079 if we are being called as part of the cost-estimation
8080 process. */
8081 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8082 {
8083 crtl->uses_pic_offset_table = 1;
8084 start_sequence ();
8085
8086 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
8087 && arm_pic_register > LAST_LO_REGNUM
8088 && !compute_now)
8089 emit_move_insn (cfun->machine->pic_reg,
8090 gen_rtx_REG (Pmode, arm_pic_register));
8091 else
8092 arm_load_pic_register (0UL, pic_reg);
8093
8094 seq = get_insns ();
8095 end_sequence ();
8096
8097 for (insn = seq; insn; insn = NEXT_INSN (insn))
8098 if (INSN_P (insn))
8099 INSN_LOCATION (insn) = prologue_location;
8100
8101 /* We can be called during expansion of PHI nodes, where
8102 we can't yet emit instructions directly in the final
8103 insn stream. Queue the insns on the entry edge, they will
8104 be committed after everything else is expanded. */
8105 if (currently_expanding_to_rtl)
8106 insert_insn_on_edge (seq,
8107 single_succ_edge
8108 (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
8109 else
8110 emit_insn (seq);
8111 }
8112 }
8113 }
8114 }
8115
8116 /* Generate insns to calculate the address of ORIG in pic mode. */
8117 static rtx_insn *
8118 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
8119 {
8120 rtx pat;
8121 rtx mem;
8122
8123 pat = gen_calculate_pic_address (reg, pic_reg, orig);
8124
8125 /* Make the MEM as close to a constant as possible. */
8126 mem = SET_SRC (pat);
8127 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
8128 MEM_READONLY_P (mem) = 1;
8129 MEM_NOTRAP_P (mem) = 1;
8130
8131 return emit_insn (pat);
8132 }
8133
8134 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
8135 created to hold the result of the load. If not NULL, PIC_REG indicates
8136 which register to use as PIC register, otherwise it is decided by register
8137 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
8138 location in the instruction stream, irregardless of whether it was loaded
8139 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8140 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8141
8142 Returns the register REG into which the PIC load is performed. */
8143
8144 rtx
8145 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
8146 bool compute_now)
8147 {
8148 gcc_assert (compute_now == (pic_reg != NULL_RTX));
8149
8150 if (SYMBOL_REF_P (orig)
8151 || LABEL_REF_P (orig))
8152 {
8153 if (reg == 0)
8154 {
8155 gcc_assert (can_create_pseudo_p ());
8156 reg = gen_reg_rtx (Pmode);
8157 }
8158
8159 /* VxWorks does not impose a fixed gap between segments; the run-time
8160 gap can be different from the object-file gap. We therefore can't
8161 use GOTOFF unless we are absolutely sure that the symbol is in the
8162 same segment as the GOT. Unfortunately, the flexibility of linker
8163 scripts means that we can't be sure of that in general, so assume
8164 that GOTOFF is never valid on VxWorks. */
8165 /* References to weak symbols cannot be resolved locally: they
8166 may be overridden by a non-weak definition at link time. */
8167 rtx_insn *insn;
8168 if ((LABEL_REF_P (orig)
8169 || (SYMBOL_REF_P (orig)
8170 && SYMBOL_REF_LOCAL_P (orig)
8171 && (SYMBOL_REF_DECL (orig)
8172 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
8173 && (!SYMBOL_REF_FUNCTION_P (orig)
8174 || arm_fdpic_local_funcdesc_p (orig))))
8175 && NEED_GOT_RELOC
8176 && arm_pic_data_is_text_relative)
8177 insn = arm_pic_static_addr (orig, reg);
8178 else
8179 {
8180 /* If this function doesn't have a pic register, create one now. */
8181 require_pic_register (pic_reg, compute_now);
8182
8183 if (pic_reg == NULL_RTX)
8184 pic_reg = cfun->machine->pic_reg;
8185
8186 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8187 }
8188
8189 /* Put a REG_EQUAL note on this insn, so that it can be optimized
8190 by loop. */
8191 set_unique_reg_note (insn, REG_EQUAL, orig);
8192
8193 return reg;
8194 }
8195 else if (GET_CODE (orig) == CONST)
8196 {
8197 rtx base, offset;
8198
8199 if (GET_CODE (XEXP (orig, 0)) == PLUS
8200 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8201 return orig;
8202
8203 /* Handle the case where we have: const (UNSPEC_TLS). */
8204 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8205 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8206 return orig;
8207
8208 /* Handle the case where we have:
8209 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
8210 CONST_INT. */
8211 if (GET_CODE (XEXP (orig, 0)) == PLUS
8212 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8213 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8214 {
8215 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8216 return orig;
8217 }
8218
8219 if (reg == 0)
8220 {
8221 gcc_assert (can_create_pseudo_p ());
8222 reg = gen_reg_rtx (Pmode);
8223 }
8224
8225 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8226
8227 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8228 pic_reg, compute_now);
8229 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8230 base == reg ? 0 : reg, pic_reg,
8231 compute_now);
8232
8233 if (CONST_INT_P (offset))
8234 {
8235 /* The base register doesn't really matter, we only want to
8236 test the index for the appropriate mode. */
8237 if (!arm_legitimate_index_p (mode, offset, SET, 0))
8238 {
8239 gcc_assert (can_create_pseudo_p ());
8240 offset = force_reg (Pmode, offset);
8241 }
8242
8243 if (CONST_INT_P (offset))
8244 return plus_constant (Pmode, base, INTVAL (offset));
8245 }
8246
8247 if (GET_MODE_SIZE (mode) > 4
8248 && (GET_MODE_CLASS (mode) == MODE_INT
8249 || TARGET_SOFT_FLOAT))
8250 {
8251 emit_insn (gen_addsi3 (reg, base, offset));
8252 return reg;
8253 }
8254
8255 return gen_rtx_PLUS (Pmode, base, offset);
8256 }
8257
8258 return orig;
8259 }
8260
8261
8262 /* Generate insns that produce the address of the stack canary */
8263 rtx
8264 arm_stack_protect_tls_canary_mem (bool reload)
8265 {
8266 rtx tp = gen_reg_rtx (SImode);
8267 if (reload)
8268 emit_insn (gen_reload_tp_hard (tp));
8269 else
8270 emit_insn (gen_load_tp_hard (tp));
8271
8272 rtx reg = gen_reg_rtx (SImode);
8273 rtx offset = GEN_INT (arm_stack_protector_guard_offset);
8274 emit_set_insn (reg, gen_rtx_PLUS (SImode, tp, offset));
8275 return gen_rtx_MEM (SImode, reg);
8276 }
8277
8278
8279 /* Whether a register is callee saved or not. This is necessary because high
8280 registers are marked as caller saved when optimizing for size on Thumb-1
8281 targets despite being callee saved in order to avoid using them. */
8282 #define callee_saved_reg_p(reg) \
8283 (!call_used_or_fixed_reg_p (reg) \
8284 || (TARGET_THUMB1 && optimize_size \
8285 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8286
8287 /* Return a mask for the call-clobbered low registers that are unused
8288 at the end of the prologue. */
8289 static unsigned long
8290 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8291 {
8292 unsigned long mask = 0;
8293 bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8294
8295 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8296 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8297 mask |= 1 << (reg - FIRST_LO_REGNUM);
8298 return mask;
8299 }
8300
8301 /* Similarly for the start of the epilogue. */
8302 static unsigned long
8303 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8304 {
8305 unsigned long mask = 0;
8306 bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8307
8308 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8309 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8310 mask |= 1 << (reg - FIRST_LO_REGNUM);
8311 return mask;
8312 }
8313
8314 /* Find a spare register to use during the prolog of a function. */
8315
8316 static int
8317 thumb_find_work_register (unsigned long pushed_regs_mask)
8318 {
8319 int reg;
8320
8321 unsigned long unused_regs
8322 = thumb1_prologue_unused_call_clobbered_lo_regs ();
8323
8324 /* Check the argument registers first as these are call-used. The
8325 register allocation order means that sometimes r3 might be used
8326 but earlier argument registers might not, so check them all. */
8327 for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8328 if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8329 return reg;
8330
8331 /* Otherwise look for a call-saved register that is going to be pushed. */
8332 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8333 if (pushed_regs_mask & (1 << reg))
8334 return reg;
8335
8336 if (TARGET_THUMB2)
8337 {
8338 /* Thumb-2 can use high regs. */
8339 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8340 if (pushed_regs_mask & (1 << reg))
8341 return reg;
8342 }
8343 /* Something went wrong - thumb_compute_save_reg_mask()
8344 should have arranged for a suitable register to be pushed. */
8345 gcc_unreachable ();
8346 }
8347
8348 static GTY(()) int pic_labelno;
8349
8350 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
8351 low register. */
8352
8353 void
8354 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8355 {
8356 rtx l1, labelno, pic_tmp, pic_rtx;
8357
8358 if (crtl->uses_pic_offset_table == 0
8359 || TARGET_SINGLE_PIC_BASE
8360 || TARGET_FDPIC)
8361 return;
8362
8363 gcc_assert (flag_pic);
8364
8365 if (pic_reg == NULL_RTX)
8366 pic_reg = cfun->machine->pic_reg;
8367 if (TARGET_VXWORKS_RTP)
8368 {
8369 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8370 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8371 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8372
8373 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8374
8375 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8376 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8377 }
8378 else
8379 {
8380 /* We use an UNSPEC rather than a LABEL_REF because this label
8381 never appears in the code stream. */
8382
8383 labelno = GEN_INT (pic_labelno++);
8384 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8385 l1 = gen_rtx_CONST (VOIDmode, l1);
8386
8387 /* On the ARM the PC register contains 'dot + 8' at the time of the
8388 addition, on the Thumb it is 'dot + 4'. */
8389 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8390 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8391 UNSPEC_GOTSYM_OFF);
8392 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8393
8394 if (TARGET_32BIT)
8395 {
8396 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8397 }
8398 else /* TARGET_THUMB1 */
8399 {
8400 if (arm_pic_register != INVALID_REGNUM
8401 && REGNO (pic_reg) > LAST_LO_REGNUM)
8402 {
8403 /* We will have pushed the pic register, so we should always be
8404 able to find a work register. */
8405 pic_tmp = gen_rtx_REG (SImode,
8406 thumb_find_work_register (saved_regs));
8407 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8408 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8409 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8410 }
8411 else if (arm_pic_register != INVALID_REGNUM
8412 && arm_pic_register > LAST_LO_REGNUM
8413 && REGNO (pic_reg) <= LAST_LO_REGNUM)
8414 {
8415 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8416 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8417 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8418 }
8419 else
8420 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8421 }
8422 }
8423
8424 /* Need to emit this whether or not we obey regdecls,
8425 since setjmp/longjmp can cause life info to screw up. */
8426 emit_use (pic_reg);
8427 }
8428
8429 /* Try to determine whether an object, referenced via ORIG, will be
8430 placed in the text or data segment. This is used in FDPIC mode, to
8431 decide which relocations to use when accessing ORIG. *IS_READONLY
8432 is set to true if ORIG is a read-only location, false otherwise.
8433 Return true if we could determine the location of ORIG, false
8434 otherwise. *IS_READONLY is valid only when we return true. */
8435 static bool
8436 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8437 {
8438 *is_readonly = false;
8439
8440 if (LABEL_REF_P (orig))
8441 {
8442 *is_readonly = true;
8443 return true;
8444 }
8445
8446 if (SYMBOL_REF_P (orig))
8447 {
8448 if (CONSTANT_POOL_ADDRESS_P (orig))
8449 {
8450 *is_readonly = true;
8451 return true;
8452 }
8453 if (SYMBOL_REF_LOCAL_P (orig)
8454 && !SYMBOL_REF_EXTERNAL_P (orig)
8455 && SYMBOL_REF_DECL (orig)
8456 && (!DECL_P (SYMBOL_REF_DECL (orig))
8457 || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8458 {
8459 tree decl = SYMBOL_REF_DECL (orig);
8460 tree init = VAR_P (decl)
8461 ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8462 ? decl : 0;
8463 int reloc = 0;
8464 bool named_section, readonly;
8465
8466 if (init && init != error_mark_node)
8467 reloc = compute_reloc_for_constant (init);
8468
8469 named_section = VAR_P (decl)
8470 && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8471 readonly = decl_readonly_section (decl, reloc);
8472
8473 /* We don't know where the link script will put a named
8474 section, so return false in such a case. */
8475 if (named_section)
8476 return false;
8477
8478 *is_readonly = readonly;
8479 return true;
8480 }
8481
8482 /* We don't know. */
8483 return false;
8484 }
8485
8486 gcc_unreachable ();
8487 }
8488
8489 /* Generate code to load the address of a static var when flag_pic is set. */
8490 static rtx_insn *
8491 arm_pic_static_addr (rtx orig, rtx reg)
8492 {
8493 rtx l1, labelno, offset_rtx;
8494 rtx_insn *insn;
8495
8496 gcc_assert (flag_pic);
8497
8498 bool is_readonly = false;
8499 bool info_known = false;
8500
8501 if (TARGET_FDPIC
8502 && SYMBOL_REF_P (orig)
8503 && !SYMBOL_REF_FUNCTION_P (orig))
8504 info_known = arm_is_segment_info_known (orig, &is_readonly);
8505
8506 if (TARGET_FDPIC
8507 && SYMBOL_REF_P (orig)
8508 && !SYMBOL_REF_FUNCTION_P (orig)
8509 && !info_known)
8510 {
8511 /* We don't know where orig is stored, so we have be
8512 pessimistic and use a GOT relocation. */
8513 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8514
8515 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8516 }
8517 else if (TARGET_FDPIC
8518 && SYMBOL_REF_P (orig)
8519 && (SYMBOL_REF_FUNCTION_P (orig)
8520 || !is_readonly))
8521 {
8522 /* We use the GOTOFF relocation. */
8523 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8524
8525 rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8526 emit_insn (gen_movsi (reg, l1));
8527 insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8528 }
8529 else
8530 {
8531 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8532 PC-relative access. */
8533 /* We use an UNSPEC rather than a LABEL_REF because this label
8534 never appears in the code stream. */
8535 labelno = GEN_INT (pic_labelno++);
8536 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8537 l1 = gen_rtx_CONST (VOIDmode, l1);
8538
8539 /* On the ARM the PC register contains 'dot + 8' at the time of the
8540 addition, on the Thumb it is 'dot + 4'. */
8541 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8542 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8543 UNSPEC_SYMBOL_OFFSET);
8544 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8545
8546 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8547 labelno));
8548 }
8549
8550 return insn;
8551 }
8552
8553 /* Return nonzero if X is valid as an ARM state addressing register. */
8554 static int
8555 arm_address_register_rtx_p (rtx x, int strict_p)
8556 {
8557 int regno;
8558
8559 if (!REG_P (x))
8560 return 0;
8561
8562 regno = REGNO (x);
8563
8564 if (strict_p)
8565 return ARM_REGNO_OK_FOR_BASE_P (regno);
8566
8567 return (regno <= LAST_ARM_REGNUM
8568 || regno >= FIRST_PSEUDO_REGISTER
8569 || regno == FRAME_POINTER_REGNUM
8570 || regno == ARG_POINTER_REGNUM);
8571 }
8572
8573 /* Return TRUE if this rtx is the difference of a symbol and a label,
8574 and will reduce to a PC-relative relocation in the object file.
8575 Expressions like this can be left alone when generating PIC, rather
8576 than forced through the GOT. */
8577 static int
8578 pcrel_constant_p (rtx x)
8579 {
8580 if (GET_CODE (x) == MINUS)
8581 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8582
8583 return FALSE;
8584 }
8585
8586 /* Return true if X will surely end up in an index register after next
8587 splitting pass. */
8588 static bool
8589 will_be_in_index_register (const_rtx x)
8590 {
8591 /* arm.md: calculate_pic_address will split this into a register. */
8592 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8593 }
8594
8595 /* Return nonzero if X is a valid ARM state address operand. */
8596 int
8597 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8598 int strict_p)
8599 {
8600 bool use_ldrd;
8601 enum rtx_code code = GET_CODE (x);
8602
8603 if (arm_address_register_rtx_p (x, strict_p))
8604 return 1;
8605
8606 use_ldrd = (TARGET_LDRD
8607 && (mode == DImode || mode == DFmode));
8608
8609 if (code == POST_INC || code == PRE_DEC
8610 || ((code == PRE_INC || code == POST_DEC)
8611 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8612 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8613
8614 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8615 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8616 && GET_CODE (XEXP (x, 1)) == PLUS
8617 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8618 {
8619 rtx addend = XEXP (XEXP (x, 1), 1);
8620
8621 /* Don't allow ldrd post increment by register because it's hard
8622 to fixup invalid register choices. */
8623 if (use_ldrd
8624 && GET_CODE (x) == POST_MODIFY
8625 && REG_P (addend))
8626 return 0;
8627
8628 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8629 && arm_legitimate_index_p (mode, addend, outer, strict_p));
8630 }
8631
8632 /* After reload constants split into minipools will have addresses
8633 from a LABEL_REF. */
8634 else if (reload_completed
8635 && (code == LABEL_REF
8636 || (code == CONST
8637 && GET_CODE (XEXP (x, 0)) == PLUS
8638 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8639 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8640 return 1;
8641
8642 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8643 return 0;
8644
8645 else if (code == PLUS)
8646 {
8647 rtx xop0 = XEXP (x, 0);
8648 rtx xop1 = XEXP (x, 1);
8649
8650 return ((arm_address_register_rtx_p (xop0, strict_p)
8651 && ((CONST_INT_P (xop1)
8652 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8653 || (!strict_p && will_be_in_index_register (xop1))))
8654 || (arm_address_register_rtx_p (xop1, strict_p)
8655 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8656 }
8657
8658 #if 0
8659 /* Reload currently can't handle MINUS, so disable this for now */
8660 else if (GET_CODE (x) == MINUS)
8661 {
8662 rtx xop0 = XEXP (x, 0);
8663 rtx xop1 = XEXP (x, 1);
8664
8665 return (arm_address_register_rtx_p (xop0, strict_p)
8666 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8667 }
8668 #endif
8669
8670 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8671 && code == SYMBOL_REF
8672 && CONSTANT_POOL_ADDRESS_P (x)
8673 && ! (flag_pic
8674 && symbol_mentioned_p (get_pool_constant (x))
8675 && ! pcrel_constant_p (get_pool_constant (x))))
8676 return 1;
8677
8678 return 0;
8679 }
8680
8681 /* Return true if we can avoid creating a constant pool entry for x. */
8682 static bool
8683 can_avoid_literal_pool_for_label_p (rtx x)
8684 {
8685 /* Normally we can assign constant values to target registers without
8686 the help of constant pool. But there are cases we have to use constant
8687 pool like:
8688 1) assign a label to register.
8689 2) sign-extend a 8bit value to 32bit and then assign to register.
8690
8691 Constant pool access in format:
8692 (set (reg r0) (mem (symbol_ref (".LC0"))))
8693 will cause the use of literal pool (later in function arm_reorg).
8694 So here we mark such format as an invalid format, then the compiler
8695 will adjust it into:
8696 (set (reg r0) (symbol_ref (".LC0")))
8697 (set (reg r0) (mem (reg r0))).
8698 No extra register is required, and (mem (reg r0)) won't cause the use
8699 of literal pools. */
8700 if (arm_disable_literal_pool && SYMBOL_REF_P (x)
8701 && CONSTANT_POOL_ADDRESS_P (x))
8702 return 1;
8703 return 0;
8704 }
8705
8706
8707 /* Return nonzero if X is a valid Thumb-2 address operand. */
8708 static int
8709 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8710 {
8711 bool use_ldrd;
8712 enum rtx_code code = GET_CODE (x);
8713
8714 /* If we are dealing with a MVE predicate mode, then treat it as a HImode as
8715 can store and load it like any other 16-bit value. */
8716 if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE (mode))
8717 mode = HImode;
8718
8719 if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode))
8720 return mve_vector_mem_operand (mode, x, strict_p);
8721
8722 if (arm_address_register_rtx_p (x, strict_p))
8723 return 1;
8724
8725 use_ldrd = (TARGET_LDRD
8726 && (mode == DImode || mode == DFmode));
8727
8728 if (code == POST_INC || code == PRE_DEC
8729 || ((code == PRE_INC || code == POST_DEC)
8730 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8731 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8732
8733 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8734 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8735 && GET_CODE (XEXP (x, 1)) == PLUS
8736 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8737 {
8738 /* Thumb-2 only has autoincrement by constant. */
8739 rtx addend = XEXP (XEXP (x, 1), 1);
8740 HOST_WIDE_INT offset;
8741
8742 if (!CONST_INT_P (addend))
8743 return 0;
8744
8745 offset = INTVAL(addend);
8746 if (GET_MODE_SIZE (mode) <= 4)
8747 return (offset > -256 && offset < 256);
8748
8749 return (use_ldrd && offset > -1024 && offset < 1024
8750 && (offset & 3) == 0);
8751 }
8752
8753 /* After reload constants split into minipools will have addresses
8754 from a LABEL_REF. */
8755 else if (reload_completed
8756 && (code == LABEL_REF
8757 || (code == CONST
8758 && GET_CODE (XEXP (x, 0)) == PLUS
8759 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8760 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8761 return 1;
8762
8763 else if (mode == TImode
8764 || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8765 || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8766 return 0;
8767
8768 else if (code == PLUS)
8769 {
8770 rtx xop0 = XEXP (x, 0);
8771 rtx xop1 = XEXP (x, 1);
8772
8773 return ((arm_address_register_rtx_p (xop0, strict_p)
8774 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8775 || (!strict_p && will_be_in_index_register (xop1))))
8776 || (arm_address_register_rtx_p (xop1, strict_p)
8777 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8778 }
8779
8780 else if (can_avoid_literal_pool_for_label_p (x))
8781 return 0;
8782
8783 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8784 && code == SYMBOL_REF
8785 && CONSTANT_POOL_ADDRESS_P (x)
8786 && ! (flag_pic
8787 && symbol_mentioned_p (get_pool_constant (x))
8788 && ! pcrel_constant_p (get_pool_constant (x))))
8789 return 1;
8790
8791 return 0;
8792 }
8793
8794 /* Return nonzero if INDEX is valid for an address index operand in
8795 ARM state. */
8796 static int
8797 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8798 int strict_p)
8799 {
8800 HOST_WIDE_INT range;
8801 enum rtx_code code = GET_CODE (index);
8802
8803 /* Standard coprocessor addressing modes. */
8804 if (TARGET_HARD_FLOAT
8805 && (mode == SFmode || mode == DFmode))
8806 return (code == CONST_INT && INTVAL (index) < 1024
8807 && INTVAL (index) > -1024
8808 && (INTVAL (index) & 3) == 0);
8809
8810 /* For quad modes, we restrict the constant offset to be slightly less
8811 than what the instruction format permits. We do this because for
8812 quad mode moves, we will actually decompose them into two separate
8813 double-mode reads or writes. INDEX must therefore be a valid
8814 (double-mode) offset and so should INDEX+8. */
8815 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8816 return (code == CONST_INT
8817 && INTVAL (index) < 1016
8818 && INTVAL (index) > -1024
8819 && (INTVAL (index) & 3) == 0);
8820
8821 /* We have no such constraint on double mode offsets, so we permit the
8822 full range of the instruction format. */
8823 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8824 return (code == CONST_INT
8825 && INTVAL (index) < 1024
8826 && INTVAL (index) > -1024
8827 && (INTVAL (index) & 3) == 0);
8828
8829 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8830 return (code == CONST_INT
8831 && INTVAL (index) < 1024
8832 && INTVAL (index) > -1024
8833 && (INTVAL (index) & 3) == 0);
8834
8835 if (arm_address_register_rtx_p (index, strict_p)
8836 && (GET_MODE_SIZE (mode) <= 4))
8837 return 1;
8838
8839 if (mode == DImode || mode == DFmode)
8840 {
8841 if (code == CONST_INT)
8842 {
8843 HOST_WIDE_INT val = INTVAL (index);
8844
8845 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8846 If vldr is selected it uses arm_coproc_mem_operand. */
8847 if (TARGET_LDRD)
8848 return val > -256 && val < 256;
8849 else
8850 return val > -4096 && val < 4092;
8851 }
8852
8853 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8854 }
8855
8856 if (GET_MODE_SIZE (mode) <= 4
8857 && ! (arm_arch4
8858 && (mode == HImode
8859 || mode == HFmode
8860 || (mode == QImode && outer == SIGN_EXTEND))))
8861 {
8862 if (code == MULT)
8863 {
8864 rtx xiop0 = XEXP (index, 0);
8865 rtx xiop1 = XEXP (index, 1);
8866
8867 return ((arm_address_register_rtx_p (xiop0, strict_p)
8868 && power_of_two_operand (xiop1, SImode))
8869 || (arm_address_register_rtx_p (xiop1, strict_p)
8870 && power_of_two_operand (xiop0, SImode)));
8871 }
8872 else if (code == LSHIFTRT || code == ASHIFTRT
8873 || code == ASHIFT || code == ROTATERT)
8874 {
8875 rtx op = XEXP (index, 1);
8876
8877 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8878 && CONST_INT_P (op)
8879 && INTVAL (op) > 0
8880 && INTVAL (op) <= 31);
8881 }
8882 }
8883
8884 /* For ARM v4 we may be doing a sign-extend operation during the
8885 load. */
8886 if (arm_arch4)
8887 {
8888 if (mode == HImode
8889 || mode == HFmode
8890 || (outer == SIGN_EXTEND && mode == QImode))
8891 range = 256;
8892 else
8893 range = 4096;
8894 }
8895 else
8896 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8897
8898 return (code == CONST_INT
8899 && INTVAL (index) < range
8900 && INTVAL (index) > -range);
8901 }
8902
8903 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8904 index operand. i.e. 1, 2, 4 or 8. */
8905 static bool
8906 thumb2_index_mul_operand (rtx op)
8907 {
8908 HOST_WIDE_INT val;
8909
8910 if (!CONST_INT_P (op))
8911 return false;
8912
8913 val = INTVAL(op);
8914 return (val == 1 || val == 2 || val == 4 || val == 8);
8915 }
8916
8917 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8918 static int
8919 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8920 {
8921 enum rtx_code code = GET_CODE (index);
8922
8923 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8924 /* Standard coprocessor addressing modes. */
8925 if (TARGET_VFP_BASE
8926 && (mode == SFmode || mode == DFmode))
8927 return (code == CONST_INT && INTVAL (index) < 1024
8928 /* Thumb-2 allows only > -256 index range for it's core register
8929 load/stores. Since we allow SF/DF in core registers, we have
8930 to use the intersection between -256~4096 (core) and -1024~1024
8931 (coprocessor). */
8932 && INTVAL (index) > -256
8933 && (INTVAL (index) & 3) == 0);
8934
8935 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8936 {
8937 /* For DImode assume values will usually live in core regs
8938 and only allow LDRD addressing modes. */
8939 if (!TARGET_LDRD || mode != DImode)
8940 return (code == CONST_INT
8941 && INTVAL (index) < 1024
8942 && INTVAL (index) > -1024
8943 && (INTVAL (index) & 3) == 0);
8944 }
8945
8946 /* For quad modes, we restrict the constant offset to be slightly less
8947 than what the instruction format permits. We do this because for
8948 quad mode moves, we will actually decompose them into two separate
8949 double-mode reads or writes. INDEX must therefore be a valid
8950 (double-mode) offset and so should INDEX+8. */
8951 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8952 return (code == CONST_INT
8953 && INTVAL (index) < 1016
8954 && INTVAL (index) > -1024
8955 && (INTVAL (index) & 3) == 0);
8956
8957 /* We have no such constraint on double mode offsets, so we permit the
8958 full range of the instruction format. */
8959 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8960 return (code == CONST_INT
8961 && INTVAL (index) < 1024
8962 && INTVAL (index) > -1024
8963 && (INTVAL (index) & 3) == 0);
8964
8965 if (arm_address_register_rtx_p (index, strict_p)
8966 && (GET_MODE_SIZE (mode) <= 4))
8967 return 1;
8968
8969 if (mode == DImode || mode == DFmode)
8970 {
8971 if (code == CONST_INT)
8972 {
8973 HOST_WIDE_INT val = INTVAL (index);
8974 /* Thumb-2 ldrd only has reg+const addressing modes.
8975 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8976 If vldr is selected it uses arm_coproc_mem_operand. */
8977 if (TARGET_LDRD)
8978 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8979 else
8980 return IN_RANGE (val, -255, 4095 - 4);
8981 }
8982 else
8983 return 0;
8984 }
8985
8986 if (code == MULT)
8987 {
8988 rtx xiop0 = XEXP (index, 0);
8989 rtx xiop1 = XEXP (index, 1);
8990
8991 return ((arm_address_register_rtx_p (xiop0, strict_p)
8992 && thumb2_index_mul_operand (xiop1))
8993 || (arm_address_register_rtx_p (xiop1, strict_p)
8994 && thumb2_index_mul_operand (xiop0)));
8995 }
8996 else if (code == ASHIFT)
8997 {
8998 rtx op = XEXP (index, 1);
8999
9000 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
9001 && CONST_INT_P (op)
9002 && INTVAL (op) > 0
9003 && INTVAL (op) <= 3);
9004 }
9005
9006 return (code == CONST_INT
9007 && INTVAL (index) < 4096
9008 && INTVAL (index) > -256);
9009 }
9010
9011 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
9012 static int
9013 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
9014 {
9015 int regno;
9016
9017 if (!REG_P (x))
9018 return 0;
9019
9020 regno = REGNO (x);
9021
9022 if (strict_p)
9023 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
9024
9025 return (regno <= LAST_LO_REGNUM
9026 || regno > LAST_VIRTUAL_REGISTER
9027 || regno == FRAME_POINTER_REGNUM
9028 || (GET_MODE_SIZE (mode) >= 4
9029 && (regno == STACK_POINTER_REGNUM
9030 || regno >= FIRST_PSEUDO_REGISTER
9031 || x == hard_frame_pointer_rtx
9032 || x == arg_pointer_rtx)));
9033 }
9034
9035 /* Return nonzero if x is a legitimate index register. This is the case
9036 for any base register that can access a QImode object. */
9037 inline static int
9038 thumb1_index_register_rtx_p (rtx x, int strict_p)
9039 {
9040 return thumb1_base_register_rtx_p (x, QImode, strict_p);
9041 }
9042
9043 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
9044
9045 The AP may be eliminated to either the SP or the FP, so we use the
9046 least common denominator, e.g. SImode, and offsets from 0 to 64.
9047
9048 ??? Verify whether the above is the right approach.
9049
9050 ??? Also, the FP may be eliminated to the SP, so perhaps that
9051 needs special handling also.
9052
9053 ??? Look at how the mips16 port solves this problem. It probably uses
9054 better ways to solve some of these problems.
9055
9056 Although it is not incorrect, we don't accept QImode and HImode
9057 addresses based on the frame pointer or arg pointer until the
9058 reload pass starts. This is so that eliminating such addresses
9059 into stack based ones won't produce impossible code. */
9060 int
9061 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
9062 {
9063 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
9064 return 0;
9065
9066 /* ??? Not clear if this is right. Experiment. */
9067 if (GET_MODE_SIZE (mode) < 4
9068 && !(reload_in_progress || reload_completed)
9069 && (reg_mentioned_p (frame_pointer_rtx, x)
9070 || reg_mentioned_p (arg_pointer_rtx, x)
9071 || reg_mentioned_p (virtual_incoming_args_rtx, x)
9072 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
9073 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
9074 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
9075 return 0;
9076
9077 /* Accept any base register. SP only in SImode or larger. */
9078 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
9079 return 1;
9080
9081 /* This is PC relative data before arm_reorg runs. */
9082 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
9083 && SYMBOL_REF_P (x)
9084 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
9085 && !arm_disable_literal_pool)
9086 return 1;
9087
9088 /* This is PC relative data after arm_reorg runs. */
9089 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
9090 && reload_completed
9091 && (LABEL_REF_P (x)
9092 || (GET_CODE (x) == CONST
9093 && GET_CODE (XEXP (x, 0)) == PLUS
9094 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
9095 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
9096 return 1;
9097
9098 /* Post-inc indexing only supported for SImode and larger. */
9099 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
9100 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
9101 return 1;
9102
9103 else if (GET_CODE (x) == PLUS)
9104 {
9105 /* REG+REG address can be any two index registers. */
9106 /* We disallow FRAME+REG addressing since we know that FRAME
9107 will be replaced with STACK, and SP relative addressing only
9108 permits SP+OFFSET. */
9109 if (GET_MODE_SIZE (mode) <= 4
9110 && XEXP (x, 0) != frame_pointer_rtx
9111 && XEXP (x, 1) != frame_pointer_rtx
9112 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9113 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
9114 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
9115 return 1;
9116
9117 /* REG+const has 5-7 bit offset for non-SP registers. */
9118 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9119 || XEXP (x, 0) == arg_pointer_rtx)
9120 && CONST_INT_P (XEXP (x, 1))
9121 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
9122 return 1;
9123
9124 /* REG+const has 10-bit offset for SP, but only SImode and
9125 larger is supported. */
9126 /* ??? Should probably check for DI/DFmode overflow here
9127 just like GO_IF_LEGITIMATE_OFFSET does. */
9128 else if (REG_P (XEXP (x, 0))
9129 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
9130 && GET_MODE_SIZE (mode) >= 4
9131 && CONST_INT_P (XEXP (x, 1))
9132 && INTVAL (XEXP (x, 1)) >= 0
9133 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
9134 && (INTVAL (XEXP (x, 1)) & 3) == 0)
9135 return 1;
9136
9137 else if (REG_P (XEXP (x, 0))
9138 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
9139 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
9140 || VIRTUAL_REGISTER_P (XEXP (x, 0)))
9141 && GET_MODE_SIZE (mode) >= 4
9142 && CONST_INT_P (XEXP (x, 1))
9143 && (INTVAL (XEXP (x, 1)) & 3) == 0)
9144 return 1;
9145 }
9146
9147 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
9148 && GET_MODE_SIZE (mode) == 4
9149 && SYMBOL_REF_P (x)
9150 && CONSTANT_POOL_ADDRESS_P (x)
9151 && !arm_disable_literal_pool
9152 && ! (flag_pic
9153 && symbol_mentioned_p (get_pool_constant (x))
9154 && ! pcrel_constant_p (get_pool_constant (x))))
9155 return 1;
9156
9157 return 0;
9158 }
9159
9160 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9161 instruction of mode MODE. */
9162 int
9163 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
9164 {
9165 switch (GET_MODE_SIZE (mode))
9166 {
9167 case 1:
9168 return val >= 0 && val < 32;
9169
9170 case 2:
9171 return val >= 0 && val < 64 && (val & 1) == 0;
9172
9173 default:
9174 return (val >= 0
9175 && (val + GET_MODE_SIZE (mode)) <= 128
9176 && (val & 3) == 0);
9177 }
9178 }
9179
9180 bool
9181 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p, code_helper)
9182 {
9183 if (TARGET_ARM)
9184 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
9185 else if (TARGET_THUMB2)
9186 return thumb2_legitimate_address_p (mode, x, strict_p);
9187 else /* if (TARGET_THUMB1) */
9188 return thumb1_legitimate_address_p (mode, x, strict_p);
9189 }
9190
9191 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9192
9193 Given an rtx X being reloaded into a reg required to be
9194 in class CLASS, return the class of reg to actually use.
9195 In general this is just CLASS, but for the Thumb core registers and
9196 immediate constants we prefer a LO_REGS class or a subset. */
9197
9198 static reg_class_t
9199 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
9200 {
9201 if (TARGET_32BIT)
9202 return rclass;
9203 else
9204 {
9205 if (rclass == GENERAL_REGS)
9206 return LO_REGS;
9207 else
9208 return rclass;
9209 }
9210 }
9211
9212 /* Build the SYMBOL_REF for __tls_get_addr. */
9213
9214 static GTY(()) rtx tls_get_addr_libfunc;
9215
9216 static rtx
9217 get_tls_get_addr (void)
9218 {
9219 if (!tls_get_addr_libfunc)
9220 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9221 return tls_get_addr_libfunc;
9222 }
9223
9224 rtx
9225 arm_load_tp (rtx target)
9226 {
9227 if (!target)
9228 target = gen_reg_rtx (SImode);
9229
9230 if (TARGET_HARD_TP)
9231 {
9232 /* Can return in any reg. */
9233 emit_insn (gen_load_tp_hard (target));
9234 }
9235 else
9236 {
9237 /* Always returned in r0. Immediately copy the result into a pseudo,
9238 otherwise other uses of r0 (e.g. setting up function arguments) may
9239 clobber the value. */
9240
9241 rtx tmp;
9242
9243 if (TARGET_FDPIC)
9244 {
9245 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9246 rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9247
9248 emit_insn (gen_load_tp_soft_fdpic ());
9249
9250 /* Restore r9. */
9251 emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9252 }
9253 else
9254 emit_insn (gen_load_tp_soft ());
9255
9256 tmp = gen_rtx_REG (SImode, R0_REGNUM);
9257 emit_move_insn (target, tmp);
9258 }
9259 return target;
9260 }
9261
9262 static rtx
9263 load_tls_operand (rtx x, rtx reg)
9264 {
9265 rtx tmp;
9266
9267 if (reg == NULL_RTX)
9268 reg = gen_reg_rtx (SImode);
9269
9270 tmp = gen_rtx_CONST (SImode, x);
9271
9272 emit_move_insn (reg, tmp);
9273
9274 return reg;
9275 }
9276
9277 static rtx_insn *
9278 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9279 {
9280 rtx label, labelno = NULL_RTX, sum;
9281
9282 gcc_assert (reloc != TLS_DESCSEQ);
9283 start_sequence ();
9284
9285 if (TARGET_FDPIC)
9286 {
9287 sum = gen_rtx_UNSPEC (Pmode,
9288 gen_rtvec (2, x, GEN_INT (reloc)),
9289 UNSPEC_TLS);
9290 }
9291 else
9292 {
9293 labelno = GEN_INT (pic_labelno++);
9294 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9295 label = gen_rtx_CONST (VOIDmode, label);
9296
9297 sum = gen_rtx_UNSPEC (Pmode,
9298 gen_rtvec (4, x, GEN_INT (reloc), label,
9299 GEN_INT (TARGET_ARM ? 8 : 4)),
9300 UNSPEC_TLS);
9301 }
9302 reg = load_tls_operand (sum, reg);
9303
9304 if (TARGET_FDPIC)
9305 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9306 else if (TARGET_ARM)
9307 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9308 else
9309 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9310
9311 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9312 LCT_PURE, /* LCT_CONST? */
9313 Pmode, reg, Pmode);
9314
9315 rtx_insn *insns = get_insns ();
9316 end_sequence ();
9317
9318 return insns;
9319 }
9320
9321 static rtx
9322 arm_tls_descseq_addr (rtx x, rtx reg)
9323 {
9324 rtx labelno = GEN_INT (pic_labelno++);
9325 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9326 rtx sum = gen_rtx_UNSPEC (Pmode,
9327 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9328 gen_rtx_CONST (VOIDmode, label),
9329 GEN_INT (!TARGET_ARM)),
9330 UNSPEC_TLS);
9331 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9332
9333 emit_insn (gen_tlscall (x, labelno));
9334 if (!reg)
9335 reg = gen_reg_rtx (SImode);
9336 else
9337 gcc_assert (REGNO (reg) != R0_REGNUM);
9338
9339 emit_move_insn (reg, reg0);
9340
9341 return reg;
9342 }
9343
9344
9345 rtx
9346 legitimize_tls_address (rtx x, rtx reg)
9347 {
9348 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9349 rtx_insn *insns;
9350 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9351
9352 switch (model)
9353 {
9354 case TLS_MODEL_GLOBAL_DYNAMIC:
9355 if (TARGET_GNU2_TLS)
9356 {
9357 gcc_assert (!TARGET_FDPIC);
9358
9359 reg = arm_tls_descseq_addr (x, reg);
9360
9361 tp = arm_load_tp (NULL_RTX);
9362
9363 dest = gen_rtx_PLUS (Pmode, tp, reg);
9364 }
9365 else
9366 {
9367 /* Original scheme */
9368 if (TARGET_FDPIC)
9369 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9370 else
9371 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9372 dest = gen_reg_rtx (Pmode);
9373 emit_libcall_block (insns, dest, ret, x);
9374 }
9375 return dest;
9376
9377 case TLS_MODEL_LOCAL_DYNAMIC:
9378 if (TARGET_GNU2_TLS)
9379 {
9380 gcc_assert (!TARGET_FDPIC);
9381
9382 reg = arm_tls_descseq_addr (x, reg);
9383
9384 tp = arm_load_tp (NULL_RTX);
9385
9386 dest = gen_rtx_PLUS (Pmode, tp, reg);
9387 }
9388 else
9389 {
9390 if (TARGET_FDPIC)
9391 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9392 else
9393 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9394
9395 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9396 share the LDM result with other LD model accesses. */
9397 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9398 UNSPEC_TLS);
9399 dest = gen_reg_rtx (Pmode);
9400 emit_libcall_block (insns, dest, ret, eqv);
9401
9402 /* Load the addend. */
9403 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9404 GEN_INT (TLS_LDO32)),
9405 UNSPEC_TLS);
9406 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9407 dest = gen_rtx_PLUS (Pmode, dest, addend);
9408 }
9409 return dest;
9410
9411 case TLS_MODEL_INITIAL_EXEC:
9412 if (TARGET_FDPIC)
9413 {
9414 sum = gen_rtx_UNSPEC (Pmode,
9415 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9416 UNSPEC_TLS);
9417 reg = load_tls_operand (sum, reg);
9418 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9419 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9420 }
9421 else
9422 {
9423 labelno = GEN_INT (pic_labelno++);
9424 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9425 label = gen_rtx_CONST (VOIDmode, label);
9426 sum = gen_rtx_UNSPEC (Pmode,
9427 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9428 GEN_INT (TARGET_ARM ? 8 : 4)),
9429 UNSPEC_TLS);
9430 reg = load_tls_operand (sum, reg);
9431
9432 if (TARGET_ARM)
9433 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9434 else if (TARGET_THUMB2)
9435 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9436 else
9437 {
9438 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9439 emit_move_insn (reg, gen_const_mem (SImode, reg));
9440 }
9441 }
9442
9443 tp = arm_load_tp (NULL_RTX);
9444
9445 return gen_rtx_PLUS (Pmode, tp, reg);
9446
9447 case TLS_MODEL_LOCAL_EXEC:
9448 tp = arm_load_tp (NULL_RTX);
9449
9450 reg = gen_rtx_UNSPEC (Pmode,
9451 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9452 UNSPEC_TLS);
9453 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9454
9455 return gen_rtx_PLUS (Pmode, tp, reg);
9456
9457 default:
9458 abort ();
9459 }
9460 }
9461
9462 /* Try machine-dependent ways of modifying an illegitimate address
9463 to be legitimate. If we find one, return the new, valid address. */
9464 rtx
9465 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9466 {
9467 if (arm_tls_referenced_p (x))
9468 {
9469 rtx addend = NULL;
9470
9471 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9472 {
9473 addend = XEXP (XEXP (x, 0), 1);
9474 x = XEXP (XEXP (x, 0), 0);
9475 }
9476
9477 if (!SYMBOL_REF_P (x))
9478 return x;
9479
9480 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9481
9482 x = legitimize_tls_address (x, NULL_RTX);
9483
9484 if (addend)
9485 {
9486 x = gen_rtx_PLUS (SImode, x, addend);
9487 orig_x = x;
9488 }
9489 else
9490 return x;
9491 }
9492
9493 if (TARGET_THUMB1)
9494 return thumb_legitimize_address (x, orig_x, mode);
9495
9496 if (GET_CODE (x) == PLUS)
9497 {
9498 rtx xop0 = XEXP (x, 0);
9499 rtx xop1 = XEXP (x, 1);
9500
9501 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9502 xop0 = force_reg (SImode, xop0);
9503
9504 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9505 && !symbol_mentioned_p (xop1))
9506 xop1 = force_reg (SImode, xop1);
9507
9508 if (ARM_BASE_REGISTER_RTX_P (xop0)
9509 && CONST_INT_P (xop1))
9510 {
9511 HOST_WIDE_INT n, low_n;
9512 rtx base_reg, val;
9513 n = INTVAL (xop1);
9514
9515 /* VFP addressing modes actually allow greater offsets, but for
9516 now we just stick with the lowest common denominator. */
9517 if (mode == DImode || mode == DFmode)
9518 {
9519 low_n = n & 0x0f;
9520 n &= ~0x0f;
9521 if (low_n > 4)
9522 {
9523 n += 16;
9524 low_n -= 16;
9525 }
9526 }
9527 else
9528 {
9529 low_n = ((mode) == TImode ? 0
9530 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9531 n -= low_n;
9532 }
9533
9534 base_reg = gen_reg_rtx (SImode);
9535 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9536 emit_move_insn (base_reg, val);
9537 x = plus_constant (Pmode, base_reg, low_n);
9538 }
9539 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9540 x = gen_rtx_PLUS (SImode, xop0, xop1);
9541 }
9542
9543 /* XXX We don't allow MINUS any more -- see comment in
9544 arm_legitimate_address_outer_p (). */
9545 else if (GET_CODE (x) == MINUS)
9546 {
9547 rtx xop0 = XEXP (x, 0);
9548 rtx xop1 = XEXP (x, 1);
9549
9550 if (CONSTANT_P (xop0))
9551 xop0 = force_reg (SImode, xop0);
9552
9553 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9554 xop1 = force_reg (SImode, xop1);
9555
9556 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9557 x = gen_rtx_MINUS (SImode, xop0, xop1);
9558 }
9559
9560 /* Make sure to take full advantage of the pre-indexed addressing mode
9561 with absolute addresses which often allows for the base register to
9562 be factorized for multiple adjacent memory references, and it might
9563 even allows for the mini pool to be avoided entirely. */
9564 else if (CONST_INT_P (x) && optimize > 0)
9565 {
9566 unsigned int bits;
9567 HOST_WIDE_INT mask, base, index;
9568 rtx base_reg;
9569
9570 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9571 only use a 8-bit index. So let's use a 12-bit index for
9572 SImode only and hope that arm_gen_constant will enable LDRB
9573 to use more bits. */
9574 bits = (mode == SImode) ? 12 : 8;
9575 mask = (1 << bits) - 1;
9576 base = INTVAL (x) & ~mask;
9577 index = INTVAL (x) & mask;
9578 if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9579 {
9580 /* It'll most probably be more efficient to generate the
9581 base with more bits set and use a negative index instead.
9582 Don't do this for Thumb as negative offsets are much more
9583 limited. */
9584 base |= mask;
9585 index -= mask;
9586 }
9587 base_reg = force_reg (SImode, GEN_INT (base));
9588 x = plus_constant (Pmode, base_reg, index);
9589 }
9590
9591 if (flag_pic)
9592 {
9593 /* We need to find and carefully transform any SYMBOL and LABEL
9594 references; so go back to the original address expression. */
9595 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9596 false /*compute_now*/);
9597
9598 if (new_x != orig_x)
9599 x = new_x;
9600 }
9601
9602 return x;
9603 }
9604
9605
9606 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9607 to be legitimate. If we find one, return the new, valid address. */
9608 rtx
9609 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9610 {
9611 if (GET_CODE (x) == PLUS
9612 && CONST_INT_P (XEXP (x, 1))
9613 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9614 || INTVAL (XEXP (x, 1)) < 0))
9615 {
9616 rtx xop0 = XEXP (x, 0);
9617 rtx xop1 = XEXP (x, 1);
9618 HOST_WIDE_INT offset = INTVAL (xop1);
9619
9620 /* Try and fold the offset into a biasing of the base register and
9621 then offsetting that. Don't do this when optimizing for space
9622 since it can cause too many CSEs. */
9623 if (optimize_size && offset >= 0
9624 && offset < 256 + 31 * GET_MODE_SIZE (mode))
9625 {
9626 HOST_WIDE_INT delta;
9627
9628 if (offset >= 256)
9629 delta = offset - (256 - GET_MODE_SIZE (mode));
9630 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9631 delta = 31 * GET_MODE_SIZE (mode);
9632 else
9633 delta = offset & (~31 * GET_MODE_SIZE (mode));
9634
9635 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9636 NULL_RTX);
9637 x = plus_constant (Pmode, xop0, delta);
9638 }
9639 else if (offset < 0 && offset > -256)
9640 /* Small negative offsets are best done with a subtract before the
9641 dereference, forcing these into a register normally takes two
9642 instructions. */
9643 x = force_operand (x, NULL_RTX);
9644 else
9645 {
9646 /* For the remaining cases, force the constant into a register. */
9647 xop1 = force_reg (SImode, xop1);
9648 x = gen_rtx_PLUS (SImode, xop0, xop1);
9649 }
9650 }
9651 else if (GET_CODE (x) == PLUS
9652 && s_register_operand (XEXP (x, 1), SImode)
9653 && !s_register_operand (XEXP (x, 0), SImode))
9654 {
9655 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9656
9657 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9658 }
9659
9660 if (flag_pic)
9661 {
9662 /* We need to find and carefully transform any SYMBOL and LABEL
9663 references; so go back to the original address expression. */
9664 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9665 false /*compute_now*/);
9666
9667 if (new_x != orig_x)
9668 x = new_x;
9669 }
9670
9671 return x;
9672 }
9673
9674 /* Return TRUE if X contains any TLS symbol references. */
9675
9676 bool
9677 arm_tls_referenced_p (rtx x)
9678 {
9679 if (! TARGET_HAVE_TLS)
9680 return false;
9681
9682 subrtx_iterator::array_type array;
9683 FOR_EACH_SUBRTX (iter, array, x, ALL)
9684 {
9685 const_rtx x = *iter;
9686 if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
9687 {
9688 /* ARM currently does not provide relocations to encode TLS variables
9689 into AArch32 instructions, only data, so there is no way to
9690 currently implement these if a literal pool is disabled. */
9691 if (arm_disable_literal_pool)
9692 sorry ("accessing thread-local storage is not currently supported "
9693 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9694
9695 return true;
9696 }
9697
9698 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9699 TLS offsets, not real symbol references. */
9700 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9701 iter.skip_subrtxes ();
9702 }
9703 return false;
9704 }
9705
9706 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9707
9708 On the ARM, allow any integer (invalid ones are removed later by insn
9709 patterns), nice doubles and symbol_refs which refer to the function's
9710 constant pool XXX.
9711
9712 When generating pic allow anything. */
9713
9714 static bool
9715 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9716 {
9717 if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9718 return false;
9719
9720 return flag_pic || !label_mentioned_p (x);
9721 }
9722
9723 static bool
9724 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9725 {
9726 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9727 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9728 for ARMv8-M Baseline or later the result is valid. */
9729 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9730 x = XEXP (x, 0);
9731
9732 return (CONST_INT_P (x)
9733 || CONST_DOUBLE_P (x)
9734 || CONSTANT_ADDRESS_P (x)
9735 || (TARGET_HAVE_MOVT && SYMBOL_REF_P (x))
9736 /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9737 we build the symbol address with upper/lower
9738 relocations. */
9739 || (TARGET_THUMB1
9740 && !label_mentioned_p (x)
9741 && arm_valid_symbolic_address_p (x)
9742 && arm_disable_literal_pool)
9743 || flag_pic);
9744 }
9745
9746 static bool
9747 arm_legitimate_constant_p (machine_mode mode, rtx x)
9748 {
9749 return (!arm_cannot_force_const_mem (mode, x)
9750 && (TARGET_32BIT
9751 ? arm_legitimate_constant_p_1 (mode, x)
9752 : thumb_legitimate_constant_p (mode, x)));
9753 }
9754
9755 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9756
9757 static bool
9758 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9759 {
9760 rtx base, offset;
9761 split_const (x, &base, &offset);
9762
9763 if (SYMBOL_REF_P (base))
9764 {
9765 /* Function symbols cannot have an offset due to the Thumb bit. */
9766 if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9767 && INTVAL (offset) != 0)
9768 return true;
9769
9770 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9771 && !offset_within_block_p (base, INTVAL (offset)))
9772 return true;
9773 }
9774 return arm_tls_referenced_p (x);
9775 }
9776 \f
9777 #define REG_OR_SUBREG_REG(X) \
9778 (REG_P (X) \
9779 || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9780
9781 #define REG_OR_SUBREG_RTX(X) \
9782 (REG_P (X) ? (X) : SUBREG_REG (X))
9783
9784 static inline int
9785 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9786 {
9787 machine_mode mode = GET_MODE (x);
9788 int total, words;
9789
9790 switch (code)
9791 {
9792 case ASHIFT:
9793 case ASHIFTRT:
9794 case LSHIFTRT:
9795 case ROTATERT:
9796 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9797
9798 case PLUS:
9799 case MINUS:
9800 case COMPARE:
9801 case NEG:
9802 case NOT:
9803 return COSTS_N_INSNS (1);
9804
9805 case MULT:
9806 if (arm_arch6m && arm_m_profile_small_mul)
9807 return COSTS_N_INSNS (32);
9808
9809 if (CONST_INT_P (XEXP (x, 1)))
9810 {
9811 int cycles = 0;
9812 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9813
9814 while (i)
9815 {
9816 i >>= 2;
9817 cycles++;
9818 }
9819 return COSTS_N_INSNS (2) + cycles;
9820 }
9821 return COSTS_N_INSNS (1) + 16;
9822
9823 case SET:
9824 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9825 the mode. */
9826 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9827 return (COSTS_N_INSNS (words)
9828 + 4 * ((MEM_P (SET_SRC (x)))
9829 + MEM_P (SET_DEST (x))));
9830
9831 case CONST_INT:
9832 if (outer == SET)
9833 {
9834 if (UINTVAL (x) < 256
9835 /* 16-bit constant. */
9836 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9837 return 0;
9838 if (thumb_shiftable_const (INTVAL (x)))
9839 return COSTS_N_INSNS (2);
9840 return arm_disable_literal_pool
9841 ? COSTS_N_INSNS (8)
9842 : COSTS_N_INSNS (3);
9843 }
9844 else if ((outer == PLUS || outer == COMPARE)
9845 && INTVAL (x) < 256 && INTVAL (x) > -256)
9846 return 0;
9847 else if ((outer == IOR || outer == XOR || outer == AND)
9848 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9849 return COSTS_N_INSNS (1);
9850 else if (outer == AND)
9851 {
9852 int i;
9853 /* This duplicates the tests in the andsi3 expander. */
9854 for (i = 9; i <= 31; i++)
9855 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9856 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9857 return COSTS_N_INSNS (2);
9858 }
9859 else if (outer == ASHIFT || outer == ASHIFTRT
9860 || outer == LSHIFTRT)
9861 return 0;
9862 return COSTS_N_INSNS (2);
9863
9864 case CONST:
9865 case CONST_DOUBLE:
9866 case LABEL_REF:
9867 case SYMBOL_REF:
9868 return COSTS_N_INSNS (3);
9869
9870 case UDIV:
9871 case UMOD:
9872 case DIV:
9873 case MOD:
9874 return 100;
9875
9876 case TRUNCATE:
9877 return 99;
9878
9879 case AND:
9880 case XOR:
9881 case IOR:
9882 /* XXX guess. */
9883 return 8;
9884
9885 case MEM:
9886 /* XXX another guess. */
9887 /* Memory costs quite a lot for the first word, but subsequent words
9888 load at the equivalent of a single insn each. */
9889 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9890 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9891 ? 4 : 0));
9892
9893 case IF_THEN_ELSE:
9894 /* XXX a guess. */
9895 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9896 return 14;
9897 return 2;
9898
9899 case SIGN_EXTEND:
9900 case ZERO_EXTEND:
9901 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9902 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9903
9904 if (mode == SImode)
9905 return total;
9906
9907 if (arm_arch6)
9908 return total + COSTS_N_INSNS (1);
9909
9910 /* Assume a two-shift sequence. Increase the cost slightly so
9911 we prefer actual shifts over an extend operation. */
9912 return total + 1 + COSTS_N_INSNS (2);
9913
9914 default:
9915 return 99;
9916 }
9917 }
9918
9919 /* Estimates the size cost of thumb1 instructions.
9920 For now most of the code is copied from thumb1_rtx_costs. We need more
9921 fine grain tuning when we have more related test cases. */
9922 static inline int
9923 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9924 {
9925 machine_mode mode = GET_MODE (x);
9926 int words, cost;
9927
9928 switch (code)
9929 {
9930 case ASHIFT:
9931 case ASHIFTRT:
9932 case LSHIFTRT:
9933 case ROTATERT:
9934 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9935
9936 case PLUS:
9937 case MINUS:
9938 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9939 defined by RTL expansion, especially for the expansion of
9940 multiplication. */
9941 if ((GET_CODE (XEXP (x, 0)) == MULT
9942 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9943 || (GET_CODE (XEXP (x, 1)) == MULT
9944 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9945 return COSTS_N_INSNS (2);
9946 /* Fall through. */
9947 case COMPARE:
9948 case NEG:
9949 case NOT:
9950 return COSTS_N_INSNS (1);
9951
9952 case MULT:
9953 if (CONST_INT_P (XEXP (x, 1)))
9954 {
9955 /* Thumb1 mul instruction can't operate on const. We must Load it
9956 into a register first. */
9957 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9958 /* For the targets which have a very small and high-latency multiply
9959 unit, we prefer to synthesize the mult with up to 5 instructions,
9960 giving a good balance between size and performance. */
9961 if (arm_arch6m && arm_m_profile_small_mul)
9962 return COSTS_N_INSNS (5);
9963 else
9964 return COSTS_N_INSNS (1) + const_size;
9965 }
9966 return COSTS_N_INSNS (1);
9967
9968 case SET:
9969 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9970 the mode. */
9971 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9972 cost = COSTS_N_INSNS (words);
9973 if (satisfies_constraint_J (SET_SRC (x))
9974 || satisfies_constraint_K (SET_SRC (x))
9975 /* Too big an immediate for a 2-byte mov, using MOVT. */
9976 || (CONST_INT_P (SET_SRC (x))
9977 && UINTVAL (SET_SRC (x)) >= 256
9978 && TARGET_HAVE_MOVT
9979 && satisfies_constraint_j (SET_SRC (x)))
9980 /* thumb1_movdi_insn. */
9981 || ((words > 1) && MEM_P (SET_SRC (x))))
9982 cost += COSTS_N_INSNS (1);
9983 return cost;
9984
9985 case CONST_INT:
9986 if (outer == SET)
9987 {
9988 if (UINTVAL (x) < 256)
9989 return COSTS_N_INSNS (1);
9990 /* movw is 4byte long. */
9991 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9992 return COSTS_N_INSNS (2);
9993 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9994 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9995 return COSTS_N_INSNS (2);
9996 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9997 if (thumb_shiftable_const (INTVAL (x)))
9998 return COSTS_N_INSNS (2);
9999 return arm_disable_literal_pool
10000 ? COSTS_N_INSNS (8)
10001 : COSTS_N_INSNS (3);
10002 }
10003 else if ((outer == PLUS || outer == COMPARE)
10004 && INTVAL (x) < 256 && INTVAL (x) > -256)
10005 return 0;
10006 else if ((outer == IOR || outer == XOR || outer == AND)
10007 && INTVAL (x) < 256 && INTVAL (x) >= -256)
10008 return COSTS_N_INSNS (1);
10009 else if (outer == AND)
10010 {
10011 int i;
10012 /* This duplicates the tests in the andsi3 expander. */
10013 for (i = 9; i <= 31; i++)
10014 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
10015 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
10016 return COSTS_N_INSNS (2);
10017 }
10018 else if (outer == ASHIFT || outer == ASHIFTRT
10019 || outer == LSHIFTRT)
10020 return 0;
10021 return COSTS_N_INSNS (2);
10022
10023 case CONST:
10024 case CONST_DOUBLE:
10025 case LABEL_REF:
10026 case SYMBOL_REF:
10027 return COSTS_N_INSNS (3);
10028
10029 case UDIV:
10030 case UMOD:
10031 case DIV:
10032 case MOD:
10033 return 100;
10034
10035 case TRUNCATE:
10036 return 99;
10037
10038 case AND:
10039 case XOR:
10040 case IOR:
10041 return COSTS_N_INSNS (1);
10042
10043 case MEM:
10044 return (COSTS_N_INSNS (1)
10045 + COSTS_N_INSNS (1)
10046 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
10047 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
10048 ? COSTS_N_INSNS (1) : 0));
10049
10050 case IF_THEN_ELSE:
10051 /* XXX a guess. */
10052 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10053 return 14;
10054 return 2;
10055
10056 case ZERO_EXTEND:
10057 /* XXX still guessing. */
10058 switch (GET_MODE (XEXP (x, 0)))
10059 {
10060 case E_QImode:
10061 return (1 + (mode == DImode ? 4 : 0)
10062 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10063
10064 case E_HImode:
10065 return (4 + (mode == DImode ? 4 : 0)
10066 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10067
10068 case E_SImode:
10069 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10070
10071 default:
10072 return 99;
10073 }
10074
10075 default:
10076 return 99;
10077 }
10078 }
10079
10080 /* Helper function for arm_rtx_costs. If one operand of the OP, a
10081 PLUS, adds the carry flag, then return the other operand. If
10082 neither is a carry, return OP unchanged. */
10083 static rtx
10084 strip_carry_operation (rtx op)
10085 {
10086 gcc_assert (GET_CODE (op) == PLUS);
10087 if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
10088 return XEXP (op, 1);
10089 else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
10090 return XEXP (op, 0);
10091 return op;
10092 }
10093
10094 /* Helper function for arm_rtx_costs. If the operand is a valid shift
10095 operand, then return the operand that is being shifted. If the shift
10096 is not by a constant, then set SHIFT_REG to point to the operand.
10097 Return NULL if OP is not a shifter operand. */
10098 static rtx
10099 shifter_op_p (rtx op, rtx *shift_reg)
10100 {
10101 enum rtx_code code = GET_CODE (op);
10102
10103 if (code == MULT && CONST_INT_P (XEXP (op, 1))
10104 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
10105 return XEXP (op, 0);
10106 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
10107 return XEXP (op, 0);
10108 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
10109 || code == ASHIFTRT)
10110 {
10111 if (!CONST_INT_P (XEXP (op, 1)))
10112 *shift_reg = XEXP (op, 1);
10113 return XEXP (op, 0);
10114 }
10115
10116 return NULL;
10117 }
10118
10119 static bool
10120 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
10121 {
10122 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
10123 rtx_code code = GET_CODE (x);
10124 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
10125
10126 switch (XINT (x, 1))
10127 {
10128 case UNSPEC_UNALIGNED_LOAD:
10129 /* We can only do unaligned loads into the integer unit, and we can't
10130 use LDM or LDRD. */
10131 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10132 if (speed_p)
10133 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
10134 + extra_cost->ldst.load_unaligned);
10135
10136 #ifdef NOT_YET
10137 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10138 ADDR_SPACE_GENERIC, speed_p);
10139 #endif
10140 return true;
10141
10142 case UNSPEC_UNALIGNED_STORE:
10143 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10144 if (speed_p)
10145 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
10146 + extra_cost->ldst.store_unaligned);
10147
10148 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
10149 #ifdef NOT_YET
10150 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10151 ADDR_SPACE_GENERIC, speed_p);
10152 #endif
10153 return true;
10154
10155 case UNSPEC_VRINTZ:
10156 case UNSPEC_VRINTP:
10157 case UNSPEC_VRINTM:
10158 case UNSPEC_VRINTR:
10159 case UNSPEC_VRINTX:
10160 case UNSPEC_VRINTA:
10161 if (speed_p)
10162 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
10163
10164 return true;
10165 default:
10166 *cost = COSTS_N_INSNS (2);
10167 break;
10168 }
10169 return true;
10170 }
10171
10172 /* Cost of a libcall. We assume one insn per argument, an amount for the
10173 call (one insn for -Os) and then one for processing the result. */
10174 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10175
10176 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
10177 do \
10178 { \
10179 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
10180 if (shift_op != NULL \
10181 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
10182 { \
10183 if (shift_reg) \
10184 { \
10185 if (speed_p) \
10186 *cost += extra_cost->alu.arith_shift_reg; \
10187 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10188 ASHIFT, 1, speed_p); \
10189 } \
10190 else if (speed_p) \
10191 *cost += extra_cost->alu.arith_shift; \
10192 \
10193 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
10194 ASHIFT, 0, speed_p) \
10195 + rtx_cost (XEXP (x, 1 - IDX), \
10196 GET_MODE (shift_op), \
10197 OP, 1, speed_p)); \
10198 return true; \
10199 } \
10200 } \
10201 while (0)
10202
10203 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
10204 considering the costs of the addressing mode and memory access
10205 separately. */
10206 static bool
10207 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
10208 int *cost, bool speed_p)
10209 {
10210 machine_mode mode = GET_MODE (x);
10211
10212 *cost = COSTS_N_INSNS (1);
10213
10214 if (flag_pic
10215 && GET_CODE (XEXP (x, 0)) == PLUS
10216 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
10217 /* This will be split into two instructions. Add the cost of the
10218 additional instruction here. The cost of the memory access is computed
10219 below. See arm.md:calculate_pic_address. */
10220 *cost += COSTS_N_INSNS (1);
10221
10222 /* Calculate cost of the addressing mode. */
10223 if (speed_p)
10224 {
10225 arm_addr_mode_op op_type;
10226 switch (GET_CODE (XEXP (x, 0)))
10227 {
10228 default:
10229 case REG:
10230 op_type = AMO_DEFAULT;
10231 break;
10232 case MINUS:
10233 /* MINUS does not appear in RTL, but the architecture supports it,
10234 so handle this case defensively. */
10235 /* fall through */
10236 case PLUS:
10237 op_type = AMO_NO_WB;
10238 break;
10239 case PRE_INC:
10240 case PRE_DEC:
10241 case POST_INC:
10242 case POST_DEC:
10243 case PRE_MODIFY:
10244 case POST_MODIFY:
10245 op_type = AMO_WB;
10246 break;
10247 }
10248
10249 if (VECTOR_MODE_P (mode))
10250 *cost += current_tune->addr_mode_costs->vector[op_type];
10251 else if (FLOAT_MODE_P (mode))
10252 *cost += current_tune->addr_mode_costs->fp[op_type];
10253 else
10254 *cost += current_tune->addr_mode_costs->integer[op_type];
10255 }
10256
10257 /* Calculate cost of memory access. */
10258 if (speed_p)
10259 {
10260 if (FLOAT_MODE_P (mode))
10261 {
10262 if (GET_MODE_SIZE (mode) == 8)
10263 *cost += extra_cost->ldst.loadd;
10264 else
10265 *cost += extra_cost->ldst.loadf;
10266 }
10267 else if (VECTOR_MODE_P (mode))
10268 *cost += extra_cost->ldst.loadv;
10269 else
10270 {
10271 /* Integer modes */
10272 if (GET_MODE_SIZE (mode) == 8)
10273 *cost += extra_cost->ldst.ldrd;
10274 else
10275 *cost += extra_cost->ldst.load;
10276 }
10277 }
10278
10279 return true;
10280 }
10281
10282 /* Helper for arm_bfi_p. */
10283 static bool
10284 arm_bfi_1_p (rtx op0, rtx op1, rtx *sub0, rtx *sub1)
10285 {
10286 unsigned HOST_WIDE_INT const1;
10287 unsigned HOST_WIDE_INT const2 = 0;
10288
10289 if (!CONST_INT_P (XEXP (op0, 1)))
10290 return false;
10291
10292 const1 = UINTVAL (XEXP (op0, 1));
10293 if (!CONST_INT_P (XEXP (op1, 1))
10294 || ~UINTVAL (XEXP (op1, 1)) != const1)
10295 return false;
10296
10297 if (GET_CODE (XEXP (op0, 0)) == ASHIFT
10298 && CONST_INT_P (XEXP (XEXP (op0, 0), 1)))
10299 {
10300 const2 = UINTVAL (XEXP (XEXP (op0, 0), 1));
10301 *sub0 = XEXP (XEXP (op0, 0), 0);
10302 }
10303 else
10304 *sub0 = XEXP (op0, 0);
10305
10306 if (const2 >= GET_MODE_BITSIZE (GET_MODE (op0)))
10307 return false;
10308
10309 *sub1 = XEXP (op1, 0);
10310 return exact_log2 (const1 + (HOST_WIDE_INT_1U << const2)) >= 0;
10311 }
10312
10313 /* Recognize a BFI idiom. Helper for arm_rtx_costs_internal. The
10314 format looks something like:
10315
10316 (IOR (AND (reg1) (~const1))
10317 (AND (ASHIFT (reg2) (const2))
10318 (const1)))
10319
10320 where const1 is a consecutive sequence of 1-bits with the
10321 least-significant non-zero bit starting at bit position const2. If
10322 const2 is zero, then the shift will not appear at all, due to
10323 canonicalization. The two arms of the IOR expression may be
10324 flipped. */
10325 static bool
10326 arm_bfi_p (rtx x, rtx *sub0, rtx *sub1)
10327 {
10328 if (GET_CODE (x) != IOR)
10329 return false;
10330 if (GET_CODE (XEXP (x, 0)) != AND
10331 || GET_CODE (XEXP (x, 1)) != AND)
10332 return false;
10333 return (arm_bfi_1_p (XEXP (x, 0), XEXP (x, 1), sub0, sub1)
10334 || arm_bfi_1_p (XEXP (x, 1), XEXP (x, 0), sub1, sub0));
10335 }
10336
10337 /* RTX costs. Make an estimate of the cost of executing the operation
10338 X, which is contained within an operation with code OUTER_CODE.
10339 SPEED_P indicates whether the cost desired is the performance cost,
10340 or the size cost. The estimate is stored in COST and the return
10341 value is TRUE if the cost calculation is final, or FALSE if the
10342 caller should recurse through the operands of X to add additional
10343 costs.
10344
10345 We currently make no attempt to model the size savings of Thumb-2
10346 16-bit instructions. At the normal points in compilation where
10347 this code is called we have no measure of whether the condition
10348 flags are live or not, and thus no realistic way to determine what
10349 the size will eventually be. */
10350 static bool
10351 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10352 const struct cpu_cost_table *extra_cost,
10353 int *cost, bool speed_p)
10354 {
10355 machine_mode mode = GET_MODE (x);
10356
10357 *cost = COSTS_N_INSNS (1);
10358
10359 if (TARGET_THUMB1)
10360 {
10361 if (speed_p)
10362 *cost = thumb1_rtx_costs (x, code, outer_code);
10363 else
10364 *cost = thumb1_size_rtx_costs (x, code, outer_code);
10365 return true;
10366 }
10367
10368 switch (code)
10369 {
10370 case SET:
10371 *cost = 0;
10372 /* SET RTXs don't have a mode so we get it from the destination. */
10373 mode = GET_MODE (SET_DEST (x));
10374
10375 if (REG_P (SET_SRC (x))
10376 && REG_P (SET_DEST (x)))
10377 {
10378 /* Assume that most copies can be done with a single insn,
10379 unless we don't have HW FP, in which case everything
10380 larger than word mode will require two insns. */
10381 *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10382 && GET_MODE_SIZE (mode) > 4)
10383 || mode == DImode)
10384 ? 2 : 1);
10385 /* Conditional register moves can be encoded
10386 in 16 bits in Thumb mode. */
10387 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10388 *cost >>= 1;
10389
10390 return true;
10391 }
10392
10393 if (CONST_INT_P (SET_SRC (x)))
10394 {
10395 /* Handle CONST_INT here, since the value doesn't have a mode
10396 and we would otherwise be unable to work out the true cost. */
10397 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10398 0, speed_p);
10399 outer_code = SET;
10400 /* Slightly lower the cost of setting a core reg to a constant.
10401 This helps break up chains and allows for better scheduling. */
10402 if (REG_P (SET_DEST (x))
10403 && REGNO (SET_DEST (x)) <= LR_REGNUM)
10404 *cost -= 1;
10405 x = SET_SRC (x);
10406 /* Immediate moves with an immediate in the range [0, 255] can be
10407 encoded in 16 bits in Thumb mode. */
10408 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10409 && INTVAL (x) >= 0 && INTVAL (x) <=255)
10410 *cost >>= 1;
10411 goto const_int_cost;
10412 }
10413
10414 return false;
10415
10416 case MEM:
10417 return arm_mem_costs (x, extra_cost, cost, speed_p);
10418
10419 case PARALLEL:
10420 {
10421 /* Calculations of LDM costs are complex. We assume an initial cost
10422 (ldm_1st) which will load the number of registers mentioned in
10423 ldm_regs_per_insn_1st registers; then each additional
10424 ldm_regs_per_insn_subsequent registers cost one more insn. The
10425 formula for N regs is thus:
10426
10427 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10428 + ldm_regs_per_insn_subsequent - 1)
10429 / ldm_regs_per_insn_subsequent).
10430
10431 Additional costs may also be added for addressing. A similar
10432 formula is used for STM. */
10433
10434 bool is_ldm = load_multiple_operation (x, SImode);
10435 bool is_stm = store_multiple_operation (x, SImode);
10436
10437 if (is_ldm || is_stm)
10438 {
10439 if (speed_p)
10440 {
10441 HOST_WIDE_INT nregs = XVECLEN (x, 0);
10442 HOST_WIDE_INT regs_per_insn_1st = is_ldm
10443 ? extra_cost->ldst.ldm_regs_per_insn_1st
10444 : extra_cost->ldst.stm_regs_per_insn_1st;
10445 HOST_WIDE_INT regs_per_insn_sub = is_ldm
10446 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10447 : extra_cost->ldst.stm_regs_per_insn_subsequent;
10448
10449 *cost += regs_per_insn_1st
10450 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10451 + regs_per_insn_sub - 1)
10452 / regs_per_insn_sub);
10453 return true;
10454 }
10455
10456 }
10457 return false;
10458 }
10459 case DIV:
10460 case UDIV:
10461 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10462 && (mode == SFmode || !TARGET_VFP_SINGLE))
10463 *cost += COSTS_N_INSNS (speed_p
10464 ? extra_cost->fp[mode != SFmode].div : 0);
10465 else if (mode == SImode && TARGET_IDIV)
10466 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10467 else
10468 *cost = LIBCALL_COST (2);
10469
10470 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10471 possible udiv is prefered. */
10472 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10473 return false; /* All arguments must be in registers. */
10474
10475 case MOD:
10476 /* MOD by a power of 2 can be expanded as:
10477 rsbs r1, r0, #0
10478 and r0, r0, #(n - 1)
10479 and r1, r1, #(n - 1)
10480 rsbpl r0, r1, #0. */
10481 if (CONST_INT_P (XEXP (x, 1))
10482 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10483 && mode == SImode)
10484 {
10485 *cost += COSTS_N_INSNS (3);
10486
10487 if (speed_p)
10488 *cost += 2 * extra_cost->alu.logical
10489 + extra_cost->alu.arith;
10490 return true;
10491 }
10492
10493 /* Fall-through. */
10494 case UMOD:
10495 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10496 possible udiv is prefered. */
10497 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10498 return false; /* All arguments must be in registers. */
10499
10500 case ROTATE:
10501 if (mode == SImode && REG_P (XEXP (x, 1)))
10502 {
10503 *cost += (COSTS_N_INSNS (1)
10504 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10505 if (speed_p)
10506 *cost += extra_cost->alu.shift_reg;
10507 return true;
10508 }
10509 /* Fall through */
10510 case ROTATERT:
10511 case ASHIFT:
10512 case LSHIFTRT:
10513 case ASHIFTRT:
10514 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10515 {
10516 *cost += (COSTS_N_INSNS (2)
10517 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10518 if (speed_p)
10519 *cost += 2 * extra_cost->alu.shift;
10520 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
10521 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10522 *cost += 1;
10523 return true;
10524 }
10525 else if (mode == SImode)
10526 {
10527 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10528 /* Slightly disparage register shifts at -Os, but not by much. */
10529 if (!CONST_INT_P (XEXP (x, 1)))
10530 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10531 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10532 return true;
10533 }
10534 else if (GET_MODE_CLASS (mode) == MODE_INT
10535 && GET_MODE_SIZE (mode) < 4)
10536 {
10537 if (code == ASHIFT)
10538 {
10539 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10540 /* Slightly disparage register shifts at -Os, but not by
10541 much. */
10542 if (!CONST_INT_P (XEXP (x, 1)))
10543 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10544 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10545 }
10546 else if (code == LSHIFTRT || code == ASHIFTRT)
10547 {
10548 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10549 {
10550 /* Can use SBFX/UBFX. */
10551 if (speed_p)
10552 *cost += extra_cost->alu.bfx;
10553 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10554 }
10555 else
10556 {
10557 *cost += COSTS_N_INSNS (1);
10558 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10559 if (speed_p)
10560 {
10561 if (CONST_INT_P (XEXP (x, 1)))
10562 *cost += 2 * extra_cost->alu.shift;
10563 else
10564 *cost += (extra_cost->alu.shift
10565 + extra_cost->alu.shift_reg);
10566 }
10567 else
10568 /* Slightly disparage register shifts. */
10569 *cost += !CONST_INT_P (XEXP (x, 1));
10570 }
10571 }
10572 else /* Rotates. */
10573 {
10574 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10575 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10576 if (speed_p)
10577 {
10578 if (CONST_INT_P (XEXP (x, 1)))
10579 *cost += (2 * extra_cost->alu.shift
10580 + extra_cost->alu.log_shift);
10581 else
10582 *cost += (extra_cost->alu.shift
10583 + extra_cost->alu.shift_reg
10584 + extra_cost->alu.log_shift_reg);
10585 }
10586 }
10587 return true;
10588 }
10589
10590 *cost = LIBCALL_COST (2);
10591 return false;
10592
10593 case BSWAP:
10594 if (arm_arch6)
10595 {
10596 if (mode == SImode)
10597 {
10598 if (speed_p)
10599 *cost += extra_cost->alu.rev;
10600
10601 return false;
10602 }
10603 }
10604 else
10605 {
10606 /* No rev instruction available. Look at arm_legacy_rev
10607 and thumb_legacy_rev for the form of RTL used then. */
10608 if (TARGET_THUMB)
10609 {
10610 *cost += COSTS_N_INSNS (9);
10611
10612 if (speed_p)
10613 {
10614 *cost += 6 * extra_cost->alu.shift;
10615 *cost += 3 * extra_cost->alu.logical;
10616 }
10617 }
10618 else
10619 {
10620 *cost += COSTS_N_INSNS (4);
10621
10622 if (speed_p)
10623 {
10624 *cost += 2 * extra_cost->alu.shift;
10625 *cost += extra_cost->alu.arith_shift;
10626 *cost += 2 * extra_cost->alu.logical;
10627 }
10628 }
10629 return true;
10630 }
10631 return false;
10632
10633 case MINUS:
10634 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10635 && (mode == SFmode || !TARGET_VFP_SINGLE))
10636 {
10637 if (GET_CODE (XEXP (x, 0)) == MULT
10638 || GET_CODE (XEXP (x, 1)) == MULT)
10639 {
10640 rtx mul_op0, mul_op1, sub_op;
10641
10642 if (speed_p)
10643 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10644
10645 if (GET_CODE (XEXP (x, 0)) == MULT)
10646 {
10647 mul_op0 = XEXP (XEXP (x, 0), 0);
10648 mul_op1 = XEXP (XEXP (x, 0), 1);
10649 sub_op = XEXP (x, 1);
10650 }
10651 else
10652 {
10653 mul_op0 = XEXP (XEXP (x, 1), 0);
10654 mul_op1 = XEXP (XEXP (x, 1), 1);
10655 sub_op = XEXP (x, 0);
10656 }
10657
10658 /* The first operand of the multiply may be optionally
10659 negated. */
10660 if (GET_CODE (mul_op0) == NEG)
10661 mul_op0 = XEXP (mul_op0, 0);
10662
10663 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10664 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10665 + rtx_cost (sub_op, mode, code, 0, speed_p));
10666
10667 return true;
10668 }
10669
10670 if (speed_p)
10671 *cost += extra_cost->fp[mode != SFmode].addsub;
10672 return false;
10673 }
10674
10675 if (mode == SImode)
10676 {
10677 rtx shift_by_reg = NULL;
10678 rtx shift_op;
10679 rtx non_shift_op;
10680 rtx op0 = XEXP (x, 0);
10681 rtx op1 = XEXP (x, 1);
10682
10683 /* Factor out any borrow operation. There's more than one way
10684 of expressing this; try to recognize them all. */
10685 if (GET_CODE (op0) == MINUS)
10686 {
10687 if (arm_borrow_operation (op1, SImode))
10688 {
10689 op1 = XEXP (op0, 1);
10690 op0 = XEXP (op0, 0);
10691 }
10692 else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10693 op0 = XEXP (op0, 0);
10694 }
10695 else if (GET_CODE (op1) == PLUS
10696 && arm_borrow_operation (XEXP (op1, 0), SImode))
10697 op1 = XEXP (op1, 0);
10698 else if (GET_CODE (op0) == NEG
10699 && arm_borrow_operation (op1, SImode))
10700 {
10701 /* Negate with carry-in. For Thumb2 this is done with
10702 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10703 RSC instruction that exists in Arm mode. */
10704 if (speed_p)
10705 *cost += (TARGET_THUMB2
10706 ? extra_cost->alu.arith_shift
10707 : extra_cost->alu.arith);
10708 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10709 return true;
10710 }
10711 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10712 Note we do mean ~borrow here. */
10713 else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10714 {
10715 *cost += rtx_cost (op1, mode, code, 1, speed_p);
10716 return true;
10717 }
10718
10719 shift_op = shifter_op_p (op0, &shift_by_reg);
10720 if (shift_op == NULL)
10721 {
10722 shift_op = shifter_op_p (op1, &shift_by_reg);
10723 non_shift_op = op0;
10724 }
10725 else
10726 non_shift_op = op1;
10727
10728 if (shift_op != NULL)
10729 {
10730 if (shift_by_reg != NULL)
10731 {
10732 if (speed_p)
10733 *cost += extra_cost->alu.arith_shift_reg;
10734 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10735 }
10736 else if (speed_p)
10737 *cost += extra_cost->alu.arith_shift;
10738
10739 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10740 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10741 return true;
10742 }
10743
10744 if (arm_arch_thumb2
10745 && GET_CODE (XEXP (x, 1)) == MULT)
10746 {
10747 /* MLS. */
10748 if (speed_p)
10749 *cost += extra_cost->mult[0].add;
10750 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10751 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10752 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10753 return true;
10754 }
10755
10756 if (CONST_INT_P (op0))
10757 {
10758 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10759 INTVAL (op0), NULL_RTX,
10760 NULL_RTX, 1, 0);
10761 *cost = COSTS_N_INSNS (insns);
10762 if (speed_p)
10763 *cost += insns * extra_cost->alu.arith;
10764 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10765 return true;
10766 }
10767 else if (speed_p)
10768 *cost += extra_cost->alu.arith;
10769
10770 /* Don't recurse as we don't want to cost any borrow that
10771 we've stripped. */
10772 *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10773 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10774 return true;
10775 }
10776
10777 if (GET_MODE_CLASS (mode) == MODE_INT
10778 && GET_MODE_SIZE (mode) < 4)
10779 {
10780 rtx shift_op, shift_reg;
10781 shift_reg = NULL;
10782
10783 /* We check both sides of the MINUS for shifter operands since,
10784 unlike PLUS, it's not commutative. */
10785
10786 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10787 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10788
10789 /* Slightly disparage, as we might need to widen the result. */
10790 *cost += 1;
10791 if (speed_p)
10792 *cost += extra_cost->alu.arith;
10793
10794 if (CONST_INT_P (XEXP (x, 0)))
10795 {
10796 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10797 return true;
10798 }
10799
10800 return false;
10801 }
10802
10803 if (mode == DImode)
10804 {
10805 *cost += COSTS_N_INSNS (1);
10806
10807 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10808 {
10809 rtx op1 = XEXP (x, 1);
10810
10811 if (speed_p)
10812 *cost += 2 * extra_cost->alu.arith;
10813
10814 if (GET_CODE (op1) == ZERO_EXTEND)
10815 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10816 0, speed_p);
10817 else
10818 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10819 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10820 0, speed_p);
10821 return true;
10822 }
10823 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10824 {
10825 if (speed_p)
10826 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10827 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10828 0, speed_p)
10829 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10830 return true;
10831 }
10832 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10833 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10834 {
10835 if (speed_p)
10836 *cost += (extra_cost->alu.arith
10837 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10838 ? extra_cost->alu.arith
10839 : extra_cost->alu.arith_shift));
10840 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10841 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10842 GET_CODE (XEXP (x, 1)), 0, speed_p));
10843 return true;
10844 }
10845
10846 if (speed_p)
10847 *cost += 2 * extra_cost->alu.arith;
10848 return false;
10849 }
10850
10851 /* Vector mode? */
10852
10853 *cost = LIBCALL_COST (2);
10854 return false;
10855
10856 case PLUS:
10857 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10858 && (mode == SFmode || !TARGET_VFP_SINGLE))
10859 {
10860 if (GET_CODE (XEXP (x, 0)) == MULT)
10861 {
10862 rtx mul_op0, mul_op1, add_op;
10863
10864 if (speed_p)
10865 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10866
10867 mul_op0 = XEXP (XEXP (x, 0), 0);
10868 mul_op1 = XEXP (XEXP (x, 0), 1);
10869 add_op = XEXP (x, 1);
10870
10871 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10872 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10873 + rtx_cost (add_op, mode, code, 0, speed_p));
10874
10875 return true;
10876 }
10877
10878 if (speed_p)
10879 *cost += extra_cost->fp[mode != SFmode].addsub;
10880 return false;
10881 }
10882 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10883 {
10884 *cost = LIBCALL_COST (2);
10885 return false;
10886 }
10887
10888 /* Narrow modes can be synthesized in SImode, but the range
10889 of useful sub-operations is limited. Check for shift operations
10890 on one of the operands. Only left shifts can be used in the
10891 narrow modes. */
10892 if (GET_MODE_CLASS (mode) == MODE_INT
10893 && GET_MODE_SIZE (mode) < 4)
10894 {
10895 rtx shift_op, shift_reg;
10896 shift_reg = NULL;
10897
10898 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10899
10900 if (CONST_INT_P (XEXP (x, 1)))
10901 {
10902 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10903 INTVAL (XEXP (x, 1)), NULL_RTX,
10904 NULL_RTX, 1, 0);
10905 *cost = COSTS_N_INSNS (insns);
10906 if (speed_p)
10907 *cost += insns * extra_cost->alu.arith;
10908 /* Slightly penalize a narrow operation as the result may
10909 need widening. */
10910 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10911 return true;
10912 }
10913
10914 /* Slightly penalize a narrow operation as the result may
10915 need widening. */
10916 *cost += 1;
10917 if (speed_p)
10918 *cost += extra_cost->alu.arith;
10919
10920 return false;
10921 }
10922
10923 if (mode == SImode)
10924 {
10925 rtx shift_op, shift_reg;
10926
10927 if (TARGET_INT_SIMD
10928 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10929 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10930 {
10931 /* UXTA[BH] or SXTA[BH]. */
10932 if (speed_p)
10933 *cost += extra_cost->alu.extend_arith;
10934 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10935 0, speed_p)
10936 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10937 return true;
10938 }
10939
10940 rtx op0 = XEXP (x, 0);
10941 rtx op1 = XEXP (x, 1);
10942
10943 /* Handle a side effect of adding in the carry to an addition. */
10944 if (GET_CODE (op0) == PLUS
10945 && arm_carry_operation (op1, mode))
10946 {
10947 op1 = XEXP (op0, 1);
10948 op0 = XEXP (op0, 0);
10949 }
10950 else if (GET_CODE (op1) == PLUS
10951 && arm_carry_operation (op0, mode))
10952 {
10953 op0 = XEXP (op1, 0);
10954 op1 = XEXP (op1, 1);
10955 }
10956 else if (GET_CODE (op0) == PLUS)
10957 {
10958 op0 = strip_carry_operation (op0);
10959 if (swap_commutative_operands_p (op0, op1))
10960 std::swap (op0, op1);
10961 }
10962
10963 if (arm_carry_operation (op0, mode))
10964 {
10965 /* Adding the carry to a register is a canonicalization of
10966 adding 0 to the register plus the carry. */
10967 if (speed_p)
10968 *cost += extra_cost->alu.arith;
10969 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10970 return true;
10971 }
10972
10973 shift_reg = NULL;
10974 shift_op = shifter_op_p (op0, &shift_reg);
10975 if (shift_op != NULL)
10976 {
10977 if (shift_reg)
10978 {
10979 if (speed_p)
10980 *cost += extra_cost->alu.arith_shift_reg;
10981 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10982 }
10983 else if (speed_p)
10984 *cost += extra_cost->alu.arith_shift;
10985
10986 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10987 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10988 return true;
10989 }
10990
10991 if (GET_CODE (op0) == MULT)
10992 {
10993 rtx mul_op = op0;
10994
10995 if (TARGET_DSP_MULTIPLY
10996 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10997 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10998 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10999 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
11000 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
11001 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
11002 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
11003 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
11004 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
11005 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
11006 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
11007 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
11008 == 16))))))
11009 {
11010 /* SMLA[BT][BT]. */
11011 if (speed_p)
11012 *cost += extra_cost->mult[0].extend_add;
11013 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
11014 SIGN_EXTEND, 0, speed_p)
11015 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
11016 SIGN_EXTEND, 0, speed_p)
11017 + rtx_cost (op1, mode, PLUS, 1, speed_p));
11018 return true;
11019 }
11020
11021 if (speed_p)
11022 *cost += extra_cost->mult[0].add;
11023 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
11024 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
11025 + rtx_cost (op1, mode, PLUS, 1, speed_p));
11026 return true;
11027 }
11028
11029 if (CONST_INT_P (op1))
11030 {
11031 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
11032 INTVAL (op1), NULL_RTX,
11033 NULL_RTX, 1, 0);
11034 *cost = COSTS_N_INSNS (insns);
11035 if (speed_p)
11036 *cost += insns * extra_cost->alu.arith;
11037 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11038 return true;
11039 }
11040
11041 if (speed_p)
11042 *cost += extra_cost->alu.arith;
11043
11044 /* Don't recurse here because we want to test the operands
11045 without any carry operation. */
11046 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11047 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
11048 return true;
11049 }
11050
11051 if (mode == DImode)
11052 {
11053 if (GET_CODE (XEXP (x, 0)) == MULT
11054 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
11055 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
11056 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
11057 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
11058 {
11059 if (speed_p)
11060 *cost += extra_cost->mult[1].extend_add;
11061 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11062 ZERO_EXTEND, 0, speed_p)
11063 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
11064 ZERO_EXTEND, 0, speed_p)
11065 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11066 return true;
11067 }
11068
11069 *cost += COSTS_N_INSNS (1);
11070
11071 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11072 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
11073 {
11074 if (speed_p)
11075 *cost += (extra_cost->alu.arith
11076 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11077 ? extra_cost->alu.arith
11078 : extra_cost->alu.arith_shift));
11079
11080 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
11081 0, speed_p)
11082 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11083 return true;
11084 }
11085
11086 if (speed_p)
11087 *cost += 2 * extra_cost->alu.arith;
11088 return false;
11089 }
11090
11091 /* Vector mode? */
11092 *cost = LIBCALL_COST (2);
11093 return false;
11094 case IOR:
11095 {
11096 rtx sub0, sub1;
11097 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
11098 {
11099 if (speed_p)
11100 *cost += extra_cost->alu.rev;
11101
11102 return true;
11103 }
11104 else if (mode == SImode && arm_arch_thumb2
11105 && arm_bfi_p (x, &sub0, &sub1))
11106 {
11107 *cost += rtx_cost (sub0, mode, ZERO_EXTRACT, 1, speed_p);
11108 *cost += rtx_cost (sub1, mode, ZERO_EXTRACT, 0, speed_p);
11109 if (speed_p)
11110 *cost += extra_cost->alu.bfi;
11111
11112 return true;
11113 }
11114 }
11115
11116 /* Fall through. */
11117 case AND: case XOR:
11118 if (mode == SImode)
11119 {
11120 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
11121 rtx op0 = XEXP (x, 0);
11122 rtx shift_op, shift_reg;
11123
11124 if (subcode == NOT
11125 && (code == AND
11126 || (code == IOR && TARGET_THUMB2)))
11127 op0 = XEXP (op0, 0);
11128
11129 shift_reg = NULL;
11130 shift_op = shifter_op_p (op0, &shift_reg);
11131 if (shift_op != NULL)
11132 {
11133 if (shift_reg)
11134 {
11135 if (speed_p)
11136 *cost += extra_cost->alu.log_shift_reg;
11137 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11138 }
11139 else if (speed_p)
11140 *cost += extra_cost->alu.log_shift;
11141
11142 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
11143 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11144 return true;
11145 }
11146
11147 if (CONST_INT_P (XEXP (x, 1)))
11148 {
11149 int insns = arm_gen_constant (code, SImode, NULL_RTX,
11150 INTVAL (XEXP (x, 1)), NULL_RTX,
11151 NULL_RTX, 1, 0);
11152
11153 *cost = COSTS_N_INSNS (insns);
11154 if (speed_p)
11155 *cost += insns * extra_cost->alu.logical;
11156 *cost += rtx_cost (op0, mode, code, 0, speed_p);
11157 return true;
11158 }
11159
11160 if (speed_p)
11161 *cost += extra_cost->alu.logical;
11162 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
11163 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11164 return true;
11165 }
11166
11167 if (mode == DImode)
11168 {
11169 rtx op0 = XEXP (x, 0);
11170 enum rtx_code subcode = GET_CODE (op0);
11171
11172 *cost += COSTS_N_INSNS (1);
11173
11174 if (subcode == NOT
11175 && (code == AND
11176 || (code == IOR && TARGET_THUMB2)))
11177 op0 = XEXP (op0, 0);
11178
11179 if (GET_CODE (op0) == ZERO_EXTEND)
11180 {
11181 if (speed_p)
11182 *cost += 2 * extra_cost->alu.logical;
11183
11184 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
11185 0, speed_p)
11186 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11187 return true;
11188 }
11189 else if (GET_CODE (op0) == SIGN_EXTEND)
11190 {
11191 if (speed_p)
11192 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
11193
11194 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
11195 0, speed_p)
11196 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11197 return true;
11198 }
11199
11200 if (speed_p)
11201 *cost += 2 * extra_cost->alu.logical;
11202
11203 return true;
11204 }
11205 /* Vector mode? */
11206
11207 *cost = LIBCALL_COST (2);
11208 return false;
11209
11210 case MULT:
11211 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11212 && (mode == SFmode || !TARGET_VFP_SINGLE))
11213 {
11214 rtx op0 = XEXP (x, 0);
11215
11216 if (GET_CODE (op0) == NEG && !flag_rounding_math)
11217 op0 = XEXP (op0, 0);
11218
11219 if (speed_p)
11220 *cost += extra_cost->fp[mode != SFmode].mult;
11221
11222 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
11223 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
11224 return true;
11225 }
11226 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11227 {
11228 *cost = LIBCALL_COST (2);
11229 return false;
11230 }
11231
11232 if (mode == SImode)
11233 {
11234 if (TARGET_DSP_MULTIPLY
11235 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11236 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11237 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11238 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11239 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
11240 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11241 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11242 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
11243 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11244 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11245 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11246 && (INTVAL (XEXP (XEXP (x, 1), 1))
11247 == 16))))))
11248 {
11249 /* SMUL[TB][TB]. */
11250 if (speed_p)
11251 *cost += extra_cost->mult[0].extend;
11252 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
11253 SIGN_EXTEND, 0, speed_p);
11254 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
11255 SIGN_EXTEND, 1, speed_p);
11256 return true;
11257 }
11258 if (speed_p)
11259 *cost += extra_cost->mult[0].simple;
11260 return false;
11261 }
11262
11263 if (mode == DImode)
11264 {
11265 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11266 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
11267 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11268 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
11269 {
11270 if (speed_p)
11271 *cost += extra_cost->mult[1].extend;
11272 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
11273 ZERO_EXTEND, 0, speed_p)
11274 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
11275 ZERO_EXTEND, 0, speed_p));
11276 return true;
11277 }
11278
11279 *cost = LIBCALL_COST (2);
11280 return false;
11281 }
11282
11283 /* Vector mode? */
11284 *cost = LIBCALL_COST (2);
11285 return false;
11286
11287 case NEG:
11288 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11289 && (mode == SFmode || !TARGET_VFP_SINGLE))
11290 {
11291 if (GET_CODE (XEXP (x, 0)) == MULT)
11292 {
11293 /* VNMUL. */
11294 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11295 return true;
11296 }
11297
11298 if (speed_p)
11299 *cost += extra_cost->fp[mode != SFmode].neg;
11300
11301 return false;
11302 }
11303 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11304 {
11305 *cost = LIBCALL_COST (1);
11306 return false;
11307 }
11308
11309 if (mode == SImode)
11310 {
11311 if (GET_CODE (XEXP (x, 0)) == ABS)
11312 {
11313 *cost += COSTS_N_INSNS (1);
11314 /* Assume the non-flag-changing variant. */
11315 if (speed_p)
11316 *cost += (extra_cost->alu.log_shift
11317 + extra_cost->alu.arith_shift);
11318 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11319 return true;
11320 }
11321
11322 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11323 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11324 {
11325 *cost += COSTS_N_INSNS (1);
11326 /* No extra cost for MOV imm and MVN imm. */
11327 /* If the comparison op is using the flags, there's no further
11328 cost, otherwise we need to add the cost of the comparison. */
11329 if (!(REG_P (XEXP (XEXP (x, 0), 0))
11330 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11331 && XEXP (XEXP (x, 0), 1) == const0_rtx))
11332 {
11333 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11334 *cost += (COSTS_N_INSNS (1)
11335 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11336 0, speed_p)
11337 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11338 1, speed_p));
11339 if (speed_p)
11340 *cost += extra_cost->alu.arith;
11341 }
11342 return true;
11343 }
11344
11345 if (speed_p)
11346 *cost += extra_cost->alu.arith;
11347 return false;
11348 }
11349
11350 if (GET_MODE_CLASS (mode) == MODE_INT
11351 && GET_MODE_SIZE (mode) < 4)
11352 {
11353 /* Slightly disparage, as we might need an extend operation. */
11354 *cost += 1;
11355 if (speed_p)
11356 *cost += extra_cost->alu.arith;
11357 return false;
11358 }
11359
11360 if (mode == DImode)
11361 {
11362 *cost += COSTS_N_INSNS (1);
11363 if (speed_p)
11364 *cost += 2 * extra_cost->alu.arith;
11365 return false;
11366 }
11367
11368 /* Vector mode? */
11369 *cost = LIBCALL_COST (1);
11370 return false;
11371
11372 case NOT:
11373 if (mode == SImode)
11374 {
11375 rtx shift_op;
11376 rtx shift_reg = NULL;
11377
11378 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11379
11380 if (shift_op)
11381 {
11382 if (shift_reg != NULL)
11383 {
11384 if (speed_p)
11385 *cost += extra_cost->alu.log_shift_reg;
11386 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11387 }
11388 else if (speed_p)
11389 *cost += extra_cost->alu.log_shift;
11390 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11391 return true;
11392 }
11393
11394 if (speed_p)
11395 *cost += extra_cost->alu.logical;
11396 return false;
11397 }
11398 if (mode == DImode)
11399 {
11400 *cost += COSTS_N_INSNS (1);
11401 return false;
11402 }
11403
11404 /* Vector mode? */
11405
11406 *cost += LIBCALL_COST (1);
11407 return false;
11408
11409 case IF_THEN_ELSE:
11410 {
11411 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11412 {
11413 *cost += COSTS_N_INSNS (3);
11414 return true;
11415 }
11416 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11417 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11418
11419 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11420 /* Assume that if one arm of the if_then_else is a register,
11421 that it will be tied with the result and eliminate the
11422 conditional insn. */
11423 if (REG_P (XEXP (x, 1)))
11424 *cost += op2cost;
11425 else if (REG_P (XEXP (x, 2)))
11426 *cost += op1cost;
11427 else
11428 {
11429 if (speed_p)
11430 {
11431 if (extra_cost->alu.non_exec_costs_exec)
11432 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11433 else
11434 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11435 }
11436 else
11437 *cost += op1cost + op2cost;
11438 }
11439 }
11440 return true;
11441
11442 case COMPARE:
11443 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11444 *cost = 0;
11445 else
11446 {
11447 machine_mode op0mode;
11448 /* We'll mostly assume that the cost of a compare is the cost of the
11449 LHS. However, there are some notable exceptions. */
11450
11451 /* Floating point compares are never done as side-effects. */
11452 op0mode = GET_MODE (XEXP (x, 0));
11453 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11454 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11455 {
11456 if (speed_p)
11457 *cost += extra_cost->fp[op0mode != SFmode].compare;
11458
11459 if (XEXP (x, 1) == CONST0_RTX (op0mode))
11460 {
11461 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11462 return true;
11463 }
11464
11465 return false;
11466 }
11467 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11468 {
11469 *cost = LIBCALL_COST (2);
11470 return false;
11471 }
11472
11473 /* DImode compares normally take two insns. */
11474 if (op0mode == DImode)
11475 {
11476 *cost += COSTS_N_INSNS (1);
11477 if (speed_p)
11478 *cost += 2 * extra_cost->alu.arith;
11479 return false;
11480 }
11481
11482 if (op0mode == SImode)
11483 {
11484 rtx shift_op;
11485 rtx shift_reg;
11486
11487 if (XEXP (x, 1) == const0_rtx
11488 && !(REG_P (XEXP (x, 0))
11489 || (GET_CODE (XEXP (x, 0)) == SUBREG
11490 && REG_P (SUBREG_REG (XEXP (x, 0))))))
11491 {
11492 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11493
11494 /* Multiply operations that set the flags are often
11495 significantly more expensive. */
11496 if (speed_p
11497 && GET_CODE (XEXP (x, 0)) == MULT
11498 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11499 *cost += extra_cost->mult[0].flag_setting;
11500
11501 if (speed_p
11502 && GET_CODE (XEXP (x, 0)) == PLUS
11503 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11504 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11505 0), 1), mode))
11506 *cost += extra_cost->mult[0].flag_setting;
11507 return true;
11508 }
11509
11510 shift_reg = NULL;
11511 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11512 if (shift_op != NULL)
11513 {
11514 if (shift_reg != NULL)
11515 {
11516 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11517 1, speed_p);
11518 if (speed_p)
11519 *cost += extra_cost->alu.arith_shift_reg;
11520 }
11521 else if (speed_p)
11522 *cost += extra_cost->alu.arith_shift;
11523 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11524 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11525 return true;
11526 }
11527
11528 if (speed_p)
11529 *cost += extra_cost->alu.arith;
11530 if (CONST_INT_P (XEXP (x, 1))
11531 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11532 {
11533 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11534 return true;
11535 }
11536 return false;
11537 }
11538
11539 /* Vector mode? */
11540
11541 *cost = LIBCALL_COST (2);
11542 return false;
11543 }
11544 return true;
11545
11546 case EQ:
11547 case GE:
11548 case GT:
11549 case LE:
11550 case LT:
11551 /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11552 vcle and vclt). */
11553 if (TARGET_NEON
11554 && TARGET_HARD_FLOAT
11555 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11556 && (XEXP (x, 1) == CONST0_RTX (mode)))
11557 {
11558 *cost = 0;
11559 return true;
11560 }
11561
11562 /* Fall through. */
11563 case NE:
11564 case LTU:
11565 case LEU:
11566 case GEU:
11567 case GTU:
11568 case ORDERED:
11569 case UNORDERED:
11570 case UNEQ:
11571 case UNLE:
11572 case UNLT:
11573 case UNGE:
11574 case UNGT:
11575 case LTGT:
11576 if (outer_code == SET)
11577 {
11578 /* Is it a store-flag operation? */
11579 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11580 && XEXP (x, 1) == const0_rtx)
11581 {
11582 /* Thumb also needs an IT insn. */
11583 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11584 return true;
11585 }
11586 if (XEXP (x, 1) == const0_rtx)
11587 {
11588 switch (code)
11589 {
11590 case LT:
11591 /* LSR Rd, Rn, #31. */
11592 if (speed_p)
11593 *cost += extra_cost->alu.shift;
11594 break;
11595
11596 case EQ:
11597 /* RSBS T1, Rn, #0
11598 ADC Rd, Rn, T1. */
11599
11600 case NE:
11601 /* SUBS T1, Rn, #1
11602 SBC Rd, Rn, T1. */
11603 *cost += COSTS_N_INSNS (1);
11604 break;
11605
11606 case LE:
11607 /* RSBS T1, Rn, Rn, LSR #31
11608 ADC Rd, Rn, T1. */
11609 *cost += COSTS_N_INSNS (1);
11610 if (speed_p)
11611 *cost += extra_cost->alu.arith_shift;
11612 break;
11613
11614 case GT:
11615 /* RSB Rd, Rn, Rn, ASR #1
11616 LSR Rd, Rd, #31. */
11617 *cost += COSTS_N_INSNS (1);
11618 if (speed_p)
11619 *cost += (extra_cost->alu.arith_shift
11620 + extra_cost->alu.shift);
11621 break;
11622
11623 case GE:
11624 /* ASR Rd, Rn, #31
11625 ADD Rd, Rn, #1. */
11626 *cost += COSTS_N_INSNS (1);
11627 if (speed_p)
11628 *cost += extra_cost->alu.shift;
11629 break;
11630
11631 default:
11632 /* Remaining cases are either meaningless or would take
11633 three insns anyway. */
11634 *cost = COSTS_N_INSNS (3);
11635 break;
11636 }
11637 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11638 return true;
11639 }
11640 else
11641 {
11642 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11643 if (CONST_INT_P (XEXP (x, 1))
11644 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11645 {
11646 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11647 return true;
11648 }
11649
11650 return false;
11651 }
11652 }
11653 /* Not directly inside a set. If it involves the condition code
11654 register it must be the condition for a branch, cond_exec or
11655 I_T_E operation. Since the comparison is performed elsewhere
11656 this is just the control part which has no additional
11657 cost. */
11658 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11659 && XEXP (x, 1) == const0_rtx)
11660 {
11661 *cost = 0;
11662 return true;
11663 }
11664 return false;
11665
11666 case ABS:
11667 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11668 && (mode == SFmode || !TARGET_VFP_SINGLE))
11669 {
11670 if (speed_p)
11671 *cost += extra_cost->fp[mode != SFmode].neg;
11672
11673 return false;
11674 }
11675 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11676 {
11677 *cost = LIBCALL_COST (1);
11678 return false;
11679 }
11680
11681 if (mode == SImode)
11682 {
11683 if (speed_p)
11684 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11685 return false;
11686 }
11687 /* Vector mode? */
11688 *cost = LIBCALL_COST (1);
11689 return false;
11690
11691 case SIGN_EXTEND:
11692 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11693 && MEM_P (XEXP (x, 0)))
11694 {
11695 if (mode == DImode)
11696 *cost += COSTS_N_INSNS (1);
11697
11698 if (!speed_p)
11699 return true;
11700
11701 if (GET_MODE (XEXP (x, 0)) == SImode)
11702 *cost += extra_cost->ldst.load;
11703 else
11704 *cost += extra_cost->ldst.load_sign_extend;
11705
11706 if (mode == DImode)
11707 *cost += extra_cost->alu.shift;
11708
11709 return true;
11710 }
11711
11712 /* Widening from less than 32-bits requires an extend operation. */
11713 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11714 {
11715 /* We have SXTB/SXTH. */
11716 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11717 if (speed_p)
11718 *cost += extra_cost->alu.extend;
11719 }
11720 else if (GET_MODE (XEXP (x, 0)) != SImode)
11721 {
11722 /* Needs two shifts. */
11723 *cost += COSTS_N_INSNS (1);
11724 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11725 if (speed_p)
11726 *cost += 2 * extra_cost->alu.shift;
11727 }
11728
11729 /* Widening beyond 32-bits requires one more insn. */
11730 if (mode == DImode)
11731 {
11732 *cost += COSTS_N_INSNS (1);
11733 if (speed_p)
11734 *cost += extra_cost->alu.shift;
11735 }
11736
11737 return true;
11738
11739 case ZERO_EXTEND:
11740 if ((arm_arch4
11741 || GET_MODE (XEXP (x, 0)) == SImode
11742 || GET_MODE (XEXP (x, 0)) == QImode)
11743 && MEM_P (XEXP (x, 0)))
11744 {
11745 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11746
11747 if (mode == DImode)
11748 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11749
11750 return true;
11751 }
11752
11753 /* Widening from less than 32-bits requires an extend operation. */
11754 if (GET_MODE (XEXP (x, 0)) == QImode)
11755 {
11756 /* UXTB can be a shorter instruction in Thumb2, but it might
11757 be slower than the AND Rd, Rn, #255 alternative. When
11758 optimizing for speed it should never be slower to use
11759 AND, and we don't really model 16-bit vs 32-bit insns
11760 here. */
11761 if (speed_p)
11762 *cost += extra_cost->alu.logical;
11763 }
11764 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11765 {
11766 /* We have UXTB/UXTH. */
11767 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11768 if (speed_p)
11769 *cost += extra_cost->alu.extend;
11770 }
11771 else if (GET_MODE (XEXP (x, 0)) != SImode)
11772 {
11773 /* Needs two shifts. It's marginally preferable to use
11774 shifts rather than two BIC instructions as the second
11775 shift may merge with a subsequent insn as a shifter
11776 op. */
11777 *cost = COSTS_N_INSNS (2);
11778 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11779 if (speed_p)
11780 *cost += 2 * extra_cost->alu.shift;
11781 }
11782
11783 /* Widening beyond 32-bits requires one more insn. */
11784 if (mode == DImode)
11785 {
11786 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11787 }
11788
11789 return true;
11790
11791 case CONST_INT:
11792 *cost = 0;
11793 /* CONST_INT has no mode, so we cannot tell for sure how many
11794 insns are really going to be needed. The best we can do is
11795 look at the value passed. If it fits in SImode, then assume
11796 that's the mode it will be used for. Otherwise assume it
11797 will be used in DImode. */
11798 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11799 mode = SImode;
11800 else
11801 mode = DImode;
11802
11803 /* Avoid blowing up in arm_gen_constant (). */
11804 if (!(outer_code == PLUS
11805 || outer_code == AND
11806 || outer_code == IOR
11807 || outer_code == XOR
11808 || outer_code == MINUS))
11809 outer_code = SET;
11810
11811 const_int_cost:
11812 if (mode == SImode)
11813 {
11814 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11815 INTVAL (x), NULL, NULL,
11816 0, 0));
11817 /* Extra costs? */
11818 }
11819 else
11820 {
11821 *cost += COSTS_N_INSNS (arm_gen_constant
11822 (outer_code, SImode, NULL,
11823 trunc_int_for_mode (INTVAL (x), SImode),
11824 NULL, NULL, 0, 0)
11825 + arm_gen_constant (outer_code, SImode, NULL,
11826 INTVAL (x) >> 32, NULL,
11827 NULL, 0, 0));
11828 /* Extra costs? */
11829 }
11830
11831 return true;
11832
11833 case CONST:
11834 case LABEL_REF:
11835 case SYMBOL_REF:
11836 if (speed_p)
11837 {
11838 if (arm_arch_thumb2 && !flag_pic)
11839 *cost += COSTS_N_INSNS (1);
11840 else
11841 *cost += extra_cost->ldst.load;
11842 }
11843 else
11844 *cost += COSTS_N_INSNS (1);
11845
11846 if (flag_pic)
11847 {
11848 *cost += COSTS_N_INSNS (1);
11849 if (speed_p)
11850 *cost += extra_cost->alu.arith;
11851 }
11852
11853 return true;
11854
11855 case CONST_FIXED:
11856 *cost = COSTS_N_INSNS (4);
11857 /* Fixme. */
11858 return true;
11859
11860 case CONST_DOUBLE:
11861 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11862 && (mode == SFmode || !TARGET_VFP_SINGLE))
11863 {
11864 if (vfp3_const_double_rtx (x))
11865 {
11866 if (speed_p)
11867 *cost += extra_cost->fp[mode == DFmode].fpconst;
11868 return true;
11869 }
11870
11871 if (speed_p)
11872 {
11873 if (mode == DFmode)
11874 *cost += extra_cost->ldst.loadd;
11875 else
11876 *cost += extra_cost->ldst.loadf;
11877 }
11878 else
11879 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11880
11881 return true;
11882 }
11883 *cost = COSTS_N_INSNS (4);
11884 return true;
11885
11886 case CONST_VECTOR:
11887 /* Fixme. */
11888 if (((TARGET_NEON && TARGET_HARD_FLOAT
11889 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11890 || TARGET_HAVE_MVE)
11891 && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11892 *cost = COSTS_N_INSNS (1);
11893 else
11894 *cost = COSTS_N_INSNS (4);
11895 return true;
11896
11897 case HIGH:
11898 case LO_SUM:
11899 /* When optimizing for size, we prefer constant pool entries to
11900 MOVW/MOVT pairs, so bump the cost of these slightly. */
11901 if (!speed_p)
11902 *cost += 1;
11903 return true;
11904
11905 case CLZ:
11906 if (speed_p)
11907 *cost += extra_cost->alu.clz;
11908 return false;
11909
11910 case SMIN:
11911 if (XEXP (x, 1) == const0_rtx)
11912 {
11913 if (speed_p)
11914 *cost += extra_cost->alu.log_shift;
11915 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11916 return true;
11917 }
11918 /* Fall through. */
11919 case SMAX:
11920 case UMIN:
11921 case UMAX:
11922 *cost += COSTS_N_INSNS (1);
11923 return false;
11924
11925 case TRUNCATE:
11926 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11927 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11928 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11929 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11930 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11931 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11932 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11933 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11934 == ZERO_EXTEND))))
11935 {
11936 if (speed_p)
11937 *cost += extra_cost->mult[1].extend;
11938 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11939 ZERO_EXTEND, 0, speed_p)
11940 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11941 ZERO_EXTEND, 0, speed_p));
11942 return true;
11943 }
11944 *cost = LIBCALL_COST (1);
11945 return false;
11946
11947 case UNSPEC_VOLATILE:
11948 case UNSPEC:
11949 return arm_unspec_cost (x, outer_code, speed_p, cost);
11950
11951 case PC:
11952 /* Reading the PC is like reading any other register. Writing it
11953 is more expensive, but we take that into account elsewhere. */
11954 *cost = 0;
11955 return true;
11956
11957 case ZERO_EXTRACT:
11958 /* TODO: Simple zero_extract of bottom bits using AND. */
11959 /* Fall through. */
11960 case SIGN_EXTRACT:
11961 if (arm_arch6
11962 && mode == SImode
11963 && CONST_INT_P (XEXP (x, 1))
11964 && CONST_INT_P (XEXP (x, 2)))
11965 {
11966 if (speed_p)
11967 *cost += extra_cost->alu.bfx;
11968 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11969 return true;
11970 }
11971 /* Without UBFX/SBFX, need to resort to shift operations. */
11972 *cost += COSTS_N_INSNS (1);
11973 if (speed_p)
11974 *cost += 2 * extra_cost->alu.shift;
11975 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11976 return true;
11977
11978 case FLOAT_EXTEND:
11979 if (TARGET_HARD_FLOAT)
11980 {
11981 if (speed_p)
11982 *cost += extra_cost->fp[mode == DFmode].widen;
11983 if (!TARGET_VFP5
11984 && GET_MODE (XEXP (x, 0)) == HFmode)
11985 {
11986 /* Pre v8, widening HF->DF is a two-step process, first
11987 widening to SFmode. */
11988 *cost += COSTS_N_INSNS (1);
11989 if (speed_p)
11990 *cost += extra_cost->fp[0].widen;
11991 }
11992 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11993 return true;
11994 }
11995
11996 *cost = LIBCALL_COST (1);
11997 return false;
11998
11999 case FLOAT_TRUNCATE:
12000 if (TARGET_HARD_FLOAT)
12001 {
12002 if (speed_p)
12003 *cost += extra_cost->fp[mode == DFmode].narrow;
12004 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
12005 return true;
12006 /* Vector modes? */
12007 }
12008 *cost = LIBCALL_COST (1);
12009 return false;
12010
12011 case FMA:
12012 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
12013 {
12014 rtx op0 = XEXP (x, 0);
12015 rtx op1 = XEXP (x, 1);
12016 rtx op2 = XEXP (x, 2);
12017
12018
12019 /* vfms or vfnma. */
12020 if (GET_CODE (op0) == NEG)
12021 op0 = XEXP (op0, 0);
12022
12023 /* vfnms or vfnma. */
12024 if (GET_CODE (op2) == NEG)
12025 op2 = XEXP (op2, 0);
12026
12027 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
12028 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
12029 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
12030
12031 if (speed_p)
12032 *cost += extra_cost->fp[mode ==DFmode].fma;
12033
12034 return true;
12035 }
12036
12037 *cost = LIBCALL_COST (3);
12038 return false;
12039
12040 case FIX:
12041 case UNSIGNED_FIX:
12042 if (TARGET_HARD_FLOAT)
12043 {
12044 /* The *combine_vcvtf2i reduces a vmul+vcvt into
12045 a vcvt fixed-point conversion. */
12046 if (code == FIX && mode == SImode
12047 && GET_CODE (XEXP (x, 0)) == FIX
12048 && GET_MODE (XEXP (x, 0)) == SFmode
12049 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12050 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
12051 > 0)
12052 {
12053 if (speed_p)
12054 *cost += extra_cost->fp[0].toint;
12055
12056 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
12057 code, 0, speed_p);
12058 return true;
12059 }
12060
12061 if (GET_MODE_CLASS (mode) == MODE_INT)
12062 {
12063 mode = GET_MODE (XEXP (x, 0));
12064 if (speed_p)
12065 *cost += extra_cost->fp[mode == DFmode].toint;
12066 /* Strip of the 'cost' of rounding towards zero. */
12067 if (GET_CODE (XEXP (x, 0)) == FIX)
12068 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
12069 0, speed_p);
12070 else
12071 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
12072 /* ??? Increase the cost to deal with transferring from
12073 FP -> CORE registers? */
12074 return true;
12075 }
12076 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
12077 && TARGET_VFP5)
12078 {
12079 if (speed_p)
12080 *cost += extra_cost->fp[mode == DFmode].roundint;
12081 return false;
12082 }
12083 /* Vector costs? */
12084 }
12085 *cost = LIBCALL_COST (1);
12086 return false;
12087
12088 case FLOAT:
12089 case UNSIGNED_FLOAT:
12090 if (TARGET_HARD_FLOAT)
12091 {
12092 /* ??? Increase the cost to deal with transferring from CORE
12093 -> FP registers? */
12094 if (speed_p)
12095 *cost += extra_cost->fp[mode == DFmode].fromint;
12096 return false;
12097 }
12098 *cost = LIBCALL_COST (1);
12099 return false;
12100
12101 case CALL:
12102 return true;
12103
12104 case ASM_OPERANDS:
12105 {
12106 /* Just a guess. Guess number of instructions in the asm
12107 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
12108 though (see PR60663). */
12109 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
12110 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
12111
12112 *cost = COSTS_N_INSNS (asm_length + num_operands);
12113 return true;
12114 }
12115 default:
12116 if (mode != VOIDmode)
12117 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
12118 else
12119 *cost = COSTS_N_INSNS (4); /* Who knows? */
12120 return false;
12121 }
12122 }
12123
12124 #undef HANDLE_NARROW_SHIFT_ARITH
12125
12126 /* RTX costs entry point. */
12127
12128 static bool
12129 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
12130 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
12131 {
12132 bool result;
12133 int code = GET_CODE (x);
12134 gcc_assert (current_tune->insn_extra_cost);
12135
12136 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
12137 (enum rtx_code) outer_code,
12138 current_tune->insn_extra_cost,
12139 total, speed);
12140
12141 if (dump_file && arm_verbose_cost)
12142 {
12143 print_rtl_single (dump_file, x);
12144 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
12145 *total, result ? "final" : "partial");
12146 }
12147 return result;
12148 }
12149
12150 static int
12151 arm_insn_cost (rtx_insn *insn, bool speed)
12152 {
12153 int cost;
12154
12155 /* Don't cost a simple reg-reg move at a full insn cost: such moves
12156 will likely disappear during register allocation. */
12157 if (!reload_completed
12158 && GET_CODE (PATTERN (insn)) == SET
12159 && REG_P (SET_DEST (PATTERN (insn)))
12160 && REG_P (SET_SRC (PATTERN (insn))))
12161 return 2;
12162 cost = pattern_cost (PATTERN (insn), speed);
12163 /* If the cost is zero, then it's likely a complex insn. We don't want the
12164 cost of these to be less than something we know about. */
12165 return cost ? cost : COSTS_N_INSNS (2);
12166 }
12167
12168 /* All address computations that can be done are free, but rtx cost returns
12169 the same for practically all of them. So we weight the different types
12170 of address here in the order (most pref first):
12171 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
12172 static inline int
12173 arm_arm_address_cost (rtx x)
12174 {
12175 enum rtx_code c = GET_CODE (x);
12176
12177 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
12178 return 0;
12179 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
12180 return 10;
12181
12182 if (c == PLUS)
12183 {
12184 if (CONST_INT_P (XEXP (x, 1)))
12185 return 2;
12186
12187 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
12188 return 3;
12189
12190 return 4;
12191 }
12192
12193 return 6;
12194 }
12195
12196 static inline int
12197 arm_thumb_address_cost (rtx x)
12198 {
12199 enum rtx_code c = GET_CODE (x);
12200
12201 if (c == REG)
12202 return 1;
12203 if (c == PLUS
12204 && REG_P (XEXP (x, 0))
12205 && CONST_INT_P (XEXP (x, 1)))
12206 return 1;
12207
12208 return 2;
12209 }
12210
12211 static int
12212 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
12213 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
12214 {
12215 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
12216 }
12217
12218 /* Adjust cost hook for XScale. */
12219 static bool
12220 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12221 int * cost)
12222 {
12223 /* Some true dependencies can have a higher cost depending
12224 on precisely how certain input operands are used. */
12225 if (dep_type == 0
12226 && recog_memoized (insn) >= 0
12227 && recog_memoized (dep) >= 0)
12228 {
12229 int shift_opnum = get_attr_shift (insn);
12230 enum attr_type attr_type = get_attr_type (dep);
12231
12232 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12233 operand for INSN. If we have a shifted input operand and the
12234 instruction we depend on is another ALU instruction, then we may
12235 have to account for an additional stall. */
12236 if (shift_opnum != 0
12237 && (attr_type == TYPE_ALU_SHIFT_IMM_LSL_1TO4
12238 || attr_type == TYPE_ALU_SHIFT_IMM_OTHER
12239 || attr_type == TYPE_ALUS_SHIFT_IMM
12240 || attr_type == TYPE_LOGIC_SHIFT_IMM
12241 || attr_type == TYPE_LOGICS_SHIFT_IMM
12242 || attr_type == TYPE_ALU_SHIFT_REG
12243 || attr_type == TYPE_ALUS_SHIFT_REG
12244 || attr_type == TYPE_LOGIC_SHIFT_REG
12245 || attr_type == TYPE_LOGICS_SHIFT_REG
12246 || attr_type == TYPE_MOV_SHIFT
12247 || attr_type == TYPE_MVN_SHIFT
12248 || attr_type == TYPE_MOV_SHIFT_REG
12249 || attr_type == TYPE_MVN_SHIFT_REG))
12250 {
12251 rtx shifted_operand;
12252 int opno;
12253
12254 /* Get the shifted operand. */
12255 extract_insn (insn);
12256 shifted_operand = recog_data.operand[shift_opnum];
12257
12258 /* Iterate over all the operands in DEP. If we write an operand
12259 that overlaps with SHIFTED_OPERAND, then we have increase the
12260 cost of this dependency. */
12261 extract_insn (dep);
12262 preprocess_constraints (dep);
12263 for (opno = 0; opno < recog_data.n_operands; opno++)
12264 {
12265 /* We can ignore strict inputs. */
12266 if (recog_data.operand_type[opno] == OP_IN)
12267 continue;
12268
12269 if (reg_overlap_mentioned_p (recog_data.operand[opno],
12270 shifted_operand))
12271 {
12272 *cost = 2;
12273 return false;
12274 }
12275 }
12276 }
12277 }
12278 return true;
12279 }
12280
12281 /* Adjust cost hook for Cortex A9. */
12282 static bool
12283 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12284 int * cost)
12285 {
12286 switch (dep_type)
12287 {
12288 case REG_DEP_ANTI:
12289 *cost = 0;
12290 return false;
12291
12292 case REG_DEP_TRUE:
12293 case REG_DEP_OUTPUT:
12294 if (recog_memoized (insn) >= 0
12295 && recog_memoized (dep) >= 0)
12296 {
12297 if (GET_CODE (PATTERN (insn)) == SET)
12298 {
12299 if (GET_MODE_CLASS
12300 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
12301 || GET_MODE_CLASS
12302 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
12303 {
12304 enum attr_type attr_type_insn = get_attr_type (insn);
12305 enum attr_type attr_type_dep = get_attr_type (dep);
12306
12307 /* By default all dependencies of the form
12308 s0 = s0 <op> s1
12309 s0 = s0 <op> s2
12310 have an extra latency of 1 cycle because
12311 of the input and output dependency in this
12312 case. However this gets modeled as an true
12313 dependency and hence all these checks. */
12314 if (REG_P (SET_DEST (PATTERN (insn)))
12315 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
12316 {
12317 /* FMACS is a special case where the dependent
12318 instruction can be issued 3 cycles before
12319 the normal latency in case of an output
12320 dependency. */
12321 if ((attr_type_insn == TYPE_FMACS
12322 || attr_type_insn == TYPE_FMACD)
12323 && (attr_type_dep == TYPE_FMACS
12324 || attr_type_dep == TYPE_FMACD))
12325 {
12326 if (dep_type == REG_DEP_OUTPUT)
12327 *cost = insn_default_latency (dep) - 3;
12328 else
12329 *cost = insn_default_latency (dep);
12330 return false;
12331 }
12332 else
12333 {
12334 if (dep_type == REG_DEP_OUTPUT)
12335 *cost = insn_default_latency (dep) + 1;
12336 else
12337 *cost = insn_default_latency (dep);
12338 }
12339 return false;
12340 }
12341 }
12342 }
12343 }
12344 break;
12345
12346 default:
12347 gcc_unreachable ();
12348 }
12349
12350 return true;
12351 }
12352
12353 /* Adjust cost hook for FA726TE. */
12354 static bool
12355 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12356 int * cost)
12357 {
12358 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12359 have penalty of 3. */
12360 if (dep_type == REG_DEP_TRUE
12361 && recog_memoized (insn) >= 0
12362 && recog_memoized (dep) >= 0
12363 && get_attr_conds (dep) == CONDS_SET)
12364 {
12365 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
12366 if (get_attr_conds (insn) == CONDS_USE
12367 && get_attr_type (insn) != TYPE_BRANCH)
12368 {
12369 *cost = 3;
12370 return false;
12371 }
12372
12373 if (GET_CODE (PATTERN (insn)) == COND_EXEC
12374 || get_attr_conds (insn) == CONDS_USE)
12375 {
12376 *cost = 0;
12377 return false;
12378 }
12379 }
12380
12381 return true;
12382 }
12383
12384 /* Implement TARGET_REGISTER_MOVE_COST.
12385
12386 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12387 it is typically more expensive than a single memory access. We set
12388 the cost to less than two memory accesses so that floating
12389 point to integer conversion does not go through memory. */
12390
12391 int
12392 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12393 reg_class_t from, reg_class_t to)
12394 {
12395 if (TARGET_32BIT)
12396 {
12397 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12398 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12399 return 15;
12400 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12401 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12402 return 4;
12403 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12404 return 20;
12405 else
12406 return 2;
12407 }
12408 else
12409 {
12410 if (from == HI_REGS || to == HI_REGS)
12411 return 4;
12412 else
12413 return 2;
12414 }
12415 }
12416
12417 /* Implement TARGET_MEMORY_MOVE_COST. */
12418
12419 int
12420 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12421 bool in ATTRIBUTE_UNUSED)
12422 {
12423 if (TARGET_32BIT)
12424 return 10;
12425 else
12426 {
12427 if (GET_MODE_SIZE (mode) < 4)
12428 return 8;
12429 else
12430 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12431 }
12432 }
12433
12434 /* Vectorizer cost model implementation. */
12435
12436 /* Implement targetm.vectorize.builtin_vectorization_cost. */
12437 static int
12438 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12439 tree vectype,
12440 int misalign ATTRIBUTE_UNUSED)
12441 {
12442 unsigned elements;
12443
12444 switch (type_of_cost)
12445 {
12446 case scalar_stmt:
12447 return current_tune->vec_costs->scalar_stmt_cost;
12448
12449 case scalar_load:
12450 return current_tune->vec_costs->scalar_load_cost;
12451
12452 case scalar_store:
12453 return current_tune->vec_costs->scalar_store_cost;
12454
12455 case vector_stmt:
12456 return current_tune->vec_costs->vec_stmt_cost;
12457
12458 case vector_load:
12459 return current_tune->vec_costs->vec_align_load_cost;
12460
12461 case vector_store:
12462 return current_tune->vec_costs->vec_store_cost;
12463
12464 case vec_to_scalar:
12465 return current_tune->vec_costs->vec_to_scalar_cost;
12466
12467 case scalar_to_vec:
12468 return current_tune->vec_costs->scalar_to_vec_cost;
12469
12470 case unaligned_load:
12471 case vector_gather_load:
12472 return current_tune->vec_costs->vec_unalign_load_cost;
12473
12474 case unaligned_store:
12475 case vector_scatter_store:
12476 return current_tune->vec_costs->vec_unalign_store_cost;
12477
12478 case cond_branch_taken:
12479 return current_tune->vec_costs->cond_taken_branch_cost;
12480
12481 case cond_branch_not_taken:
12482 return current_tune->vec_costs->cond_not_taken_branch_cost;
12483
12484 case vec_perm:
12485 case vec_promote_demote:
12486 return current_tune->vec_costs->vec_stmt_cost;
12487
12488 case vec_construct:
12489 elements = TYPE_VECTOR_SUBPARTS (vectype);
12490 return elements / 2 + 1;
12491
12492 default:
12493 gcc_unreachable ();
12494 }
12495 }
12496
12497 /* Return true if and only if this insn can dual-issue only as older. */
12498 static bool
12499 cortexa7_older_only (rtx_insn *insn)
12500 {
12501 if (recog_memoized (insn) < 0)
12502 return false;
12503
12504 switch (get_attr_type (insn))
12505 {
12506 case TYPE_ALU_DSP_REG:
12507 case TYPE_ALU_SREG:
12508 case TYPE_ALUS_SREG:
12509 case TYPE_LOGIC_REG:
12510 case TYPE_LOGICS_REG:
12511 case TYPE_ADC_REG:
12512 case TYPE_ADCS_REG:
12513 case TYPE_ADR:
12514 case TYPE_BFM:
12515 case TYPE_REV:
12516 case TYPE_MVN_REG:
12517 case TYPE_SHIFT_IMM:
12518 case TYPE_SHIFT_REG:
12519 case TYPE_LOAD_BYTE:
12520 case TYPE_LOAD_4:
12521 case TYPE_STORE_4:
12522 case TYPE_FFARITHS:
12523 case TYPE_FADDS:
12524 case TYPE_FFARITHD:
12525 case TYPE_FADDD:
12526 case TYPE_FMOV:
12527 case TYPE_F_CVT:
12528 case TYPE_FCMPS:
12529 case TYPE_FCMPD:
12530 case TYPE_FCONSTS:
12531 case TYPE_FCONSTD:
12532 case TYPE_FMULS:
12533 case TYPE_FMACS:
12534 case TYPE_FMULD:
12535 case TYPE_FMACD:
12536 case TYPE_FDIVS:
12537 case TYPE_FDIVD:
12538 case TYPE_F_MRC:
12539 case TYPE_F_MRRC:
12540 case TYPE_F_FLAG:
12541 case TYPE_F_LOADS:
12542 case TYPE_F_STORES:
12543 return true;
12544 default:
12545 return false;
12546 }
12547 }
12548
12549 /* Return true if and only if this insn can dual-issue as younger. */
12550 static bool
12551 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12552 {
12553 if (recog_memoized (insn) < 0)
12554 {
12555 if (verbose > 5)
12556 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12557 return false;
12558 }
12559
12560 switch (get_attr_type (insn))
12561 {
12562 case TYPE_ALU_IMM:
12563 case TYPE_ALUS_IMM:
12564 case TYPE_LOGIC_IMM:
12565 case TYPE_LOGICS_IMM:
12566 case TYPE_EXTEND:
12567 case TYPE_MVN_IMM:
12568 case TYPE_MOV_IMM:
12569 case TYPE_MOV_REG:
12570 case TYPE_MOV_SHIFT:
12571 case TYPE_MOV_SHIFT_REG:
12572 case TYPE_BRANCH:
12573 case TYPE_CALL:
12574 return true;
12575 default:
12576 return false;
12577 }
12578 }
12579
12580
12581 /* Look for an instruction that can dual issue only as an older
12582 instruction, and move it in front of any instructions that can
12583 dual-issue as younger, while preserving the relative order of all
12584 other instructions in the ready list. This is a hueuristic to help
12585 dual-issue in later cycles, by postponing issue of more flexible
12586 instructions. This heuristic may affect dual issue opportunities
12587 in the current cycle. */
12588 static void
12589 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12590 int *n_readyp, int clock)
12591 {
12592 int i;
12593 int first_older_only = -1, first_younger = -1;
12594
12595 if (verbose > 5)
12596 fprintf (file,
12597 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12598 clock,
12599 *n_readyp);
12600
12601 /* Traverse the ready list from the head (the instruction to issue
12602 first), and looking for the first instruction that can issue as
12603 younger and the first instruction that can dual-issue only as
12604 older. */
12605 for (i = *n_readyp - 1; i >= 0; i--)
12606 {
12607 rtx_insn *insn = ready[i];
12608 if (cortexa7_older_only (insn))
12609 {
12610 first_older_only = i;
12611 if (verbose > 5)
12612 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12613 break;
12614 }
12615 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12616 first_younger = i;
12617 }
12618
12619 /* Nothing to reorder because either no younger insn found or insn
12620 that can dual-issue only as older appears before any insn that
12621 can dual-issue as younger. */
12622 if (first_younger == -1)
12623 {
12624 if (verbose > 5)
12625 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12626 return;
12627 }
12628
12629 /* Nothing to reorder because no older-only insn in the ready list. */
12630 if (first_older_only == -1)
12631 {
12632 if (verbose > 5)
12633 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12634 return;
12635 }
12636
12637 /* Move first_older_only insn before first_younger. */
12638 if (verbose > 5)
12639 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12640 INSN_UID(ready [first_older_only]),
12641 INSN_UID(ready [first_younger]));
12642 rtx_insn *first_older_only_insn = ready [first_older_only];
12643 for (i = first_older_only; i < first_younger; i++)
12644 {
12645 ready[i] = ready[i+1];
12646 }
12647
12648 ready[i] = first_older_only_insn;
12649 return;
12650 }
12651
12652 /* Implement TARGET_SCHED_REORDER. */
12653 static int
12654 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12655 int clock)
12656 {
12657 switch (arm_tune)
12658 {
12659 case TARGET_CPU_cortexa7:
12660 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12661 break;
12662 default:
12663 /* Do nothing for other cores. */
12664 break;
12665 }
12666
12667 return arm_issue_rate ();
12668 }
12669
12670 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12671 It corrects the value of COST based on the relationship between
12672 INSN and DEP through the dependence LINK. It returns the new
12673 value. There is a per-core adjust_cost hook to adjust scheduler costs
12674 and the per-core hook can choose to completely override the generic
12675 adjust_cost function. Only put bits of code into arm_adjust_cost that
12676 are common across all cores. */
12677 static int
12678 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12679 unsigned int)
12680 {
12681 rtx i_pat, d_pat;
12682
12683 /* When generating Thumb-1 code, we want to place flag-setting operations
12684 close to a conditional branch which depends on them, so that we can
12685 omit the comparison. */
12686 if (TARGET_THUMB1
12687 && dep_type == 0
12688 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12689 && recog_memoized (dep) >= 0
12690 && get_attr_conds (dep) == CONDS_SET)
12691 return 0;
12692
12693 if (current_tune->sched_adjust_cost != NULL)
12694 {
12695 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12696 return cost;
12697 }
12698
12699 /* XXX Is this strictly true? */
12700 if (dep_type == REG_DEP_ANTI
12701 || dep_type == REG_DEP_OUTPUT)
12702 return 0;
12703
12704 /* Call insns don't incur a stall, even if they follow a load. */
12705 if (dep_type == 0
12706 && CALL_P (insn))
12707 return 1;
12708
12709 if ((i_pat = single_set (insn)) != NULL
12710 && MEM_P (SET_SRC (i_pat))
12711 && (d_pat = single_set (dep)) != NULL
12712 && MEM_P (SET_DEST (d_pat)))
12713 {
12714 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12715 /* This is a load after a store, there is no conflict if the load reads
12716 from a cached area. Assume that loads from the stack, and from the
12717 constant pool are cached, and that others will miss. This is a
12718 hack. */
12719
12720 if ((SYMBOL_REF_P (src_mem)
12721 && CONSTANT_POOL_ADDRESS_P (src_mem))
12722 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12723 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12724 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12725 return 1;
12726 }
12727
12728 return cost;
12729 }
12730
12731 int
12732 arm_max_conditional_execute (void)
12733 {
12734 return max_insns_skipped;
12735 }
12736
12737 static int
12738 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12739 {
12740 if (TARGET_32BIT)
12741 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12742 else
12743 return (optimize > 0) ? 2 : 0;
12744 }
12745
12746 static int
12747 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12748 {
12749 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12750 }
12751
12752 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12753 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12754 sequences of non-executed instructions in IT blocks probably take the same
12755 amount of time as executed instructions (and the IT instruction itself takes
12756 space in icache). This function was experimentally determined to give good
12757 results on a popular embedded benchmark. */
12758
12759 static int
12760 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12761 {
12762 return (TARGET_32BIT && speed_p) ? 1
12763 : arm_default_branch_cost (speed_p, predictable_p);
12764 }
12765
12766 static int
12767 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12768 {
12769 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12770 }
12771
12772 static bool fp_consts_inited = false;
12773
12774 static REAL_VALUE_TYPE value_fp0;
12775
12776 static void
12777 init_fp_table (void)
12778 {
12779 REAL_VALUE_TYPE r;
12780
12781 r = REAL_VALUE_ATOF ("0", DFmode);
12782 value_fp0 = r;
12783 fp_consts_inited = true;
12784 }
12785
12786 /* Return TRUE if rtx X is a valid immediate FP constant. */
12787 int
12788 arm_const_double_rtx (rtx x)
12789 {
12790 const REAL_VALUE_TYPE *r;
12791
12792 if (!fp_consts_inited)
12793 init_fp_table ();
12794
12795 r = CONST_DOUBLE_REAL_VALUE (x);
12796 if (REAL_VALUE_MINUS_ZERO (*r))
12797 return 0;
12798
12799 if (real_equal (r, &value_fp0))
12800 return 1;
12801
12802 return 0;
12803 }
12804
12805 /* VFPv3 has a fairly wide range of representable immediates, formed from
12806 "quarter-precision" floating-point values. These can be evaluated using this
12807 formula (with ^ for exponentiation):
12808
12809 -1^s * n * 2^-r
12810
12811 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12812 16 <= n <= 31 and 0 <= r <= 7.
12813
12814 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12815
12816 - A (most-significant) is the sign bit.
12817 - BCD are the exponent (encoded as r XOR 3).
12818 - EFGH are the mantissa (encoded as n - 16).
12819 */
12820
12821 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12822 fconst[sd] instruction, or -1 if X isn't suitable. */
12823 static int
12824 vfp3_const_double_index (rtx x)
12825 {
12826 REAL_VALUE_TYPE r, m;
12827 int sign, exponent;
12828 unsigned HOST_WIDE_INT mantissa, mant_hi;
12829 unsigned HOST_WIDE_INT mask;
12830 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12831 bool fail;
12832
12833 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12834 return -1;
12835
12836 r = *CONST_DOUBLE_REAL_VALUE (x);
12837
12838 /* We can't represent these things, so detect them first. */
12839 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12840 return -1;
12841
12842 /* Extract sign, exponent and mantissa. */
12843 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12844 r = real_value_abs (&r);
12845 exponent = REAL_EXP (&r);
12846 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12847 highest (sign) bit, with a fixed binary point at bit point_pos.
12848 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12849 bits for the mantissa, this may fail (low bits would be lost). */
12850 real_ldexp (&m, &r, point_pos - exponent);
12851 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12852 mantissa = w.elt (0);
12853 mant_hi = w.elt (1);
12854
12855 /* If there are bits set in the low part of the mantissa, we can't
12856 represent this value. */
12857 if (mantissa != 0)
12858 return -1;
12859
12860 /* Now make it so that mantissa contains the most-significant bits, and move
12861 the point_pos to indicate that the least-significant bits have been
12862 discarded. */
12863 point_pos -= HOST_BITS_PER_WIDE_INT;
12864 mantissa = mant_hi;
12865
12866 /* We can permit four significant bits of mantissa only, plus a high bit
12867 which is always 1. */
12868 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12869 if ((mantissa & mask) != 0)
12870 return -1;
12871
12872 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12873 mantissa >>= point_pos - 5;
12874
12875 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12876 floating-point immediate zero with Neon using an integer-zero load, but
12877 that case is handled elsewhere.) */
12878 if (mantissa == 0)
12879 return -1;
12880
12881 gcc_assert (mantissa >= 16 && mantissa <= 31);
12882
12883 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12884 normalized significands are in the range [1, 2). (Our mantissa is shifted
12885 left 4 places at this point relative to normalized IEEE754 values). GCC
12886 internally uses [0.5, 1) (see real.cc), so the exponent returned from
12887 REAL_EXP must be altered. */
12888 exponent = 5 - exponent;
12889
12890 if (exponent < 0 || exponent > 7)
12891 return -1;
12892
12893 /* Sign, mantissa and exponent are now in the correct form to plug into the
12894 formula described in the comment above. */
12895 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12896 }
12897
12898 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12899 int
12900 vfp3_const_double_rtx (rtx x)
12901 {
12902 if (!TARGET_VFP3)
12903 return 0;
12904
12905 return vfp3_const_double_index (x) != -1;
12906 }
12907
12908 /* Recognize immediates which can be used in various Neon and MVE instructions.
12909 Legal immediates are described by the following table (for VMVN variants, the
12910 bitwise inverse of the constant shown is recognized. In either case, VMOV
12911 is output and the correct instruction to use for a given constant is chosen
12912 by the assembler). The constant shown is replicated across all elements of
12913 the destination vector.
12914
12915 insn elems variant constant (binary)
12916 ---- ----- ------- -----------------
12917 vmov i32 0 00000000 00000000 00000000 abcdefgh
12918 vmov i32 1 00000000 00000000 abcdefgh 00000000
12919 vmov i32 2 00000000 abcdefgh 00000000 00000000
12920 vmov i32 3 abcdefgh 00000000 00000000 00000000
12921 vmov i16 4 00000000 abcdefgh
12922 vmov i16 5 abcdefgh 00000000
12923 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12924 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12925 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12926 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12927 vmvn i16 10 00000000 abcdefgh
12928 vmvn i16 11 abcdefgh 00000000
12929 vmov i32 12 00000000 00000000 abcdefgh 11111111
12930 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12931 vmov i32 14 00000000 abcdefgh 11111111 11111111
12932 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12933 vmov i8 16 abcdefgh
12934 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12935 eeeeeeee ffffffff gggggggg hhhhhhhh
12936 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12937 vmov f32 19 00000000 00000000 00000000 00000000
12938
12939 For case 18, B = !b. Representable values are exactly those accepted by
12940 vfp3_const_double_index, but are output as floating-point numbers rather
12941 than indices.
12942
12943 For case 19, we will change it to vmov.i32 when assembling.
12944
12945 Variants 0-5 (inclusive) may also be used as immediates for the second
12946 operand of VORR/VBIC instructions.
12947
12948 The INVERSE argument causes the bitwise inverse of the given operand to be
12949 recognized instead (used for recognizing legal immediates for the VAND/VORN
12950 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12951 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12952 output, rather than the real insns vbic/vorr).
12953
12954 INVERSE makes no difference to the recognition of float vectors.
12955
12956 The return value is the variant of immediate as shown in the above table, or
12957 -1 if the given value doesn't match any of the listed patterns.
12958 */
12959 static int
12960 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12961 rtx *modconst, int *elementwidth)
12962 {
12963 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12964 matches = 1; \
12965 for (i = 0; i < idx; i += (STRIDE)) \
12966 if (!(TEST)) \
12967 matches = 0; \
12968 if (matches) \
12969 { \
12970 immtype = (CLASS); \
12971 elsize = (ELSIZE); \
12972 break; \
12973 }
12974
12975 unsigned int i, elsize = 0, idx = 0, n_elts;
12976 unsigned int innersize;
12977 unsigned char bytes[16] = {};
12978 int immtype = -1, matches;
12979 unsigned int invmask = inverse ? 0xff : 0;
12980 bool vector = GET_CODE (op) == CONST_VECTOR;
12981
12982 if (vector)
12983 n_elts = CONST_VECTOR_NUNITS (op);
12984 else
12985 {
12986 n_elts = 1;
12987 gcc_assert (mode != VOIDmode);
12988 }
12989
12990 innersize = GET_MODE_UNIT_SIZE (mode);
12991
12992 /* Only support 128-bit vectors for MVE. */
12993 if (TARGET_HAVE_MVE
12994 && (!vector
12995 || VALID_MVE_PRED_MODE (mode)
12996 || n_elts * innersize != 16))
12997 return -1;
12998
12999 if (!TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
13000 return -1;
13001
13002 /* Vectors of float constants. */
13003 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
13004 {
13005 rtx el0 = CONST_VECTOR_ELT (op, 0);
13006
13007 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
13008 return -1;
13009
13010 /* FP16 vectors cannot be represented. */
13011 if (GET_MODE_INNER (mode) == HFmode)
13012 return -1;
13013
13014 /* All elements in the vector must be the same. Note that 0.0 and -0.0
13015 are distinct in this context. */
13016 if (!const_vec_duplicate_p (op))
13017 return -1;
13018
13019 if (modconst)
13020 *modconst = CONST_VECTOR_ELT (op, 0);
13021
13022 if (elementwidth)
13023 *elementwidth = 0;
13024
13025 if (el0 == CONST0_RTX (GET_MODE (el0)))
13026 return 19;
13027 else
13028 return 18;
13029 }
13030
13031 /* The tricks done in the code below apply for little-endian vector layout.
13032 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
13033 FIXME: Implement logic for big-endian vectors. */
13034 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
13035 return -1;
13036
13037 /* Splat vector constant out into a byte vector. */
13038 for (i = 0; i < n_elts; i++)
13039 {
13040 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
13041 unsigned HOST_WIDE_INT elpart;
13042
13043 gcc_assert (CONST_INT_P (el));
13044 elpart = INTVAL (el);
13045
13046 for (unsigned int byte = 0; byte < innersize; byte++)
13047 {
13048 bytes[idx++] = (elpart & 0xff) ^ invmask;
13049 elpart >>= BITS_PER_UNIT;
13050 }
13051 }
13052
13053 /* Sanity check. */
13054 gcc_assert (idx == GET_MODE_SIZE (mode));
13055
13056 do
13057 {
13058 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
13059 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13060
13061 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13062 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13063
13064 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
13065 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13066
13067 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
13068 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
13069
13070 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
13071
13072 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
13073
13074 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
13075 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13076
13077 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13078 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13079
13080 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
13081 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13082
13083 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
13084 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
13085
13086 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
13087
13088 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
13089
13090 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13091 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13092
13093 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13094 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13095
13096 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
13097 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13098
13099 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
13100 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13101
13102 CHECK (1, 8, 16, bytes[i] == bytes[0]);
13103
13104 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
13105 && bytes[i] == bytes[(i + 8) % idx]);
13106 }
13107 while (0);
13108
13109 if (immtype == -1)
13110 return -1;
13111
13112 if (elementwidth)
13113 *elementwidth = elsize;
13114
13115 if (modconst)
13116 {
13117 unsigned HOST_WIDE_INT imm = 0;
13118
13119 /* Un-invert bytes of recognized vector, if necessary. */
13120 if (invmask != 0)
13121 for (i = 0; i < idx; i++)
13122 bytes[i] ^= invmask;
13123
13124 if (immtype == 17)
13125 {
13126 /* FIXME: Broken on 32-bit H_W_I hosts. */
13127 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
13128
13129 for (i = 0; i < 8; i++)
13130 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
13131 << (i * BITS_PER_UNIT);
13132
13133 *modconst = GEN_INT (imm);
13134 }
13135 else
13136 {
13137 unsigned HOST_WIDE_INT imm = 0;
13138
13139 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
13140 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
13141
13142 *modconst = GEN_INT (imm);
13143 }
13144 }
13145
13146 return immtype;
13147 #undef CHECK
13148 }
13149
13150 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13151 implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH
13152 (or zero for float elements), and a modified constant (whatever should be
13153 output for a VMOV) in *MODCONST. "neon_immediate_valid_for_move" function is
13154 modified to "simd_immediate_valid_for_move" as this function will be used
13155 both by neon and mve. */
13156 int
13157 simd_immediate_valid_for_move (rtx op, machine_mode mode,
13158 rtx *modconst, int *elementwidth)
13159 {
13160 rtx tmpconst;
13161 int tmpwidth;
13162 int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
13163
13164 if (retval == -1)
13165 return 0;
13166
13167 if (modconst)
13168 *modconst = tmpconst;
13169
13170 if (elementwidth)
13171 *elementwidth = tmpwidth;
13172
13173 return 1;
13174 }
13175
13176 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
13177 the immediate is valid, write a constant suitable for using as an operand
13178 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13179 *ELEMENTWIDTH. See simd_valid_immediate for description of INVERSE. */
13180
13181 int
13182 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
13183 rtx *modconst, int *elementwidth)
13184 {
13185 rtx tmpconst;
13186 int tmpwidth;
13187 int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
13188
13189 if (retval < 0 || retval > 5)
13190 return 0;
13191
13192 if (modconst)
13193 *modconst = tmpconst;
13194
13195 if (elementwidth)
13196 *elementwidth = tmpwidth;
13197
13198 return 1;
13199 }
13200
13201 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
13202 the immediate is valid, write a constant suitable for using as an operand
13203 to VSHR/VSHL to *MODCONST and the corresponding element width to
13204 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13205 because they have different limitations. */
13206
13207 int
13208 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
13209 rtx *modconst, int *elementwidth,
13210 bool isleftshift)
13211 {
13212 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
13213 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
13214 unsigned HOST_WIDE_INT last_elt = 0;
13215 unsigned HOST_WIDE_INT maxshift;
13216
13217 /* Split vector constant out into a byte vector. */
13218 for (i = 0; i < n_elts; i++)
13219 {
13220 rtx el = CONST_VECTOR_ELT (op, i);
13221 unsigned HOST_WIDE_INT elpart;
13222
13223 if (CONST_INT_P (el))
13224 elpart = INTVAL (el);
13225 else if (CONST_DOUBLE_P (el))
13226 return 0;
13227 else
13228 gcc_unreachable ();
13229
13230 if (i != 0 && elpart != last_elt)
13231 return 0;
13232
13233 last_elt = elpart;
13234 }
13235
13236 /* Shift less than element size. */
13237 maxshift = innersize * 8;
13238
13239 if (isleftshift)
13240 {
13241 /* Left shift immediate value can be from 0 to <size>-1. */
13242 if (last_elt >= maxshift)
13243 return 0;
13244 }
13245 else
13246 {
13247 /* Right shift immediate value can be from 1 to <size>. */
13248 if (last_elt == 0 || last_elt > maxshift)
13249 return 0;
13250 }
13251
13252 if (elementwidth)
13253 *elementwidth = innersize * 8;
13254
13255 if (modconst)
13256 *modconst = CONST_VECTOR_ELT (op, 0);
13257
13258 return 1;
13259 }
13260
13261 /* Return a string suitable for output of Neon immediate logic operation
13262 MNEM. */
13263
13264 char *
13265 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
13266 int inverse, int quad)
13267 {
13268 int width, is_valid;
13269 static char templ[40];
13270
13271 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
13272
13273 gcc_assert (is_valid != 0);
13274
13275 if (quad)
13276 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
13277 else
13278 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
13279
13280 return templ;
13281 }
13282
13283 /* Return a string suitable for output of Neon immediate shift operation
13284 (VSHR or VSHL) MNEM. */
13285
13286 char *
13287 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13288 machine_mode mode, int quad,
13289 bool isleftshift)
13290 {
13291 int width, is_valid;
13292 static char templ[40];
13293
13294 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13295 gcc_assert (is_valid != 0);
13296
13297 if (quad)
13298 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13299 else
13300 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13301
13302 return templ;
13303 }
13304
13305 /* Output a sequence of pairwise operations to implement a reduction.
13306 NOTE: We do "too much work" here, because pairwise operations work on two
13307 registers-worth of operands in one go. Unfortunately we can't exploit those
13308 extra calculations to do the full operation in fewer steps, I don't think.
13309 Although all vector elements of the result but the first are ignored, we
13310 actually calculate the same result in each of the elements. An alternative
13311 such as initially loading a vector with zero to use as each of the second
13312 operands would use up an additional register and take an extra instruction,
13313 for no particular gain. */
13314
13315 void
13316 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13317 rtx (*reduc) (rtx, rtx, rtx))
13318 {
13319 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13320 rtx tmpsum = op1;
13321
13322 for (i = parts / 2; i >= 1; i /= 2)
13323 {
13324 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13325 emit_insn (reduc (dest, tmpsum, tmpsum));
13326 tmpsum = dest;
13327 }
13328 }
13329
13330 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13331 loaded into a register using VDUP.
13332
13333 If this is the case, and GENERATE is set, we also generate
13334 instructions to do this and return an RTX to assign to the register. */
13335
13336 static rtx
13337 neon_vdup_constant (rtx vals, bool generate)
13338 {
13339 machine_mode mode = GET_MODE (vals);
13340 machine_mode inner_mode = GET_MODE_INNER (mode);
13341 rtx x;
13342
13343 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13344 return NULL_RTX;
13345
13346 if (!const_vec_duplicate_p (vals, &x))
13347 /* The elements are not all the same. We could handle repeating
13348 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13349 {0, C, 0, C, 0, C, 0, C} which can be loaded using
13350 vdup.i16). */
13351 return NULL_RTX;
13352
13353 if (!generate)
13354 return x;
13355
13356 /* We can load this constant by using VDUP and a constant in a
13357 single ARM register. This will be cheaper than a vector
13358 load. */
13359
13360 x = copy_to_mode_reg (inner_mode, x);
13361 return gen_vec_duplicate (mode, x);
13362 }
13363
13364 /* Return a HI representation of CONST_VEC suitable for MVE predicates. */
13365 rtx
13366 mve_bool_vec_to_const (rtx const_vec)
13367 {
13368 machine_mode mode = GET_MODE (const_vec);
13369
13370 if (!VECTOR_MODE_P (mode))
13371 return const_vec;
13372
13373 unsigned n_elts = GET_MODE_NUNITS (mode);
13374 unsigned el_prec = GET_MODE_PRECISION (GET_MODE_INNER (mode));
13375 unsigned shift_c = 16 / n_elts;
13376 unsigned i;
13377 int hi_val = 0;
13378
13379 for (i = 0; i < n_elts; i++)
13380 {
13381 rtx el = CONST_VECTOR_ELT (const_vec, i);
13382 unsigned HOST_WIDE_INT elpart;
13383
13384 gcc_assert (CONST_INT_P (el));
13385 elpart = INTVAL (el) & ((1U << el_prec) - 1);
13386
13387 unsigned index = BYTES_BIG_ENDIAN ? n_elts - i - 1 : i;
13388
13389 hi_val |= elpart << (index * shift_c);
13390 }
13391 /* We are using mov immediate to encode this constant which writes 32-bits
13392 so we need to make sure the top 16-bits are all 0, otherwise we can't
13393 guarantee we can actually write this immediate. */
13394 return gen_int_mode (hi_val, SImode);
13395 }
13396
13397 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13398 constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13399 into a register.
13400
13401 If this is the case, and GENERATE is set, we also generate code to do
13402 this and return an RTX to copy into the register. */
13403
13404 rtx
13405 neon_make_constant (rtx vals, bool generate)
13406 {
13407 machine_mode mode = GET_MODE (vals);
13408 rtx target;
13409 rtx const_vec = NULL_RTX;
13410 int n_elts = GET_MODE_NUNITS (mode);
13411 int n_const = 0;
13412 int i;
13413
13414 if (GET_CODE (vals) == CONST_VECTOR)
13415 const_vec = vals;
13416 else if (GET_CODE (vals) == PARALLEL)
13417 {
13418 /* A CONST_VECTOR must contain only CONST_INTs and
13419 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13420 Only store valid constants in a CONST_VECTOR. */
13421 for (i = 0; i < n_elts; ++i)
13422 {
13423 rtx x = XVECEXP (vals, 0, i);
13424 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13425 n_const++;
13426 }
13427 if (n_const == n_elts)
13428 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13429 }
13430 else
13431 gcc_unreachable ();
13432
13433 if (const_vec != NULL
13434 && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13435 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
13436 return const_vec;
13437 else if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE(mode))
13438 return mve_bool_vec_to_const (const_vec);
13439 else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13440 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
13441 pipeline cycle; creating the constant takes one or two ARM
13442 pipeline cycles. */
13443 return target;
13444 else if (const_vec != NULL_RTX)
13445 /* Load from constant pool. On Cortex-A8 this takes two cycles
13446 (for either double or quad vectors). We cannot take advantage
13447 of single-cycle VLD1 because we need a PC-relative addressing
13448 mode. */
13449 return arm_disable_literal_pool ? NULL_RTX : const_vec;
13450 else
13451 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13452 We cannot construct an initializer. */
13453 return NULL_RTX;
13454 }
13455
13456 /* Initialize vector TARGET to VALS. */
13457
13458 void
13459 neon_expand_vector_init (rtx target, rtx vals)
13460 {
13461 machine_mode mode = GET_MODE (target);
13462 machine_mode inner_mode = GET_MODE_INNER (mode);
13463 int n_elts = GET_MODE_NUNITS (mode);
13464 int n_var = 0, one_var = -1;
13465 bool all_same = true;
13466 rtx x, mem;
13467 int i;
13468
13469 for (i = 0; i < n_elts; ++i)
13470 {
13471 x = XVECEXP (vals, 0, i);
13472 if (!CONSTANT_P (x))
13473 ++n_var, one_var = i;
13474
13475 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13476 all_same = false;
13477 }
13478
13479 if (n_var == 0)
13480 {
13481 rtx constant = neon_make_constant (vals);
13482 if (constant != NULL_RTX)
13483 {
13484 emit_move_insn (target, constant);
13485 return;
13486 }
13487 }
13488
13489 /* Splat a single non-constant element if we can. */
13490 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13491 {
13492 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13493 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13494 return;
13495 }
13496
13497 /* One field is non-constant. Load constant then overwrite varying
13498 field. This is more efficient than using the stack. */
13499 if (n_var == 1)
13500 {
13501 rtx copy = copy_rtx (vals);
13502 rtx merge_mask = GEN_INT (1 << one_var);
13503
13504 /* Load constant part of vector, substitute neighboring value for
13505 varying element. */
13506 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13507 neon_expand_vector_init (target, copy);
13508
13509 /* Insert variable. */
13510 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13511 emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13512 return;
13513 }
13514
13515 /* Construct the vector in memory one field at a time
13516 and load the whole vector. */
13517 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13518 for (i = 0; i < n_elts; i++)
13519 emit_move_insn (adjust_address_nv (mem, inner_mode,
13520 i * GET_MODE_SIZE (inner_mode)),
13521 XVECEXP (vals, 0, i));
13522 emit_move_insn (target, mem);
13523 }
13524
13525 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
13526 ERR if it doesn't. EXP indicates the source location, which includes the
13527 inlining history for intrinsics. */
13528
13529 static void
13530 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13531 const_tree exp, const char *desc)
13532 {
13533 HOST_WIDE_INT lane;
13534
13535 gcc_assert (CONST_INT_P (operand));
13536
13537 lane = INTVAL (operand);
13538
13539 if (lane < low || lane >= high)
13540 {
13541 if (exp)
13542 error_at (EXPR_LOCATION (exp),
13543 "%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13544 else
13545 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13546 }
13547 }
13548
13549 /* Bounds-check lanes. */
13550
13551 void
13552 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13553 const_tree exp)
13554 {
13555 bounds_check (operand, low, high, exp, "lane");
13556 }
13557
13558 /* Bounds-check constants. */
13559
13560 void
13561 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13562 {
13563 bounds_check (operand, low, high, NULL_TREE, "constant");
13564 }
13565
13566 HOST_WIDE_INT
13567 neon_element_bits (machine_mode mode)
13568 {
13569 return GET_MODE_UNIT_BITSIZE (mode);
13570 }
13571
13572 \f
13573 /* Predicates for `match_operand' and `match_operator'. */
13574
13575 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13576 WB level is 2 if full writeback address modes are allowed, 1
13577 if limited writeback address modes (POST_INC and PRE_DEC) are
13578 allowed and 0 if no writeback at all is supported. */
13579
13580 int
13581 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13582 {
13583 gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13584 rtx ind;
13585
13586 /* Reject eliminable registers. */
13587 if (! (reload_in_progress || reload_completed || lra_in_progress)
13588 && ( reg_mentioned_p (frame_pointer_rtx, op)
13589 || reg_mentioned_p (arg_pointer_rtx, op)
13590 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13591 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13592 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13593 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13594 return FALSE;
13595
13596 /* Constants are converted into offsets from labels. */
13597 if (!MEM_P (op))
13598 return FALSE;
13599
13600 ind = XEXP (op, 0);
13601
13602 if (reload_completed
13603 && (LABEL_REF_P (ind)
13604 || (GET_CODE (ind) == CONST
13605 && GET_CODE (XEXP (ind, 0)) == PLUS
13606 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13607 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13608 return TRUE;
13609
13610 /* Match: (mem (reg)). */
13611 if (REG_P (ind))
13612 return arm_address_register_rtx_p (ind, 0);
13613
13614 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13615 acceptable in any case (subject to verification by
13616 arm_address_register_rtx_p). We need full writeback to accept
13617 PRE_INC and POST_DEC, and at least restricted writeback for
13618 PRE_INC and POST_DEC. */
13619 if (wb_level > 0
13620 && (GET_CODE (ind) == POST_INC
13621 || GET_CODE (ind) == PRE_DEC
13622 || (wb_level > 1
13623 && (GET_CODE (ind) == PRE_INC
13624 || GET_CODE (ind) == POST_DEC))))
13625 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13626
13627 if (wb_level > 1
13628 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13629 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13630 && GET_CODE (XEXP (ind, 1)) == PLUS
13631 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13632 ind = XEXP (ind, 1);
13633
13634 /* Match:
13635 (plus (reg)
13636 (const))
13637
13638 The encoded immediate for 16-bit modes is multiplied by 2,
13639 while the encoded immediate for 32-bit and 64-bit modes is
13640 multiplied by 4. */
13641 int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13642 if (GET_CODE (ind) == PLUS
13643 && REG_P (XEXP (ind, 0))
13644 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13645 && CONST_INT_P (XEXP (ind, 1))
13646 && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13647 && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13648 return TRUE;
13649
13650 return FALSE;
13651 }
13652
13653 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13654 WB is true if full writeback address modes are allowed and is false
13655 if limited writeback address modes (POST_INC and PRE_DEC) are
13656 allowed. */
13657
13658 int arm_coproc_mem_operand (rtx op, bool wb)
13659 {
13660 return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13661 }
13662
13663 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13664 context in which no writeback address modes are allowed. */
13665
13666 int
13667 arm_coproc_mem_operand_no_writeback (rtx op)
13668 {
13669 return arm_coproc_mem_operand_wb (op, 0);
13670 }
13671
13672 /* In non-STRICT mode, return the register number; in STRICT mode return
13673 the hard regno or the replacement if it won't be a mem. Otherwise, return
13674 the original pseudo number. */
13675 static int
13676 arm_effective_regno (rtx op, bool strict)
13677 {
13678 gcc_assert (REG_P (op));
13679 if (!strict || REGNO (op) < FIRST_PSEUDO_REGISTER
13680 || !reg_renumber || reg_renumber[REGNO (op)] < 0)
13681 return REGNO (op);
13682 return reg_renumber[REGNO (op)];
13683 }
13684
13685 /* This function returns TRUE on matching mode and op.
13686 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13687 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
13688 int
13689 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13690 {
13691 enum rtx_code code;
13692 int val, reg_no;
13693
13694 /* Match: (mem (reg)). */
13695 if (REG_P (op))
13696 {
13697 reg_no = arm_effective_regno (op, strict);
13698 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13699 ? reg_no <= LAST_LO_REGNUM
13700 : reg_no < LAST_ARM_REGNUM)
13701 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13702 }
13703 code = GET_CODE (op);
13704
13705 if ((code == POST_INC
13706 || code == PRE_DEC
13707 || code == PRE_INC
13708 || code == POST_DEC)
13709 && REG_P (XEXP (op, 0)))
13710 {
13711 reg_no = arm_effective_regno (XEXP (op, 0), strict);
13712 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13713 ? reg_no <= LAST_LO_REGNUM
13714 :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13715 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13716 }
13717 else if (((code == POST_MODIFY || code == PRE_MODIFY)
13718 && GET_CODE (XEXP (op, 1)) == PLUS
13719 && XEXP (op, 0) == XEXP (XEXP (op, 1), 0)
13720 && REG_P (XEXP (op, 0))
13721 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT)
13722 /* Make sure to only accept PLUS after reload_completed, otherwise
13723 this will interfere with auto_inc's pattern detection. */
13724 || (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
13725 && GET_CODE (XEXP (op, 1)) == CONST_INT))
13726 {
13727 reg_no = arm_effective_regno (XEXP (op, 0), strict);
13728 if (code == PLUS)
13729 val = INTVAL (XEXP (op, 1));
13730 else
13731 val = INTVAL (XEXP(XEXP (op, 1), 1));
13732
13733 switch (mode)
13734 {
13735 case E_V16QImode:
13736 case E_V8QImode:
13737 case E_V4QImode:
13738 if (abs (val) > 127)
13739 return FALSE;
13740 break;
13741 case E_V8HImode:
13742 case E_V8HFmode:
13743 case E_V4HImode:
13744 case E_V4HFmode:
13745 if (val % 2 != 0 || abs (val) > 254)
13746 return FALSE;
13747 break;
13748 case E_V4SImode:
13749 case E_V4SFmode:
13750 if (val % 4 != 0 || abs (val) > 508)
13751 return FALSE;
13752 break;
13753 default:
13754 return FALSE;
13755 }
13756 return ((!strict && reg_no >= FIRST_PSEUDO_REGISTER)
13757 || (MVE_STN_LDW_MODE (mode)
13758 ? reg_no <= LAST_LO_REGNUM
13759 : (reg_no < LAST_ARM_REGNUM
13760 && (code == PLUS || reg_no != SP_REGNUM))));
13761 }
13762 return FALSE;
13763 }
13764
13765 /* Return TRUE if OP is a memory operand which we can load or store a vector
13766 to/from. TYPE is one of the following values:
13767 0 - Vector load/stor (vldr)
13768 1 - Core registers (ldm)
13769 2 - Element/structure loads (vld1)
13770 */
13771 int
13772 neon_vector_mem_operand (rtx op, int type, bool strict)
13773 {
13774 rtx ind;
13775
13776 /* Reject eliminable registers. */
13777 if (strict && ! (reload_in_progress || reload_completed)
13778 && (reg_mentioned_p (frame_pointer_rtx, op)
13779 || reg_mentioned_p (arg_pointer_rtx, op)
13780 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13781 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13782 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13783 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13784 return FALSE;
13785
13786 /* Constants are converted into offsets from labels. */
13787 if (!MEM_P (op))
13788 return FALSE;
13789
13790 ind = XEXP (op, 0);
13791
13792 if (reload_completed
13793 && (LABEL_REF_P (ind)
13794 || (GET_CODE (ind) == CONST
13795 && GET_CODE (XEXP (ind, 0)) == PLUS
13796 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13797 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13798 return TRUE;
13799
13800 /* Match: (mem (reg)). */
13801 if (REG_P (ind))
13802 return arm_address_register_rtx_p (ind, 0);
13803
13804 /* Allow post-increment with Neon registers. */
13805 if ((type != 1 && GET_CODE (ind) == POST_INC)
13806 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13807 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13808
13809 /* Allow post-increment by register for VLDn */
13810 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13811 && GET_CODE (XEXP (ind, 1)) == PLUS
13812 && REG_P (XEXP (XEXP (ind, 1), 1))
13813 && REG_P (XEXP (ind, 0))
13814 && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13815 return true;
13816
13817 /* Match:
13818 (plus (reg)
13819 (const)). */
13820 if (type == 0
13821 && GET_CODE (ind) == PLUS
13822 && REG_P (XEXP (ind, 0))
13823 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13824 && CONST_INT_P (XEXP (ind, 1))
13825 && INTVAL (XEXP (ind, 1)) > -1024
13826 /* For quad modes, we restrict the constant offset to be slightly less
13827 than what the instruction format permits. We have no such constraint
13828 on double mode offsets. (This must match arm_legitimate_index_p.) */
13829 && (INTVAL (XEXP (ind, 1))
13830 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13831 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13832 return TRUE;
13833
13834 return FALSE;
13835 }
13836
13837 /* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
13838 type. */
13839 int
13840 mve_struct_mem_operand (rtx op)
13841 {
13842 rtx ind = XEXP (op, 0);
13843
13844 /* Match: (mem (reg)). */
13845 if (REG_P (ind))
13846 return arm_address_register_rtx_p (ind, 0);
13847
13848 /* Allow only post-increment by the mode size. */
13849 if (GET_CODE (ind) == POST_INC)
13850 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13851
13852 return FALSE;
13853 }
13854
13855 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13856 type. */
13857 int
13858 neon_struct_mem_operand (rtx op)
13859 {
13860 rtx ind;
13861
13862 /* Reject eliminable registers. */
13863 if (! (reload_in_progress || reload_completed)
13864 && ( reg_mentioned_p (frame_pointer_rtx, op)
13865 || reg_mentioned_p (arg_pointer_rtx, op)
13866 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13867 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13868 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13869 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13870 return FALSE;
13871
13872 /* Constants are converted into offsets from labels. */
13873 if (!MEM_P (op))
13874 return FALSE;
13875
13876 ind = XEXP (op, 0);
13877
13878 if (reload_completed
13879 && (LABEL_REF_P (ind)
13880 || (GET_CODE (ind) == CONST
13881 && GET_CODE (XEXP (ind, 0)) == PLUS
13882 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13883 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13884 return TRUE;
13885
13886 /* Match: (mem (reg)). */
13887 if (REG_P (ind))
13888 return arm_address_register_rtx_p (ind, 0);
13889
13890 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13891 if (GET_CODE (ind) == POST_INC
13892 || GET_CODE (ind) == PRE_DEC)
13893 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13894
13895 return FALSE;
13896 }
13897
13898 /* Prepares the operands for the VCMLA by lane instruction such that the right
13899 register number is selected. This instruction is special in that it always
13900 requires a D register, however there is a choice to be made between Dn[0],
13901 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13902
13903 The VCMLA by lane function always selects two values. For instance given D0
13904 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13905 used by the instruction. However given V4SF then index 0 and 1 are valid as
13906 D0[0] or D1[0] are both valid.
13907
13908 This function centralizes that information based on OPERANDS, OPERANDS[3]
13909 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13910 updated to contain the right index. */
13911
13912 rtx *
13913 neon_vcmla_lane_prepare_operands (rtx *operands)
13914 {
13915 int lane = INTVAL (operands[4]);
13916 machine_mode constmode = SImode;
13917 machine_mode mode = GET_MODE (operands[3]);
13918 int regno = REGNO (operands[3]);
13919 regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13920 if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13921 {
13922 operands[3] = gen_int_mode (regno + 1, constmode);
13923 operands[4]
13924 = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13925 }
13926 else
13927 {
13928 operands[3] = gen_int_mode (regno, constmode);
13929 operands[4] = gen_int_mode (lane, constmode);
13930 }
13931 return operands;
13932 }
13933
13934
13935 /* Return true if X is a register that will be eliminated later on. */
13936 int
13937 arm_eliminable_register (rtx x)
13938 {
13939 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13940 || REGNO (x) == ARG_POINTER_REGNUM
13941 || VIRTUAL_REGISTER_P (x));
13942 }
13943
13944 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13945 coprocessor registers. Otherwise return NO_REGS. */
13946
13947 enum reg_class
13948 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13949 {
13950 if (mode == HFmode)
13951 {
13952 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13953 return GENERAL_REGS;
13954 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13955 return NO_REGS;
13956 return GENERAL_REGS;
13957 }
13958
13959 /* The neon move patterns handle all legitimate vector and struct
13960 addresses. */
13961 if (TARGET_NEON
13962 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13963 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13964 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13965 || VALID_NEON_STRUCT_MODE (mode)))
13966 return NO_REGS;
13967
13968 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13969 return NO_REGS;
13970
13971 return GENERAL_REGS;
13972 }
13973
13974 /* Values which must be returned in the most-significant end of the return
13975 register. */
13976
13977 static bool
13978 arm_return_in_msb (const_tree valtype)
13979 {
13980 return (TARGET_AAPCS_BASED
13981 && BYTES_BIG_ENDIAN
13982 && (AGGREGATE_TYPE_P (valtype)
13983 || TREE_CODE (valtype) == COMPLEX_TYPE
13984 || FIXED_POINT_TYPE_P (valtype)));
13985 }
13986
13987 /* Return TRUE if X references a SYMBOL_REF. */
13988 int
13989 symbol_mentioned_p (rtx x)
13990 {
13991 const char * fmt;
13992 int i;
13993
13994 if (SYMBOL_REF_P (x))
13995 return 1;
13996
13997 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13998 are constant offsets, not symbols. */
13999 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
14000 return 0;
14001
14002 fmt = GET_RTX_FORMAT (GET_CODE (x));
14003
14004 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
14005 {
14006 if (fmt[i] == 'E')
14007 {
14008 int j;
14009
14010 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
14011 if (symbol_mentioned_p (XVECEXP (x, i, j)))
14012 return 1;
14013 }
14014 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
14015 return 1;
14016 }
14017
14018 return 0;
14019 }
14020
14021 /* Return TRUE if X references a LABEL_REF. */
14022 int
14023 label_mentioned_p (rtx x)
14024 {
14025 const char * fmt;
14026 int i;
14027
14028 if (LABEL_REF_P (x))
14029 return 1;
14030
14031 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
14032 instruction, but they are constant offsets, not symbols. */
14033 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
14034 return 0;
14035
14036 fmt = GET_RTX_FORMAT (GET_CODE (x));
14037 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
14038 {
14039 if (fmt[i] == 'E')
14040 {
14041 int j;
14042
14043 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
14044 if (label_mentioned_p (XVECEXP (x, i, j)))
14045 return 1;
14046 }
14047 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
14048 return 1;
14049 }
14050
14051 return 0;
14052 }
14053
14054 int
14055 tls_mentioned_p (rtx x)
14056 {
14057 switch (GET_CODE (x))
14058 {
14059 case CONST:
14060 return tls_mentioned_p (XEXP (x, 0));
14061
14062 case UNSPEC:
14063 if (XINT (x, 1) == UNSPEC_TLS)
14064 return 1;
14065
14066 /* Fall through. */
14067 default:
14068 return 0;
14069 }
14070 }
14071
14072 /* Must not copy any rtx that uses a pc-relative address.
14073 Also, disallow copying of load-exclusive instructions that
14074 may appear after splitting of compare-and-swap-style operations
14075 so as to prevent those loops from being transformed away from their
14076 canonical forms (see PR 69904). */
14077
14078 static bool
14079 arm_cannot_copy_insn_p (rtx_insn *insn)
14080 {
14081 /* The tls call insn cannot be copied, as it is paired with a data
14082 word. */
14083 if (recog_memoized (insn) == CODE_FOR_tlscall)
14084 return true;
14085
14086 subrtx_iterator::array_type array;
14087 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
14088 {
14089 const_rtx x = *iter;
14090 if (GET_CODE (x) == UNSPEC
14091 && (XINT (x, 1) == UNSPEC_PIC_BASE
14092 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
14093 return true;
14094 }
14095
14096 rtx set = single_set (insn);
14097 if (set)
14098 {
14099 rtx src = SET_SRC (set);
14100 if (GET_CODE (src) == ZERO_EXTEND)
14101 src = XEXP (src, 0);
14102
14103 /* Catch the load-exclusive and load-acquire operations. */
14104 if (GET_CODE (src) == UNSPEC_VOLATILE
14105 && (XINT (src, 1) == VUNSPEC_LL
14106 || XINT (src, 1) == VUNSPEC_LAX))
14107 return true;
14108 }
14109 return false;
14110 }
14111
14112 enum rtx_code
14113 minmax_code (rtx x)
14114 {
14115 enum rtx_code code = GET_CODE (x);
14116
14117 switch (code)
14118 {
14119 case SMAX:
14120 return GE;
14121 case SMIN:
14122 return LE;
14123 case UMIN:
14124 return LEU;
14125 case UMAX:
14126 return GEU;
14127 default:
14128 gcc_unreachable ();
14129 }
14130 }
14131
14132 /* Match pair of min/max operators that can be implemented via usat/ssat. */
14133
14134 bool
14135 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
14136 int *mask, bool *signed_sat)
14137 {
14138 /* The high bound must be a power of two minus one. */
14139 int log = exact_log2 (INTVAL (hi_bound) + 1);
14140 if (log == -1)
14141 return false;
14142
14143 /* The low bound is either zero (for usat) or one less than the
14144 negation of the high bound (for ssat). */
14145 if (INTVAL (lo_bound) == 0)
14146 {
14147 if (mask)
14148 *mask = log;
14149 if (signed_sat)
14150 *signed_sat = false;
14151
14152 return true;
14153 }
14154
14155 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
14156 {
14157 if (mask)
14158 *mask = log + 1;
14159 if (signed_sat)
14160 *signed_sat = true;
14161
14162 return true;
14163 }
14164
14165 return false;
14166 }
14167
14168 /* Return 1 if memory locations are adjacent. */
14169 int
14170 adjacent_mem_locations (rtx a, rtx b)
14171 {
14172 /* We don't guarantee to preserve the order of these memory refs. */
14173 if (volatile_refs_p (a) || volatile_refs_p (b))
14174 return 0;
14175
14176 if ((REG_P (XEXP (a, 0))
14177 || (GET_CODE (XEXP (a, 0)) == PLUS
14178 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
14179 && (REG_P (XEXP (b, 0))
14180 || (GET_CODE (XEXP (b, 0)) == PLUS
14181 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
14182 {
14183 HOST_WIDE_INT val0 = 0, val1 = 0;
14184 rtx reg0, reg1;
14185 int val_diff;
14186
14187 if (GET_CODE (XEXP (a, 0)) == PLUS)
14188 {
14189 reg0 = XEXP (XEXP (a, 0), 0);
14190 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
14191 }
14192 else
14193 reg0 = XEXP (a, 0);
14194
14195 if (GET_CODE (XEXP (b, 0)) == PLUS)
14196 {
14197 reg1 = XEXP (XEXP (b, 0), 0);
14198 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
14199 }
14200 else
14201 reg1 = XEXP (b, 0);
14202
14203 /* Don't accept any offset that will require multiple
14204 instructions to handle, since this would cause the
14205 arith_adjacentmem pattern to output an overlong sequence. */
14206 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
14207 return 0;
14208
14209 /* Don't allow an eliminable register: register elimination can make
14210 the offset too large. */
14211 if (arm_eliminable_register (reg0))
14212 return 0;
14213
14214 val_diff = val1 - val0;
14215
14216 if (arm_ld_sched)
14217 {
14218 /* If the target has load delay slots, then there's no benefit
14219 to using an ldm instruction unless the offset is zero and
14220 we are optimizing for size. */
14221 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
14222 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
14223 && (val_diff == 4 || val_diff == -4));
14224 }
14225
14226 return ((REGNO (reg0) == REGNO (reg1))
14227 && (val_diff == 4 || val_diff == -4));
14228 }
14229
14230 return 0;
14231 }
14232
14233 /* Return true if OP is a valid load or store multiple operation. LOAD is true
14234 for load operations, false for store operations. CONSECUTIVE is true
14235 if the register numbers in the operation must be consecutive in the register
14236 bank. RETURN_PC is true if value is to be loaded in PC.
14237 The pattern we are trying to match for load is:
14238 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14239 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14240 :
14241 :
14242 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14243 ]
14244 where
14245 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14246 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14247 3. If consecutive is TRUE, then for kth register being loaded,
14248 REGNO (R_dk) = REGNO (R_d0) + k.
14249 The pattern for store is similar. */
14250 bool
14251 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
14252 bool consecutive, bool return_pc)
14253 {
14254 HOST_WIDE_INT count = XVECLEN (op, 0);
14255 rtx reg, mem, addr;
14256 unsigned regno;
14257 unsigned first_regno;
14258 HOST_WIDE_INT i = 1, base = 0, offset = 0;
14259 rtx elt;
14260 bool addr_reg_in_reglist = false;
14261 bool update = false;
14262 int reg_increment;
14263 int offset_adj;
14264 int regs_per_val;
14265
14266 /* If not in SImode, then registers must be consecutive
14267 (e.g., VLDM instructions for DFmode). */
14268 gcc_assert ((mode == SImode) || consecutive);
14269 /* Setting return_pc for stores is illegal. */
14270 gcc_assert (!return_pc || load);
14271
14272 /* Set up the increments and the regs per val based on the mode. */
14273 reg_increment = GET_MODE_SIZE (mode);
14274 regs_per_val = reg_increment / 4;
14275 offset_adj = return_pc ? 1 : 0;
14276
14277 if (count <= 1
14278 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
14279 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
14280 return false;
14281
14282 /* Check if this is a write-back. */
14283 elt = XVECEXP (op, 0, offset_adj);
14284 if (GET_CODE (SET_SRC (elt)) == PLUS)
14285 {
14286 i++;
14287 base = 1;
14288 update = true;
14289
14290 /* The offset adjustment must be the number of registers being
14291 popped times the size of a single register. */
14292 if (!REG_P (SET_DEST (elt))
14293 || !REG_P (XEXP (SET_SRC (elt), 0))
14294 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
14295 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
14296 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
14297 ((count - 1 - offset_adj) * reg_increment))
14298 return false;
14299 }
14300
14301 i = i + offset_adj;
14302 base = base + offset_adj;
14303 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14304 success depends on the type: VLDM can do just one reg,
14305 LDM must do at least two. */
14306 if ((count <= i) && (mode == SImode))
14307 return false;
14308
14309 elt = XVECEXP (op, 0, i - 1);
14310 if (GET_CODE (elt) != SET)
14311 return false;
14312
14313 if (load)
14314 {
14315 reg = SET_DEST (elt);
14316 mem = SET_SRC (elt);
14317 }
14318 else
14319 {
14320 reg = SET_SRC (elt);
14321 mem = SET_DEST (elt);
14322 }
14323
14324 if (!REG_P (reg) || !MEM_P (mem))
14325 return false;
14326
14327 regno = REGNO (reg);
14328 first_regno = regno;
14329 addr = XEXP (mem, 0);
14330 if (GET_CODE (addr) == PLUS)
14331 {
14332 if (!CONST_INT_P (XEXP (addr, 1)))
14333 return false;
14334
14335 offset = INTVAL (XEXP (addr, 1));
14336 addr = XEXP (addr, 0);
14337 }
14338
14339 if (!REG_P (addr))
14340 return false;
14341
14342 /* Don't allow SP to be loaded unless it is also the base register. It
14343 guarantees that SP is reset correctly when an LDM instruction
14344 is interrupted. Otherwise, we might end up with a corrupt stack. */
14345 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14346 return false;
14347
14348 if (regno == REGNO (addr))
14349 addr_reg_in_reglist = true;
14350
14351 for (; i < count; i++)
14352 {
14353 elt = XVECEXP (op, 0, i);
14354 if (GET_CODE (elt) != SET)
14355 return false;
14356
14357 if (load)
14358 {
14359 reg = SET_DEST (elt);
14360 mem = SET_SRC (elt);
14361 }
14362 else
14363 {
14364 reg = SET_SRC (elt);
14365 mem = SET_DEST (elt);
14366 }
14367
14368 if (!REG_P (reg)
14369 || GET_MODE (reg) != mode
14370 || REGNO (reg) <= regno
14371 || (consecutive
14372 && (REGNO (reg) !=
14373 (unsigned int) (first_regno + regs_per_val * (i - base))))
14374 /* Don't allow SP to be loaded unless it is also the base register. It
14375 guarantees that SP is reset correctly when an LDM instruction
14376 is interrupted. Otherwise, we might end up with a corrupt stack. */
14377 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14378 || !MEM_P (mem)
14379 || GET_MODE (mem) != mode
14380 || ((GET_CODE (XEXP (mem, 0)) != PLUS
14381 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14382 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14383 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14384 offset + (i - base) * reg_increment))
14385 && (!REG_P (XEXP (mem, 0))
14386 || offset + (i - base) * reg_increment != 0)))
14387 return false;
14388
14389 regno = REGNO (reg);
14390 if (regno == REGNO (addr))
14391 addr_reg_in_reglist = true;
14392 }
14393
14394 if (load)
14395 {
14396 if (update && addr_reg_in_reglist)
14397 return false;
14398
14399 /* For Thumb-1, address register is always modified - either by write-back
14400 or by explicit load. If the pattern does not describe an update,
14401 then the address register must be in the list of loaded registers. */
14402 if (TARGET_THUMB1)
14403 return update || addr_reg_in_reglist;
14404 }
14405
14406 return true;
14407 }
14408
14409 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14410 or VSCCLRM (otherwise) insn. To be a valid CLRM pattern, OP must have the
14411 following form:
14412
14413 [(set (reg:SI <N>) (const_int 0))
14414 (set (reg:SI <M>) (const_int 0))
14415 ...
14416 (unspec_volatile [(const_int 0)]
14417 VUNSPEC_CLRM_APSR)
14418 (clobber (reg:CC CC_REGNUM))
14419 ]
14420
14421 Any number (including 0) of set expressions is valid, the volatile unspec is
14422 optional. All registers but SP and PC are allowed and registers must be in
14423 strict increasing order.
14424
14425 To be a valid VSCCLRM pattern, OP must have the following form:
14426
14427 [(unspec_volatile [(const_int 0)]
14428 VUNSPEC_VSCCLRM_VPR)
14429 (set (reg:SF <N>) (const_int 0))
14430 (set (reg:SF <M>) (const_int 0))
14431 ...
14432 ]
14433
14434 As with CLRM, any number (including 0) of set expressions is valid, however
14435 the volatile unspec is mandatory here. Any VFP single-precision register is
14436 accepted but all registers must be consecutive and in increasing order. */
14437
14438 bool
14439 clear_operation_p (rtx op, bool vfp)
14440 {
14441 unsigned regno;
14442 unsigned last_regno = INVALID_REGNUM;
14443 rtx elt, reg, zero;
14444 int count = XVECLEN (op, 0);
14445 int first_set = vfp ? 1 : 0;
14446 machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14447
14448 for (int i = first_set; i < count; i++)
14449 {
14450 elt = XVECEXP (op, 0, i);
14451
14452 if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14453 {
14454 if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14455 || XVECLEN (elt, 0) != 1
14456 || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14457 || i != count - 2)
14458 return false;
14459
14460 continue;
14461 }
14462
14463 if (GET_CODE (elt) == CLOBBER)
14464 continue;
14465
14466 if (GET_CODE (elt) != SET)
14467 return false;
14468
14469 reg = SET_DEST (elt);
14470 zero = SET_SRC (elt);
14471
14472 if (!REG_P (reg)
14473 || GET_MODE (reg) != expected_mode
14474 || zero != CONST0_RTX (SImode))
14475 return false;
14476
14477 regno = REGNO (reg);
14478
14479 if (vfp)
14480 {
14481 if (i != first_set && regno != last_regno + 1)
14482 return false;
14483 }
14484 else
14485 {
14486 if (regno == SP_REGNUM || regno == PC_REGNUM)
14487 return false;
14488 if (i != first_set && regno <= last_regno)
14489 return false;
14490 }
14491
14492 last_regno = regno;
14493 }
14494
14495 return true;
14496 }
14497
14498 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14499 or stores (depending on IS_STORE) into a load-multiple or store-multiple
14500 instruction. ADD_OFFSET is nonzero if the base address register needs
14501 to be modified with an add instruction before we can use it. */
14502
14503 static bool
14504 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14505 int nops, HOST_WIDE_INT add_offset)
14506 {
14507 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14508 if the offset isn't small enough. The reason 2 ldrs are faster
14509 is because these ARMs are able to do more than one cache access
14510 in a single cycle. The ARM9 and StrongARM have Harvard caches,
14511 whilst the ARM8 has a double bandwidth cache. This means that
14512 these cores can do both an instruction fetch and a data fetch in
14513 a single cycle, so the trick of calculating the address into a
14514 scratch register (one of the result regs) and then doing a load
14515 multiple actually becomes slower (and no smaller in code size).
14516 That is the transformation
14517
14518 ldr rd1, [rbase + offset]
14519 ldr rd2, [rbase + offset + 4]
14520
14521 to
14522
14523 add rd1, rbase, offset
14524 ldmia rd1, {rd1, rd2}
14525
14526 produces worse code -- '3 cycles + any stalls on rd2' instead of
14527 '2 cycles + any stalls on rd2'. On ARMs with only one cache
14528 access per cycle, the first sequence could never complete in less
14529 than 6 cycles, whereas the ldm sequence would only take 5 and
14530 would make better use of sequential accesses if not hitting the
14531 cache.
14532
14533 We cheat here and test 'arm_ld_sched' which we currently know to
14534 only be true for the ARM8, ARM9 and StrongARM. If this ever
14535 changes, then the test below needs to be reworked. */
14536 if (nops == 2 && arm_ld_sched && add_offset != 0)
14537 return false;
14538
14539 /* XScale has load-store double instructions, but they have stricter
14540 alignment requirements than load-store multiple, so we cannot
14541 use them.
14542
14543 For XScale ldm requires 2 + NREGS cycles to complete and blocks
14544 the pipeline until completion.
14545
14546 NREGS CYCLES
14547 1 3
14548 2 4
14549 3 5
14550 4 6
14551
14552 An ldr instruction takes 1-3 cycles, but does not block the
14553 pipeline.
14554
14555 NREGS CYCLES
14556 1 1-3
14557 2 2-6
14558 3 3-9
14559 4 4-12
14560
14561 Best case ldr will always win. However, the more ldr instructions
14562 we issue, the less likely we are to be able to schedule them well.
14563 Using ldr instructions also increases code size.
14564
14565 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14566 for counts of 3 or 4 regs. */
14567 if (nops <= 2 && arm_tune_xscale && !optimize_size)
14568 return false;
14569 return true;
14570 }
14571
14572 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14573 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14574 an array ORDER which describes the sequence to use when accessing the
14575 offsets that produces an ascending order. In this sequence, each
14576 offset must be larger by exactly 4 than the previous one. ORDER[0]
14577 must have been filled in with the lowest offset by the caller.
14578 If UNSORTED_REGS is nonnull, it is an array of register numbers that
14579 we use to verify that ORDER produces an ascending order of registers.
14580 Return true if it was possible to construct such an order, false if
14581 not. */
14582
14583 static bool
14584 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14585 int *unsorted_regs)
14586 {
14587 int i;
14588 for (i = 1; i < nops; i++)
14589 {
14590 int j;
14591
14592 order[i] = order[i - 1];
14593 for (j = 0; j < nops; j++)
14594 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14595 {
14596 /* We must find exactly one offset that is higher than the
14597 previous one by 4. */
14598 if (order[i] != order[i - 1])
14599 return false;
14600 order[i] = j;
14601 }
14602 if (order[i] == order[i - 1])
14603 return false;
14604 /* The register numbers must be ascending. */
14605 if (unsorted_regs != NULL
14606 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14607 return false;
14608 }
14609 return true;
14610 }
14611
14612 /* Used to determine in a peephole whether a sequence of load
14613 instructions can be changed into a load-multiple instruction.
14614 NOPS is the number of separate load instructions we are examining. The
14615 first NOPS entries in OPERANDS are the destination registers, the
14616 next NOPS entries are memory operands. If this function is
14617 successful, *BASE is set to the common base register of the memory
14618 accesses; *LOAD_OFFSET is set to the first memory location's offset
14619 from that base register.
14620 REGS is an array filled in with the destination register numbers.
14621 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14622 insn numbers to an ascending order of stores. If CHECK_REGS is true,
14623 the sequence of registers in REGS matches the loads from ascending memory
14624 locations, and the function verifies that the register numbers are
14625 themselves ascending. If CHECK_REGS is false, the register numbers
14626 are stored in the order they are found in the operands. */
14627 static int
14628 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14629 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14630 {
14631 int unsorted_regs[MAX_LDM_STM_OPS];
14632 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14633 int order[MAX_LDM_STM_OPS];
14634 int base_reg = -1;
14635 int i, ldm_case;
14636
14637 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14638 easily extended if required. */
14639 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14640
14641 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14642
14643 /* Loop over the operands and check that the memory references are
14644 suitable (i.e. immediate offsets from the same base register). At
14645 the same time, extract the target register, and the memory
14646 offsets. */
14647 for (i = 0; i < nops; i++)
14648 {
14649 rtx reg;
14650 rtx offset;
14651
14652 /* Convert a subreg of a mem into the mem itself. */
14653 if (GET_CODE (operands[nops + i]) == SUBREG)
14654 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14655
14656 gcc_assert (MEM_P (operands[nops + i]));
14657
14658 /* Don't reorder volatile memory references; it doesn't seem worth
14659 looking for the case where the order is ok anyway. */
14660 if (MEM_VOLATILE_P (operands[nops + i]))
14661 return 0;
14662
14663 offset = const0_rtx;
14664
14665 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14666 || (SUBREG_P (reg)
14667 && REG_P (reg = SUBREG_REG (reg))))
14668 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14669 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14670 || (SUBREG_P (reg)
14671 && REG_P (reg = SUBREG_REG (reg))))
14672 && (CONST_INT_P (offset
14673 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14674 {
14675 if (i == 0)
14676 {
14677 base_reg = REGNO (reg);
14678 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14679 return 0;
14680 }
14681 else if (base_reg != (int) REGNO (reg))
14682 /* Not addressed from the same base register. */
14683 return 0;
14684
14685 unsorted_regs[i] = (REG_P (operands[i])
14686 ? REGNO (operands[i])
14687 : REGNO (SUBREG_REG (operands[i])));
14688
14689 /* If it isn't an integer register, or if it overwrites the
14690 base register but isn't the last insn in the list, then
14691 we can't do this. */
14692 if (unsorted_regs[i] < 0
14693 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14694 || unsorted_regs[i] > 14
14695 || (i != nops - 1 && unsorted_regs[i] == base_reg))
14696 return 0;
14697
14698 /* Don't allow SP to be loaded unless it is also the base
14699 register. It guarantees that SP is reset correctly when
14700 an LDM instruction is interrupted. Otherwise, we might
14701 end up with a corrupt stack. */
14702 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14703 return 0;
14704
14705 unsorted_offsets[i] = INTVAL (offset);
14706 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14707 order[0] = i;
14708 }
14709 else
14710 /* Not a suitable memory address. */
14711 return 0;
14712 }
14713
14714 /* All the useful information has now been extracted from the
14715 operands into unsorted_regs and unsorted_offsets; additionally,
14716 order[0] has been set to the lowest offset in the list. Sort
14717 the offsets into order, verifying that they are adjacent, and
14718 check that the register numbers are ascending. */
14719 if (!compute_offset_order (nops, unsorted_offsets, order,
14720 check_regs ? unsorted_regs : NULL))
14721 return 0;
14722
14723 if (saved_order)
14724 memcpy (saved_order, order, sizeof order);
14725
14726 if (base)
14727 {
14728 *base = base_reg;
14729
14730 for (i = 0; i < nops; i++)
14731 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14732
14733 *load_offset = unsorted_offsets[order[0]];
14734 }
14735
14736 if (unsorted_offsets[order[0]] == 0)
14737 ldm_case = 1; /* ldmia */
14738 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14739 ldm_case = 2; /* ldmib */
14740 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14741 ldm_case = 3; /* ldmda */
14742 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14743 ldm_case = 4; /* ldmdb */
14744 else if (const_ok_for_arm (unsorted_offsets[order[0]])
14745 || const_ok_for_arm (-unsorted_offsets[order[0]]))
14746 ldm_case = 5;
14747 else
14748 return 0;
14749
14750 if (!multiple_operation_profitable_p (false, nops,
14751 ldm_case == 5
14752 ? unsorted_offsets[order[0]] : 0))
14753 return 0;
14754
14755 return ldm_case;
14756 }
14757
14758 /* Used to determine in a peephole whether a sequence of store instructions can
14759 be changed into a store-multiple instruction.
14760 NOPS is the number of separate store instructions we are examining.
14761 NOPS_TOTAL is the total number of instructions recognized by the peephole
14762 pattern.
14763 The first NOPS entries in OPERANDS are the source registers, the next
14764 NOPS entries are memory operands. If this function is successful, *BASE is
14765 set to the common base register of the memory accesses; *LOAD_OFFSET is set
14766 to the first memory location's offset from that base register. REGS is an
14767 array filled in with the source register numbers, REG_RTXS (if nonnull) is
14768 likewise filled with the corresponding rtx's.
14769 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14770 numbers to an ascending order of stores.
14771 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14772 from ascending memory locations, and the function verifies that the register
14773 numbers are themselves ascending. If CHECK_REGS is false, the register
14774 numbers are stored in the order they are found in the operands. */
14775 static int
14776 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14777 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14778 HOST_WIDE_INT *load_offset, bool check_regs)
14779 {
14780 int unsorted_regs[MAX_LDM_STM_OPS];
14781 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14782 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14783 int order[MAX_LDM_STM_OPS];
14784 int base_reg = -1;
14785 rtx base_reg_rtx = NULL;
14786 int i, stm_case;
14787
14788 /* Write back of base register is currently only supported for Thumb 1. */
14789 int base_writeback = TARGET_THUMB1;
14790
14791 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14792 easily extended if required. */
14793 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14794
14795 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14796
14797 /* Loop over the operands and check that the memory references are
14798 suitable (i.e. immediate offsets from the same base register). At
14799 the same time, extract the target register, and the memory
14800 offsets. */
14801 for (i = 0; i < nops; i++)
14802 {
14803 rtx reg;
14804 rtx offset;
14805
14806 /* Convert a subreg of a mem into the mem itself. */
14807 if (GET_CODE (operands[nops + i]) == SUBREG)
14808 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14809
14810 gcc_assert (MEM_P (operands[nops + i]));
14811
14812 /* Don't reorder volatile memory references; it doesn't seem worth
14813 looking for the case where the order is ok anyway. */
14814 if (MEM_VOLATILE_P (operands[nops + i]))
14815 return 0;
14816
14817 offset = const0_rtx;
14818
14819 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14820 || (SUBREG_P (reg)
14821 && REG_P (reg = SUBREG_REG (reg))))
14822 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14823 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14824 || (SUBREG_P (reg)
14825 && REG_P (reg = SUBREG_REG (reg))))
14826 && (CONST_INT_P (offset
14827 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14828 {
14829 unsorted_reg_rtxs[i] = (REG_P (operands[i])
14830 ? operands[i] : SUBREG_REG (operands[i]));
14831 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14832
14833 if (i == 0)
14834 {
14835 base_reg = REGNO (reg);
14836 base_reg_rtx = reg;
14837 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14838 return 0;
14839 }
14840 else if (base_reg != (int) REGNO (reg))
14841 /* Not addressed from the same base register. */
14842 return 0;
14843
14844 /* If it isn't an integer register, then we can't do this. */
14845 if (unsorted_regs[i] < 0
14846 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14847 /* The effects are unpredictable if the base register is
14848 both updated and stored. */
14849 || (base_writeback && unsorted_regs[i] == base_reg)
14850 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14851 || unsorted_regs[i] > 14)
14852 return 0;
14853
14854 unsorted_offsets[i] = INTVAL (offset);
14855 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14856 order[0] = i;
14857 }
14858 else
14859 /* Not a suitable memory address. */
14860 return 0;
14861 }
14862
14863 /* All the useful information has now been extracted from the
14864 operands into unsorted_regs and unsorted_offsets; additionally,
14865 order[0] has been set to the lowest offset in the list. Sort
14866 the offsets into order, verifying that they are adjacent, and
14867 check that the register numbers are ascending. */
14868 if (!compute_offset_order (nops, unsorted_offsets, order,
14869 check_regs ? unsorted_regs : NULL))
14870 return 0;
14871
14872 if (saved_order)
14873 memcpy (saved_order, order, sizeof order);
14874
14875 if (base)
14876 {
14877 *base = base_reg;
14878
14879 for (i = 0; i < nops; i++)
14880 {
14881 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14882 if (reg_rtxs)
14883 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14884 }
14885
14886 *load_offset = unsorted_offsets[order[0]];
14887 }
14888
14889 if (TARGET_THUMB1
14890 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14891 return 0;
14892
14893 if (unsorted_offsets[order[0]] == 0)
14894 stm_case = 1; /* stmia */
14895 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14896 stm_case = 2; /* stmib */
14897 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14898 stm_case = 3; /* stmda */
14899 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14900 stm_case = 4; /* stmdb */
14901 else
14902 return 0;
14903
14904 if (!multiple_operation_profitable_p (false, nops, 0))
14905 return 0;
14906
14907 return stm_case;
14908 }
14909 \f
14910 /* Routines for use in generating RTL. */
14911
14912 /* Generate a load-multiple instruction. COUNT is the number of loads in
14913 the instruction; REGS and MEMS are arrays containing the operands.
14914 BASEREG is the base register to be used in addressing the memory operands.
14915 WBACK_OFFSET is nonzero if the instruction should update the base
14916 register. */
14917
14918 static rtx
14919 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14920 HOST_WIDE_INT wback_offset)
14921 {
14922 int i = 0, j;
14923 rtx result;
14924
14925 if (!multiple_operation_profitable_p (false, count, 0))
14926 {
14927 rtx seq;
14928
14929 start_sequence ();
14930
14931 for (i = 0; i < count; i++)
14932 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14933
14934 if (wback_offset != 0)
14935 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14936
14937 seq = get_insns ();
14938 end_sequence ();
14939
14940 return seq;
14941 }
14942
14943 result = gen_rtx_PARALLEL (VOIDmode,
14944 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14945 if (wback_offset != 0)
14946 {
14947 XVECEXP (result, 0, 0)
14948 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14949 i = 1;
14950 count++;
14951 }
14952
14953 for (j = 0; i < count; i++, j++)
14954 XVECEXP (result, 0, i)
14955 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14956
14957 return result;
14958 }
14959
14960 /* Generate a store-multiple instruction. COUNT is the number of stores in
14961 the instruction; REGS and MEMS are arrays containing the operands.
14962 BASEREG is the base register to be used in addressing the memory operands.
14963 WBACK_OFFSET is nonzero if the instruction should update the base
14964 register. */
14965
14966 static rtx
14967 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14968 HOST_WIDE_INT wback_offset)
14969 {
14970 int i = 0, j;
14971 rtx result;
14972
14973 if (GET_CODE (basereg) == PLUS)
14974 basereg = XEXP (basereg, 0);
14975
14976 if (!multiple_operation_profitable_p (false, count, 0))
14977 {
14978 rtx seq;
14979
14980 start_sequence ();
14981
14982 for (i = 0; i < count; i++)
14983 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14984
14985 if (wback_offset != 0)
14986 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14987
14988 seq = get_insns ();
14989 end_sequence ();
14990
14991 return seq;
14992 }
14993
14994 result = gen_rtx_PARALLEL (VOIDmode,
14995 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14996 if (wback_offset != 0)
14997 {
14998 XVECEXP (result, 0, 0)
14999 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
15000 i = 1;
15001 count++;
15002 }
15003
15004 for (j = 0; i < count; i++, j++)
15005 XVECEXP (result, 0, i)
15006 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
15007
15008 return result;
15009 }
15010
15011 /* Generate either a load-multiple or a store-multiple instruction. This
15012 function can be used in situations where we can start with a single MEM
15013 rtx and adjust its address upwards.
15014 COUNT is the number of operations in the instruction, not counting a
15015 possible update of the base register. REGS is an array containing the
15016 register operands.
15017 BASEREG is the base register to be used in addressing the memory operands,
15018 which are constructed from BASEMEM.
15019 WRITE_BACK specifies whether the generated instruction should include an
15020 update of the base register.
15021 OFFSETP is used to pass an offset to and from this function; this offset
15022 is not used when constructing the address (instead BASEMEM should have an
15023 appropriate offset in its address), it is used only for setting
15024 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
15025
15026 static rtx
15027 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
15028 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
15029 {
15030 rtx mems[MAX_LDM_STM_OPS];
15031 HOST_WIDE_INT offset = *offsetp;
15032 int i;
15033
15034 gcc_assert (count <= MAX_LDM_STM_OPS);
15035
15036 if (GET_CODE (basereg) == PLUS)
15037 basereg = XEXP (basereg, 0);
15038
15039 for (i = 0; i < count; i++)
15040 {
15041 rtx addr = plus_constant (Pmode, basereg, i * 4);
15042 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
15043 offset += 4;
15044 }
15045
15046 if (write_back)
15047 *offsetp = offset;
15048
15049 if (is_load)
15050 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
15051 write_back ? 4 * count : 0);
15052 else
15053 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
15054 write_back ? 4 * count : 0);
15055 }
15056
15057 rtx
15058 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
15059 rtx basemem, HOST_WIDE_INT *offsetp)
15060 {
15061 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
15062 offsetp);
15063 }
15064
15065 rtx
15066 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
15067 rtx basemem, HOST_WIDE_INT *offsetp)
15068 {
15069 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
15070 offsetp);
15071 }
15072
15073 /* Called from a peephole2 expander to turn a sequence of loads into an
15074 LDM instruction. OPERANDS are the operands found by the peephole matcher;
15075 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
15076 is true if we can reorder the registers because they are used commutatively
15077 subsequently.
15078 Returns true iff we could generate a new instruction. */
15079
15080 bool
15081 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
15082 {
15083 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15084 rtx mems[MAX_LDM_STM_OPS];
15085 int i, j, base_reg;
15086 rtx base_reg_rtx;
15087 HOST_WIDE_INT offset;
15088 int write_back = FALSE;
15089 int ldm_case;
15090 rtx addr;
15091
15092 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
15093 &base_reg, &offset, !sort_regs);
15094
15095 if (ldm_case == 0)
15096 return false;
15097
15098 if (sort_regs)
15099 for (i = 0; i < nops - 1; i++)
15100 for (j = i + 1; j < nops; j++)
15101 if (regs[i] > regs[j])
15102 {
15103 int t = regs[i];
15104 regs[i] = regs[j];
15105 regs[j] = t;
15106 }
15107 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15108
15109 if (TARGET_THUMB1)
15110 {
15111 gcc_assert (ldm_case == 1 || ldm_case == 5);
15112
15113 /* Thumb-1 ldm uses writeback except if the base is loaded. */
15114 write_back = true;
15115 for (i = 0; i < nops; i++)
15116 if (base_reg == regs[i])
15117 write_back = false;
15118
15119 /* Ensure the base is dead if it is updated. */
15120 if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
15121 return false;
15122 }
15123
15124 if (ldm_case == 5)
15125 {
15126 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
15127 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
15128 offset = 0;
15129 base_reg_rtx = newbase;
15130 }
15131
15132 for (i = 0; i < nops; i++)
15133 {
15134 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15135 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15136 SImode, addr, 0);
15137 }
15138 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
15139 write_back ? offset + i * 4 : 0));
15140 return true;
15141 }
15142
15143 /* Called from a peephole2 expander to turn a sequence of stores into an
15144 STM instruction. OPERANDS are the operands found by the peephole matcher;
15145 NOPS indicates how many separate stores we are trying to combine.
15146 Returns true iff we could generate a new instruction. */
15147
15148 bool
15149 gen_stm_seq (rtx *operands, int nops)
15150 {
15151 int i;
15152 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15153 rtx mems[MAX_LDM_STM_OPS];
15154 int base_reg;
15155 rtx base_reg_rtx;
15156 HOST_WIDE_INT offset;
15157 int write_back = FALSE;
15158 int stm_case;
15159 rtx addr;
15160 bool base_reg_dies;
15161
15162 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
15163 mem_order, &base_reg, &offset, true);
15164
15165 if (stm_case == 0)
15166 return false;
15167
15168 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15169
15170 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
15171 if (TARGET_THUMB1)
15172 {
15173 gcc_assert (base_reg_dies);
15174 write_back = TRUE;
15175 }
15176
15177 if (stm_case == 5)
15178 {
15179 gcc_assert (base_reg_dies);
15180 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15181 offset = 0;
15182 }
15183
15184 addr = plus_constant (Pmode, base_reg_rtx, offset);
15185
15186 for (i = 0; i < nops; i++)
15187 {
15188 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15189 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15190 SImode, addr, 0);
15191 }
15192 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
15193 write_back ? offset + i * 4 : 0));
15194 return true;
15195 }
15196
15197 /* Called from a peephole2 expander to turn a sequence of stores that are
15198 preceded by constant loads into an STM instruction. OPERANDS are the
15199 operands found by the peephole matcher; NOPS indicates how many
15200 separate stores we are trying to combine; there are 2 * NOPS
15201 instructions in the peephole.
15202 Returns true iff we could generate a new instruction. */
15203
15204 bool
15205 gen_const_stm_seq (rtx *operands, int nops)
15206 {
15207 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
15208 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15209 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
15210 rtx mems[MAX_LDM_STM_OPS];
15211 int base_reg;
15212 rtx base_reg_rtx;
15213 HOST_WIDE_INT offset;
15214 int write_back = FALSE;
15215 int stm_case;
15216 rtx addr;
15217 bool base_reg_dies;
15218 int i, j;
15219 HARD_REG_SET allocated;
15220
15221 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
15222 mem_order, &base_reg, &offset, false);
15223
15224 if (stm_case == 0)
15225 return false;
15226
15227 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
15228
15229 /* If the same register is used more than once, try to find a free
15230 register. */
15231 CLEAR_HARD_REG_SET (allocated);
15232 for (i = 0; i < nops; i++)
15233 {
15234 for (j = i + 1; j < nops; j++)
15235 if (regs[i] == regs[j])
15236 {
15237 rtx t = peep2_find_free_register (0, nops * 2,
15238 TARGET_THUMB1 ? "l" : "r",
15239 SImode, &allocated);
15240 if (t == NULL_RTX)
15241 return false;
15242 reg_rtxs[i] = t;
15243 regs[i] = REGNO (t);
15244 }
15245 }
15246
15247 /* Compute an ordering that maps the register numbers to an ascending
15248 sequence. */
15249 reg_order[0] = 0;
15250 for (i = 0; i < nops; i++)
15251 if (regs[i] < regs[reg_order[0]])
15252 reg_order[0] = i;
15253
15254 for (i = 1; i < nops; i++)
15255 {
15256 int this_order = reg_order[i - 1];
15257 for (j = 0; j < nops; j++)
15258 if (regs[j] > regs[reg_order[i - 1]]
15259 && (this_order == reg_order[i - 1]
15260 || regs[j] < regs[this_order]))
15261 this_order = j;
15262 reg_order[i] = this_order;
15263 }
15264
15265 /* Ensure that registers that must be live after the instruction end
15266 up with the correct value. */
15267 for (i = 0; i < nops; i++)
15268 {
15269 int this_order = reg_order[i];
15270 if ((this_order != mem_order[i]
15271 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
15272 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
15273 return false;
15274 }
15275
15276 /* Load the constants. */
15277 for (i = 0; i < nops; i++)
15278 {
15279 rtx op = operands[2 * nops + mem_order[i]];
15280 sorted_regs[i] = regs[reg_order[i]];
15281 emit_move_insn (reg_rtxs[reg_order[i]], op);
15282 }
15283
15284 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15285
15286 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
15287 if (TARGET_THUMB1)
15288 {
15289 gcc_assert (base_reg_dies);
15290 write_back = TRUE;
15291 }
15292
15293 if (stm_case == 5)
15294 {
15295 gcc_assert (base_reg_dies);
15296 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15297 offset = 0;
15298 }
15299
15300 addr = plus_constant (Pmode, base_reg_rtx, offset);
15301
15302 for (i = 0; i < nops; i++)
15303 {
15304 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15305 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15306 SImode, addr, 0);
15307 }
15308 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
15309 write_back ? offset + i * 4 : 0));
15310 return true;
15311 }
15312
15313 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15314 unaligned copies on processors which support unaligned semantics for those
15315 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
15316 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15317 An interleave factor of 1 (the minimum) will perform no interleaving.
15318 Load/store multiple are used for aligned addresses where possible. */
15319
15320 static void
15321 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
15322 HOST_WIDE_INT length,
15323 unsigned int interleave_factor)
15324 {
15325 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
15326 int *regnos = XALLOCAVEC (int, interleave_factor);
15327 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
15328 HOST_WIDE_INT i, j;
15329 HOST_WIDE_INT remaining = length, words;
15330 rtx halfword_tmp = NULL, byte_tmp = NULL;
15331 rtx dst, src;
15332 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
15333 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
15334 HOST_WIDE_INT srcoffset, dstoffset;
15335 HOST_WIDE_INT src_autoinc, dst_autoinc;
15336 rtx mem, addr;
15337
15338 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
15339
15340 /* Use hard registers if we have aligned source or destination so we can use
15341 load/store multiple with contiguous registers. */
15342 if (dst_aligned || src_aligned)
15343 for (i = 0; i < interleave_factor; i++)
15344 regs[i] = gen_rtx_REG (SImode, i);
15345 else
15346 for (i = 0; i < interleave_factor; i++)
15347 regs[i] = gen_reg_rtx (SImode);
15348
15349 dst = copy_addr_to_reg (XEXP (dstbase, 0));
15350 src = copy_addr_to_reg (XEXP (srcbase, 0));
15351
15352 srcoffset = dstoffset = 0;
15353
15354 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15355 For copying the last bytes we want to subtract this offset again. */
15356 src_autoinc = dst_autoinc = 0;
15357
15358 for (i = 0; i < interleave_factor; i++)
15359 regnos[i] = i;
15360
15361 /* Copy BLOCK_SIZE_BYTES chunks. */
15362
15363 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15364 {
15365 /* Load words. */
15366 if (src_aligned && interleave_factor > 1)
15367 {
15368 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15369 TRUE, srcbase, &srcoffset));
15370 src_autoinc += UNITS_PER_WORD * interleave_factor;
15371 }
15372 else
15373 {
15374 for (j = 0; j < interleave_factor; j++)
15375 {
15376 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15377 - src_autoinc));
15378 mem = adjust_automodify_address (srcbase, SImode, addr,
15379 srcoffset + j * UNITS_PER_WORD);
15380 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15381 }
15382 srcoffset += block_size_bytes;
15383 }
15384
15385 /* Store words. */
15386 if (dst_aligned && interleave_factor > 1)
15387 {
15388 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15389 TRUE, dstbase, &dstoffset));
15390 dst_autoinc += UNITS_PER_WORD * interleave_factor;
15391 }
15392 else
15393 {
15394 for (j = 0; j < interleave_factor; j++)
15395 {
15396 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15397 - dst_autoinc));
15398 mem = adjust_automodify_address (dstbase, SImode, addr,
15399 dstoffset + j * UNITS_PER_WORD);
15400 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15401 }
15402 dstoffset += block_size_bytes;
15403 }
15404
15405 remaining -= block_size_bytes;
15406 }
15407
15408 /* Copy any whole words left (note these aren't interleaved with any
15409 subsequent halfword/byte load/stores in the interests of simplicity). */
15410
15411 words = remaining / UNITS_PER_WORD;
15412
15413 gcc_assert (words < interleave_factor);
15414
15415 if (src_aligned && words > 1)
15416 {
15417 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15418 &srcoffset));
15419 src_autoinc += UNITS_PER_WORD * words;
15420 }
15421 else
15422 {
15423 for (j = 0; j < words; j++)
15424 {
15425 addr = plus_constant (Pmode, src,
15426 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15427 mem = adjust_automodify_address (srcbase, SImode, addr,
15428 srcoffset + j * UNITS_PER_WORD);
15429 if (src_aligned)
15430 emit_move_insn (regs[j], mem);
15431 else
15432 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15433 }
15434 srcoffset += words * UNITS_PER_WORD;
15435 }
15436
15437 if (dst_aligned && words > 1)
15438 {
15439 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15440 &dstoffset));
15441 dst_autoinc += words * UNITS_PER_WORD;
15442 }
15443 else
15444 {
15445 for (j = 0; j < words; j++)
15446 {
15447 addr = plus_constant (Pmode, dst,
15448 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15449 mem = adjust_automodify_address (dstbase, SImode, addr,
15450 dstoffset + j * UNITS_PER_WORD);
15451 if (dst_aligned)
15452 emit_move_insn (mem, regs[j]);
15453 else
15454 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15455 }
15456 dstoffset += words * UNITS_PER_WORD;
15457 }
15458
15459 remaining -= words * UNITS_PER_WORD;
15460
15461 gcc_assert (remaining < 4);
15462
15463 /* Copy a halfword if necessary. */
15464
15465 if (remaining >= 2)
15466 {
15467 halfword_tmp = gen_reg_rtx (SImode);
15468
15469 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15470 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15471 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15472
15473 /* Either write out immediately, or delay until we've loaded the last
15474 byte, depending on interleave factor. */
15475 if (interleave_factor == 1)
15476 {
15477 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15478 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15479 emit_insn (gen_unaligned_storehi (mem,
15480 gen_lowpart (HImode, halfword_tmp)));
15481 halfword_tmp = NULL;
15482 dstoffset += 2;
15483 }
15484
15485 remaining -= 2;
15486 srcoffset += 2;
15487 }
15488
15489 gcc_assert (remaining < 2);
15490
15491 /* Copy last byte. */
15492
15493 if ((remaining & 1) != 0)
15494 {
15495 byte_tmp = gen_reg_rtx (SImode);
15496
15497 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15498 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15499 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15500
15501 if (interleave_factor == 1)
15502 {
15503 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15504 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15505 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15506 byte_tmp = NULL;
15507 dstoffset++;
15508 }
15509
15510 remaining--;
15511 srcoffset++;
15512 }
15513
15514 /* Store last halfword if we haven't done so already. */
15515
15516 if (halfword_tmp)
15517 {
15518 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15519 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15520 emit_insn (gen_unaligned_storehi (mem,
15521 gen_lowpart (HImode, halfword_tmp)));
15522 dstoffset += 2;
15523 }
15524
15525 /* Likewise for last byte. */
15526
15527 if (byte_tmp)
15528 {
15529 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15530 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15531 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15532 dstoffset++;
15533 }
15534
15535 gcc_assert (remaining == 0 && srcoffset == dstoffset);
15536 }
15537
15538 /* From mips_adjust_block_mem:
15539
15540 Helper function for doing a loop-based block operation on memory
15541 reference MEM. Each iteration of the loop will operate on LENGTH
15542 bytes of MEM.
15543
15544 Create a new base register for use within the loop and point it to
15545 the start of MEM. Create a new memory reference that uses this
15546 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
15547
15548 static void
15549 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15550 rtx *loop_mem)
15551 {
15552 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15553
15554 /* Although the new mem does not refer to a known location,
15555 it does keep up to LENGTH bytes of alignment. */
15556 *loop_mem = change_address (mem, BLKmode, *loop_reg);
15557 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15558 }
15559
15560 /* From mips_block_move_loop:
15561
15562 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15563 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
15564 the memory regions do not overlap. */
15565
15566 static void
15567 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15568 unsigned int interleave_factor,
15569 HOST_WIDE_INT bytes_per_iter)
15570 {
15571 rtx src_reg, dest_reg, final_src, test;
15572 HOST_WIDE_INT leftover;
15573
15574 leftover = length % bytes_per_iter;
15575 length -= leftover;
15576
15577 /* Create registers and memory references for use within the loop. */
15578 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15579 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15580
15581 /* Calculate the value that SRC_REG should have after the last iteration of
15582 the loop. */
15583 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15584 0, 0, OPTAB_WIDEN);
15585
15586 /* Emit the start of the loop. */
15587 rtx_code_label *label = gen_label_rtx ();
15588 emit_label (label);
15589
15590 /* Emit the loop body. */
15591 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15592 interleave_factor);
15593
15594 /* Move on to the next block. */
15595 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15596 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15597
15598 /* Emit the loop condition. */
15599 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15600 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15601
15602 /* Mop up any left-over bytes. */
15603 if (leftover)
15604 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15605 }
15606
15607 /* Emit a block move when either the source or destination is unaligned (not
15608 aligned to a four-byte boundary). This may need further tuning depending on
15609 core type, optimize_size setting, etc. */
15610
15611 static int
15612 arm_cpymemqi_unaligned (rtx *operands)
15613 {
15614 HOST_WIDE_INT length = INTVAL (operands[2]);
15615
15616 if (optimize_size)
15617 {
15618 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15619 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15620 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15621 size of code if optimizing for size. We'll use ldm/stm if src_aligned
15622 or dst_aligned though: allow more interleaving in those cases since the
15623 resulting code can be smaller. */
15624 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15625 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15626
15627 if (length > 12)
15628 arm_block_move_unaligned_loop (operands[0], operands[1], length,
15629 interleave_factor, bytes_per_iter);
15630 else
15631 arm_block_move_unaligned_straight (operands[0], operands[1], length,
15632 interleave_factor);
15633 }
15634 else
15635 {
15636 /* Note that the loop created by arm_block_move_unaligned_loop may be
15637 subject to loop unrolling, which makes tuning this condition a little
15638 redundant. */
15639 if (length > 32)
15640 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15641 else
15642 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15643 }
15644
15645 return 1;
15646 }
15647
15648 int
15649 arm_gen_cpymemqi (rtx *operands)
15650 {
15651 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15652 HOST_WIDE_INT srcoffset, dstoffset;
15653 rtx src, dst, srcbase, dstbase;
15654 rtx part_bytes_reg = NULL;
15655 rtx mem;
15656
15657 if (!CONST_INT_P (operands[2])
15658 || !CONST_INT_P (operands[3])
15659 || INTVAL (operands[2]) > 64)
15660 return 0;
15661
15662 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15663 return arm_cpymemqi_unaligned (operands);
15664
15665 if (INTVAL (operands[3]) & 3)
15666 return 0;
15667
15668 dstbase = operands[0];
15669 srcbase = operands[1];
15670
15671 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15672 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15673
15674 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15675 out_words_to_go = INTVAL (operands[2]) / 4;
15676 last_bytes = INTVAL (operands[2]) & 3;
15677 dstoffset = srcoffset = 0;
15678
15679 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15680 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15681
15682 while (in_words_to_go >= 2)
15683 {
15684 if (in_words_to_go > 4)
15685 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15686 TRUE, srcbase, &srcoffset));
15687 else
15688 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15689 src, FALSE, srcbase,
15690 &srcoffset));
15691
15692 if (out_words_to_go)
15693 {
15694 if (out_words_to_go > 4)
15695 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15696 TRUE, dstbase, &dstoffset));
15697 else if (out_words_to_go != 1)
15698 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15699 out_words_to_go, dst,
15700 (last_bytes == 0
15701 ? FALSE : TRUE),
15702 dstbase, &dstoffset));
15703 else
15704 {
15705 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15706 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15707 if (last_bytes != 0)
15708 {
15709 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15710 dstoffset += 4;
15711 }
15712 }
15713 }
15714
15715 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15716 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15717 }
15718
15719 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
15720 if (out_words_to_go)
15721 {
15722 rtx sreg;
15723
15724 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15725 sreg = copy_to_reg (mem);
15726
15727 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15728 emit_move_insn (mem, sreg);
15729 in_words_to_go--;
15730
15731 gcc_assert (!in_words_to_go); /* Sanity check */
15732 }
15733
15734 if (in_words_to_go)
15735 {
15736 gcc_assert (in_words_to_go > 0);
15737
15738 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15739 part_bytes_reg = copy_to_mode_reg (SImode, mem);
15740 }
15741
15742 gcc_assert (!last_bytes || part_bytes_reg);
15743
15744 if (BYTES_BIG_ENDIAN && last_bytes)
15745 {
15746 rtx tmp = gen_reg_rtx (SImode);
15747
15748 /* The bytes we want are in the top end of the word. */
15749 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15750 GEN_INT (8 * (4 - last_bytes))));
15751 part_bytes_reg = tmp;
15752
15753 while (last_bytes)
15754 {
15755 mem = adjust_automodify_address (dstbase, QImode,
15756 plus_constant (Pmode, dst,
15757 last_bytes - 1),
15758 dstoffset + last_bytes - 1);
15759 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15760
15761 if (--last_bytes)
15762 {
15763 tmp = gen_reg_rtx (SImode);
15764 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15765 part_bytes_reg = tmp;
15766 }
15767 }
15768
15769 }
15770 else
15771 {
15772 if (last_bytes > 1)
15773 {
15774 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15775 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15776 last_bytes -= 2;
15777 if (last_bytes)
15778 {
15779 rtx tmp = gen_reg_rtx (SImode);
15780 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15781 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15782 part_bytes_reg = tmp;
15783 dstoffset += 2;
15784 }
15785 }
15786
15787 if (last_bytes)
15788 {
15789 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15790 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15791 }
15792 }
15793
15794 return 1;
15795 }
15796
15797 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15798 by mode size. */
15799 inline static rtx
15800 next_consecutive_mem (rtx mem)
15801 {
15802 machine_mode mode = GET_MODE (mem);
15803 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15804 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15805
15806 return adjust_automodify_address (mem, mode, addr, offset);
15807 }
15808
15809 /* Copy using LDRD/STRD instructions whenever possible.
15810 Returns true upon success. */
15811 bool
15812 gen_cpymem_ldrd_strd (rtx *operands)
15813 {
15814 unsigned HOST_WIDE_INT len;
15815 HOST_WIDE_INT align;
15816 rtx src, dst, base;
15817 rtx reg0;
15818 bool src_aligned, dst_aligned;
15819 bool src_volatile, dst_volatile;
15820
15821 gcc_assert (CONST_INT_P (operands[2]));
15822 gcc_assert (CONST_INT_P (operands[3]));
15823
15824 len = UINTVAL (operands[2]);
15825 if (len > 64)
15826 return false;
15827
15828 /* Maximum alignment we can assume for both src and dst buffers. */
15829 align = INTVAL (operands[3]);
15830
15831 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15832 return false;
15833
15834 /* Place src and dst addresses in registers
15835 and update the corresponding mem rtx. */
15836 dst = operands[0];
15837 dst_volatile = MEM_VOLATILE_P (dst);
15838 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15839 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15840 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15841
15842 src = operands[1];
15843 src_volatile = MEM_VOLATILE_P (src);
15844 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15845 base = copy_to_mode_reg (SImode, XEXP (src, 0));
15846 src = adjust_automodify_address (src, VOIDmode, base, 0);
15847
15848 if (!unaligned_access && !(src_aligned && dst_aligned))
15849 return false;
15850
15851 if (src_volatile || dst_volatile)
15852 return false;
15853
15854 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15855 if (!(dst_aligned || src_aligned))
15856 return arm_gen_cpymemqi (operands);
15857
15858 /* If the either src or dst is unaligned we'll be accessing it as pairs
15859 of unaligned SImode accesses. Otherwise we can generate DImode
15860 ldrd/strd instructions. */
15861 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15862 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15863
15864 while (len >= 8)
15865 {
15866 len -= 8;
15867 reg0 = gen_reg_rtx (DImode);
15868 rtx first_reg = NULL_RTX;
15869 rtx second_reg = NULL_RTX;
15870
15871 if (!src_aligned || !dst_aligned)
15872 {
15873 if (BYTES_BIG_ENDIAN)
15874 {
15875 second_reg = gen_lowpart (SImode, reg0);
15876 first_reg = gen_highpart_mode (SImode, DImode, reg0);
15877 }
15878 else
15879 {
15880 first_reg = gen_lowpart (SImode, reg0);
15881 second_reg = gen_highpart_mode (SImode, DImode, reg0);
15882 }
15883 }
15884 if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15885 emit_move_insn (reg0, src);
15886 else if (src_aligned)
15887 emit_insn (gen_unaligned_loaddi (reg0, src));
15888 else
15889 {
15890 emit_insn (gen_unaligned_loadsi (first_reg, src));
15891 src = next_consecutive_mem (src);
15892 emit_insn (gen_unaligned_loadsi (second_reg, src));
15893 }
15894
15895 if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15896 emit_move_insn (dst, reg0);
15897 else if (dst_aligned)
15898 emit_insn (gen_unaligned_storedi (dst, reg0));
15899 else
15900 {
15901 emit_insn (gen_unaligned_storesi (dst, first_reg));
15902 dst = next_consecutive_mem (dst);
15903 emit_insn (gen_unaligned_storesi (dst, second_reg));
15904 }
15905
15906 src = next_consecutive_mem (src);
15907 dst = next_consecutive_mem (dst);
15908 }
15909
15910 gcc_assert (len < 8);
15911 if (len >= 4)
15912 {
15913 /* More than a word but less than a double-word to copy. Copy a word. */
15914 reg0 = gen_reg_rtx (SImode);
15915 src = adjust_address (src, SImode, 0);
15916 dst = adjust_address (dst, SImode, 0);
15917 if (src_aligned)
15918 emit_move_insn (reg0, src);
15919 else
15920 emit_insn (gen_unaligned_loadsi (reg0, src));
15921
15922 if (dst_aligned)
15923 emit_move_insn (dst, reg0);
15924 else
15925 emit_insn (gen_unaligned_storesi (dst, reg0));
15926
15927 src = next_consecutive_mem (src);
15928 dst = next_consecutive_mem (dst);
15929 len -= 4;
15930 }
15931
15932 if (len == 0)
15933 return true;
15934
15935 /* Copy the remaining bytes. */
15936 if (len >= 2)
15937 {
15938 dst = adjust_address (dst, HImode, 0);
15939 src = adjust_address (src, HImode, 0);
15940 reg0 = gen_reg_rtx (SImode);
15941 if (src_aligned)
15942 emit_insn (gen_zero_extendhisi2 (reg0, src));
15943 else
15944 emit_insn (gen_unaligned_loadhiu (reg0, src));
15945
15946 if (dst_aligned)
15947 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15948 else
15949 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15950
15951 src = next_consecutive_mem (src);
15952 dst = next_consecutive_mem (dst);
15953 if (len == 2)
15954 return true;
15955 }
15956
15957 dst = adjust_address (dst, QImode, 0);
15958 src = adjust_address (src, QImode, 0);
15959 reg0 = gen_reg_rtx (QImode);
15960 emit_move_insn (reg0, src);
15961 emit_move_insn (dst, reg0);
15962 return true;
15963 }
15964
15965 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15966 into its component 32-bit subregs. OP2 may be an immediate
15967 constant and we want to simplify it in that case. */
15968 void
15969 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15970 rtx *lo_op2, rtx *hi_op2)
15971 {
15972 *lo_op1 = gen_lowpart (SImode, op1);
15973 *hi_op1 = gen_highpart (SImode, op1);
15974 *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15975 subreg_lowpart_offset (SImode, DImode));
15976 *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15977 subreg_highpart_offset (SImode, DImode));
15978 }
15979
15980 /* Select a dominance comparison mode if possible for a test of the general
15981 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15982 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15983 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15984 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15985 In all cases OP will be either EQ or NE, but we don't need to know which
15986 here. If we are unable to support a dominance comparison we return
15987 CC mode. This will then fail to match for the RTL expressions that
15988 generate this call. */
15989 machine_mode
15990 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15991 {
15992 enum rtx_code cond1, cond2;
15993 int swapped = 0;
15994
15995 /* Currently we will probably get the wrong result if the individual
15996 comparisons are not simple. This also ensures that it is safe to
15997 reverse a comparison if necessary. */
15998 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15999 != CCmode)
16000 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
16001 != CCmode))
16002 return CCmode;
16003
16004 /* The if_then_else variant of this tests the second condition if the
16005 first passes, but is true if the first fails. Reverse the first
16006 condition to get a true "inclusive-or" expression. */
16007 if (cond_or == DOM_CC_NX_OR_Y)
16008 cond1 = reverse_condition (cond1);
16009
16010 /* If the comparisons are not equal, and one doesn't dominate the other,
16011 then we can't do this. */
16012 if (cond1 != cond2
16013 && !comparison_dominates_p (cond1, cond2)
16014 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
16015 return CCmode;
16016
16017 if (swapped)
16018 std::swap (cond1, cond2);
16019
16020 switch (cond1)
16021 {
16022 case EQ:
16023 if (cond_or == DOM_CC_X_AND_Y)
16024 return CC_DEQmode;
16025
16026 switch (cond2)
16027 {
16028 case EQ: return CC_DEQmode;
16029 case LE: return CC_DLEmode;
16030 case LEU: return CC_DLEUmode;
16031 case GE: return CC_DGEmode;
16032 case GEU: return CC_DGEUmode;
16033 default: gcc_unreachable ();
16034 }
16035
16036 case LT:
16037 if (cond_or == DOM_CC_X_AND_Y)
16038 return CC_DLTmode;
16039
16040 switch (cond2)
16041 {
16042 case LT:
16043 return CC_DLTmode;
16044 case LE:
16045 return CC_DLEmode;
16046 case NE:
16047 return CC_DNEmode;
16048 default:
16049 gcc_unreachable ();
16050 }
16051
16052 case GT:
16053 if (cond_or == DOM_CC_X_AND_Y)
16054 return CC_DGTmode;
16055
16056 switch (cond2)
16057 {
16058 case GT:
16059 return CC_DGTmode;
16060 case GE:
16061 return CC_DGEmode;
16062 case NE:
16063 return CC_DNEmode;
16064 default:
16065 gcc_unreachable ();
16066 }
16067
16068 case LTU:
16069 if (cond_or == DOM_CC_X_AND_Y)
16070 return CC_DLTUmode;
16071
16072 switch (cond2)
16073 {
16074 case LTU:
16075 return CC_DLTUmode;
16076 case LEU:
16077 return CC_DLEUmode;
16078 case NE:
16079 return CC_DNEmode;
16080 default:
16081 gcc_unreachable ();
16082 }
16083
16084 case GTU:
16085 if (cond_or == DOM_CC_X_AND_Y)
16086 return CC_DGTUmode;
16087
16088 switch (cond2)
16089 {
16090 case GTU:
16091 return CC_DGTUmode;
16092 case GEU:
16093 return CC_DGEUmode;
16094 case NE:
16095 return CC_DNEmode;
16096 default:
16097 gcc_unreachable ();
16098 }
16099
16100 /* The remaining cases only occur when both comparisons are the
16101 same. */
16102 case NE:
16103 gcc_assert (cond1 == cond2);
16104 return CC_DNEmode;
16105
16106 case LE:
16107 gcc_assert (cond1 == cond2);
16108 return CC_DLEmode;
16109
16110 case GE:
16111 gcc_assert (cond1 == cond2);
16112 return CC_DGEmode;
16113
16114 case LEU:
16115 gcc_assert (cond1 == cond2);
16116 return CC_DLEUmode;
16117
16118 case GEU:
16119 gcc_assert (cond1 == cond2);
16120 return CC_DGEUmode;
16121
16122 default:
16123 gcc_unreachable ();
16124 }
16125 }
16126
16127 machine_mode
16128 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
16129 {
16130 /* All floating point compares return CCFP if it is an equality
16131 comparison, and CCFPE otherwise. */
16132 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16133 {
16134 switch (op)
16135 {
16136 case EQ:
16137 case NE:
16138 case UNORDERED:
16139 case ORDERED:
16140 case UNLT:
16141 case UNLE:
16142 case UNGT:
16143 case UNGE:
16144 case UNEQ:
16145 case LTGT:
16146 return CCFPmode;
16147
16148 case LT:
16149 case LE:
16150 case GT:
16151 case GE:
16152 return CCFPEmode;
16153
16154 default:
16155 gcc_unreachable ();
16156 }
16157 }
16158
16159 /* A compare with a shifted operand. Because of canonicalization, the
16160 comparison will have to be swapped when we emit the assembler. */
16161 if (GET_MODE (y) == SImode
16162 && (REG_P (y) || (SUBREG_P (y)))
16163 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16164 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
16165 || GET_CODE (x) == ROTATERT))
16166 return CC_SWPmode;
16167
16168 /* A widened compare of the sum of a value plus a carry against a
16169 constant. This is a representation of RSC. We want to swap the
16170 result of the comparison at output. Not valid if the Z bit is
16171 needed. */
16172 if (GET_MODE (x) == DImode
16173 && GET_CODE (x) == PLUS
16174 && arm_borrow_operation (XEXP (x, 1), DImode)
16175 && CONST_INT_P (y)
16176 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16177 && (op == LE || op == GT))
16178 || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
16179 && (op == LEU || op == GTU))))
16180 return CC_SWPmode;
16181
16182 /* If X is a constant we want to use CC_RSBmode. This is
16183 non-canonical, but arm_gen_compare_reg uses this to generate the
16184 correct canonical form. */
16185 if (GET_MODE (y) == SImode
16186 && (REG_P (y) || SUBREG_P (y))
16187 && CONST_INT_P (x))
16188 return CC_RSBmode;
16189
16190 /* This operation is performed swapped, but since we only rely on the Z
16191 flag we don't need an additional mode. */
16192 if (GET_MODE (y) == SImode
16193 && (REG_P (y) || (SUBREG_P (y)))
16194 && GET_CODE (x) == NEG
16195 && (op == EQ || op == NE))
16196 return CC_Zmode;
16197
16198 /* This is a special case that is used by combine to allow a
16199 comparison of a shifted byte load to be split into a zero-extend
16200 followed by a comparison of the shifted integer (only valid for
16201 equalities and unsigned inequalities). */
16202 if (GET_MODE (x) == SImode
16203 && GET_CODE (x) == ASHIFT
16204 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
16205 && GET_CODE (XEXP (x, 0)) == SUBREG
16206 && MEM_P (SUBREG_REG (XEXP (x, 0)))
16207 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
16208 && (op == EQ || op == NE
16209 || op == GEU || op == GTU || op == LTU || op == LEU)
16210 && CONST_INT_P (y))
16211 return CC_Zmode;
16212
16213 /* A construct for a conditional compare, if the false arm contains
16214 0, then both conditions must be true, otherwise either condition
16215 must be true. Not all conditions are possible, so CCmode is
16216 returned if it can't be done. */
16217 if (GET_CODE (x) == IF_THEN_ELSE
16218 && (XEXP (x, 2) == const0_rtx
16219 || XEXP (x, 2) == const1_rtx)
16220 && COMPARISON_P (XEXP (x, 0))
16221 && COMPARISON_P (XEXP (x, 1)))
16222 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16223 INTVAL (XEXP (x, 2)));
16224
16225 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
16226 if (GET_CODE (x) == AND
16227 && (op == EQ || op == NE)
16228 && COMPARISON_P (XEXP (x, 0))
16229 && COMPARISON_P (XEXP (x, 1)))
16230 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16231 DOM_CC_X_AND_Y);
16232
16233 if (GET_CODE (x) == IOR
16234 && (op == EQ || op == NE)
16235 && COMPARISON_P (XEXP (x, 0))
16236 && COMPARISON_P (XEXP (x, 1)))
16237 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16238 DOM_CC_X_OR_Y);
16239
16240 /* An operation (on Thumb) where we want to test for a single bit.
16241 This is done by shifting that bit up into the top bit of a
16242 scratch register; we can then branch on the sign bit. */
16243 if (TARGET_THUMB1
16244 && GET_MODE (x) == SImode
16245 && (op == EQ || op == NE)
16246 && GET_CODE (x) == ZERO_EXTRACT
16247 && XEXP (x, 1) == const1_rtx)
16248 return CC_Nmode;
16249
16250 /* An operation that sets the condition codes as a side-effect, the
16251 V flag is not set correctly, so we can only use comparisons where
16252 this doesn't matter. (For LT and GE we can use "mi" and "pl"
16253 instead.) */
16254 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
16255 if (GET_MODE (x) == SImode
16256 && y == const0_rtx
16257 && (op == EQ || op == NE || op == LT || op == GE)
16258 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
16259 || GET_CODE (x) == AND || GET_CODE (x) == IOR
16260 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
16261 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
16262 || GET_CODE (x) == LSHIFTRT
16263 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16264 || GET_CODE (x) == ROTATERT
16265 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
16266 return CC_NZmode;
16267
16268 /* A comparison of ~reg with a const is really a special
16269 canoncialization of compare (~const, reg), which is a reverse
16270 subtract operation. We may not get here if CONST is 0, but that
16271 doesn't matter because ~0 isn't a valid immediate for RSB. */
16272 if (GET_MODE (x) == SImode
16273 && GET_CODE (x) == NOT
16274 && CONST_INT_P (y))
16275 return CC_RSBmode;
16276
16277 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
16278 return CC_Zmode;
16279
16280 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
16281 && GET_CODE (x) == PLUS
16282 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
16283 return CC_Cmode;
16284
16285 if (GET_MODE (x) == DImode
16286 && GET_CODE (x) == PLUS
16287 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
16288 && CONST_INT_P (y)
16289 && UINTVAL (y) == 0x800000000
16290 && (op == GEU || op == LTU))
16291 return CC_ADCmode;
16292
16293 if (GET_MODE (x) == DImode
16294 && (op == GE || op == LT)
16295 && GET_CODE (x) == SIGN_EXTEND
16296 && ((GET_CODE (y) == PLUS
16297 && arm_borrow_operation (XEXP (y, 0), DImode))
16298 || arm_borrow_operation (y, DImode)))
16299 return CC_NVmode;
16300
16301 if (GET_MODE (x) == DImode
16302 && (op == GEU || op == LTU)
16303 && GET_CODE (x) == ZERO_EXTEND
16304 && ((GET_CODE (y) == PLUS
16305 && arm_borrow_operation (XEXP (y, 0), DImode))
16306 || arm_borrow_operation (y, DImode)))
16307 return CC_Bmode;
16308
16309 if (GET_MODE (x) == DImode
16310 && (op == EQ || op == NE)
16311 && (GET_CODE (x) == PLUS
16312 || GET_CODE (x) == MINUS)
16313 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16314 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
16315 && GET_CODE (y) == SIGN_EXTEND
16316 && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
16317 return CC_Vmode;
16318
16319 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
16320 return GET_MODE (x);
16321
16322 return CCmode;
16323 }
16324
16325 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
16326 the sequence of instructions needed to generate a suitable condition
16327 code register. Return the CC register result. */
16328 static rtx
16329 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16330 {
16331 machine_mode mode;
16332 rtx cc_reg;
16333
16334 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
16335 gcc_assert (TARGET_32BIT);
16336 gcc_assert (!CONST_INT_P (x));
16337
16338 rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
16339 subreg_lowpart_offset (SImode, DImode));
16340 rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
16341 subreg_highpart_offset (SImode, DImode));
16342 rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
16343 subreg_lowpart_offset (SImode, DImode));
16344 rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
16345 subreg_highpart_offset (SImode, DImode));
16346 switch (code)
16347 {
16348 case EQ:
16349 case NE:
16350 {
16351 if (y_lo == const0_rtx || y_hi == const0_rtx)
16352 {
16353 if (y_lo != const0_rtx)
16354 {
16355 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16356
16357 gcc_assert (y_hi == const0_rtx);
16358 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16359 if (!arm_add_operand (y_lo, SImode))
16360 y_lo = force_reg (SImode, y_lo);
16361 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16362 x_lo = scratch2;
16363 }
16364 else if (y_hi != const0_rtx)
16365 {
16366 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16367
16368 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16369 if (!arm_add_operand (y_hi, SImode))
16370 y_hi = force_reg (SImode, y_hi);
16371 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16372 x_hi = scratch2;
16373 }
16374
16375 if (!scratch)
16376 {
16377 gcc_assert (!reload_completed);
16378 scratch = gen_rtx_SCRATCH (SImode);
16379 }
16380
16381 rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16382 cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16383
16384 rtx set
16385 = gen_rtx_SET (cc_reg,
16386 gen_rtx_COMPARE (CC_NZmode,
16387 gen_rtx_IOR (SImode, x_lo, x_hi),
16388 const0_rtx));
16389 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16390 clobber)));
16391 return cc_reg;
16392 }
16393
16394 if (!arm_add_operand (y_lo, SImode))
16395 y_lo = force_reg (SImode, y_lo);
16396
16397 if (!arm_add_operand (y_hi, SImode))
16398 y_hi = force_reg (SImode, y_hi);
16399
16400 rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16401 rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16402 rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16403 mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16404 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16405
16406 emit_insn (gen_rtx_SET (cc_reg,
16407 gen_rtx_COMPARE (mode, conjunction,
16408 const0_rtx)));
16409 return cc_reg;
16410 }
16411
16412 case LT:
16413 case GE:
16414 {
16415 if (y_lo == const0_rtx)
16416 {
16417 /* If the low word of y is 0, then this is simply a normal
16418 compare of the upper words. */
16419 if (!arm_add_operand (y_hi, SImode))
16420 y_hi = force_reg (SImode, y_hi);
16421
16422 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16423 }
16424
16425 if (!arm_add_operand (y_lo, SImode))
16426 y_lo = force_reg (SImode, y_lo);
16427
16428 rtx cmp1
16429 = gen_rtx_LTU (DImode,
16430 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16431 const0_rtx);
16432
16433 if (!scratch)
16434 scratch = gen_rtx_SCRATCH (SImode);
16435
16436 if (!arm_not_operand (y_hi, SImode))
16437 y_hi = force_reg (SImode, y_hi);
16438
16439 rtx_insn *insn;
16440 if (y_hi == const0_rtx)
16441 insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16442 cmp1));
16443 else if (CONST_INT_P (y_hi))
16444 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16445 y_hi, cmp1));
16446 else
16447 insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16448 cmp1));
16449 return SET_DEST (single_set (insn));
16450 }
16451
16452 case LE:
16453 case GT:
16454 {
16455 /* During expansion, we only expect to get here if y is a
16456 constant that we want to handle, otherwise we should have
16457 swapped the operands already. */
16458 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16459
16460 if (!const_ok_for_arm (INTVAL (y_lo)))
16461 y_lo = force_reg (SImode, y_lo);
16462
16463 /* Perform a reverse subtract and compare. */
16464 rtx cmp1
16465 = gen_rtx_LTU (DImode,
16466 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16467 const0_rtx);
16468 rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16469 x_hi, cmp1));
16470 return SET_DEST (single_set (insn));
16471 }
16472
16473 case LTU:
16474 case GEU:
16475 {
16476 if (y_lo == const0_rtx)
16477 {
16478 /* If the low word of y is 0, then this is simply a normal
16479 compare of the upper words. */
16480 if (!arm_add_operand (y_hi, SImode))
16481 y_hi = force_reg (SImode, y_hi);
16482
16483 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16484 }
16485
16486 if (!arm_add_operand (y_lo, SImode))
16487 y_lo = force_reg (SImode, y_lo);
16488
16489 rtx cmp1
16490 = gen_rtx_LTU (DImode,
16491 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16492 const0_rtx);
16493
16494 if (!scratch)
16495 scratch = gen_rtx_SCRATCH (SImode);
16496 if (!arm_not_operand (y_hi, SImode))
16497 y_hi = force_reg (SImode, y_hi);
16498
16499 rtx_insn *insn;
16500 if (y_hi == const0_rtx)
16501 insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16502 cmp1));
16503 else if (CONST_INT_P (y_hi))
16504 {
16505 /* Constant is viewed as unsigned when zero-extended. */
16506 y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16507 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16508 y_hi, cmp1));
16509 }
16510 else
16511 insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16512 cmp1));
16513 return SET_DEST (single_set (insn));
16514 }
16515
16516 case LEU:
16517 case GTU:
16518 {
16519 /* During expansion, we only expect to get here if y is a
16520 constant that we want to handle, otherwise we should have
16521 swapped the operands already. */
16522 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16523
16524 if (!const_ok_for_arm (INTVAL (y_lo)))
16525 y_lo = force_reg (SImode, y_lo);
16526
16527 /* Perform a reverse subtract and compare. */
16528 rtx cmp1
16529 = gen_rtx_LTU (DImode,
16530 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16531 const0_rtx);
16532 y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16533 rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16534 x_hi, cmp1));
16535 return SET_DEST (single_set (insn));
16536 }
16537
16538 default:
16539 gcc_unreachable ();
16540 }
16541 }
16542
16543 /* X and Y are two things to compare using CODE. Emit the compare insn and
16544 return the rtx for register 0 in the proper mode. */
16545 rtx
16546 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16547 {
16548 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16549 return arm_gen_dicompare_reg (code, x, y, scratch);
16550
16551 machine_mode mode = SELECT_CC_MODE (code, x, y);
16552 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16553 if (mode == CC_RSBmode)
16554 {
16555 if (!scratch)
16556 scratch = gen_rtx_SCRATCH (SImode);
16557 emit_insn (gen_rsb_imm_compare_scratch (scratch,
16558 GEN_INT (~UINTVAL (x)), y));
16559 }
16560 else
16561 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16562
16563 return cc_reg;
16564 }
16565
16566 /* Generate a sequence of insns that will generate the correct return
16567 address mask depending on the physical architecture that the program
16568 is running on. */
16569 rtx
16570 arm_gen_return_addr_mask (void)
16571 {
16572 rtx reg = gen_reg_rtx (Pmode);
16573
16574 emit_insn (gen_return_addr_mask (reg));
16575 return reg;
16576 }
16577
16578 void
16579 arm_reload_in_hi (rtx *operands)
16580 {
16581 rtx ref = operands[1];
16582 rtx base, scratch;
16583 HOST_WIDE_INT offset = 0;
16584
16585 if (SUBREG_P (ref))
16586 {
16587 offset = SUBREG_BYTE (ref);
16588 ref = SUBREG_REG (ref);
16589 }
16590
16591 if (REG_P (ref))
16592 {
16593 /* We have a pseudo which has been spilt onto the stack; there
16594 are two cases here: the first where there is a simple
16595 stack-slot replacement and a second where the stack-slot is
16596 out of range, or is used as a subreg. */
16597 if (reg_equiv_mem (REGNO (ref)))
16598 {
16599 ref = reg_equiv_mem (REGNO (ref));
16600 base = find_replacement (&XEXP (ref, 0));
16601 }
16602 else
16603 /* The slot is out of range, or was dressed up in a SUBREG. */
16604 base = reg_equiv_address (REGNO (ref));
16605
16606 /* PR 62554: If there is no equivalent memory location then just move
16607 the value as an SImode register move. This happens when the target
16608 architecture variant does not have an HImode register move. */
16609 if (base == NULL)
16610 {
16611 gcc_assert (REG_P (operands[0]));
16612 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16613 gen_rtx_SUBREG (SImode, ref, 0)));
16614 return;
16615 }
16616 }
16617 else
16618 base = find_replacement (&XEXP (ref, 0));
16619
16620 /* Handle the case where the address is too complex to be offset by 1. */
16621 if (GET_CODE (base) == MINUS
16622 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16623 {
16624 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16625
16626 emit_set_insn (base_plus, base);
16627 base = base_plus;
16628 }
16629 else if (GET_CODE (base) == PLUS)
16630 {
16631 /* The addend must be CONST_INT, or we would have dealt with it above. */
16632 HOST_WIDE_INT hi, lo;
16633
16634 offset += INTVAL (XEXP (base, 1));
16635 base = XEXP (base, 0);
16636
16637 /* Rework the address into a legal sequence of insns. */
16638 /* Valid range for lo is -4095 -> 4095 */
16639 lo = (offset >= 0
16640 ? (offset & 0xfff)
16641 : -((-offset) & 0xfff));
16642
16643 /* Corner case, if lo is the max offset then we would be out of range
16644 once we have added the additional 1 below, so bump the msb into the
16645 pre-loading insn(s). */
16646 if (lo == 4095)
16647 lo &= 0x7ff;
16648
16649 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16650 ^ (HOST_WIDE_INT) 0x80000000)
16651 - (HOST_WIDE_INT) 0x80000000);
16652
16653 gcc_assert (hi + lo == offset);
16654
16655 if (hi != 0)
16656 {
16657 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16658
16659 /* Get the base address; addsi3 knows how to handle constants
16660 that require more than one insn. */
16661 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16662 base = base_plus;
16663 offset = lo;
16664 }
16665 }
16666
16667 /* Operands[2] may overlap operands[0] (though it won't overlap
16668 operands[1]), that's why we asked for a DImode reg -- so we can
16669 use the bit that does not overlap. */
16670 if (REGNO (operands[2]) == REGNO (operands[0]))
16671 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16672 else
16673 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16674
16675 emit_insn (gen_zero_extendqisi2 (scratch,
16676 gen_rtx_MEM (QImode,
16677 plus_constant (Pmode, base,
16678 offset))));
16679 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16680 gen_rtx_MEM (QImode,
16681 plus_constant (Pmode, base,
16682 offset + 1))));
16683 if (!BYTES_BIG_ENDIAN)
16684 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16685 gen_rtx_IOR (SImode,
16686 gen_rtx_ASHIFT
16687 (SImode,
16688 gen_rtx_SUBREG (SImode, operands[0], 0),
16689 GEN_INT (8)),
16690 scratch));
16691 else
16692 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16693 gen_rtx_IOR (SImode,
16694 gen_rtx_ASHIFT (SImode, scratch,
16695 GEN_INT (8)),
16696 gen_rtx_SUBREG (SImode, operands[0], 0)));
16697 }
16698
16699 /* Handle storing a half-word to memory during reload by synthesizing as two
16700 byte stores. Take care not to clobber the input values until after we
16701 have moved them somewhere safe. This code assumes that if the DImode
16702 scratch in operands[2] overlaps either the input value or output address
16703 in some way, then that value must die in this insn (we absolutely need
16704 two scratch registers for some corner cases). */
16705 void
16706 arm_reload_out_hi (rtx *operands)
16707 {
16708 rtx ref = operands[0];
16709 rtx outval = operands[1];
16710 rtx base, scratch;
16711 HOST_WIDE_INT offset = 0;
16712
16713 if (SUBREG_P (ref))
16714 {
16715 offset = SUBREG_BYTE (ref);
16716 ref = SUBREG_REG (ref);
16717 }
16718
16719 if (REG_P (ref))
16720 {
16721 /* We have a pseudo which has been spilt onto the stack; there
16722 are two cases here: the first where there is a simple
16723 stack-slot replacement and a second where the stack-slot is
16724 out of range, or is used as a subreg. */
16725 if (reg_equiv_mem (REGNO (ref)))
16726 {
16727 ref = reg_equiv_mem (REGNO (ref));
16728 base = find_replacement (&XEXP (ref, 0));
16729 }
16730 else
16731 /* The slot is out of range, or was dressed up in a SUBREG. */
16732 base = reg_equiv_address (REGNO (ref));
16733
16734 /* PR 62254: If there is no equivalent memory location then just move
16735 the value as an SImode register move. This happens when the target
16736 architecture variant does not have an HImode register move. */
16737 if (base == NULL)
16738 {
16739 gcc_assert (REG_P (outval) || SUBREG_P (outval));
16740
16741 if (REG_P (outval))
16742 {
16743 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16744 gen_rtx_SUBREG (SImode, outval, 0)));
16745 }
16746 else /* SUBREG_P (outval) */
16747 {
16748 if (GET_MODE (SUBREG_REG (outval)) == SImode)
16749 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16750 SUBREG_REG (outval)));
16751 else
16752 /* FIXME: Handle other cases ? */
16753 gcc_unreachable ();
16754 }
16755 return;
16756 }
16757 }
16758 else
16759 base = find_replacement (&XEXP (ref, 0));
16760
16761 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16762
16763 /* Handle the case where the address is too complex to be offset by 1. */
16764 if (GET_CODE (base) == MINUS
16765 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16766 {
16767 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16768
16769 /* Be careful not to destroy OUTVAL. */
16770 if (reg_overlap_mentioned_p (base_plus, outval))
16771 {
16772 /* Updating base_plus might destroy outval, see if we can
16773 swap the scratch and base_plus. */
16774 if (!reg_overlap_mentioned_p (scratch, outval))
16775 std::swap (scratch, base_plus);
16776 else
16777 {
16778 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16779
16780 /* Be conservative and copy OUTVAL into the scratch now,
16781 this should only be necessary if outval is a subreg
16782 of something larger than a word. */
16783 /* XXX Might this clobber base? I can't see how it can,
16784 since scratch is known to overlap with OUTVAL, and
16785 must be wider than a word. */
16786 emit_insn (gen_movhi (scratch_hi, outval));
16787 outval = scratch_hi;
16788 }
16789 }
16790
16791 emit_set_insn (base_plus, base);
16792 base = base_plus;
16793 }
16794 else if (GET_CODE (base) == PLUS)
16795 {
16796 /* The addend must be CONST_INT, or we would have dealt with it above. */
16797 HOST_WIDE_INT hi, lo;
16798
16799 offset += INTVAL (XEXP (base, 1));
16800 base = XEXP (base, 0);
16801
16802 /* Rework the address into a legal sequence of insns. */
16803 /* Valid range for lo is -4095 -> 4095 */
16804 lo = (offset >= 0
16805 ? (offset & 0xfff)
16806 : -((-offset) & 0xfff));
16807
16808 /* Corner case, if lo is the max offset then we would be out of range
16809 once we have added the additional 1 below, so bump the msb into the
16810 pre-loading insn(s). */
16811 if (lo == 4095)
16812 lo &= 0x7ff;
16813
16814 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16815 ^ (HOST_WIDE_INT) 0x80000000)
16816 - (HOST_WIDE_INT) 0x80000000);
16817
16818 gcc_assert (hi + lo == offset);
16819
16820 if (hi != 0)
16821 {
16822 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16823
16824 /* Be careful not to destroy OUTVAL. */
16825 if (reg_overlap_mentioned_p (base_plus, outval))
16826 {
16827 /* Updating base_plus might destroy outval, see if we
16828 can swap the scratch and base_plus. */
16829 if (!reg_overlap_mentioned_p (scratch, outval))
16830 std::swap (scratch, base_plus);
16831 else
16832 {
16833 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16834
16835 /* Be conservative and copy outval into scratch now,
16836 this should only be necessary if outval is a
16837 subreg of something larger than a word. */
16838 /* XXX Might this clobber base? I can't see how it
16839 can, since scratch is known to overlap with
16840 outval. */
16841 emit_insn (gen_movhi (scratch_hi, outval));
16842 outval = scratch_hi;
16843 }
16844 }
16845
16846 /* Get the base address; addsi3 knows how to handle constants
16847 that require more than one insn. */
16848 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16849 base = base_plus;
16850 offset = lo;
16851 }
16852 }
16853
16854 if (BYTES_BIG_ENDIAN)
16855 {
16856 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16857 plus_constant (Pmode, base,
16858 offset + 1)),
16859 gen_lowpart (QImode, outval)));
16860 emit_insn (gen_lshrsi3 (scratch,
16861 gen_rtx_SUBREG (SImode, outval, 0),
16862 GEN_INT (8)));
16863 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16864 offset)),
16865 gen_lowpart (QImode, scratch)));
16866 }
16867 else
16868 {
16869 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16870 offset)),
16871 gen_lowpart (QImode, outval)));
16872 emit_insn (gen_lshrsi3 (scratch,
16873 gen_rtx_SUBREG (SImode, outval, 0),
16874 GEN_INT (8)));
16875 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16876 plus_constant (Pmode, base,
16877 offset + 1)),
16878 gen_lowpart (QImode, scratch)));
16879 }
16880 }
16881
16882 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16883 (padded to the size of a word) should be passed in a register. */
16884
16885 static bool
16886 arm_must_pass_in_stack (const function_arg_info &arg)
16887 {
16888 if (TARGET_AAPCS_BASED)
16889 return must_pass_in_stack_var_size (arg);
16890 else
16891 return must_pass_in_stack_var_size_or_pad (arg);
16892 }
16893
16894
16895 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16896 byte of a stack argument has useful data. For legacy APCS ABIs we use
16897 the default. For AAPCS based ABIs small aggregate types are placed
16898 in the lowest memory address. */
16899
16900 static pad_direction
16901 arm_function_arg_padding (machine_mode mode, const_tree type)
16902 {
16903 if (!TARGET_AAPCS_BASED)
16904 return default_function_arg_padding (mode, type);
16905
16906 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16907 return PAD_DOWNWARD;
16908
16909 return PAD_UPWARD;
16910 }
16911
16912
16913 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16914 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16915 register has useful data, and return the opposite if the most
16916 significant byte does. */
16917
16918 bool
16919 arm_pad_reg_upward (machine_mode mode,
16920 tree type, int first ATTRIBUTE_UNUSED)
16921 {
16922 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16923 {
16924 /* For AAPCS, small aggregates, small fixed-point types,
16925 and small complex types are always padded upwards. */
16926 if (type)
16927 {
16928 if ((AGGREGATE_TYPE_P (type)
16929 || TREE_CODE (type) == COMPLEX_TYPE
16930 || FIXED_POINT_TYPE_P (type))
16931 && int_size_in_bytes (type) <= 4)
16932 return true;
16933 }
16934 else
16935 {
16936 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16937 && GET_MODE_SIZE (mode) <= 4)
16938 return true;
16939 }
16940 }
16941
16942 /* Otherwise, use default padding. */
16943 return !BYTES_BIG_ENDIAN;
16944 }
16945
16946 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16947 assuming that the address in the base register is word aligned. */
16948 bool
16949 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16950 {
16951 HOST_WIDE_INT max_offset;
16952
16953 /* Offset must be a multiple of 4 in Thumb mode. */
16954 if (TARGET_THUMB2 && ((offset & 3) != 0))
16955 return false;
16956
16957 if (TARGET_THUMB2)
16958 max_offset = 1020;
16959 else if (TARGET_ARM)
16960 max_offset = 255;
16961 else
16962 return false;
16963
16964 return ((offset <= max_offset) && (offset >= -max_offset));
16965 }
16966
16967 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16968 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16969 Assumes that the address in the base register RN is word aligned. Pattern
16970 guarantees that both memory accesses use the same base register,
16971 the offsets are constants within the range, and the gap between the offsets is 4.
16972 If preload complete then check that registers are legal. WBACK indicates whether
16973 address is updated. LOAD indicates whether memory access is load or store. */
16974 bool
16975 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16976 bool wback, bool load)
16977 {
16978 unsigned int t, t2, n;
16979
16980 if (!reload_completed)
16981 return true;
16982
16983 if (!offset_ok_for_ldrd_strd (offset))
16984 return false;
16985
16986 t = REGNO (rt);
16987 t2 = REGNO (rt2);
16988 n = REGNO (rn);
16989
16990 if ((TARGET_THUMB2)
16991 && ((wback && (n == t || n == t2))
16992 || (t == SP_REGNUM)
16993 || (t == PC_REGNUM)
16994 || (t2 == SP_REGNUM)
16995 || (t2 == PC_REGNUM)
16996 || (!load && (n == PC_REGNUM))
16997 || (load && (t == t2))
16998 /* Triggers Cortex-M3 LDRD errata. */
16999 || (!wback && load && fix_cm3_ldrd && (n == t))))
17000 return false;
17001
17002 if ((TARGET_ARM)
17003 && ((wback && (n == t || n == t2))
17004 || (t2 == PC_REGNUM)
17005 || (t % 2 != 0) /* First destination register is not even. */
17006 || (t2 != t + 1)
17007 /* PC can be used as base register (for offset addressing only),
17008 but it is depricated. */
17009 || (n == PC_REGNUM)))
17010 return false;
17011
17012 return true;
17013 }
17014
17015 /* Return true if a 64-bit access with alignment ALIGN and with a
17016 constant offset OFFSET from the base pointer is permitted on this
17017 architecture. */
17018 static bool
17019 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
17020 {
17021 return (unaligned_access
17022 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
17023 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
17024 }
17025
17026 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
17027 operand MEM's address contains an immediate offset from the base
17028 register and has no side effects, in which case it sets BASE,
17029 OFFSET and ALIGN accordingly. */
17030 static bool
17031 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
17032 {
17033 rtx addr;
17034
17035 gcc_assert (base != NULL && offset != NULL);
17036
17037 /* TODO: Handle more general memory operand patterns, such as
17038 PRE_DEC and PRE_INC. */
17039
17040 if (side_effects_p (mem))
17041 return false;
17042
17043 /* Can't deal with subregs. */
17044 if (SUBREG_P (mem))
17045 return false;
17046
17047 gcc_assert (MEM_P (mem));
17048
17049 *offset = const0_rtx;
17050 *align = MEM_ALIGN (mem);
17051
17052 addr = XEXP (mem, 0);
17053
17054 /* If addr isn't valid for DImode, then we can't handle it. */
17055 if (!arm_legitimate_address_p (DImode, addr,
17056 reload_in_progress || reload_completed))
17057 return false;
17058
17059 if (REG_P (addr))
17060 {
17061 *base = addr;
17062 return true;
17063 }
17064 else if (GET_CODE (addr) == PLUS)
17065 {
17066 *base = XEXP (addr, 0);
17067 *offset = XEXP (addr, 1);
17068 return (REG_P (*base) && CONST_INT_P (*offset));
17069 }
17070
17071 return false;
17072 }
17073
17074 /* Called from a peephole2 to replace two word-size accesses with a
17075 single LDRD/STRD instruction. Returns true iff we can generate a
17076 new instruction sequence. That is, both accesses use the same base
17077 register and the gap between constant offsets is 4. This function
17078 may reorder its operands to match ldrd/strd RTL templates.
17079 OPERANDS are the operands found by the peephole matcher;
17080 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
17081 corresponding memory operands. LOAD indicaates whether the access
17082 is load or store. CONST_STORE indicates a store of constant
17083 integer values held in OPERANDS[4,5] and assumes that the pattern
17084 is of length 4 insn, for the purpose of checking dead registers.
17085 COMMUTE indicates that register operands may be reordered. */
17086 bool
17087 gen_operands_ldrd_strd (rtx *operands, bool load,
17088 bool const_store, bool commute)
17089 {
17090 int nops = 2;
17091 HOST_WIDE_INT offsets[2], offset, align[2];
17092 rtx base = NULL_RTX;
17093 rtx cur_base, cur_offset, tmp;
17094 int i, gap;
17095 HARD_REG_SET regset;
17096
17097 gcc_assert (!const_store || !load);
17098 /* Check that the memory references are immediate offsets from the
17099 same base register. Extract the base register, the destination
17100 registers, and the corresponding memory offsets. */
17101 for (i = 0; i < nops; i++)
17102 {
17103 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17104 &align[i]))
17105 return false;
17106
17107 if (i == 0)
17108 base = cur_base;
17109 else if (REGNO (base) != REGNO (cur_base))
17110 return false;
17111
17112 offsets[i] = INTVAL (cur_offset);
17113 if (GET_CODE (operands[i]) == SUBREG)
17114 {
17115 tmp = SUBREG_REG (operands[i]);
17116 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
17117 operands[i] = tmp;
17118 }
17119 }
17120
17121 /* Make sure there is no dependency between the individual loads. */
17122 if (load && REGNO (operands[0]) == REGNO (base))
17123 return false; /* RAW */
17124
17125 if (load && REGNO (operands[0]) == REGNO (operands[1]))
17126 return false; /* WAW */
17127
17128 /* If the same input register is used in both stores
17129 when storing different constants, try to find a free register.
17130 For example, the code
17131 mov r0, 0
17132 str r0, [r2]
17133 mov r0, 1
17134 str r0, [r2, #4]
17135 can be transformed into
17136 mov r1, 0
17137 mov r0, 1
17138 strd r1, r0, [r2]
17139 in Thumb mode assuming that r1 is free.
17140 For ARM mode do the same but only if the starting register
17141 can be made to be even. */
17142 if (const_store
17143 && REGNO (operands[0]) == REGNO (operands[1])
17144 && INTVAL (operands[4]) != INTVAL (operands[5]))
17145 {
17146 if (TARGET_THUMB2)
17147 {
17148 CLEAR_HARD_REG_SET (regset);
17149 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17150 if (tmp == NULL_RTX)
17151 return false;
17152
17153 /* Use the new register in the first load to ensure that
17154 if the original input register is not dead after peephole,
17155 then it will have the correct constant value. */
17156 operands[0] = tmp;
17157 }
17158 else if (TARGET_ARM)
17159 {
17160 int regno = REGNO (operands[0]);
17161 if (!peep2_reg_dead_p (4, operands[0]))
17162 {
17163 /* When the input register is even and is not dead after the
17164 pattern, it has to hold the second constant but we cannot
17165 form a legal STRD in ARM mode with this register as the second
17166 register. */
17167 if (regno % 2 == 0)
17168 return false;
17169
17170 /* Is regno-1 free? */
17171 SET_HARD_REG_SET (regset);
17172 CLEAR_HARD_REG_BIT(regset, regno - 1);
17173 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17174 if (tmp == NULL_RTX)
17175 return false;
17176
17177 operands[0] = tmp;
17178 }
17179 else
17180 {
17181 /* Find a DImode register. */
17182 CLEAR_HARD_REG_SET (regset);
17183 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17184 if (tmp != NULL_RTX)
17185 {
17186 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17187 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17188 }
17189 else
17190 {
17191 /* Can we use the input register to form a DI register? */
17192 SET_HARD_REG_SET (regset);
17193 CLEAR_HARD_REG_BIT(regset,
17194 regno % 2 == 0 ? regno + 1 : regno - 1);
17195 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17196 if (tmp == NULL_RTX)
17197 return false;
17198 operands[regno % 2 == 1 ? 0 : 1] = tmp;
17199 }
17200 }
17201
17202 gcc_assert (operands[0] != NULL_RTX);
17203 gcc_assert (operands[1] != NULL_RTX);
17204 gcc_assert (REGNO (operands[0]) % 2 == 0);
17205 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
17206 }
17207 }
17208
17209 /* Make sure the instructions are ordered with lower memory access first. */
17210 if (offsets[0] > offsets[1])
17211 {
17212 gap = offsets[0] - offsets[1];
17213 offset = offsets[1];
17214
17215 /* Swap the instructions such that lower memory is accessed first. */
17216 std::swap (operands[0], operands[1]);
17217 std::swap (operands[2], operands[3]);
17218 std::swap (align[0], align[1]);
17219 if (const_store)
17220 std::swap (operands[4], operands[5]);
17221 }
17222 else
17223 {
17224 gap = offsets[1] - offsets[0];
17225 offset = offsets[0];
17226 }
17227
17228 /* Make sure accesses are to consecutive memory locations. */
17229 if (gap != GET_MODE_SIZE (SImode))
17230 return false;
17231
17232 if (!align_ok_ldrd_strd (align[0], offset))
17233 return false;
17234
17235 /* Make sure we generate legal instructions. */
17236 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17237 false, load))
17238 return true;
17239
17240 /* In Thumb state, where registers are almost unconstrained, there
17241 is little hope to fix it. */
17242 if (TARGET_THUMB2)
17243 return false;
17244
17245 if (load && commute)
17246 {
17247 /* Try reordering registers. */
17248 std::swap (operands[0], operands[1]);
17249 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17250 false, load))
17251 return true;
17252 }
17253
17254 if (const_store)
17255 {
17256 /* If input registers are dead after this pattern, they can be
17257 reordered or replaced by other registers that are free in the
17258 current pattern. */
17259 if (!peep2_reg_dead_p (4, operands[0])
17260 || !peep2_reg_dead_p (4, operands[1]))
17261 return false;
17262
17263 /* Try to reorder the input registers. */
17264 /* For example, the code
17265 mov r0, 0
17266 mov r1, 1
17267 str r1, [r2]
17268 str r0, [r2, #4]
17269 can be transformed into
17270 mov r1, 0
17271 mov r0, 1
17272 strd r0, [r2]
17273 */
17274 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
17275 false, false))
17276 {
17277 std::swap (operands[0], operands[1]);
17278 return true;
17279 }
17280
17281 /* Try to find a free DI register. */
17282 CLEAR_HARD_REG_SET (regset);
17283 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
17284 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
17285 while (true)
17286 {
17287 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17288 if (tmp == NULL_RTX)
17289 return false;
17290
17291 /* DREG must be an even-numbered register in DImode.
17292 Split it into SI registers. */
17293 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17294 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17295 gcc_assert (operands[0] != NULL_RTX);
17296 gcc_assert (operands[1] != NULL_RTX);
17297 gcc_assert (REGNO (operands[0]) % 2 == 0);
17298 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
17299
17300 return (operands_ok_ldrd_strd (operands[0], operands[1],
17301 base, offset,
17302 false, load));
17303 }
17304 }
17305
17306 return false;
17307 }
17308
17309
17310 /* Return true if parallel execution of the two word-size accesses provided
17311 could be satisfied with a single LDRD/STRD instruction. Two word-size
17312 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17313 register operands and OPERANDS[2,3] are the corresponding memory operands.
17314 */
17315 bool
17316 valid_operands_ldrd_strd (rtx *operands, bool load)
17317 {
17318 int nops = 2;
17319 HOST_WIDE_INT offsets[2], offset, align[2];
17320 rtx base = NULL_RTX;
17321 rtx cur_base, cur_offset;
17322 int i, gap;
17323
17324 /* Check that the memory references are immediate offsets from the
17325 same base register. Extract the base register, the destination
17326 registers, and the corresponding memory offsets. */
17327 for (i = 0; i < nops; i++)
17328 {
17329 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17330 &align[i]))
17331 return false;
17332
17333 if (i == 0)
17334 base = cur_base;
17335 else if (REGNO (base) != REGNO (cur_base))
17336 return false;
17337
17338 offsets[i] = INTVAL (cur_offset);
17339 if (GET_CODE (operands[i]) == SUBREG)
17340 return false;
17341 }
17342
17343 if (offsets[0] > offsets[1])
17344 return false;
17345
17346 gap = offsets[1] - offsets[0];
17347 offset = offsets[0];
17348
17349 /* Make sure accesses are to consecutive memory locations. */
17350 if (gap != GET_MODE_SIZE (SImode))
17351 return false;
17352
17353 if (!align_ok_ldrd_strd (align[0], offset))
17354 return false;
17355
17356 return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17357 false, load);
17358 }
17359
17360 \f
17361 /* Print a symbolic form of X to the debug file, F. */
17362 static void
17363 arm_print_value (FILE *f, rtx x)
17364 {
17365 switch (GET_CODE (x))
17366 {
17367 case CONST_INT:
17368 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17369 return;
17370
17371 case CONST_DOUBLE:
17372 {
17373 char fpstr[20];
17374 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17375 sizeof (fpstr), 0, 1);
17376 fputs (fpstr, f);
17377 }
17378 return;
17379
17380 case CONST_VECTOR:
17381 {
17382 int i;
17383
17384 fprintf (f, "<");
17385 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17386 {
17387 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17388 if (i < (CONST_VECTOR_NUNITS (x) - 1))
17389 fputc (',', f);
17390 }
17391 fprintf (f, ">");
17392 }
17393 return;
17394
17395 case CONST_STRING:
17396 fprintf (f, "\"%s\"", XSTR (x, 0));
17397 return;
17398
17399 case SYMBOL_REF:
17400 fprintf (f, "`%s'", XSTR (x, 0));
17401 return;
17402
17403 case LABEL_REF:
17404 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17405 return;
17406
17407 case CONST:
17408 arm_print_value (f, XEXP (x, 0));
17409 return;
17410
17411 case PLUS:
17412 arm_print_value (f, XEXP (x, 0));
17413 fprintf (f, "+");
17414 arm_print_value (f, XEXP (x, 1));
17415 return;
17416
17417 case PC:
17418 fprintf (f, "pc");
17419 return;
17420
17421 default:
17422 fprintf (f, "????");
17423 return;
17424 }
17425 }
17426 \f
17427 /* Routines for manipulation of the constant pool. */
17428
17429 /* Arm instructions cannot load a large constant directly into a
17430 register; they have to come from a pc relative load. The constant
17431 must therefore be placed in the addressable range of the pc
17432 relative load. Depending on the precise pc relative load
17433 instruction the range is somewhere between 256 bytes and 4k. This
17434 means that we often have to dump a constant inside a function, and
17435 generate code to branch around it.
17436
17437 It is important to minimize this, since the branches will slow
17438 things down and make the code larger.
17439
17440 Normally we can hide the table after an existing unconditional
17441 branch so that there is no interruption of the flow, but in the
17442 worst case the code looks like this:
17443
17444 ldr rn, L1
17445 ...
17446 b L2
17447 align
17448 L1: .long value
17449 L2:
17450 ...
17451
17452 ldr rn, L3
17453 ...
17454 b L4
17455 align
17456 L3: .long value
17457 L4:
17458 ...
17459
17460 We fix this by performing a scan after scheduling, which notices
17461 which instructions need to have their operands fetched from the
17462 constant table and builds the table.
17463
17464 The algorithm starts by building a table of all the constants that
17465 need fixing up and all the natural barriers in the function (places
17466 where a constant table can be dropped without breaking the flow).
17467 For each fixup we note how far the pc-relative replacement will be
17468 able to reach and the offset of the instruction into the function.
17469
17470 Having built the table we then group the fixes together to form
17471 tables that are as large as possible (subject to addressing
17472 constraints) and emit each table of constants after the last
17473 barrier that is within range of all the instructions in the group.
17474 If a group does not contain a barrier, then we forcibly create one
17475 by inserting a jump instruction into the flow. Once the table has
17476 been inserted, the insns are then modified to reference the
17477 relevant entry in the pool.
17478
17479 Possible enhancements to the algorithm (not implemented) are:
17480
17481 1) For some processors and object formats, there may be benefit in
17482 aligning the pools to the start of cache lines; this alignment
17483 would need to be taken into account when calculating addressability
17484 of a pool. */
17485
17486 /* These typedefs are located at the start of this file, so that
17487 they can be used in the prototypes there. This comment is to
17488 remind readers of that fact so that the following structures
17489 can be understood more easily.
17490
17491 typedef struct minipool_node Mnode;
17492 typedef struct minipool_fixup Mfix; */
17493
17494 struct minipool_node
17495 {
17496 /* Doubly linked chain of entries. */
17497 Mnode * next;
17498 Mnode * prev;
17499 /* The maximum offset into the code that this entry can be placed. While
17500 pushing fixes for forward references, all entries are sorted in order
17501 of increasing max_address. */
17502 HOST_WIDE_INT max_address;
17503 /* Similarly for an entry inserted for a backwards ref. */
17504 HOST_WIDE_INT min_address;
17505 /* The number of fixes referencing this entry. This can become zero
17506 if we "unpush" an entry. In this case we ignore the entry when we
17507 come to emit the code. */
17508 int refcount;
17509 /* The offset from the start of the minipool. */
17510 HOST_WIDE_INT offset;
17511 /* The value in table. */
17512 rtx value;
17513 /* The mode of value. */
17514 machine_mode mode;
17515 /* The size of the value. With iWMMXt enabled
17516 sizes > 4 also imply an alignment of 8-bytes. */
17517 int fix_size;
17518 };
17519
17520 struct minipool_fixup
17521 {
17522 Mfix * next;
17523 rtx_insn * insn;
17524 HOST_WIDE_INT address;
17525 rtx * loc;
17526 machine_mode mode;
17527 int fix_size;
17528 rtx value;
17529 Mnode * minipool;
17530 HOST_WIDE_INT forwards;
17531 HOST_WIDE_INT backwards;
17532 };
17533
17534 /* Fixes less than a word need padding out to a word boundary. */
17535 #define MINIPOOL_FIX_SIZE(mode) \
17536 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17537
17538 static Mnode * minipool_vector_head;
17539 static Mnode * minipool_vector_tail;
17540 static rtx_code_label *minipool_vector_label;
17541 static int minipool_pad;
17542
17543 /* The linked list of all minipool fixes required for this function. */
17544 Mfix * minipool_fix_head;
17545 Mfix * minipool_fix_tail;
17546 /* The fix entry for the current minipool, once it has been placed. */
17547 Mfix * minipool_barrier;
17548
17549 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17550 #define JUMP_TABLES_IN_TEXT_SECTION 0
17551 #endif
17552
17553 static HOST_WIDE_INT
17554 get_jump_table_size (rtx_jump_table_data *insn)
17555 {
17556 /* ADDR_VECs only take room if read-only data does into the text
17557 section. */
17558 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17559 {
17560 rtx body = PATTERN (insn);
17561 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17562 HOST_WIDE_INT size;
17563 HOST_WIDE_INT modesize;
17564
17565 modesize = GET_MODE_SIZE (GET_MODE (body));
17566 size = modesize * XVECLEN (body, elt);
17567 switch (modesize)
17568 {
17569 case 1:
17570 /* Round up size of TBB table to a halfword boundary. */
17571 size = (size + 1) & ~HOST_WIDE_INT_1;
17572 break;
17573 case 2:
17574 /* No padding necessary for TBH. */
17575 break;
17576 case 4:
17577 /* Add two bytes for alignment on Thumb. */
17578 if (TARGET_THUMB)
17579 size += 2;
17580 break;
17581 default:
17582 gcc_unreachable ();
17583 }
17584 return size;
17585 }
17586
17587 return 0;
17588 }
17589
17590 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17591 function descriptor) into a register and the GOT address into the
17592 FDPIC register, returning an rtx for the register holding the
17593 function address. */
17594
17595 rtx
17596 arm_load_function_descriptor (rtx funcdesc)
17597 {
17598 rtx fnaddr_reg = gen_reg_rtx (Pmode);
17599 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17600 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17601 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17602
17603 emit_move_insn (fnaddr_reg, fnaddr);
17604
17605 /* The ABI requires the entry point address to be loaded first, but
17606 since we cannot support lazy binding for lack of atomic load of
17607 two 32-bits values, we do not need to bother to prevent the
17608 previous load from being moved after that of the GOT address. */
17609 emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17610
17611 return fnaddr_reg;
17612 }
17613
17614 /* Return the maximum amount of padding that will be inserted before
17615 label LABEL. */
17616 static HOST_WIDE_INT
17617 get_label_padding (rtx label)
17618 {
17619 HOST_WIDE_INT align, min_insn_size;
17620
17621 align = 1 << label_to_alignment (label).levels[0].log;
17622 min_insn_size = TARGET_THUMB ? 2 : 4;
17623 return align > min_insn_size ? align - min_insn_size : 0;
17624 }
17625
17626 /* Move a minipool fix MP from its current location to before MAX_MP.
17627 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17628 constraints may need updating. */
17629 static Mnode *
17630 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17631 HOST_WIDE_INT max_address)
17632 {
17633 /* The code below assumes these are different. */
17634 gcc_assert (mp != max_mp);
17635
17636 if (max_mp == NULL)
17637 {
17638 if (max_address < mp->max_address)
17639 mp->max_address = max_address;
17640 }
17641 else
17642 {
17643 if (max_address > max_mp->max_address - mp->fix_size)
17644 mp->max_address = max_mp->max_address - mp->fix_size;
17645 else
17646 mp->max_address = max_address;
17647
17648 /* Unlink MP from its current position. Since max_mp is non-null,
17649 mp->prev must be non-null. */
17650 mp->prev->next = mp->next;
17651 if (mp->next != NULL)
17652 mp->next->prev = mp->prev;
17653 else
17654 minipool_vector_tail = mp->prev;
17655
17656 /* Re-insert it before MAX_MP. */
17657 mp->next = max_mp;
17658 mp->prev = max_mp->prev;
17659 max_mp->prev = mp;
17660
17661 if (mp->prev != NULL)
17662 mp->prev->next = mp;
17663 else
17664 minipool_vector_head = mp;
17665 }
17666
17667 /* Save the new entry. */
17668 max_mp = mp;
17669
17670 /* Scan over the preceding entries and adjust their addresses as
17671 required. */
17672 while (mp->prev != NULL
17673 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17674 {
17675 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17676 mp = mp->prev;
17677 }
17678
17679 return max_mp;
17680 }
17681
17682 /* Add a constant to the minipool for a forward reference. Returns the
17683 node added or NULL if the constant will not fit in this pool. */
17684 static Mnode *
17685 add_minipool_forward_ref (Mfix *fix)
17686 {
17687 /* If set, max_mp is the first pool_entry that has a lower
17688 constraint than the one we are trying to add. */
17689 Mnode * max_mp = NULL;
17690 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17691 Mnode * mp;
17692
17693 /* If the minipool starts before the end of FIX->INSN then this FIX
17694 cannot be placed into the current pool. Furthermore, adding the
17695 new constant pool entry may cause the pool to start FIX_SIZE bytes
17696 earlier. */
17697 if (minipool_vector_head &&
17698 (fix->address + get_attr_length (fix->insn)
17699 >= minipool_vector_head->max_address - fix->fix_size))
17700 return NULL;
17701
17702 /* Scan the pool to see if a constant with the same value has
17703 already been added. While we are doing this, also note the
17704 location where we must insert the constant if it doesn't already
17705 exist. */
17706 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17707 {
17708 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17709 && fix->mode == mp->mode
17710 && (!LABEL_P (fix->value)
17711 || (CODE_LABEL_NUMBER (fix->value)
17712 == CODE_LABEL_NUMBER (mp->value)))
17713 && rtx_equal_p (fix->value, mp->value))
17714 {
17715 /* More than one fix references this entry. */
17716 mp->refcount++;
17717 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17718 }
17719
17720 /* Note the insertion point if necessary. */
17721 if (max_mp == NULL
17722 && mp->max_address > max_address)
17723 max_mp = mp;
17724
17725 /* If we are inserting an 8-bytes aligned quantity and
17726 we have not already found an insertion point, then
17727 make sure that all such 8-byte aligned quantities are
17728 placed at the start of the pool. */
17729 if (ARM_DOUBLEWORD_ALIGN
17730 && max_mp == NULL
17731 && fix->fix_size >= 8
17732 && mp->fix_size < 8)
17733 {
17734 max_mp = mp;
17735 max_address = mp->max_address;
17736 }
17737 }
17738
17739 /* The value is not currently in the minipool, so we need to create
17740 a new entry for it. If MAX_MP is NULL, the entry will be put on
17741 the end of the list since the placement is less constrained than
17742 any existing entry. Otherwise, we insert the new fix before
17743 MAX_MP and, if necessary, adjust the constraints on the other
17744 entries. */
17745 mp = XNEW (Mnode);
17746 mp->fix_size = fix->fix_size;
17747 mp->mode = fix->mode;
17748 mp->value = fix->value;
17749 mp->refcount = 1;
17750 /* Not yet required for a backwards ref. */
17751 mp->min_address = -65536;
17752
17753 if (max_mp == NULL)
17754 {
17755 mp->max_address = max_address;
17756 mp->next = NULL;
17757 mp->prev = minipool_vector_tail;
17758
17759 if (mp->prev == NULL)
17760 {
17761 minipool_vector_head = mp;
17762 minipool_vector_label = gen_label_rtx ();
17763 }
17764 else
17765 mp->prev->next = mp;
17766
17767 minipool_vector_tail = mp;
17768 }
17769 else
17770 {
17771 if (max_address > max_mp->max_address - mp->fix_size)
17772 mp->max_address = max_mp->max_address - mp->fix_size;
17773 else
17774 mp->max_address = max_address;
17775
17776 mp->next = max_mp;
17777 mp->prev = max_mp->prev;
17778 max_mp->prev = mp;
17779 if (mp->prev != NULL)
17780 mp->prev->next = mp;
17781 else
17782 minipool_vector_head = mp;
17783 }
17784
17785 /* Save the new entry. */
17786 max_mp = mp;
17787
17788 /* Scan over the preceding entries and adjust their addresses as
17789 required. */
17790 while (mp->prev != NULL
17791 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17792 {
17793 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17794 mp = mp->prev;
17795 }
17796
17797 return max_mp;
17798 }
17799
17800 static Mnode *
17801 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17802 HOST_WIDE_INT min_address)
17803 {
17804 HOST_WIDE_INT offset;
17805
17806 /* The code below assumes these are different. */
17807 gcc_assert (mp != min_mp);
17808
17809 if (min_mp == NULL)
17810 {
17811 if (min_address > mp->min_address)
17812 mp->min_address = min_address;
17813 }
17814 else
17815 {
17816 /* We will adjust this below if it is too loose. */
17817 mp->min_address = min_address;
17818
17819 /* Unlink MP from its current position. Since min_mp is non-null,
17820 mp->next must be non-null. */
17821 mp->next->prev = mp->prev;
17822 if (mp->prev != NULL)
17823 mp->prev->next = mp->next;
17824 else
17825 minipool_vector_head = mp->next;
17826
17827 /* Reinsert it after MIN_MP. */
17828 mp->prev = min_mp;
17829 mp->next = min_mp->next;
17830 min_mp->next = mp;
17831 if (mp->next != NULL)
17832 mp->next->prev = mp;
17833 else
17834 minipool_vector_tail = mp;
17835 }
17836
17837 min_mp = mp;
17838
17839 offset = 0;
17840 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17841 {
17842 mp->offset = offset;
17843 if (mp->refcount > 0)
17844 offset += mp->fix_size;
17845
17846 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17847 mp->next->min_address = mp->min_address + mp->fix_size;
17848 }
17849
17850 return min_mp;
17851 }
17852
17853 /* Add a constant to the minipool for a backward reference. Returns the
17854 node added or NULL if the constant will not fit in this pool.
17855
17856 Note that the code for insertion for a backwards reference can be
17857 somewhat confusing because the calculated offsets for each fix do
17858 not take into account the size of the pool (which is still under
17859 construction. */
17860 static Mnode *
17861 add_minipool_backward_ref (Mfix *fix)
17862 {
17863 /* If set, min_mp is the last pool_entry that has a lower constraint
17864 than the one we are trying to add. */
17865 Mnode *min_mp = NULL;
17866 /* This can be negative, since it is only a constraint. */
17867 HOST_WIDE_INT min_address = fix->address - fix->backwards;
17868 Mnode *mp;
17869
17870 /* If we can't reach the current pool from this insn, or if we can't
17871 insert this entry at the end of the pool without pushing other
17872 fixes out of range, then we don't try. This ensures that we
17873 can't fail later on. */
17874 if (min_address >= minipool_barrier->address
17875 || (minipool_vector_tail->min_address + fix->fix_size
17876 >= minipool_barrier->address))
17877 return NULL;
17878
17879 /* Scan the pool to see if a constant with the same value has
17880 already been added. While we are doing this, also note the
17881 location where we must insert the constant if it doesn't already
17882 exist. */
17883 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17884 {
17885 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17886 && fix->mode == mp->mode
17887 && (!LABEL_P (fix->value)
17888 || (CODE_LABEL_NUMBER (fix->value)
17889 == CODE_LABEL_NUMBER (mp->value)))
17890 && rtx_equal_p (fix->value, mp->value)
17891 /* Check that there is enough slack to move this entry to the
17892 end of the table (this is conservative). */
17893 && (mp->max_address
17894 > (minipool_barrier->address
17895 + minipool_vector_tail->offset
17896 + minipool_vector_tail->fix_size)))
17897 {
17898 mp->refcount++;
17899 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17900 }
17901
17902 if (min_mp != NULL)
17903 mp->min_address += fix->fix_size;
17904 else
17905 {
17906 /* Note the insertion point if necessary. */
17907 if (mp->min_address < min_address)
17908 {
17909 /* For now, we do not allow the insertion of 8-byte alignment
17910 requiring nodes anywhere but at the start of the pool. */
17911 if (ARM_DOUBLEWORD_ALIGN
17912 && fix->fix_size >= 8 && mp->fix_size < 8)
17913 return NULL;
17914 else
17915 min_mp = mp;
17916 }
17917 else if (mp->max_address
17918 < minipool_barrier->address + mp->offset + fix->fix_size)
17919 {
17920 /* Inserting before this entry would push the fix beyond
17921 its maximum address (which can happen if we have
17922 re-located a forwards fix); force the new fix to come
17923 after it. */
17924 if (ARM_DOUBLEWORD_ALIGN
17925 && fix->fix_size >= 8 && mp->fix_size < 8)
17926 return NULL;
17927 else
17928 {
17929 min_mp = mp;
17930 min_address = mp->min_address + fix->fix_size;
17931 }
17932 }
17933 /* Do not insert a non-8-byte aligned quantity before 8-byte
17934 aligned quantities. */
17935 else if (ARM_DOUBLEWORD_ALIGN
17936 && fix->fix_size < 8
17937 && mp->fix_size >= 8)
17938 {
17939 min_mp = mp;
17940 min_address = mp->min_address + fix->fix_size;
17941 }
17942 }
17943 }
17944
17945 /* We need to create a new entry. */
17946 mp = XNEW (Mnode);
17947 mp->fix_size = fix->fix_size;
17948 mp->mode = fix->mode;
17949 mp->value = fix->value;
17950 mp->refcount = 1;
17951 mp->max_address = minipool_barrier->address + 65536;
17952
17953 mp->min_address = min_address;
17954
17955 if (min_mp == NULL)
17956 {
17957 mp->prev = NULL;
17958 mp->next = minipool_vector_head;
17959
17960 if (mp->next == NULL)
17961 {
17962 minipool_vector_tail = mp;
17963 minipool_vector_label = gen_label_rtx ();
17964 }
17965 else
17966 mp->next->prev = mp;
17967
17968 minipool_vector_head = mp;
17969 }
17970 else
17971 {
17972 mp->next = min_mp->next;
17973 mp->prev = min_mp;
17974 min_mp->next = mp;
17975
17976 if (mp->next != NULL)
17977 mp->next->prev = mp;
17978 else
17979 minipool_vector_tail = mp;
17980 }
17981
17982 /* Save the new entry. */
17983 min_mp = mp;
17984
17985 if (mp->prev)
17986 mp = mp->prev;
17987 else
17988 mp->offset = 0;
17989
17990 /* Scan over the following entries and adjust their offsets. */
17991 while (mp->next != NULL)
17992 {
17993 if (mp->next->min_address < mp->min_address + mp->fix_size)
17994 mp->next->min_address = mp->min_address + mp->fix_size;
17995
17996 if (mp->refcount)
17997 mp->next->offset = mp->offset + mp->fix_size;
17998 else
17999 mp->next->offset = mp->offset;
18000
18001 mp = mp->next;
18002 }
18003
18004 return min_mp;
18005 }
18006
18007 static void
18008 assign_minipool_offsets (Mfix *barrier)
18009 {
18010 HOST_WIDE_INT offset = 0;
18011 Mnode *mp;
18012
18013 minipool_barrier = barrier;
18014
18015 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
18016 {
18017 mp->offset = offset;
18018
18019 if (mp->refcount > 0)
18020 offset += mp->fix_size;
18021 }
18022 }
18023
18024 /* Output the literal table */
18025 static void
18026 dump_minipool (rtx_insn *scan)
18027 {
18028 Mnode * mp;
18029 Mnode * nmp;
18030 int align64 = 0;
18031
18032 if (ARM_DOUBLEWORD_ALIGN)
18033 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
18034 if (mp->refcount > 0 && mp->fix_size >= 8)
18035 {
18036 align64 = 1;
18037 break;
18038 }
18039
18040 if (dump_file)
18041 fprintf (dump_file,
18042 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
18043 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
18044
18045 scan = emit_label_after (gen_label_rtx (), scan);
18046 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
18047 scan = emit_label_after (minipool_vector_label, scan);
18048
18049 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
18050 {
18051 if (mp->refcount > 0)
18052 {
18053 if (dump_file)
18054 {
18055 fprintf (dump_file,
18056 ";; Offset %u, min %ld, max %ld ",
18057 (unsigned) mp->offset, (unsigned long) mp->min_address,
18058 (unsigned long) mp->max_address);
18059 arm_print_value (dump_file, mp->value);
18060 fputc ('\n', dump_file);
18061 }
18062
18063 rtx val = copy_rtx (mp->value);
18064
18065 switch (GET_MODE_SIZE (mp->mode))
18066 {
18067 #ifdef HAVE_consttable_1
18068 case 1:
18069 scan = emit_insn_after (gen_consttable_1 (val), scan);
18070 break;
18071
18072 #endif
18073 #ifdef HAVE_consttable_2
18074 case 2:
18075 scan = emit_insn_after (gen_consttable_2 (val), scan);
18076 break;
18077
18078 #endif
18079 #ifdef HAVE_consttable_4
18080 case 4:
18081 scan = emit_insn_after (gen_consttable_4 (val), scan);
18082 break;
18083
18084 #endif
18085 #ifdef HAVE_consttable_8
18086 case 8:
18087 scan = emit_insn_after (gen_consttable_8 (val), scan);
18088 break;
18089
18090 #endif
18091 #ifdef HAVE_consttable_16
18092 case 16:
18093 scan = emit_insn_after (gen_consttable_16 (val), scan);
18094 break;
18095
18096 #endif
18097 default:
18098 gcc_unreachable ();
18099 }
18100 }
18101
18102 nmp = mp->next;
18103 free (mp);
18104 }
18105
18106 minipool_vector_head = minipool_vector_tail = NULL;
18107 scan = emit_insn_after (gen_consttable_end (), scan);
18108 scan = emit_barrier_after (scan);
18109 }
18110
18111 /* Return the cost of forcibly inserting a barrier after INSN. */
18112 static int
18113 arm_barrier_cost (rtx_insn *insn)
18114 {
18115 /* Basing the location of the pool on the loop depth is preferable,
18116 but at the moment, the basic block information seems to be
18117 corrupt by this stage of the compilation. */
18118 int base_cost = 50;
18119 rtx_insn *next = next_nonnote_insn (insn);
18120
18121 if (next != NULL && LABEL_P (next))
18122 base_cost -= 20;
18123
18124 switch (GET_CODE (insn))
18125 {
18126 case CODE_LABEL:
18127 /* It will always be better to place the table before the label, rather
18128 than after it. */
18129 return 50;
18130
18131 case INSN:
18132 case CALL_INSN:
18133 return base_cost;
18134
18135 case JUMP_INSN:
18136 return base_cost - 10;
18137
18138 default:
18139 return base_cost + 10;
18140 }
18141 }
18142
18143 /* Find the best place in the insn stream in the range
18144 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18145 Create the barrier by inserting a jump and add a new fix entry for
18146 it. */
18147 static Mfix *
18148 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
18149 {
18150 HOST_WIDE_INT count = 0;
18151 rtx_barrier *barrier;
18152 rtx_insn *from = fix->insn;
18153 /* The instruction after which we will insert the jump. */
18154 rtx_insn *selected = NULL;
18155 int selected_cost;
18156 /* The address at which the jump instruction will be placed. */
18157 HOST_WIDE_INT selected_address;
18158 Mfix * new_fix;
18159 HOST_WIDE_INT max_count = max_address - fix->address;
18160 rtx_code_label *label = gen_label_rtx ();
18161
18162 selected_cost = arm_barrier_cost (from);
18163 selected_address = fix->address;
18164
18165 while (from && count < max_count)
18166 {
18167 rtx_jump_table_data *tmp;
18168 int new_cost;
18169
18170 /* This code shouldn't have been called if there was a natural barrier
18171 within range. */
18172 gcc_assert (!BARRIER_P (from));
18173
18174 /* Count the length of this insn. This must stay in sync with the
18175 code that pushes minipool fixes. */
18176 if (LABEL_P (from))
18177 count += get_label_padding (from);
18178 else
18179 count += get_attr_length (from);
18180
18181 /* If there is a jump table, add its length. */
18182 if (tablejump_p (from, NULL, &tmp))
18183 {
18184 count += get_jump_table_size (tmp);
18185
18186 /* Jump tables aren't in a basic block, so base the cost on
18187 the dispatch insn. If we select this location, we will
18188 still put the pool after the table. */
18189 new_cost = arm_barrier_cost (from);
18190
18191 if (count < max_count
18192 && (!selected || new_cost <= selected_cost))
18193 {
18194 selected = tmp;
18195 selected_cost = new_cost;
18196 selected_address = fix->address + count;
18197 }
18198
18199 /* Continue after the dispatch table. */
18200 from = NEXT_INSN (tmp);
18201 continue;
18202 }
18203
18204 new_cost = arm_barrier_cost (from);
18205
18206 if (count < max_count
18207 && (!selected || new_cost <= selected_cost))
18208 {
18209 selected = from;
18210 selected_cost = new_cost;
18211 selected_address = fix->address + count;
18212 }
18213
18214 from = NEXT_INSN (from);
18215 }
18216
18217 /* Make sure that we found a place to insert the jump. */
18218 gcc_assert (selected);
18219
18220 /* Create a new JUMP_INSN that branches around a barrier. */
18221 from = emit_jump_insn_after (gen_jump (label), selected);
18222 JUMP_LABEL (from) = label;
18223 barrier = emit_barrier_after (from);
18224 emit_label_after (label, barrier);
18225
18226 /* Create a minipool barrier entry for the new barrier. */
18227 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
18228 new_fix->insn = barrier;
18229 new_fix->address = selected_address;
18230 new_fix->next = fix->next;
18231 fix->next = new_fix;
18232
18233 return new_fix;
18234 }
18235
18236 /* Record that there is a natural barrier in the insn stream at
18237 ADDRESS. */
18238 static void
18239 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
18240 {
18241 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18242
18243 fix->insn = insn;
18244 fix->address = address;
18245
18246 fix->next = NULL;
18247 if (minipool_fix_head != NULL)
18248 minipool_fix_tail->next = fix;
18249 else
18250 minipool_fix_head = fix;
18251
18252 minipool_fix_tail = fix;
18253 }
18254
18255 /* Record INSN, which will need fixing up to load a value from the
18256 minipool. ADDRESS is the offset of the insn since the start of the
18257 function; LOC is a pointer to the part of the insn which requires
18258 fixing; VALUE is the constant that must be loaded, which is of type
18259 MODE. */
18260 static void
18261 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
18262 machine_mode mode, rtx value)
18263 {
18264 gcc_assert (!arm_disable_literal_pool);
18265 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18266
18267 fix->insn = insn;
18268 fix->address = address;
18269 fix->loc = loc;
18270 fix->mode = mode;
18271 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
18272 fix->value = value;
18273 fix->forwards = get_attr_pool_range (insn);
18274 fix->backwards = get_attr_neg_pool_range (insn);
18275 fix->minipool = NULL;
18276
18277 /* If an insn doesn't have a range defined for it, then it isn't
18278 expecting to be reworked by this code. Better to stop now than
18279 to generate duff assembly code. */
18280 gcc_assert (fix->forwards || fix->backwards);
18281
18282 /* If an entry requires 8-byte alignment then assume all constant pools
18283 require 4 bytes of padding. Trying to do this later on a per-pool
18284 basis is awkward because existing pool entries have to be modified. */
18285 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
18286 minipool_pad = 4;
18287
18288 if (dump_file)
18289 {
18290 fprintf (dump_file,
18291 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18292 GET_MODE_NAME (mode),
18293 INSN_UID (insn), (unsigned long) address,
18294 -1 * (long)fix->backwards, (long)fix->forwards);
18295 arm_print_value (dump_file, fix->value);
18296 fprintf (dump_file, "\n");
18297 }
18298
18299 /* Add it to the chain of fixes. */
18300 fix->next = NULL;
18301
18302 if (minipool_fix_head != NULL)
18303 minipool_fix_tail->next = fix;
18304 else
18305 minipool_fix_head = fix;
18306
18307 minipool_fix_tail = fix;
18308 }
18309
18310 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18311 Returns the number of insns needed, or 99 if we always want to synthesize
18312 the value. */
18313 int
18314 arm_max_const_double_inline_cost ()
18315 {
18316 return ((optimize_size || arm_ld_sched) ? 3 : 4);
18317 }
18318
18319 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18320 Returns the number of insns needed, or 99 if we don't know how to
18321 do it. */
18322 int
18323 arm_const_double_inline_cost (rtx val)
18324 {
18325 rtx lowpart, highpart;
18326 machine_mode mode;
18327
18328 mode = GET_MODE (val);
18329
18330 if (mode == VOIDmode)
18331 mode = DImode;
18332
18333 gcc_assert (GET_MODE_SIZE (mode) == 8);
18334
18335 lowpart = gen_lowpart (SImode, val);
18336 highpart = gen_highpart_mode (SImode, mode, val);
18337
18338 gcc_assert (CONST_INT_P (lowpart));
18339 gcc_assert (CONST_INT_P (highpart));
18340
18341 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
18342 NULL_RTX, NULL_RTX, 0, 0)
18343 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
18344 NULL_RTX, NULL_RTX, 0, 0));
18345 }
18346
18347 /* Cost of loading a SImode constant. */
18348 static inline int
18349 arm_const_inline_cost (enum rtx_code code, rtx val)
18350 {
18351 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
18352 NULL_RTX, NULL_RTX, 1, 0);
18353 }
18354
18355 /* Return true if it is worthwhile to split a 64-bit constant into two
18356 32-bit operations. This is the case if optimizing for size, or
18357 if we have load delay slots, or if one 32-bit part can be done with
18358 a single data operation. */
18359 bool
18360 arm_const_double_by_parts (rtx val)
18361 {
18362 machine_mode mode = GET_MODE (val);
18363 rtx part;
18364
18365 if (optimize_size || arm_ld_sched)
18366 return true;
18367
18368 if (mode == VOIDmode)
18369 mode = DImode;
18370
18371 part = gen_highpart_mode (SImode, mode, val);
18372
18373 gcc_assert (CONST_INT_P (part));
18374
18375 if (const_ok_for_arm (INTVAL (part))
18376 || const_ok_for_arm (~INTVAL (part)))
18377 return true;
18378
18379 part = gen_lowpart (SImode, val);
18380
18381 gcc_assert (CONST_INT_P (part));
18382
18383 if (const_ok_for_arm (INTVAL (part))
18384 || const_ok_for_arm (~INTVAL (part)))
18385 return true;
18386
18387 return false;
18388 }
18389
18390 /* Return true if it is possible to inline both the high and low parts
18391 of a 64-bit constant into 32-bit data processing instructions. */
18392 bool
18393 arm_const_double_by_immediates (rtx val)
18394 {
18395 machine_mode mode = GET_MODE (val);
18396 rtx part;
18397
18398 if (mode == VOIDmode)
18399 mode = DImode;
18400
18401 part = gen_highpart_mode (SImode, mode, val);
18402
18403 gcc_assert (CONST_INT_P (part));
18404
18405 if (!const_ok_for_arm (INTVAL (part)))
18406 return false;
18407
18408 part = gen_lowpart (SImode, val);
18409
18410 gcc_assert (CONST_INT_P (part));
18411
18412 if (!const_ok_for_arm (INTVAL (part)))
18413 return false;
18414
18415 return true;
18416 }
18417
18418 /* Scan INSN and note any of its operands that need fixing.
18419 If DO_PUSHES is false we do not actually push any of the fixups
18420 needed. */
18421 static void
18422 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18423 {
18424 int opno;
18425
18426 extract_constrain_insn (insn);
18427
18428 if (recog_data.n_alternatives == 0)
18429 return;
18430
18431 /* Fill in recog_op_alt with information about the constraints of
18432 this insn. */
18433 preprocess_constraints (insn);
18434
18435 const operand_alternative *op_alt = which_op_alt ();
18436 for (opno = 0; opno < recog_data.n_operands; opno++)
18437 {
18438 /* Things we need to fix can only occur in inputs. */
18439 if (recog_data.operand_type[opno] != OP_IN)
18440 continue;
18441
18442 /* If this alternative is a memory reference, then any mention
18443 of constants in this alternative is really to fool reload
18444 into allowing us to accept one there. We need to fix them up
18445 now so that we output the right code. */
18446 if (op_alt[opno].memory_ok)
18447 {
18448 rtx op = recog_data.operand[opno];
18449
18450 if (CONSTANT_P (op))
18451 {
18452 if (do_pushes)
18453 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18454 recog_data.operand_mode[opno], op);
18455 }
18456 else if (MEM_P (op)
18457 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18458 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18459 {
18460 if (do_pushes)
18461 {
18462 rtx cop = avoid_constant_pool_reference (op);
18463
18464 /* Casting the address of something to a mode narrower
18465 than a word can cause avoid_constant_pool_reference()
18466 to return the pool reference itself. That's no good to
18467 us here. Lets just hope that we can use the
18468 constant pool value directly. */
18469 if (op == cop)
18470 cop = get_pool_constant (XEXP (op, 0));
18471
18472 push_minipool_fix (insn, address,
18473 recog_data.operand_loc[opno],
18474 recog_data.operand_mode[opno], cop);
18475 }
18476
18477 }
18478 }
18479 }
18480
18481 return;
18482 }
18483
18484 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18485 and unions in the context of ARMv8-M Security Extensions. It is used as a
18486 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18487 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18488 or four masks, depending on whether it is being computed for a
18489 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18490 respectively. The tree for the type of the argument or a field within an
18491 argument is passed in ARG_TYPE, the current register this argument or field
18492 starts in is kept in the pointer REGNO and updated accordingly, the bit this
18493 argument or field starts at is passed in STARTING_BIT and the last used bit
18494 is kept in LAST_USED_BIT which is also updated accordingly. */
18495
18496 static unsigned HOST_WIDE_INT
18497 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18498 uint32_t * padding_bits_to_clear,
18499 unsigned starting_bit, int * last_used_bit)
18500
18501 {
18502 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18503
18504 if (TREE_CODE (arg_type) == RECORD_TYPE)
18505 {
18506 unsigned current_bit = starting_bit;
18507 tree field;
18508 long int offset, size;
18509
18510
18511 field = TYPE_FIELDS (arg_type);
18512 while (field)
18513 {
18514 /* The offset within a structure is always an offset from
18515 the start of that structure. Make sure we take that into the
18516 calculation of the register based offset that we use here. */
18517 offset = starting_bit;
18518 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18519 offset %= 32;
18520
18521 /* This is the actual size of the field, for bitfields this is the
18522 bitfield width and not the container size. */
18523 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18524
18525 if (*last_used_bit != offset)
18526 {
18527 if (offset < *last_used_bit)
18528 {
18529 /* This field's offset is before the 'last_used_bit', that
18530 means this field goes on the next register. So we need to
18531 pad the rest of the current register and increase the
18532 register number. */
18533 uint32_t mask;
18534 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18535 mask++;
18536
18537 padding_bits_to_clear[*regno] |= mask;
18538 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18539 (*regno)++;
18540 }
18541 else
18542 {
18543 /* Otherwise we pad the bits between the last field's end and
18544 the start of the new field. */
18545 uint32_t mask;
18546
18547 mask = ((uint32_t)-1) >> (32 - offset);
18548 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18549 padding_bits_to_clear[*regno] |= mask;
18550 }
18551 current_bit = offset;
18552 }
18553
18554 /* Calculate further padding bits for inner structs/unions too. */
18555 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18556 {
18557 *last_used_bit = current_bit;
18558 not_to_clear_reg_mask
18559 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18560 padding_bits_to_clear, offset,
18561 last_used_bit);
18562 }
18563 else
18564 {
18565 /* Update 'current_bit' with this field's size. If the
18566 'current_bit' lies in a subsequent register, update 'regno' and
18567 reset 'current_bit' to point to the current bit in that new
18568 register. */
18569 current_bit += size;
18570 while (current_bit >= 32)
18571 {
18572 current_bit-=32;
18573 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18574 (*regno)++;
18575 }
18576 *last_used_bit = current_bit;
18577 }
18578
18579 field = TREE_CHAIN (field);
18580 }
18581 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18582 }
18583 else if (TREE_CODE (arg_type) == UNION_TYPE)
18584 {
18585 tree field, field_t;
18586 int i, regno_t, field_size;
18587 int max_reg = -1;
18588 int max_bit = -1;
18589 uint32_t mask;
18590 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18591 = {-1, -1, -1, -1};
18592
18593 /* To compute the padding bits in a union we only consider bits as
18594 padding bits if they are always either a padding bit or fall outside a
18595 fields size for all fields in the union. */
18596 field = TYPE_FIELDS (arg_type);
18597 while (field)
18598 {
18599 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18600 = {0U, 0U, 0U, 0U};
18601 int last_used_bit_t = *last_used_bit;
18602 regno_t = *regno;
18603 field_t = TREE_TYPE (field);
18604
18605 /* If the field's type is either a record or a union make sure to
18606 compute their padding bits too. */
18607 if (RECORD_OR_UNION_TYPE_P (field_t))
18608 not_to_clear_reg_mask
18609 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18610 &padding_bits_to_clear_t[0],
18611 starting_bit, &last_used_bit_t);
18612 else
18613 {
18614 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18615 regno_t = (field_size / 32) + *regno;
18616 last_used_bit_t = (starting_bit + field_size) % 32;
18617 }
18618
18619 for (i = *regno; i < regno_t; i++)
18620 {
18621 /* For all but the last register used by this field only keep the
18622 padding bits that were padding bits in this field. */
18623 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18624 }
18625
18626 /* For the last register, keep all padding bits that were padding
18627 bits in this field and any padding bits that are still valid
18628 as padding bits but fall outside of this field's size. */
18629 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18630 padding_bits_to_clear_res[regno_t]
18631 &= padding_bits_to_clear_t[regno_t] | mask;
18632
18633 /* Update the maximum size of the fields in terms of registers used
18634 ('max_reg') and the 'last_used_bit' in said register. */
18635 if (max_reg < regno_t)
18636 {
18637 max_reg = regno_t;
18638 max_bit = last_used_bit_t;
18639 }
18640 else if (max_reg == regno_t && max_bit < last_used_bit_t)
18641 max_bit = last_used_bit_t;
18642
18643 field = TREE_CHAIN (field);
18644 }
18645
18646 /* Update the current padding_bits_to_clear using the intersection of the
18647 padding bits of all the fields. */
18648 for (i=*regno; i < max_reg; i++)
18649 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18650
18651 /* Do not keep trailing padding bits, we do not know yet whether this
18652 is the end of the argument. */
18653 mask = ((uint32_t) 1 << max_bit) - 1;
18654 padding_bits_to_clear[max_reg]
18655 |= padding_bits_to_clear_res[max_reg] & mask;
18656
18657 *regno = max_reg;
18658 *last_used_bit = max_bit;
18659 }
18660 else
18661 /* This function should only be used for structs and unions. */
18662 gcc_unreachable ();
18663
18664 return not_to_clear_reg_mask;
18665 }
18666
18667 /* In the context of ARMv8-M Security Extensions, this function is used for both
18668 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18669 registers are used when returning or passing arguments, which is then
18670 returned as a mask. It will also compute a mask to indicate padding/unused
18671 bits for each of these registers, and passes this through the
18672 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
18673 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18674 the starting register used to pass this argument or return value is passed
18675 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18676 for struct and union types. */
18677
18678 static unsigned HOST_WIDE_INT
18679 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18680 uint32_t * padding_bits_to_clear)
18681
18682 {
18683 int last_used_bit = 0;
18684 unsigned HOST_WIDE_INT not_to_clear_mask;
18685
18686 if (RECORD_OR_UNION_TYPE_P (arg_type))
18687 {
18688 not_to_clear_mask
18689 = comp_not_to_clear_mask_str_un (arg_type, &regno,
18690 padding_bits_to_clear, 0,
18691 &last_used_bit);
18692
18693
18694 /* If the 'last_used_bit' is not zero, that means we are still using a
18695 part of the last 'regno'. In such cases we must clear the trailing
18696 bits. Otherwise we are not using regno and we should mark it as to
18697 clear. */
18698 if (last_used_bit != 0)
18699 padding_bits_to_clear[regno]
18700 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18701 else
18702 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18703 }
18704 else
18705 {
18706 not_to_clear_mask = 0;
18707 /* We are not dealing with structs nor unions. So these arguments may be
18708 passed in floating point registers too. In some cases a BLKmode is
18709 used when returning or passing arguments in multiple VFP registers. */
18710 if (GET_MODE (arg_rtx) == BLKmode)
18711 {
18712 int i, arg_regs;
18713 rtx reg;
18714
18715 /* This should really only occur when dealing with the hard-float
18716 ABI. */
18717 gcc_assert (TARGET_HARD_FLOAT_ABI);
18718
18719 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18720 {
18721 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18722 gcc_assert (REG_P (reg));
18723
18724 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18725
18726 /* If we are dealing with DF mode, make sure we don't
18727 clear either of the registers it addresses. */
18728 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18729 if (arg_regs > 1)
18730 {
18731 unsigned HOST_WIDE_INT mask;
18732 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18733 mask -= HOST_WIDE_INT_1U << REGNO (reg);
18734 not_to_clear_mask |= mask;
18735 }
18736 }
18737 }
18738 else
18739 {
18740 /* Otherwise we can rely on the MODE to determine how many registers
18741 are being used by this argument. */
18742 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18743 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18744 if (arg_regs > 1)
18745 {
18746 unsigned HOST_WIDE_INT
18747 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18748 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18749 not_to_clear_mask |= mask;
18750 }
18751 }
18752 }
18753
18754 return not_to_clear_mask;
18755 }
18756
18757 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18758 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
18759 are to be fully cleared, using the value in register CLEARING_REG if more
18760 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18761 the bits that needs to be cleared in caller-saved core registers, with
18762 SCRATCH_REG used as a scratch register for that clearing.
18763
18764 NOTE: one of three following assertions must hold:
18765 - SCRATCH_REG is a low register
18766 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18767 in TO_CLEAR_BITMAP)
18768 - CLEARING_REG is a low register. */
18769
18770 static void
18771 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18772 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18773 {
18774 bool saved_clearing = false;
18775 rtx saved_clearing_reg = NULL_RTX;
18776 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18777
18778 gcc_assert (arm_arch_cmse);
18779
18780 if (!bitmap_empty_p (to_clear_bitmap))
18781 {
18782 minregno = bitmap_first_set_bit (to_clear_bitmap);
18783 maxregno = bitmap_last_set_bit (to_clear_bitmap);
18784 }
18785 clearing_regno = REGNO (clearing_reg);
18786
18787 /* Clear padding bits. */
18788 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18789 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18790 {
18791 uint64_t mask;
18792 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18793
18794 if (padding_bits_to_clear[i] == 0)
18795 continue;
18796
18797 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18798 CLEARING_REG as scratch. */
18799 if (TARGET_THUMB1
18800 && REGNO (scratch_reg) > LAST_LO_REGNUM)
18801 {
18802 /* clearing_reg is not to be cleared, copy its value into scratch_reg
18803 such that we can use clearing_reg to clear the unused bits in the
18804 arguments. */
18805 if ((clearing_regno > maxregno
18806 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18807 && !saved_clearing)
18808 {
18809 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18810 emit_move_insn (scratch_reg, clearing_reg);
18811 saved_clearing = true;
18812 saved_clearing_reg = scratch_reg;
18813 }
18814 scratch_reg = clearing_reg;
18815 }
18816
18817 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
18818 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18819 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18820
18821 /* Fill the top half of the negated padding_bits_to_clear[i]. */
18822 mask = (~padding_bits_to_clear[i]) >> 16;
18823 rtx16 = gen_int_mode (16, SImode);
18824 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18825 if (mask)
18826 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18827
18828 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18829 }
18830 if (saved_clearing)
18831 emit_move_insn (clearing_reg, saved_clearing_reg);
18832
18833
18834 /* Clear full registers. */
18835
18836 if (TARGET_HAVE_FPCXT_CMSE)
18837 {
18838 rtvec vunspec_vec;
18839 int i, j, k, nb_regs;
18840 rtx use_seq, par, reg, set, vunspec;
18841 int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18842 auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18843 auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18844
18845 for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18846 {
18847 /* Find next register to clear and exit if none. */
18848 for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18849 if (i > maxregno)
18850 break;
18851
18852 /* Compute number of consecutive registers to clear. */
18853 for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18854 j++);
18855 nb_regs = j - i;
18856
18857 /* Create VSCCLRM RTX pattern. */
18858 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18859 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18860 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18861 VUNSPEC_VSCCLRM_VPR);
18862 XVECEXP (par, 0, 0) = vunspec;
18863
18864 /* Insert VFP register clearing RTX in the pattern. */
18865 start_sequence ();
18866 for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18867 {
18868 if (!bitmap_bit_p (to_clear_bitmap, j))
18869 continue;
18870
18871 reg = gen_rtx_REG (SFmode, j);
18872 set = gen_rtx_SET (reg, const0_rtx);
18873 XVECEXP (par, 0, k++) = set;
18874 emit_use (reg);
18875 }
18876 use_seq = get_insns ();
18877 end_sequence ();
18878
18879 emit_insn_after (use_seq, emit_insn (par));
18880 }
18881
18882 /* Get set of core registers to clear. */
18883 bitmap_clear (core_regs_bitmap);
18884 bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18885 IP_REGNUM - R0_REGNUM + 1);
18886 bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18887 core_regs_bitmap);
18888 gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18889
18890 if (bitmap_empty_p (to_clear_core_bitmap))
18891 return;
18892
18893 /* Create clrm RTX pattern. */
18894 nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18895 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18896
18897 /* Insert core register clearing RTX in the pattern. */
18898 start_sequence ();
18899 for (j = 0, i = minregno; j < nb_regs; i++)
18900 {
18901 if (!bitmap_bit_p (to_clear_core_bitmap, i))
18902 continue;
18903
18904 reg = gen_rtx_REG (SImode, i);
18905 set = gen_rtx_SET (reg, const0_rtx);
18906 XVECEXP (par, 0, j++) = set;
18907 emit_use (reg);
18908 }
18909
18910 /* Insert APSR register clearing RTX in the pattern
18911 * along with clobbering CC. */
18912 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18913 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18914 VUNSPEC_CLRM_APSR);
18915
18916 XVECEXP (par, 0, j++) = vunspec;
18917
18918 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18919 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18920 XVECEXP (par, 0, j) = clobber;
18921
18922 use_seq = get_insns ();
18923 end_sequence ();
18924
18925 emit_insn_after (use_seq, emit_insn (par));
18926 }
18927 else
18928 {
18929 /* If not marked for clearing, clearing_reg already does not contain
18930 any secret. */
18931 if (clearing_regno <= maxregno
18932 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18933 {
18934 emit_move_insn (clearing_reg, const0_rtx);
18935 emit_use (clearing_reg);
18936 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18937 }
18938
18939 for (regno = minregno; regno <= maxregno; regno++)
18940 {
18941 if (!bitmap_bit_p (to_clear_bitmap, regno))
18942 continue;
18943
18944 if (IS_VFP_REGNUM (regno))
18945 {
18946 /* If regno is an even vfp register and its successor is also to
18947 be cleared, use vmov. */
18948 if (TARGET_VFP_DOUBLE
18949 && VFP_REGNO_OK_FOR_DOUBLE (regno)
18950 && bitmap_bit_p (to_clear_bitmap, regno + 1))
18951 {
18952 emit_move_insn (gen_rtx_REG (DFmode, regno),
18953 CONST1_RTX (DFmode));
18954 emit_use (gen_rtx_REG (DFmode, regno));
18955 regno++;
18956 }
18957 else
18958 {
18959 emit_move_insn (gen_rtx_REG (SFmode, regno),
18960 CONST1_RTX (SFmode));
18961 emit_use (gen_rtx_REG (SFmode, regno));
18962 }
18963 }
18964 else
18965 {
18966 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18967 emit_use (gen_rtx_REG (SImode, regno));
18968 }
18969 }
18970 }
18971 }
18972
18973 /* Clear core and caller-saved VFP registers not used to pass arguments before
18974 a cmse_nonsecure_call. Saving, clearing and restoring of VFP callee-saved
18975 registers is done in the __gnu_cmse_nonsecure_call libcall. See
18976 libgcc/config/arm/cmse_nonsecure_call.S. */
18977
18978 static void
18979 cmse_nonsecure_call_inline_register_clear (void)
18980 {
18981 basic_block bb;
18982
18983 FOR_EACH_BB_FN (bb, cfun)
18984 {
18985 rtx_insn *insn;
18986
18987 FOR_BB_INSNS (bb, insn)
18988 {
18989 bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18990 /* frame = VFP regs + FPSCR + VPR. */
18991 unsigned lazy_store_stack_frame_size
18992 = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18993 unsigned long callee_saved_mask
18994 = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18995 & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18996 unsigned address_regnum, regno;
18997 unsigned max_int_regno
18998 = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
18999 unsigned max_fp_regno
19000 = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
19001 unsigned maxregno
19002 = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
19003 auto_sbitmap to_clear_bitmap (maxregno + 1);
19004 rtx_insn *seq;
19005 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
19006 rtx address;
19007 CUMULATIVE_ARGS args_so_far_v;
19008 cumulative_args_t args_so_far;
19009 tree arg_type, fntype;
19010 bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
19011 function_args_iterator args_iter;
19012 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
19013
19014 if (!NONDEBUG_INSN_P (insn))
19015 continue;
19016
19017 if (!CALL_P (insn))
19018 continue;
19019
19020 pat = PATTERN (insn);
19021 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
19022 call = XVECEXP (pat, 0, 0);
19023
19024 /* Get the real call RTX if the insn sets a value, ie. returns. */
19025 if (GET_CODE (call) == SET)
19026 call = SET_SRC (call);
19027
19028 /* Check if it is a cmse_nonsecure_call. */
19029 unspec = XEXP (call, 0);
19030 if (GET_CODE (unspec) != UNSPEC
19031 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
19032 continue;
19033
19034 /* Mark registers that needs to be cleared. Those that holds a
19035 parameter are removed from the set further below. */
19036 bitmap_clear (to_clear_bitmap);
19037 bitmap_set_range (to_clear_bitmap, R0_REGNUM,
19038 max_int_regno - R0_REGNUM + 1);
19039
19040 /* Only look at the caller-saved floating point registers in case of
19041 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
19042 lazy store and loads which clear both caller- and callee-saved
19043 registers. */
19044 if (!lazy_fpclear)
19045 {
19046 auto_sbitmap float_bitmap (maxregno + 1);
19047
19048 bitmap_clear (float_bitmap);
19049 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
19050 max_fp_regno - FIRST_VFP_REGNUM + 1);
19051 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
19052 }
19053
19054 /* Make sure the register used to hold the function address is not
19055 cleared. */
19056 address = RTVEC_ELT (XVEC (unspec, 0), 0);
19057 gcc_assert (MEM_P (address));
19058 gcc_assert (REG_P (XEXP (address, 0)));
19059 address_regnum = REGNO (XEXP (address, 0));
19060 if (address_regnum <= max_int_regno)
19061 bitmap_clear_bit (to_clear_bitmap, address_regnum);
19062
19063 /* Set basic block of call insn so that df rescan is performed on
19064 insns inserted here. */
19065 set_block_for_insn (insn, bb);
19066 df_set_flags (DF_DEFER_INSN_RESCAN);
19067 start_sequence ();
19068
19069 /* Make sure the scheduler doesn't schedule other insns beyond
19070 here. */
19071 emit_insn (gen_blockage ());
19072
19073 /* Walk through all arguments and clear registers appropriately.
19074 */
19075 fntype = TREE_TYPE (MEM_EXPR (address));
19076 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
19077 NULL_TREE);
19078 args_so_far = pack_cumulative_args (&args_so_far_v);
19079 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
19080 {
19081 rtx arg_rtx;
19082 uint64_t to_clear_args_mask;
19083
19084 if (VOID_TYPE_P (arg_type))
19085 continue;
19086
19087 function_arg_info arg (arg_type, /*named=*/true);
19088 if (!first_param)
19089 /* ??? We should advance after processing the argument and pass
19090 the argument we're advancing past. */
19091 arm_function_arg_advance (args_so_far, arg);
19092
19093 arg_rtx = arm_function_arg (args_so_far, arg);
19094 gcc_assert (REG_P (arg_rtx));
19095 to_clear_args_mask
19096 = compute_not_to_clear_mask (arg_type, arg_rtx,
19097 REGNO (arg_rtx),
19098 &padding_bits_to_clear[0]);
19099 if (to_clear_args_mask)
19100 {
19101 for (regno = R0_REGNUM; regno <= maxregno; regno++)
19102 {
19103 if (to_clear_args_mask & (1ULL << regno))
19104 bitmap_clear_bit (to_clear_bitmap, regno);
19105 }
19106 }
19107
19108 first_param = false;
19109 }
19110
19111 /* We use right shift and left shift to clear the LSB of the address
19112 we jump to instead of using bic, to avoid having to use an extra
19113 register on Thumb-1. */
19114 clearing_reg = XEXP (address, 0);
19115 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
19116 emit_insn (gen_rtx_SET (clearing_reg, shift));
19117 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
19118 emit_insn (gen_rtx_SET (clearing_reg, shift));
19119
19120 if (clear_callee_saved)
19121 {
19122 rtx push_insn =
19123 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
19124 /* Disable frame debug info in push because it needs to be
19125 disabled for pop (see below). */
19126 RTX_FRAME_RELATED_P (push_insn) = 0;
19127
19128 /* Lazy store multiple. */
19129 if (lazy_fpclear)
19130 {
19131 rtx imm;
19132 rtx_insn *add_insn;
19133
19134 imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
19135 add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19136 stack_pointer_rtx, imm));
19137 /* If we have the frame pointer, then it will be the
19138 CFA reg. Otherwise, the stack pointer is the CFA
19139 reg, so we need to emit a CFA adjust. */
19140 if (!frame_pointer_needed)
19141 arm_add_cfa_adjust_cfa_note (add_insn,
19142 - lazy_store_stack_frame_size,
19143 stack_pointer_rtx,
19144 stack_pointer_rtx);
19145 emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
19146 }
19147 /* Save VFP callee-saved registers. */
19148 else
19149 {
19150 vfp_emit_fstmd (D7_VFP_REGNUM + 1,
19151 (max_fp_regno - D7_VFP_REGNUM) / 2);
19152 /* Disable frame debug info in push because it needs to be
19153 disabled for vpop (see below). */
19154 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19155 }
19156 }
19157
19158 /* Clear caller-saved registers that leak before doing a non-secure
19159 call. */
19160 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
19161 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
19162 NUM_ARG_REGS, ip_reg, clearing_reg);
19163
19164 seq = get_insns ();
19165 end_sequence ();
19166 emit_insn_before (seq, insn);
19167
19168 if (TARGET_HAVE_FPCXT_CMSE)
19169 {
19170 rtx_insn *last, *pop_insn, *after = insn;
19171
19172 start_sequence ();
19173
19174 /* Lazy load multiple done as part of libcall in Armv8-M. */
19175 if (lazy_fpclear)
19176 {
19177 rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
19178 emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
19179 rtx_insn *add_insn =
19180 emit_insn (gen_addsi3 (stack_pointer_rtx,
19181 stack_pointer_rtx, imm));
19182 if (!frame_pointer_needed)
19183 arm_add_cfa_adjust_cfa_note (add_insn,
19184 lazy_store_stack_frame_size,
19185 stack_pointer_rtx,
19186 stack_pointer_rtx);
19187 }
19188 /* Restore VFP callee-saved registers. */
19189 else
19190 {
19191 int nb_callee_saved_vfp_regs =
19192 (max_fp_regno - D7_VFP_REGNUM) / 2;
19193 arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
19194 nb_callee_saved_vfp_regs,
19195 stack_pointer_rtx);
19196 /* Disable frame debug info in vpop because the SP adjustment
19197 is made using a CFA adjustment note while CFA used is
19198 sometimes R7. This then causes an assert failure in the
19199 CFI note creation code. */
19200 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19201 }
19202
19203 arm_emit_multi_reg_pop (callee_saved_mask);
19204 pop_insn = get_last_insn ();
19205
19206 /* Disable frame debug info in pop because they reset the state
19207 of popped registers to what it was at the beginning of the
19208 function, before the prologue. This leads to incorrect state
19209 when doing the pop after the nonsecure call for registers that
19210 are pushed both in prologue and before the nonsecure call.
19211
19212 It also occasionally triggers an assert failure in CFI note
19213 creation code when there are two codepaths to the epilogue,
19214 one of which does not go through the nonsecure call.
19215 Obviously this mean that debugging between the push and pop is
19216 not reliable. */
19217 RTX_FRAME_RELATED_P (pop_insn) = 0;
19218
19219 seq = get_insns ();
19220 last = get_last_insn ();
19221 end_sequence ();
19222
19223 emit_insn_after (seq, after);
19224
19225 /* Skip pop we have just inserted after nonsecure call, we know
19226 it does not contain a nonsecure call. */
19227 insn = last;
19228 }
19229 }
19230 }
19231 }
19232
19233 /* Rewrite move insn into subtract of 0 if the condition codes will
19234 be useful in next conditional jump insn. */
19235
19236 static void
19237 thumb1_reorg (void)
19238 {
19239 basic_block bb;
19240
19241 FOR_EACH_BB_FN (bb, cfun)
19242 {
19243 rtx dest, src;
19244 rtx cmp, op0, op1, set = NULL;
19245 rtx_insn *prev, *insn = BB_END (bb);
19246 bool insn_clobbered = false;
19247
19248 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
19249 insn = PREV_INSN (insn);
19250
19251 /* Find the last cbranchsi4_insn in basic block BB. */
19252 if (insn == BB_HEAD (bb)
19253 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19254 continue;
19255
19256 /* Get the register with which we are comparing. */
19257 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
19258 op0 = XEXP (cmp, 0);
19259 op1 = XEXP (cmp, 1);
19260
19261 /* Check that comparison is against ZERO. */
19262 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
19263 continue;
19264
19265 /* Find the first flag setting insn before INSN in basic block BB. */
19266 gcc_assert (insn != BB_HEAD (bb));
19267 for (prev = PREV_INSN (insn);
19268 (!insn_clobbered
19269 && prev != BB_HEAD (bb)
19270 && (NOTE_P (prev)
19271 || DEBUG_INSN_P (prev)
19272 || ((set = single_set (prev)) != NULL
19273 && get_attr_conds (prev) == CONDS_NOCOND)));
19274 prev = PREV_INSN (prev))
19275 {
19276 if (reg_set_p (op0, prev))
19277 insn_clobbered = true;
19278 }
19279
19280 /* Skip if op0 is clobbered by insn other than prev. */
19281 if (insn_clobbered)
19282 continue;
19283
19284 if (!set)
19285 continue;
19286
19287 dest = SET_DEST (set);
19288 src = SET_SRC (set);
19289 if (!low_register_operand (dest, SImode)
19290 || !low_register_operand (src, SImode))
19291 continue;
19292
19293 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19294 in INSN. Both src and dest of the move insn are checked. */
19295 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
19296 {
19297 dest = copy_rtx (dest);
19298 src = copy_rtx (src);
19299 src = gen_rtx_MINUS (SImode, src, const0_rtx);
19300 PATTERN (prev) = gen_rtx_SET (dest, src);
19301 INSN_CODE (prev) = -1;
19302 /* Set test register in INSN to dest. */
19303 XEXP (cmp, 0) = copy_rtx (dest);
19304 INSN_CODE (insn) = -1;
19305 }
19306 }
19307 }
19308
19309 /* Convert instructions to their cc-clobbering variant if possible, since
19310 that allows us to use smaller encodings. */
19311
19312 static void
19313 thumb2_reorg (void)
19314 {
19315 basic_block bb;
19316 regset_head live;
19317
19318 INIT_REG_SET (&live);
19319
19320 /* We are freeing block_for_insn in the toplev to keep compatibility
19321 with old MDEP_REORGS that are not CFG based. Recompute it now. */
19322 compute_bb_for_insn ();
19323 df_analyze ();
19324
19325 enum Convert_Action {SKIP, CONV, SWAP_CONV};
19326
19327 FOR_EACH_BB_FN (bb, cfun)
19328 {
19329 if ((current_tune->disparage_flag_setting_t16_encodings
19330 == tune_params::DISPARAGE_FLAGS_ALL)
19331 && optimize_bb_for_speed_p (bb))
19332 continue;
19333
19334 rtx_insn *insn;
19335 Convert_Action action = SKIP;
19336 Convert_Action action_for_partial_flag_setting
19337 = ((current_tune->disparage_flag_setting_t16_encodings
19338 != tune_params::DISPARAGE_FLAGS_NEITHER)
19339 && optimize_bb_for_speed_p (bb))
19340 ? SKIP : CONV;
19341
19342 COPY_REG_SET (&live, DF_LR_OUT (bb));
19343 df_simulate_initialize_backwards (bb, &live);
19344 FOR_BB_INSNS_REVERSE (bb, insn)
19345 {
19346 if (NONJUMP_INSN_P (insn)
19347 && !REGNO_REG_SET_P (&live, CC_REGNUM)
19348 && GET_CODE (PATTERN (insn)) == SET)
19349 {
19350 action = SKIP;
19351 rtx pat = PATTERN (insn);
19352 rtx dst = XEXP (pat, 0);
19353 rtx src = XEXP (pat, 1);
19354 rtx op0 = NULL_RTX, op1 = NULL_RTX;
19355
19356 if (UNARY_P (src) || BINARY_P (src))
19357 op0 = XEXP (src, 0);
19358
19359 if (BINARY_P (src))
19360 op1 = XEXP (src, 1);
19361
19362 if (low_register_operand (dst, SImode))
19363 {
19364 switch (GET_CODE (src))
19365 {
19366 case PLUS:
19367 /* Adding two registers and storing the result
19368 in the first source is already a 16-bit
19369 operation. */
19370 if (rtx_equal_p (dst, op0)
19371 && register_operand (op1, SImode))
19372 break;
19373
19374 if (low_register_operand (op0, SImode))
19375 {
19376 /* ADDS <Rd>,<Rn>,<Rm> */
19377 if (low_register_operand (op1, SImode))
19378 action = CONV;
19379 /* ADDS <Rdn>,#<imm8> */
19380 /* SUBS <Rdn>,#<imm8> */
19381 else if (rtx_equal_p (dst, op0)
19382 && CONST_INT_P (op1)
19383 && IN_RANGE (INTVAL (op1), -255, 255))
19384 action = CONV;
19385 /* ADDS <Rd>,<Rn>,#<imm3> */
19386 /* SUBS <Rd>,<Rn>,#<imm3> */
19387 else if (CONST_INT_P (op1)
19388 && IN_RANGE (INTVAL (op1), -7, 7))
19389 action = CONV;
19390 }
19391 /* ADCS <Rd>, <Rn> */
19392 else if (GET_CODE (XEXP (src, 0)) == PLUS
19393 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19394 && low_register_operand (XEXP (XEXP (src, 0), 1),
19395 SImode)
19396 && COMPARISON_P (op1)
19397 && cc_register (XEXP (op1, 0), VOIDmode)
19398 && maybe_get_arm_condition_code (op1) == ARM_CS
19399 && XEXP (op1, 1) == const0_rtx)
19400 action = CONV;
19401 break;
19402
19403 case MINUS:
19404 /* RSBS <Rd>,<Rn>,#0
19405 Not handled here: see NEG below. */
19406 /* SUBS <Rd>,<Rn>,#<imm3>
19407 SUBS <Rdn>,#<imm8>
19408 Not handled here: see PLUS above. */
19409 /* SUBS <Rd>,<Rn>,<Rm> */
19410 if (low_register_operand (op0, SImode)
19411 && low_register_operand (op1, SImode))
19412 action = CONV;
19413 break;
19414
19415 case MULT:
19416 /* MULS <Rdm>,<Rn>,<Rdm>
19417 As an exception to the rule, this is only used
19418 when optimizing for size since MULS is slow on all
19419 known implementations. We do not even want to use
19420 MULS in cold code, if optimizing for speed, so we
19421 test the global flag here. */
19422 if (!optimize_size)
19423 break;
19424 /* Fall through. */
19425 case AND:
19426 case IOR:
19427 case XOR:
19428 /* ANDS <Rdn>,<Rm> */
19429 if (rtx_equal_p (dst, op0)
19430 && low_register_operand (op1, SImode))
19431 action = action_for_partial_flag_setting;
19432 else if (rtx_equal_p (dst, op1)
19433 && low_register_operand (op0, SImode))
19434 action = action_for_partial_flag_setting == SKIP
19435 ? SKIP : SWAP_CONV;
19436 break;
19437
19438 case ASHIFTRT:
19439 case ASHIFT:
19440 case LSHIFTRT:
19441 /* ASRS <Rdn>,<Rm> */
19442 /* LSRS <Rdn>,<Rm> */
19443 /* LSLS <Rdn>,<Rm> */
19444 if (rtx_equal_p (dst, op0)
19445 && low_register_operand (op1, SImode))
19446 action = action_for_partial_flag_setting;
19447 /* ASRS <Rd>,<Rm>,#<imm5> */
19448 /* LSRS <Rd>,<Rm>,#<imm5> */
19449 /* LSLS <Rd>,<Rm>,#<imm5> */
19450 else if (low_register_operand (op0, SImode)
19451 && CONST_INT_P (op1)
19452 && IN_RANGE (INTVAL (op1), 0, 31))
19453 action = action_for_partial_flag_setting;
19454 break;
19455
19456 case ROTATERT:
19457 /* RORS <Rdn>,<Rm> */
19458 if (rtx_equal_p (dst, op0)
19459 && low_register_operand (op1, SImode))
19460 action = action_for_partial_flag_setting;
19461 break;
19462
19463 case NOT:
19464 /* MVNS <Rd>,<Rm> */
19465 if (low_register_operand (op0, SImode))
19466 action = action_for_partial_flag_setting;
19467 break;
19468
19469 case NEG:
19470 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
19471 if (low_register_operand (op0, SImode))
19472 action = CONV;
19473 break;
19474
19475 case CONST_INT:
19476 /* MOVS <Rd>,#<imm8> */
19477 if (CONST_INT_P (src)
19478 && IN_RANGE (INTVAL (src), 0, 255))
19479 action = action_for_partial_flag_setting;
19480 break;
19481
19482 case REG:
19483 /* MOVS and MOV<c> with registers have different
19484 encodings, so are not relevant here. */
19485 break;
19486
19487 default:
19488 break;
19489 }
19490 }
19491
19492 if (action != SKIP)
19493 {
19494 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19495 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19496 rtvec vec;
19497
19498 if (action == SWAP_CONV)
19499 {
19500 src = copy_rtx (src);
19501 XEXP (src, 0) = op1;
19502 XEXP (src, 1) = op0;
19503 pat = gen_rtx_SET (dst, src);
19504 vec = gen_rtvec (2, pat, clobber);
19505 }
19506 else /* action == CONV */
19507 vec = gen_rtvec (2, pat, clobber);
19508
19509 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19510 INSN_CODE (insn) = -1;
19511 }
19512 }
19513
19514 if (NONDEBUG_INSN_P (insn))
19515 df_simulate_one_insn_backwards (bb, insn, &live);
19516 }
19517 }
19518
19519 CLEAR_REG_SET (&live);
19520 }
19521
19522 /* Gcc puts the pool in the wrong place for ARM, since we can only
19523 load addresses a limited distance around the pc. We do some
19524 special munging to move the constant pool values to the correct
19525 point in the code. */
19526 static void
19527 arm_reorg (void)
19528 {
19529 rtx_insn *insn;
19530 HOST_WIDE_INT address = 0;
19531 Mfix * fix;
19532
19533 if (use_cmse)
19534 cmse_nonsecure_call_inline_register_clear ();
19535
19536 /* We cannot run the Thumb passes for thunks because there is no CFG. */
19537 if (cfun->is_thunk)
19538 ;
19539 else if (TARGET_THUMB1)
19540 thumb1_reorg ();
19541 else if (TARGET_THUMB2)
19542 thumb2_reorg ();
19543
19544 /* Ensure all insns that must be split have been split at this point.
19545 Otherwise, the pool placement code below may compute incorrect
19546 insn lengths. Note that when optimizing, all insns have already
19547 been split at this point. */
19548 if (!optimize)
19549 split_all_insns_noflow ();
19550
19551 /* Make sure we do not attempt to create a literal pool even though it should
19552 no longer be necessary to create any. */
19553 if (arm_disable_literal_pool)
19554 return ;
19555
19556 minipool_fix_head = minipool_fix_tail = NULL;
19557
19558 /* The first insn must always be a note, or the code below won't
19559 scan it properly. */
19560 insn = get_insns ();
19561 gcc_assert (NOTE_P (insn));
19562 minipool_pad = 0;
19563
19564 /* Scan all the insns and record the operands that will need fixing. */
19565 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19566 {
19567 if (BARRIER_P (insn))
19568 push_minipool_barrier (insn, address);
19569 else if (INSN_P (insn))
19570 {
19571 rtx_jump_table_data *table;
19572
19573 note_invalid_constants (insn, address, true);
19574 address += get_attr_length (insn);
19575
19576 /* If the insn is a vector jump, add the size of the table
19577 and skip the table. */
19578 if (tablejump_p (insn, NULL, &table))
19579 {
19580 address += get_jump_table_size (table);
19581 insn = table;
19582 }
19583 }
19584 else if (LABEL_P (insn))
19585 /* Add the worst-case padding due to alignment. We don't add
19586 the _current_ padding because the minipool insertions
19587 themselves might change it. */
19588 address += get_label_padding (insn);
19589 }
19590
19591 fix = minipool_fix_head;
19592
19593 /* Now scan the fixups and perform the required changes. */
19594 while (fix)
19595 {
19596 Mfix * ftmp;
19597 Mfix * fdel;
19598 Mfix * last_added_fix;
19599 Mfix * last_barrier = NULL;
19600 Mfix * this_fix;
19601
19602 /* Skip any further barriers before the next fix. */
19603 while (fix && BARRIER_P (fix->insn))
19604 fix = fix->next;
19605
19606 /* No more fixes. */
19607 if (fix == NULL)
19608 break;
19609
19610 last_added_fix = NULL;
19611
19612 for (ftmp = fix; ftmp; ftmp = ftmp->next)
19613 {
19614 if (BARRIER_P (ftmp->insn))
19615 {
19616 if (ftmp->address >= minipool_vector_head->max_address)
19617 break;
19618
19619 last_barrier = ftmp;
19620 }
19621 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19622 break;
19623
19624 last_added_fix = ftmp; /* Keep track of the last fix added. */
19625 }
19626
19627 /* If we found a barrier, drop back to that; any fixes that we
19628 could have reached but come after the barrier will now go in
19629 the next mini-pool. */
19630 if (last_barrier != NULL)
19631 {
19632 /* Reduce the refcount for those fixes that won't go into this
19633 pool after all. */
19634 for (fdel = last_barrier->next;
19635 fdel && fdel != ftmp;
19636 fdel = fdel->next)
19637 {
19638 fdel->minipool->refcount--;
19639 fdel->minipool = NULL;
19640 }
19641
19642 ftmp = last_barrier;
19643 }
19644 else
19645 {
19646 /* ftmp is first fix that we can't fit into this pool and
19647 there no natural barriers that we could use. Insert a
19648 new barrier in the code somewhere between the previous
19649 fix and this one, and arrange to jump around it. */
19650 HOST_WIDE_INT max_address;
19651
19652 /* The last item on the list of fixes must be a barrier, so
19653 we can never run off the end of the list of fixes without
19654 last_barrier being set. */
19655 gcc_assert (ftmp);
19656
19657 max_address = minipool_vector_head->max_address;
19658 /* Check that there isn't another fix that is in range that
19659 we couldn't fit into this pool because the pool was
19660 already too large: we need to put the pool before such an
19661 instruction. The pool itself may come just after the
19662 fix because create_fix_barrier also allows space for a
19663 jump instruction. */
19664 if (ftmp->address < max_address)
19665 max_address = ftmp->address + 1;
19666
19667 last_barrier = create_fix_barrier (last_added_fix, max_address);
19668 }
19669
19670 assign_minipool_offsets (last_barrier);
19671
19672 while (ftmp)
19673 {
19674 if (!BARRIER_P (ftmp->insn)
19675 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19676 == NULL))
19677 break;
19678
19679 ftmp = ftmp->next;
19680 }
19681
19682 /* Scan over the fixes we have identified for this pool, fixing them
19683 up and adding the constants to the pool itself. */
19684 for (this_fix = fix; this_fix && ftmp != this_fix;
19685 this_fix = this_fix->next)
19686 if (!BARRIER_P (this_fix->insn))
19687 {
19688 rtx addr
19689 = plus_constant (Pmode,
19690 gen_rtx_LABEL_REF (VOIDmode,
19691 minipool_vector_label),
19692 this_fix->minipool->offset);
19693 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19694 }
19695
19696 dump_minipool (last_barrier->insn);
19697 fix = ftmp;
19698 }
19699
19700 /* From now on we must synthesize any constants that we can't handle
19701 directly. This can happen if the RTL gets split during final
19702 instruction generation. */
19703 cfun->machine->after_arm_reorg = 1;
19704
19705 /* Free the minipool memory. */
19706 obstack_free (&minipool_obstack, minipool_startobj);
19707 }
19708 \f
19709 /* Routines to output assembly language. */
19710
19711 /* Return string representation of passed in real value. */
19712 static const char *
19713 fp_const_from_val (REAL_VALUE_TYPE *r)
19714 {
19715 if (!fp_consts_inited)
19716 init_fp_table ();
19717
19718 gcc_assert (real_equal (r, &value_fp0));
19719 return "0";
19720 }
19721
19722 /* OPERANDS[0] is the entire list of insns that constitute pop,
19723 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19724 is in the list, UPDATE is true iff the list contains explicit
19725 update of base register. */
19726 void
19727 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19728 bool update)
19729 {
19730 int i;
19731 char pattern[100];
19732 int offset;
19733 const char *conditional;
19734 int num_saves = XVECLEN (operands[0], 0);
19735 unsigned int regno;
19736 unsigned int regno_base = REGNO (operands[1]);
19737 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19738
19739 offset = 0;
19740 offset += update ? 1 : 0;
19741 offset += return_pc ? 1 : 0;
19742
19743 /* Is the base register in the list? */
19744 for (i = offset; i < num_saves; i++)
19745 {
19746 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19747 /* If SP is in the list, then the base register must be SP. */
19748 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19749 /* If base register is in the list, there must be no explicit update. */
19750 if (regno == regno_base)
19751 gcc_assert (!update);
19752 }
19753
19754 conditional = reverse ? "%?%D0" : "%?%d0";
19755 /* Can't use POP if returning from an interrupt. */
19756 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19757 sprintf (pattern, "pop%s\t{", conditional);
19758 else
19759 {
19760 /* Output ldmfd when the base register is SP, otherwise output ldmia.
19761 It's just a convention, their semantics are identical. */
19762 if (regno_base == SP_REGNUM)
19763 sprintf (pattern, "ldmfd%s\t", conditional);
19764 else if (update)
19765 sprintf (pattern, "ldmia%s\t", conditional);
19766 else
19767 sprintf (pattern, "ldm%s\t", conditional);
19768
19769 strcat (pattern, reg_names[regno_base]);
19770 if (update)
19771 strcat (pattern, "!, {");
19772 else
19773 strcat (pattern, ", {");
19774 }
19775
19776 /* Output the first destination register. */
19777 strcat (pattern,
19778 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19779
19780 /* Output the rest of the destination registers. */
19781 for (i = offset + 1; i < num_saves; i++)
19782 {
19783 strcat (pattern, ", ");
19784 strcat (pattern,
19785 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19786 }
19787
19788 strcat (pattern, "}");
19789
19790 if (interrupt_p && return_pc)
19791 strcat (pattern, "^");
19792
19793 output_asm_insn (pattern, &cond);
19794 }
19795
19796
19797 /* Output the assembly for a store multiple. */
19798
19799 const char *
19800 vfp_output_vstmd (rtx * operands)
19801 {
19802 char pattern[100];
19803 int p;
19804 int base;
19805 int i;
19806 rtx addr_reg = REG_P (XEXP (operands[0], 0))
19807 ? XEXP (operands[0], 0)
19808 : XEXP (XEXP (operands[0], 0), 0);
19809 bool push_p = REGNO (addr_reg) == SP_REGNUM;
19810
19811 if (push_p)
19812 strcpy (pattern, "vpush%?.64\t{%P1");
19813 else
19814 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19815
19816 p = strlen (pattern);
19817
19818 gcc_assert (REG_P (operands[1]));
19819
19820 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19821 for (i = 1; i < XVECLEN (operands[2], 0); i++)
19822 {
19823 p += sprintf (&pattern[p], ", d%d", base + i);
19824 }
19825 strcpy (&pattern[p], "}");
19826
19827 output_asm_insn (pattern, operands);
19828 return "";
19829 }
19830
19831
19832 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
19833 number of bytes pushed. */
19834
19835 static int
19836 vfp_emit_fstmd (int base_reg, int count)
19837 {
19838 rtx par;
19839 rtx dwarf;
19840 rtx tmp, reg;
19841 int i;
19842
19843 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
19844 register pairs are stored by a store multiple insn. We avoid this
19845 by pushing an extra pair. */
19846 if (count == 2 && !arm_arch6)
19847 {
19848 if (base_reg == LAST_VFP_REGNUM - 3)
19849 base_reg -= 2;
19850 count++;
19851 }
19852
19853 /* FSTMD may not store more than 16 doubleword registers at once. Split
19854 larger stores into multiple parts (up to a maximum of two, in
19855 practice). */
19856 if (count > 16)
19857 {
19858 int saved;
19859 /* NOTE: base_reg is an internal register number, so each D register
19860 counts as 2. */
19861 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19862 saved += vfp_emit_fstmd (base_reg, 16);
19863 return saved;
19864 }
19865
19866 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19867 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19868
19869 reg = gen_rtx_REG (DFmode, base_reg);
19870 base_reg += 2;
19871
19872 XVECEXP (par, 0, 0)
19873 = gen_rtx_SET (gen_frame_mem
19874 (BLKmode,
19875 gen_rtx_PRE_MODIFY (Pmode,
19876 stack_pointer_rtx,
19877 plus_constant
19878 (Pmode, stack_pointer_rtx,
19879 - (count * 8)))
19880 ),
19881 gen_rtx_UNSPEC (BLKmode,
19882 gen_rtvec (1, reg),
19883 UNSPEC_PUSH_MULT));
19884
19885 tmp = gen_rtx_SET (stack_pointer_rtx,
19886 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19887 RTX_FRAME_RELATED_P (tmp) = 1;
19888 XVECEXP (dwarf, 0, 0) = tmp;
19889
19890 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19891 RTX_FRAME_RELATED_P (tmp) = 1;
19892 XVECEXP (dwarf, 0, 1) = tmp;
19893
19894 for (i = 1; i < count; i++)
19895 {
19896 reg = gen_rtx_REG (DFmode, base_reg);
19897 base_reg += 2;
19898 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19899
19900 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19901 plus_constant (Pmode,
19902 stack_pointer_rtx,
19903 i * 8)),
19904 reg);
19905 RTX_FRAME_RELATED_P (tmp) = 1;
19906 XVECEXP (dwarf, 0, i + 1) = tmp;
19907 }
19908
19909 par = emit_insn (par);
19910 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19911 RTX_FRAME_RELATED_P (par) = 1;
19912
19913 return count * 8;
19914 }
19915
19916 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19917 has the cmse_nonsecure_call attribute and returns false otherwise. */
19918
19919 bool
19920 detect_cmse_nonsecure_call (tree addr)
19921 {
19922 if (!addr)
19923 return FALSE;
19924
19925 tree fntype = TREE_TYPE (addr);
19926 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19927 TYPE_ATTRIBUTES (fntype)))
19928 return TRUE;
19929 return FALSE;
19930 }
19931
19932
19933 /* Emit a call instruction with pattern PAT. ADDR is the address of
19934 the call target. */
19935
19936 void
19937 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19938 {
19939 rtx insn;
19940
19941 insn = emit_call_insn (pat);
19942
19943 /* The PIC register is live on entry to VxWorks PIC PLT entries.
19944 If the call might use such an entry, add a use of the PIC register
19945 to the instruction's CALL_INSN_FUNCTION_USAGE. */
19946 if (TARGET_VXWORKS_RTP
19947 && flag_pic
19948 && !sibcall
19949 && SYMBOL_REF_P (addr)
19950 && (SYMBOL_REF_DECL (addr)
19951 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19952 : !SYMBOL_REF_LOCAL_P (addr)))
19953 {
19954 require_pic_register (NULL_RTX, false /*compute_now*/);
19955 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19956 }
19957
19958 if (TARGET_FDPIC)
19959 {
19960 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19961 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19962 }
19963
19964 if (TARGET_AAPCS_BASED)
19965 {
19966 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19967 linker. We need to add an IP clobber to allow setting
19968 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
19969 is not needed since it's a fixed register. */
19970 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19971 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19972 }
19973 }
19974
19975 /* Output a 'call' insn. */
19976 const char *
19977 output_call (rtx *operands)
19978 {
19979 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
19980
19981 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
19982 if (REGNO (operands[0]) == LR_REGNUM)
19983 {
19984 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19985 output_asm_insn ("mov%?\t%0, %|lr", operands);
19986 }
19987
19988 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19989
19990 if (TARGET_INTERWORK || arm_arch4t)
19991 output_asm_insn ("bx%?\t%0", operands);
19992 else
19993 output_asm_insn ("mov%?\t%|pc, %0", operands);
19994
19995 return "";
19996 }
19997
19998 /* Output a move from arm registers to arm registers of a long double
19999 OPERANDS[0] is the destination.
20000 OPERANDS[1] is the source. */
20001 const char *
20002 output_mov_long_double_arm_from_arm (rtx *operands)
20003 {
20004 /* We have to be careful here because the two might overlap. */
20005 int dest_start = REGNO (operands[0]);
20006 int src_start = REGNO (operands[1]);
20007 rtx ops[2];
20008 int i;
20009
20010 if (dest_start < src_start)
20011 {
20012 for (i = 0; i < 3; i++)
20013 {
20014 ops[0] = gen_rtx_REG (SImode, dest_start + i);
20015 ops[1] = gen_rtx_REG (SImode, src_start + i);
20016 output_asm_insn ("mov%?\t%0, %1", ops);
20017 }
20018 }
20019 else
20020 {
20021 for (i = 2; i >= 0; i--)
20022 {
20023 ops[0] = gen_rtx_REG (SImode, dest_start + i);
20024 ops[1] = gen_rtx_REG (SImode, src_start + i);
20025 output_asm_insn ("mov%?\t%0, %1", ops);
20026 }
20027 }
20028
20029 return "";
20030 }
20031
20032 void
20033 arm_emit_movpair (rtx dest, rtx src)
20034 {
20035 /* If the src is an immediate, simplify it. */
20036 if (CONST_INT_P (src))
20037 {
20038 HOST_WIDE_INT val = INTVAL (src);
20039 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
20040 if ((val >> 16) & 0x0000ffff)
20041 {
20042 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
20043 GEN_INT (16)),
20044 GEN_INT ((val >> 16) & 0x0000ffff));
20045 rtx_insn *insn = get_last_insn ();
20046 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20047 }
20048 return;
20049 }
20050 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
20051 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
20052 rtx_insn *insn = get_last_insn ();
20053 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20054 }
20055
20056 /* Output a move between double words. It must be REG<-MEM
20057 or MEM<-REG. */
20058 const char *
20059 output_move_double (rtx *operands, bool emit, int *count)
20060 {
20061 enum rtx_code code0 = GET_CODE (operands[0]);
20062 enum rtx_code code1 = GET_CODE (operands[1]);
20063 rtx otherops[3];
20064 if (count)
20065 *count = 1;
20066
20067 /* The only case when this might happen is when
20068 you are looking at the length of a DImode instruction
20069 that has an invalid constant in it. */
20070 if (code0 == REG && code1 != MEM)
20071 {
20072 gcc_assert (!emit);
20073 *count = 2;
20074 return "";
20075 }
20076
20077 if (code0 == REG)
20078 {
20079 unsigned int reg0 = REGNO (operands[0]);
20080 const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
20081
20082 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
20083
20084 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
20085
20086 switch (GET_CODE (XEXP (operands[1], 0)))
20087 {
20088 case REG:
20089
20090 if (emit)
20091 {
20092 if (can_ldrd
20093 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
20094 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
20095 else
20096 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20097 }
20098 break;
20099
20100 case PRE_INC:
20101 gcc_assert (can_ldrd);
20102 if (emit)
20103 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
20104 break;
20105
20106 case PRE_DEC:
20107 if (emit)
20108 {
20109 if (can_ldrd)
20110 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
20111 else
20112 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
20113 }
20114 break;
20115
20116 case POST_INC:
20117 if (emit)
20118 {
20119 if (can_ldrd)
20120 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
20121 else
20122 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
20123 }
20124 break;
20125
20126 case POST_DEC:
20127 gcc_assert (can_ldrd);
20128 if (emit)
20129 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
20130 break;
20131
20132 case PRE_MODIFY:
20133 case POST_MODIFY:
20134 /* Autoicrement addressing modes should never have overlapping
20135 base and destination registers, and overlapping index registers
20136 are already prohibited, so this doesn't need to worry about
20137 fix_cm3_ldrd. */
20138 otherops[0] = operands[0];
20139 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
20140 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
20141
20142 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
20143 {
20144 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
20145 {
20146 /* Registers overlap so split out the increment. */
20147 if (emit)
20148 {
20149 gcc_assert (can_ldrd);
20150 output_asm_insn ("add%?\t%1, %1, %2", otherops);
20151 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
20152 }
20153 if (count)
20154 *count = 2;
20155 }
20156 else
20157 {
20158 /* Use a single insn if we can.
20159 FIXME: IWMMXT allows offsets larger than ldrd can
20160 handle, fix these up with a pair of ldr. */
20161 if (can_ldrd
20162 && (TARGET_THUMB2
20163 || !CONST_INT_P (otherops[2])
20164 || (INTVAL (otherops[2]) > -256
20165 && INTVAL (otherops[2]) < 256)))
20166 {
20167 if (emit)
20168 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
20169 }
20170 else
20171 {
20172 if (emit)
20173 {
20174 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
20175 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20176 }
20177 if (count)
20178 *count = 2;
20179
20180 }
20181 }
20182 }
20183 else
20184 {
20185 /* Use a single insn if we can.
20186 FIXME: IWMMXT allows offsets larger than ldrd can handle,
20187 fix these up with a pair of ldr. */
20188 if (can_ldrd
20189 && (TARGET_THUMB2
20190 || !CONST_INT_P (otherops[2])
20191 || (INTVAL (otherops[2]) > -256
20192 && INTVAL (otherops[2]) < 256)))
20193 {
20194 if (emit)
20195 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
20196 }
20197 else
20198 {
20199 if (emit)
20200 {
20201 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20202 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
20203 }
20204 if (count)
20205 *count = 2;
20206 }
20207 }
20208 break;
20209
20210 case LABEL_REF:
20211 case CONST:
20212 /* We might be able to use ldrd %0, %1 here. However the range is
20213 different to ldr/adr, and it is broken on some ARMv7-M
20214 implementations. */
20215 /* Use the second register of the pair to avoid problematic
20216 overlap. */
20217 otherops[1] = operands[1];
20218 if (emit)
20219 output_asm_insn ("adr%?\t%0, %1", otherops);
20220 operands[1] = otherops[0];
20221 if (emit)
20222 {
20223 if (can_ldrd)
20224 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20225 else
20226 output_asm_insn ("ldmia%?\t%1, %M0", operands);
20227 }
20228
20229 if (count)
20230 *count = 2;
20231 break;
20232
20233 /* ??? This needs checking for thumb2. */
20234 default:
20235 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
20236 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
20237 {
20238 otherops[0] = operands[0];
20239 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
20240 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
20241
20242 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
20243 {
20244 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20245 {
20246 switch ((int) INTVAL (otherops[2]))
20247 {
20248 case -8:
20249 if (emit)
20250 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
20251 return "";
20252 case -4:
20253 if (TARGET_THUMB2)
20254 break;
20255 if (emit)
20256 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
20257 return "";
20258 case 4:
20259 if (TARGET_THUMB2)
20260 break;
20261 if (emit)
20262 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
20263 return "";
20264 }
20265 }
20266 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
20267 operands[1] = otherops[0];
20268 if (can_ldrd
20269 && (REG_P (otherops[2])
20270 || TARGET_THUMB2
20271 || (CONST_INT_P (otherops[2])
20272 && INTVAL (otherops[2]) > -256
20273 && INTVAL (otherops[2]) < 256)))
20274 {
20275 if (reg_overlap_mentioned_p (operands[0],
20276 otherops[2]))
20277 {
20278 /* Swap base and index registers over to
20279 avoid a conflict. */
20280 std::swap (otherops[1], otherops[2]);
20281 }
20282 /* If both registers conflict, it will usually
20283 have been fixed by a splitter. */
20284 if (reg_overlap_mentioned_p (operands[0], otherops[2])
20285 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
20286 {
20287 if (emit)
20288 {
20289 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20290 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20291 }
20292 if (count)
20293 *count = 2;
20294 }
20295 else
20296 {
20297 otherops[0] = operands[0];
20298 if (emit)
20299 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
20300 }
20301 return "";
20302 }
20303
20304 if (CONST_INT_P (otherops[2]))
20305 {
20306 if (emit)
20307 {
20308 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
20309 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
20310 else
20311 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20312 }
20313 }
20314 else
20315 {
20316 if (emit)
20317 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20318 }
20319 }
20320 else
20321 {
20322 if (emit)
20323 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
20324 }
20325
20326 if (count)
20327 *count = 2;
20328
20329 if (can_ldrd)
20330 return "ldrd%?\t%0, [%1]";
20331
20332 return "ldmia%?\t%1, %M0";
20333 }
20334 else
20335 {
20336 otherops[1] = adjust_address (operands[1], SImode, 4);
20337 /* Take care of overlapping base/data reg. */
20338 if (reg_mentioned_p (operands[0], operands[1]))
20339 {
20340 if (emit)
20341 {
20342 output_asm_insn ("ldr%?\t%0, %1", otherops);
20343 output_asm_insn ("ldr%?\t%0, %1", operands);
20344 }
20345 if (count)
20346 *count = 2;
20347
20348 }
20349 else
20350 {
20351 if (emit)
20352 {
20353 output_asm_insn ("ldr%?\t%0, %1", operands);
20354 output_asm_insn ("ldr%?\t%0, %1", otherops);
20355 }
20356 if (count)
20357 *count = 2;
20358 }
20359 }
20360 }
20361 }
20362 else
20363 {
20364 /* Constraints should ensure this. */
20365 gcc_assert (code0 == MEM && code1 == REG);
20366 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20367 || (TARGET_ARM && TARGET_LDRD));
20368
20369 /* For TARGET_ARM the first source register of an STRD
20370 must be even. This is usually the case for double-word
20371 values but user assembly constraints can force an odd
20372 starting register. */
20373 bool allow_strd = TARGET_LDRD
20374 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20375 switch (GET_CODE (XEXP (operands[0], 0)))
20376 {
20377 case REG:
20378 if (emit)
20379 {
20380 if (allow_strd)
20381 output_asm_insn ("strd%?\t%1, [%m0]", operands);
20382 else
20383 output_asm_insn ("stm%?\t%m0, %M1", operands);
20384 }
20385 break;
20386
20387 case PRE_INC:
20388 gcc_assert (allow_strd);
20389 if (emit)
20390 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20391 break;
20392
20393 case PRE_DEC:
20394 if (emit)
20395 {
20396 if (allow_strd)
20397 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20398 else
20399 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20400 }
20401 break;
20402
20403 case POST_INC:
20404 if (emit)
20405 {
20406 if (allow_strd)
20407 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20408 else
20409 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20410 }
20411 break;
20412
20413 case POST_DEC:
20414 gcc_assert (allow_strd);
20415 if (emit)
20416 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20417 break;
20418
20419 case PRE_MODIFY:
20420 case POST_MODIFY:
20421 otherops[0] = operands[1];
20422 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20423 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20424
20425 /* IWMMXT allows offsets larger than strd can handle,
20426 fix these up with a pair of str. */
20427 if (!TARGET_THUMB2
20428 && CONST_INT_P (otherops[2])
20429 && (INTVAL(otherops[2]) <= -256
20430 || INTVAL(otherops[2]) >= 256))
20431 {
20432 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20433 {
20434 if (emit)
20435 {
20436 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20437 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20438 }
20439 if (count)
20440 *count = 2;
20441 }
20442 else
20443 {
20444 if (emit)
20445 {
20446 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20447 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20448 }
20449 if (count)
20450 *count = 2;
20451 }
20452 }
20453 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20454 {
20455 if (emit)
20456 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20457 }
20458 else
20459 {
20460 if (emit)
20461 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20462 }
20463 break;
20464
20465 case PLUS:
20466 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20467 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20468 {
20469 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20470 {
20471 case -8:
20472 if (emit)
20473 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20474 return "";
20475
20476 case -4:
20477 if (TARGET_THUMB2)
20478 break;
20479 if (emit)
20480 output_asm_insn ("stmda%?\t%m0, %M1", operands);
20481 return "";
20482
20483 case 4:
20484 if (TARGET_THUMB2)
20485 break;
20486 if (emit)
20487 output_asm_insn ("stmib%?\t%m0, %M1", operands);
20488 return "";
20489 }
20490 }
20491 if (allow_strd
20492 && (REG_P (otherops[2])
20493 || TARGET_THUMB2
20494 || (CONST_INT_P (otherops[2])
20495 && INTVAL (otherops[2]) > -256
20496 && INTVAL (otherops[2]) < 256)))
20497 {
20498 otherops[0] = operands[1];
20499 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20500 if (emit)
20501 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20502 return "";
20503 }
20504 /* Fall through */
20505
20506 default:
20507 otherops[0] = adjust_address (operands[0], SImode, 4);
20508 otherops[1] = operands[1];
20509 if (emit)
20510 {
20511 output_asm_insn ("str%?\t%1, %0", operands);
20512 output_asm_insn ("str%?\t%H1, %0", otherops);
20513 }
20514 if (count)
20515 *count = 2;
20516 }
20517 }
20518
20519 return "";
20520 }
20521
20522 /* Output a move, load or store for quad-word vectors in ARM registers. Only
20523 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
20524
20525 const char *
20526 output_move_quad (rtx *operands)
20527 {
20528 if (REG_P (operands[0]))
20529 {
20530 /* Load, or reg->reg move. */
20531
20532 if (MEM_P (operands[1]))
20533 {
20534 switch (GET_CODE (XEXP (operands[1], 0)))
20535 {
20536 case REG:
20537 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20538 break;
20539
20540 case LABEL_REF:
20541 case CONST:
20542 output_asm_insn ("adr%?\t%0, %1", operands);
20543 output_asm_insn ("ldmia%?\t%0, %M0", operands);
20544 break;
20545
20546 default:
20547 gcc_unreachable ();
20548 }
20549 }
20550 else
20551 {
20552 rtx ops[2];
20553 int dest, src, i;
20554
20555 gcc_assert (REG_P (operands[1]));
20556
20557 dest = REGNO (operands[0]);
20558 src = REGNO (operands[1]);
20559
20560 /* This seems pretty dumb, but hopefully GCC won't try to do it
20561 very often. */
20562 if (dest < src)
20563 for (i = 0; i < 4; i++)
20564 {
20565 ops[0] = gen_rtx_REG (SImode, dest + i);
20566 ops[1] = gen_rtx_REG (SImode, src + i);
20567 output_asm_insn ("mov%?\t%0, %1", ops);
20568 }
20569 else
20570 for (i = 3; i >= 0; i--)
20571 {
20572 ops[0] = gen_rtx_REG (SImode, dest + i);
20573 ops[1] = gen_rtx_REG (SImode, src + i);
20574 output_asm_insn ("mov%?\t%0, %1", ops);
20575 }
20576 }
20577 }
20578 else
20579 {
20580 gcc_assert (MEM_P (operands[0]));
20581 gcc_assert (REG_P (operands[1]));
20582 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20583
20584 switch (GET_CODE (XEXP (operands[0], 0)))
20585 {
20586 case REG:
20587 output_asm_insn ("stm%?\t%m0, %M1", operands);
20588 break;
20589
20590 default:
20591 gcc_unreachable ();
20592 }
20593 }
20594
20595 return "";
20596 }
20597
20598 /* Output a VFP load or store instruction. */
20599
20600 const char *
20601 output_move_vfp (rtx *operands)
20602 {
20603 rtx reg, mem, addr, ops[2];
20604 int load = REG_P (operands[0]);
20605 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20606 int sp = (!TARGET_VFP_FP16INST
20607 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20608 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20609 const char *templ;
20610 char buff[50];
20611 machine_mode mode;
20612
20613 reg = operands[!load];
20614 mem = operands[load];
20615
20616 mode = GET_MODE (reg);
20617
20618 gcc_assert (REG_P (reg));
20619 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20620 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20621 || mode == SFmode
20622 || mode == DFmode
20623 || mode == HImode
20624 || mode == SImode
20625 || mode == DImode
20626 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20627 gcc_assert (MEM_P (mem));
20628
20629 addr = XEXP (mem, 0);
20630
20631 switch (GET_CODE (addr))
20632 {
20633 case PRE_DEC:
20634 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20635 ops[0] = XEXP (addr, 0);
20636 ops[1] = reg;
20637 break;
20638
20639 case POST_INC:
20640 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20641 ops[0] = XEXP (addr, 0);
20642 ops[1] = reg;
20643 break;
20644
20645 default:
20646 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20647 ops[0] = reg;
20648 ops[1] = mem;
20649 break;
20650 }
20651
20652 sprintf (buff, templ,
20653 load ? "ld" : "st",
20654 dp ? "64" : sp ? "32" : "16",
20655 dp ? "P" : "",
20656 integer_p ? "\t%@ int" : "");
20657 output_asm_insn (buff, ops);
20658
20659 return "";
20660 }
20661
20662 /* Output a Neon double-word or quad-word load or store, or a load
20663 or store for larger structure modes.
20664
20665 WARNING: The ordering of elements is weird in big-endian mode,
20666 because the EABI requires that vectors stored in memory appear
20667 as though they were stored by a VSTM, as required by the EABI.
20668 GCC RTL defines element ordering based on in-memory order.
20669 This can be different from the architectural ordering of elements
20670 within a NEON register. The intrinsics defined in arm_neon.h use the
20671 NEON register element ordering, not the GCC RTL element ordering.
20672
20673 For example, the in-memory ordering of a big-endian a quadword
20674 vector with 16-bit elements when stored from register pair {d0,d1}
20675 will be (lowest address first, d0[N] is NEON register element N):
20676
20677 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20678
20679 When necessary, quadword registers (dN, dN+1) are moved to ARM
20680 registers from rN in the order:
20681
20682 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20683
20684 So that STM/LDM can be used on vectors in ARM registers, and the
20685 same memory layout will result as if VSTM/VLDM were used.
20686
20687 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20688 possible, which allows use of appropriate alignment tags.
20689 Note that the choice of "64" is independent of the actual vector
20690 element size; this size simply ensures that the behavior is
20691 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20692
20693 Due to limitations of those instructions, use of VST1.64/VLD1.64
20694 is not possible if:
20695 - the address contains PRE_DEC, or
20696 - the mode refers to more than 4 double-word registers
20697
20698 In those cases, it would be possible to replace VSTM/VLDM by a
20699 sequence of instructions; this is not currently implemented since
20700 this is not certain to actually improve performance. */
20701
20702 const char *
20703 output_move_neon (rtx *operands)
20704 {
20705 rtx reg, mem, addr, ops[2];
20706 int regno, nregs, load = REG_P (operands[0]);
20707 const char *templ;
20708 char buff[50];
20709 machine_mode mode;
20710
20711 reg = operands[!load];
20712 mem = operands[load];
20713
20714 mode = GET_MODE (reg);
20715
20716 gcc_assert (REG_P (reg));
20717 regno = REGNO (reg);
20718 nregs = REG_NREGS (reg) / 2;
20719 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20720 || NEON_REGNO_OK_FOR_QUAD (regno));
20721 gcc_assert (VALID_NEON_DREG_MODE (mode)
20722 || VALID_NEON_QREG_MODE (mode)
20723 || VALID_NEON_STRUCT_MODE (mode));
20724 gcc_assert (MEM_P (mem));
20725
20726 addr = XEXP (mem, 0);
20727
20728 /* Strip off const from addresses like (const (plus (...))). */
20729 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20730 addr = XEXP (addr, 0);
20731
20732 switch (GET_CODE (addr))
20733 {
20734 case POST_INC:
20735 /* We have to use vldm / vstm for too-large modes. */
20736 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20737 {
20738 templ = "v%smia%%?\t%%0!, %%h1";
20739 ops[0] = XEXP (addr, 0);
20740 }
20741 else
20742 {
20743 templ = "v%s1.64\t%%h1, %%A0";
20744 ops[0] = mem;
20745 }
20746 ops[1] = reg;
20747 break;
20748
20749 case PRE_DEC:
20750 /* We have to use vldm / vstm in this case, since there is no
20751 pre-decrement form of the vld1 / vst1 instructions. */
20752 templ = "v%smdb%%?\t%%0!, %%h1";
20753 ops[0] = XEXP (addr, 0);
20754 ops[1] = reg;
20755 break;
20756
20757 case POST_MODIFY:
20758 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
20759 gcc_unreachable ();
20760
20761 case REG:
20762 /* We have to use vldm / vstm for too-large modes. */
20763 if (nregs > 1)
20764 {
20765 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20766 templ = "v%smia%%?\t%%m0, %%h1";
20767 else
20768 templ = "v%s1.64\t%%h1, %%A0";
20769
20770 ops[0] = mem;
20771 ops[1] = reg;
20772 break;
20773 }
20774 /* Fall through. */
20775 case PLUS:
20776 if (GET_CODE (addr) == PLUS)
20777 addr = XEXP (addr, 0);
20778 /* Fall through. */
20779 case LABEL_REF:
20780 {
20781 int i;
20782 int overlap = -1;
20783 for (i = 0; i < nregs; i++)
20784 {
20785 /* We're only using DImode here because it's a convenient
20786 size. */
20787 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20788 ops[1] = adjust_address (mem, DImode, 8 * i);
20789 if (reg_overlap_mentioned_p (ops[0], mem))
20790 {
20791 gcc_assert (overlap == -1);
20792 overlap = i;
20793 }
20794 else
20795 {
20796 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20797 sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20798 else
20799 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20800 output_asm_insn (buff, ops);
20801 }
20802 }
20803 if (overlap != -1)
20804 {
20805 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20806 ops[1] = adjust_address (mem, SImode, 8 * overlap);
20807 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20808 sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20809 else
20810 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20811 output_asm_insn (buff, ops);
20812 }
20813
20814 return "";
20815 }
20816
20817 default:
20818 gcc_unreachable ();
20819 }
20820
20821 sprintf (buff, templ, load ? "ld" : "st");
20822 output_asm_insn (buff, ops);
20823
20824 return "";
20825 }
20826
20827 /* Compute and return the length of neon_mov<mode>, where <mode> is
20828 one of VSTRUCT modes: EI, OI, CI or XI. */
20829 int
20830 arm_attr_length_move_neon (rtx_insn *insn)
20831 {
20832 rtx reg, mem, addr;
20833 int load;
20834 machine_mode mode;
20835
20836 extract_insn_cached (insn);
20837
20838 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20839 {
20840 mode = GET_MODE (recog_data.operand[0]);
20841 switch (mode)
20842 {
20843 case E_EImode:
20844 case E_OImode:
20845 return 8;
20846 case E_CImode:
20847 return 12;
20848 case E_XImode:
20849 return 16;
20850 default:
20851 gcc_unreachable ();
20852 }
20853 }
20854
20855 load = REG_P (recog_data.operand[0]);
20856 reg = recog_data.operand[!load];
20857 mem = recog_data.operand[load];
20858
20859 gcc_assert (MEM_P (mem));
20860
20861 addr = XEXP (mem, 0);
20862
20863 /* Strip off const from addresses like (const (plus (...))). */
20864 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20865 addr = XEXP (addr, 0);
20866
20867 if (LABEL_REF_P (addr) || GET_CODE (addr) == PLUS)
20868 {
20869 int insns = REG_NREGS (reg) / 2;
20870 return insns * 4;
20871 }
20872 else
20873 return 4;
20874 }
20875
20876 /* Return nonzero if the offset in the address is an immediate. Otherwise,
20877 return zero. */
20878
20879 int
20880 arm_address_offset_is_imm (rtx_insn *insn)
20881 {
20882 rtx mem, addr;
20883
20884 extract_insn_cached (insn);
20885
20886 if (REG_P (recog_data.operand[0]))
20887 return 0;
20888
20889 mem = recog_data.operand[0];
20890
20891 gcc_assert (MEM_P (mem));
20892
20893 addr = XEXP (mem, 0);
20894
20895 if (REG_P (addr)
20896 || (GET_CODE (addr) == PLUS
20897 && REG_P (XEXP (addr, 0))
20898 && CONST_INT_P (XEXP (addr, 1))))
20899 return 1;
20900 else
20901 return 0;
20902 }
20903
20904 /* Output an ADD r, s, #n where n may be too big for one instruction.
20905 If adding zero to one register, output nothing. */
20906 const char *
20907 output_add_immediate (rtx *operands)
20908 {
20909 HOST_WIDE_INT n = INTVAL (operands[2]);
20910
20911 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20912 {
20913 if (n < 0)
20914 output_multi_immediate (operands,
20915 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20916 -n);
20917 else
20918 output_multi_immediate (operands,
20919 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20920 n);
20921 }
20922
20923 return "";
20924 }
20925
20926 /* Output a multiple immediate operation.
20927 OPERANDS is the vector of operands referred to in the output patterns.
20928 INSTR1 is the output pattern to use for the first constant.
20929 INSTR2 is the output pattern to use for subsequent constants.
20930 IMMED_OP is the index of the constant slot in OPERANDS.
20931 N is the constant value. */
20932 static const char *
20933 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20934 int immed_op, HOST_WIDE_INT n)
20935 {
20936 #if HOST_BITS_PER_WIDE_INT > 32
20937 n &= 0xffffffff;
20938 #endif
20939
20940 if (n == 0)
20941 {
20942 /* Quick and easy output. */
20943 operands[immed_op] = const0_rtx;
20944 output_asm_insn (instr1, operands);
20945 }
20946 else
20947 {
20948 int i;
20949 const char * instr = instr1;
20950
20951 /* Note that n is never zero here (which would give no output). */
20952 for (i = 0; i < 32; i += 2)
20953 {
20954 if (n & (3 << i))
20955 {
20956 operands[immed_op] = GEN_INT (n & (255 << i));
20957 output_asm_insn (instr, operands);
20958 instr = instr2;
20959 i += 6;
20960 }
20961 }
20962 }
20963
20964 return "";
20965 }
20966
20967 /* Return the name of a shifter operation. */
20968 static const char *
20969 arm_shift_nmem(enum rtx_code code)
20970 {
20971 switch (code)
20972 {
20973 case ASHIFT:
20974 return ARM_LSL_NAME;
20975
20976 case ASHIFTRT:
20977 return "asr";
20978
20979 case LSHIFTRT:
20980 return "lsr";
20981
20982 case ROTATERT:
20983 return "ror";
20984
20985 default:
20986 abort();
20987 }
20988 }
20989
20990 /* Return the appropriate ARM instruction for the operation code.
20991 The returned result should not be overwritten. OP is the rtx of the
20992 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20993 was shifted. */
20994 const char *
20995 arithmetic_instr (rtx op, int shift_first_arg)
20996 {
20997 switch (GET_CODE (op))
20998 {
20999 case PLUS:
21000 return "add";
21001
21002 case MINUS:
21003 return shift_first_arg ? "rsb" : "sub";
21004
21005 case IOR:
21006 return "orr";
21007
21008 case XOR:
21009 return "eor";
21010
21011 case AND:
21012 return "and";
21013
21014 case ASHIFT:
21015 case ASHIFTRT:
21016 case LSHIFTRT:
21017 case ROTATERT:
21018 return arm_shift_nmem(GET_CODE(op));
21019
21020 default:
21021 gcc_unreachable ();
21022 }
21023 }
21024
21025 /* Ensure valid constant shifts and return the appropriate shift mnemonic
21026 for the operation code. The returned result should not be overwritten.
21027 OP is the rtx code of the shift.
21028 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
21029 shift. */
21030 static const char *
21031 shift_op (rtx op, HOST_WIDE_INT *amountp)
21032 {
21033 const char * mnem;
21034 enum rtx_code code = GET_CODE (op);
21035
21036 switch (code)
21037 {
21038 case ROTATE:
21039 if (!CONST_INT_P (XEXP (op, 1)))
21040 {
21041 output_operand_lossage ("invalid shift operand");
21042 return NULL;
21043 }
21044
21045 code = ROTATERT;
21046 *amountp = 32 - INTVAL (XEXP (op, 1));
21047 mnem = "ror";
21048 break;
21049
21050 case ASHIFT:
21051 case ASHIFTRT:
21052 case LSHIFTRT:
21053 case ROTATERT:
21054 mnem = arm_shift_nmem(code);
21055 if (CONST_INT_P (XEXP (op, 1)))
21056 {
21057 *amountp = INTVAL (XEXP (op, 1));
21058 }
21059 else if (REG_P (XEXP (op, 1)))
21060 {
21061 *amountp = -1;
21062 return mnem;
21063 }
21064 else
21065 {
21066 output_operand_lossage ("invalid shift operand");
21067 return NULL;
21068 }
21069 break;
21070
21071 case MULT:
21072 /* We never have to worry about the amount being other than a
21073 power of 2, since this case can never be reloaded from a reg. */
21074 if (!CONST_INT_P (XEXP (op, 1)))
21075 {
21076 output_operand_lossage ("invalid shift operand");
21077 return NULL;
21078 }
21079
21080 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
21081
21082 /* Amount must be a power of two. */
21083 if (*amountp & (*amountp - 1))
21084 {
21085 output_operand_lossage ("invalid shift operand");
21086 return NULL;
21087 }
21088
21089 *amountp = exact_log2 (*amountp);
21090 gcc_assert (IN_RANGE (*amountp, 0, 31));
21091 return ARM_LSL_NAME;
21092
21093 default:
21094 output_operand_lossage ("invalid shift operand");
21095 return NULL;
21096 }
21097
21098 /* This is not 100% correct, but follows from the desire to merge
21099 multiplication by a power of 2 with the recognizer for a
21100 shift. >=32 is not a valid shift for "lsl", so we must try and
21101 output a shift that produces the correct arithmetical result.
21102 Using lsr #32 is identical except for the fact that the carry bit
21103 is not set correctly if we set the flags; but we never use the
21104 carry bit from such an operation, so we can ignore that. */
21105 if (code == ROTATERT)
21106 /* Rotate is just modulo 32. */
21107 *amountp &= 31;
21108 else if (*amountp != (*amountp & 31))
21109 {
21110 if (code == ASHIFT)
21111 mnem = "lsr";
21112 *amountp = 32;
21113 }
21114
21115 /* Shifts of 0 are no-ops. */
21116 if (*amountp == 0)
21117 return NULL;
21118
21119 return mnem;
21120 }
21121
21122 /* Output a .ascii pseudo-op, keeping track of lengths. This is
21123 because /bin/as is horribly restrictive. The judgement about
21124 whether or not each character is 'printable' (and can be output as
21125 is) or not (and must be printed with an octal escape) must be made
21126 with reference to the *host* character set -- the situation is
21127 similar to that discussed in the comments above pp_c_char in
21128 c-pretty-print.cc. */
21129
21130 #define MAX_ASCII_LEN 51
21131
21132 void
21133 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
21134 {
21135 int i;
21136 int len_so_far = 0;
21137
21138 fputs ("\t.ascii\t\"", stream);
21139
21140 for (i = 0; i < len; i++)
21141 {
21142 int c = p[i];
21143
21144 if (len_so_far >= MAX_ASCII_LEN)
21145 {
21146 fputs ("\"\n\t.ascii\t\"", stream);
21147 len_so_far = 0;
21148 }
21149
21150 if (ISPRINT (c))
21151 {
21152 if (c == '\\' || c == '\"')
21153 {
21154 putc ('\\', stream);
21155 len_so_far++;
21156 }
21157 putc (c, stream);
21158 len_so_far++;
21159 }
21160 else
21161 {
21162 fprintf (stream, "\\%03o", c);
21163 len_so_far += 4;
21164 }
21165 }
21166
21167 fputs ("\"\n", stream);
21168 }
21169 \f
21170
21171 /* Compute the register save mask for registers 0 through 12
21172 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
21173
21174 static unsigned long
21175 arm_compute_save_reg0_reg12_mask (void)
21176 {
21177 unsigned long func_type = arm_current_func_type ();
21178 unsigned long save_reg_mask = 0;
21179 unsigned int reg;
21180
21181 if (IS_INTERRUPT (func_type))
21182 {
21183 unsigned int max_reg;
21184 /* Interrupt functions must not corrupt any registers,
21185 even call clobbered ones. If this is a leaf function
21186 we can just examine the registers used by the RTL, but
21187 otherwise we have to assume that whatever function is
21188 called might clobber anything, and so we have to save
21189 all the call-clobbered registers as well. */
21190 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
21191 /* FIQ handlers have registers r8 - r12 banked, so
21192 we only need to check r0 - r7, Normal ISRs only
21193 bank r14 and r15, so we must check up to r12.
21194 r13 is the stack pointer which is always preserved,
21195 so we do not need to consider it here. */
21196 max_reg = 7;
21197 else
21198 max_reg = 12;
21199
21200 for (reg = 0; reg <= max_reg; reg++)
21201 if (reg_needs_saving_p (reg))
21202 save_reg_mask |= (1 << reg);
21203
21204 /* Also save the pic base register if necessary. */
21205 if (PIC_REGISTER_MAY_NEED_SAVING
21206 && crtl->uses_pic_offset_table)
21207 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21208 }
21209 else if (IS_VOLATILE(func_type))
21210 {
21211 /* For noreturn functions we historically omitted register saves
21212 altogether. However this really messes up debugging. As a
21213 compromise save just the frame pointers. Combined with the link
21214 register saved elsewhere this should be sufficient to get
21215 a backtrace. */
21216 if (frame_pointer_needed)
21217 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21218 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
21219 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21220 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
21221 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
21222 }
21223 else
21224 {
21225 /* In the normal case we only need to save those registers
21226 which are call saved and which are used by this function. */
21227 for (reg = 0; reg <= 11; reg++)
21228 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21229 save_reg_mask |= (1 << reg);
21230
21231 /* Handle the frame pointer as a special case. */
21232 if (frame_pointer_needed)
21233 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21234
21235 /* If we aren't loading the PIC register,
21236 don't stack it even though it may be live. */
21237 if (PIC_REGISTER_MAY_NEED_SAVING
21238 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
21239 || crtl->uses_pic_offset_table))
21240 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21241
21242 /* The prologue will copy SP into R0, so save it. */
21243 if (IS_STACKALIGN (func_type))
21244 save_reg_mask |= 1;
21245 }
21246
21247 /* Save registers so the exception handler can modify them. */
21248 if (crtl->calls_eh_return)
21249 {
21250 unsigned int i;
21251
21252 for (i = 0; ; i++)
21253 {
21254 reg = EH_RETURN_DATA_REGNO (i);
21255 if (reg == INVALID_REGNUM)
21256 break;
21257 save_reg_mask |= 1 << reg;
21258 }
21259 }
21260
21261 return save_reg_mask;
21262 }
21263
21264 /* Return true if r3 is live at the start of the function. */
21265
21266 static bool
21267 arm_r3_live_at_start_p (void)
21268 {
21269 /* Just look at cfg info, which is still close enough to correct at this
21270 point. This gives false positives for broken functions that might use
21271 uninitialized data that happens to be allocated in r3, but who cares? */
21272 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
21273 }
21274
21275 /* Compute the number of bytes used to store the static chain register on the
21276 stack, above the stack frame. We need to know this accurately to get the
21277 alignment of the rest of the stack frame correct. */
21278
21279 static int
21280 arm_compute_static_chain_stack_bytes (void)
21281 {
21282 /* Once the value is updated from the init value of -1, do not
21283 re-compute. */
21284 if (cfun->machine->static_chain_stack_bytes != -1)
21285 return cfun->machine->static_chain_stack_bytes;
21286
21287 /* See the defining assertion in arm_expand_prologue. */
21288 if (IS_NESTED (arm_current_func_type ())
21289 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21290 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21291 || flag_stack_clash_protection)
21292 && !df_regs_ever_live_p (LR_REGNUM)))
21293 && arm_r3_live_at_start_p ()
21294 && crtl->args.pretend_args_size == 0)
21295 return 4;
21296
21297 return 0;
21298 }
21299
21300 /* Compute a bit mask of which core registers need to be
21301 saved on the stack for the current function.
21302 This is used by arm_compute_frame_layout, which may add extra registers. */
21303
21304 static unsigned long
21305 arm_compute_save_core_reg_mask (void)
21306 {
21307 unsigned int save_reg_mask = 0;
21308 unsigned long func_type = arm_current_func_type ();
21309 unsigned int reg;
21310
21311 if (IS_NAKED (func_type))
21312 /* This should never really happen. */
21313 return 0;
21314
21315 /* If we are creating a stack frame, then we must save the frame pointer,
21316 IP (which will hold the old stack pointer), LR and the PC. */
21317 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21318 save_reg_mask |=
21319 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
21320 | (1 << IP_REGNUM)
21321 | (1 << LR_REGNUM)
21322 | (1 << PC_REGNUM);
21323
21324 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
21325
21326 if (arm_current_function_pac_enabled_p ())
21327 save_reg_mask |= 1 << IP_REGNUM;
21328
21329 /* Decide if we need to save the link register.
21330 Interrupt routines have their own banked link register,
21331 so they never need to save it.
21332 Otherwise if we do not use the link register we do not need to save
21333 it. If we are pushing other registers onto the stack however, we
21334 can save an instruction in the epilogue by pushing the link register
21335 now and then popping it back into the PC. This incurs extra memory
21336 accesses though, so we only do it when optimizing for size, and only
21337 if we know that we will not need a fancy return sequence. */
21338 if (df_regs_ever_live_p (LR_REGNUM)
21339 || (save_reg_mask
21340 && optimize_size
21341 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
21342 && !crtl->tail_call_emit
21343 && !crtl->calls_eh_return))
21344 save_reg_mask |= 1 << LR_REGNUM;
21345
21346 if (cfun->machine->lr_save_eliminated)
21347 save_reg_mask &= ~ (1 << LR_REGNUM);
21348
21349 if (TARGET_REALLY_IWMMXT
21350 && ((bit_count (save_reg_mask)
21351 + ARM_NUM_INTS (crtl->args.pretend_args_size +
21352 arm_compute_static_chain_stack_bytes())
21353 ) % 2) != 0)
21354 {
21355 /* The total number of registers that are going to be pushed
21356 onto the stack is odd. We need to ensure that the stack
21357 is 64-bit aligned before we start to save iWMMXt registers,
21358 and also before we start to create locals. (A local variable
21359 might be a double or long long which we will load/store using
21360 an iWMMXt instruction). Therefore we need to push another
21361 ARM register, so that the stack will be 64-bit aligned. We
21362 try to avoid using the arg registers (r0 -r3) as they might be
21363 used to pass values in a tail call. */
21364 for (reg = 4; reg <= 12; reg++)
21365 if ((save_reg_mask & (1 << reg)) == 0)
21366 break;
21367
21368 if (reg <= 12)
21369 save_reg_mask |= (1 << reg);
21370 else
21371 {
21372 cfun->machine->sibcall_blocked = 1;
21373 save_reg_mask |= (1 << 3);
21374 }
21375 }
21376
21377 /* We may need to push an additional register for use initializing the
21378 PIC base register. */
21379 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21380 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21381 {
21382 reg = thumb_find_work_register (1 << 4);
21383 if (!call_used_or_fixed_reg_p (reg))
21384 save_reg_mask |= (1 << reg);
21385 }
21386
21387 return save_reg_mask;
21388 }
21389
21390 /* Compute a bit mask of which core registers need to be
21391 saved on the stack for the current function. */
21392 static unsigned long
21393 thumb1_compute_save_core_reg_mask (void)
21394 {
21395 unsigned long mask;
21396 unsigned reg;
21397
21398 mask = 0;
21399 for (reg = 0; reg < 12; reg ++)
21400 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21401 mask |= 1 << reg;
21402
21403 /* Handle the frame pointer as a special case. */
21404 if (frame_pointer_needed)
21405 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21406
21407 if (flag_pic
21408 && !TARGET_SINGLE_PIC_BASE
21409 && arm_pic_register != INVALID_REGNUM
21410 && crtl->uses_pic_offset_table)
21411 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21412
21413 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
21414 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21415 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21416
21417 /* LR will also be pushed if any lo regs are pushed. */
21418 if (mask & 0xff || thumb_force_lr_save ())
21419 mask |= (1 << LR_REGNUM);
21420
21421 bool call_clobbered_scratch
21422 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21423 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21424
21425 /* Make sure we have a low work register if we need one. We will
21426 need one if we are going to push a high register, but we are not
21427 currently intending to push a low register. However if both the
21428 prologue and epilogue have a spare call-clobbered low register,
21429 then we won't need to find an additional work register. It does
21430 not need to be the same register in the prologue and
21431 epilogue. */
21432 if ((mask & 0xff) == 0
21433 && !call_clobbered_scratch
21434 && ((mask & 0x0f00) || TARGET_BACKTRACE))
21435 {
21436 /* Use thumb_find_work_register to choose which register
21437 we will use. If the register is live then we will
21438 have to push it. Use LAST_LO_REGNUM as our fallback
21439 choice for the register to select. */
21440 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21441 /* Make sure the register returned by thumb_find_work_register is
21442 not part of the return value. */
21443 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21444 reg = LAST_LO_REGNUM;
21445
21446 if (callee_saved_reg_p (reg))
21447 mask |= 1 << reg;
21448 }
21449
21450 /* The 504 below is 8 bytes less than 512 because there are two possible
21451 alignment words. We can't tell here if they will be present or not so we
21452 have to play it safe and assume that they are. */
21453 if ((CALLER_INTERWORKING_SLOT_SIZE +
21454 ROUND_UP_WORD (get_frame_size ()) +
21455 crtl->outgoing_args_size) >= 504)
21456 {
21457 /* This is the same as the code in thumb1_expand_prologue() which
21458 determines which register to use for stack decrement. */
21459 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21460 if (mask & (1 << reg))
21461 break;
21462
21463 if (reg > LAST_LO_REGNUM)
21464 {
21465 /* Make sure we have a register available for stack decrement. */
21466 mask |= 1 << LAST_LO_REGNUM;
21467 }
21468 }
21469
21470 return mask;
21471 }
21472
21473 /* Return the number of bytes required to save VFP registers. */
21474 static int
21475 arm_get_vfp_saved_size (void)
21476 {
21477 unsigned int regno;
21478 int count;
21479 int saved;
21480
21481 saved = 0;
21482 /* Space for saved VFP registers. */
21483 if (TARGET_VFP_BASE)
21484 {
21485 count = 0;
21486 for (regno = FIRST_VFP_REGNUM;
21487 regno < LAST_VFP_REGNUM;
21488 regno += 2)
21489 {
21490 if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21491 {
21492 if (count > 0)
21493 {
21494 /* Workaround ARM10 VFPr1 bug. */
21495 if (count == 2 && !arm_arch6)
21496 count++;
21497 saved += count * 8;
21498 }
21499 count = 0;
21500 }
21501 else
21502 count++;
21503 }
21504 if (count > 0)
21505 {
21506 if (count == 2 && !arm_arch6)
21507 count++;
21508 saved += count * 8;
21509 }
21510 }
21511 return saved;
21512 }
21513
21514
21515 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
21516 everything bar the final return instruction. If simple_return is true,
21517 then do not output epilogue, because it has already been emitted in RTL.
21518
21519 Note: do not forget to update length attribute of corresponding insn pattern
21520 when changing assembly output (eg. length attribute of
21521 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21522 register clearing sequences). */
21523 const char *
21524 output_return_instruction (rtx operand, bool really_return, bool reverse,
21525 bool simple_return)
21526 {
21527 char conditional[10];
21528 char instr[100];
21529 unsigned reg;
21530 unsigned long live_regs_mask;
21531 unsigned long func_type;
21532 arm_stack_offsets *offsets;
21533
21534 func_type = arm_current_func_type ();
21535
21536 if (IS_NAKED (func_type))
21537 return "";
21538
21539 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21540 {
21541 /* If this function was declared non-returning, and we have
21542 found a tail call, then we have to trust that the called
21543 function won't return. */
21544 if (really_return)
21545 {
21546 rtx ops[2];
21547
21548 /* Otherwise, trap an attempted return by aborting. */
21549 ops[0] = operand;
21550 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21551 : "abort");
21552 assemble_external_libcall (ops[1]);
21553 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21554 }
21555
21556 return "";
21557 }
21558
21559 gcc_assert (!cfun->calls_alloca || really_return);
21560
21561 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21562
21563 cfun->machine->return_used_this_function = 1;
21564
21565 offsets = arm_get_frame_offsets ();
21566 live_regs_mask = offsets->saved_regs_mask;
21567
21568 if (!simple_return && live_regs_mask)
21569 {
21570 const char * return_reg;
21571
21572 /* If we do not have any special requirements for function exit
21573 (e.g. interworking) then we can load the return address
21574 directly into the PC. Otherwise we must load it into LR. */
21575 if (really_return
21576 && !IS_CMSE_ENTRY (func_type)
21577 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21578 return_reg = reg_names[PC_REGNUM];
21579 else
21580 return_reg = reg_names[LR_REGNUM];
21581
21582 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21583 {
21584 /* There are three possible reasons for the IP register
21585 being saved. 1) a stack frame was created, in which case
21586 IP contains the old stack pointer, or 2) an ISR routine
21587 corrupted it, or 3) it was saved to align the stack on
21588 iWMMXt. In case 1, restore IP into SP, otherwise just
21589 restore IP. */
21590 if (frame_pointer_needed)
21591 {
21592 live_regs_mask &= ~ (1 << IP_REGNUM);
21593 live_regs_mask |= (1 << SP_REGNUM);
21594 }
21595 else
21596 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21597 }
21598
21599 /* On some ARM architectures it is faster to use LDR rather than
21600 LDM to load a single register. On other architectures, the
21601 cost is the same. In 26 bit mode, or for exception handlers,
21602 we have to use LDM to load the PC so that the CPSR is also
21603 restored. */
21604 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21605 if (live_regs_mask == (1U << reg))
21606 break;
21607
21608 if (reg <= LAST_ARM_REGNUM
21609 && (reg != LR_REGNUM
21610 || ! really_return
21611 || ! IS_INTERRUPT (func_type)))
21612 {
21613 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21614 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21615 }
21616 else
21617 {
21618 char *p;
21619 int first = 1;
21620
21621 /* Generate the load multiple instruction to restore the
21622 registers. Note we can get here, even if
21623 frame_pointer_needed is true, but only if sp already
21624 points to the base of the saved core registers. */
21625 if (live_regs_mask & (1 << SP_REGNUM))
21626 {
21627 unsigned HOST_WIDE_INT stack_adjust;
21628
21629 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21630 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21631
21632 if (stack_adjust && arm_arch5t && TARGET_ARM)
21633 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21634 else
21635 {
21636 /* If we can't use ldmib (SA110 bug),
21637 then try to pop r3 instead. */
21638 if (stack_adjust)
21639 live_regs_mask |= 1 << 3;
21640
21641 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21642 }
21643 }
21644 /* For interrupt returns we have to use an LDM rather than
21645 a POP so that we can use the exception return variant. */
21646 else if (IS_INTERRUPT (func_type))
21647 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21648 else
21649 sprintf (instr, "pop%s\t{", conditional);
21650
21651 p = instr + strlen (instr);
21652
21653 for (reg = 0; reg <= SP_REGNUM; reg++)
21654 if (live_regs_mask & (1 << reg))
21655 {
21656 int l = strlen (reg_names[reg]);
21657
21658 if (first)
21659 first = 0;
21660 else
21661 {
21662 memcpy (p, ", ", 2);
21663 p += 2;
21664 }
21665
21666 memcpy (p, "%|", 2);
21667 memcpy (p + 2, reg_names[reg], l);
21668 p += l + 2;
21669 }
21670
21671 if (live_regs_mask & (1 << LR_REGNUM))
21672 {
21673 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21674 /* If returning from an interrupt, restore the CPSR. */
21675 if (IS_INTERRUPT (func_type))
21676 strcat (p, "^");
21677 }
21678 else
21679 strcpy (p, "}");
21680 }
21681
21682 output_asm_insn (instr, & operand);
21683
21684 /* See if we need to generate an extra instruction to
21685 perform the actual function return. */
21686 if (really_return
21687 && func_type != ARM_FT_INTERWORKED
21688 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21689 {
21690 /* The return has already been handled
21691 by loading the LR into the PC. */
21692 return "";
21693 }
21694 }
21695
21696 if (really_return)
21697 {
21698 switch ((int) ARM_FUNC_TYPE (func_type))
21699 {
21700 case ARM_FT_ISR:
21701 case ARM_FT_FIQ:
21702 /* ??? This is wrong for unified assembly syntax. */
21703 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21704 break;
21705
21706 case ARM_FT_INTERWORKED:
21707 gcc_assert (arm_arch5t || arm_arch4t);
21708 sprintf (instr, "bx%s\t%%|lr", conditional);
21709 break;
21710
21711 case ARM_FT_EXCEPTION:
21712 /* ??? This is wrong for unified assembly syntax. */
21713 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21714 break;
21715
21716 default:
21717 if (IS_CMSE_ENTRY (func_type))
21718 {
21719 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21720 emitted by cmse_nonsecure_entry_clear_before_return () and the
21721 VSTR/VLDR instructions in the prologue and epilogue. */
21722 if (!TARGET_HAVE_FPCXT_CMSE)
21723 {
21724 /* Check if we have to clear the 'GE bits' which is only used if
21725 parallel add and subtraction instructions are available. */
21726 if (TARGET_INT_SIMD)
21727 snprintf (instr, sizeof (instr),
21728 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21729 else
21730 snprintf (instr, sizeof (instr),
21731 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21732
21733 output_asm_insn (instr, & operand);
21734 /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21735 care of it. */
21736 if (TARGET_HARD_FLOAT)
21737 {
21738 /* Clear the cumulative exception-status bits (0-4,7) and
21739 the condition code bits (28-31) of the FPSCR. We need
21740 to remember to clear the first scratch register used
21741 (IP) and save and restore the second (r4).
21742
21743 Important note: the length of the
21744 thumb2_cmse_entry_return insn pattern must account for
21745 the size of the below instructions. */
21746 output_asm_insn ("push\t{%|r4}", & operand);
21747 output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21748 output_asm_insn ("movw\t%|r4, #65376", & operand);
21749 output_asm_insn ("movt\t%|r4, #4095", & operand);
21750 output_asm_insn ("and\t%|ip, %|r4", & operand);
21751 output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21752 output_asm_insn ("pop\t{%|r4}", & operand);
21753 output_asm_insn ("mov\t%|ip, %|lr", & operand);
21754 }
21755 }
21756 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21757 }
21758 /* Use bx if it's available. */
21759 else if (arm_arch5t || arm_arch4t)
21760 sprintf (instr, "bx%s\t%%|lr", conditional);
21761 else
21762 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21763 break;
21764 }
21765
21766 output_asm_insn (instr, & operand);
21767 }
21768
21769 return "";
21770 }
21771
21772 /* Output in FILE asm statements needed to declare the NAME of the function
21773 defined by its DECL node. */
21774
21775 void
21776 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21777 {
21778 size_t cmse_name_len;
21779 char *cmse_name = 0;
21780 char cmse_prefix[] = "__acle_se_";
21781
21782 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21783 extra function label for each function with the 'cmse_nonsecure_entry'
21784 attribute. This extra function label should be prepended with
21785 '__acle_se_', telling the linker that it needs to create secure gateway
21786 veneers for this function. */
21787 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21788 DECL_ATTRIBUTES (decl)))
21789 {
21790 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21791 cmse_name = XALLOCAVEC (char, cmse_name_len);
21792 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21793 targetm.asm_out.globalize_label (file, cmse_name);
21794
21795 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21796 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21797 }
21798
21799 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21800 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21801 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21802 ASM_OUTPUT_LABEL (file, name);
21803
21804 if (cmse_name)
21805 ASM_OUTPUT_LABEL (file, cmse_name);
21806
21807 ARM_OUTPUT_FN_UNWIND (file, TRUE);
21808 }
21809
21810 /* Write the function name into the code section, directly preceding
21811 the function prologue.
21812
21813 Code will be output similar to this:
21814 t0
21815 .ascii "arm_poke_function_name", 0
21816 .align
21817 t1
21818 .word 0xff000000 + (t1 - t0)
21819 arm_poke_function_name
21820 mov ip, sp
21821 stmfd sp!, {fp, ip, lr, pc}
21822 sub fp, ip, #4
21823
21824 When performing a stack backtrace, code can inspect the value
21825 of 'pc' stored at 'fp' + 0. If the trace function then looks
21826 at location pc - 12 and the top 8 bits are set, then we know
21827 that there is a function name embedded immediately preceding this
21828 location and has length ((pc[-3]) & 0xff000000).
21829
21830 We assume that pc is declared as a pointer to an unsigned long.
21831
21832 It is of no benefit to output the function name if we are assembling
21833 a leaf function. These function types will not contain a stack
21834 backtrace structure, therefore it is not possible to determine the
21835 function name. */
21836 void
21837 arm_poke_function_name (FILE *stream, const char *name)
21838 {
21839 unsigned long alignlength;
21840 unsigned long length;
21841 rtx x;
21842
21843 length = strlen (name) + 1;
21844 alignlength = ROUND_UP_WORD (length);
21845
21846 ASM_OUTPUT_ASCII (stream, name, length);
21847 ASM_OUTPUT_ALIGN (stream, 2);
21848 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21849 assemble_aligned_integer (UNITS_PER_WORD, x);
21850 }
21851
21852 /* Place some comments into the assembler stream
21853 describing the current function. */
21854 static void
21855 arm_output_function_prologue (FILE *f)
21856 {
21857 unsigned long func_type;
21858
21859 /* Sanity check. */
21860 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21861
21862 func_type = arm_current_func_type ();
21863
21864 switch ((int) ARM_FUNC_TYPE (func_type))
21865 {
21866 default:
21867 case ARM_FT_NORMAL:
21868 break;
21869 case ARM_FT_INTERWORKED:
21870 asm_fprintf (f, "\t%@ Function supports interworking.\n");
21871 break;
21872 case ARM_FT_ISR:
21873 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21874 break;
21875 case ARM_FT_FIQ:
21876 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21877 break;
21878 case ARM_FT_EXCEPTION:
21879 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21880 break;
21881 }
21882
21883 if (IS_NAKED (func_type))
21884 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21885
21886 if (IS_VOLATILE (func_type))
21887 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21888
21889 if (IS_NESTED (func_type))
21890 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21891 if (IS_STACKALIGN (func_type))
21892 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21893 if (IS_CMSE_ENTRY (func_type))
21894 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21895
21896 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21897 (HOST_WIDE_INT) crtl->args.size,
21898 crtl->args.pretend_args_size,
21899 (HOST_WIDE_INT) get_frame_size ());
21900
21901 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21902 frame_pointer_needed,
21903 cfun->machine->uses_anonymous_args);
21904
21905 if (cfun->machine->lr_save_eliminated)
21906 asm_fprintf (f, "\t%@ link register save eliminated.\n");
21907
21908 if (crtl->calls_eh_return)
21909 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21910
21911 }
21912
21913 static void
21914 arm_output_function_epilogue (FILE *)
21915 {
21916 arm_stack_offsets *offsets;
21917
21918 if (TARGET_THUMB1)
21919 {
21920 int regno;
21921
21922 /* Emit any call-via-reg trampolines that are needed for v4t support
21923 of call_reg and call_value_reg type insns. */
21924 for (regno = 0; regno < LR_REGNUM; regno++)
21925 {
21926 rtx label = cfun->machine->call_via[regno];
21927
21928 if (label != NULL)
21929 {
21930 switch_to_section (function_section (current_function_decl));
21931 targetm.asm_out.internal_label (asm_out_file, "L",
21932 CODE_LABEL_NUMBER (label));
21933 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21934 }
21935 }
21936
21937 /* ??? Probably not safe to set this here, since it assumes that a
21938 function will be emitted as assembly immediately after we generate
21939 RTL for it. This does not happen for inline functions. */
21940 cfun->machine->return_used_this_function = 0;
21941 }
21942 else /* TARGET_32BIT */
21943 {
21944 /* We need to take into account any stack-frame rounding. */
21945 offsets = arm_get_frame_offsets ();
21946
21947 gcc_assert (!use_return_insn (FALSE, NULL)
21948 || (cfun->machine->return_used_this_function != 0)
21949 || offsets->saved_regs == offsets->outgoing_args
21950 || frame_pointer_needed);
21951 }
21952 }
21953
21954 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21955 STR and STRD. If an even number of registers are being pushed, one
21956 or more STRD patterns are created for each register pair. If an
21957 odd number of registers are pushed, emit an initial STR followed by
21958 as many STRD instructions as are needed. This works best when the
21959 stack is initially 64-bit aligned (the normal case), since it
21960 ensures that each STRD is also 64-bit aligned. */
21961 static void
21962 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21963 {
21964 int num_regs = 0;
21965 int i;
21966 int regno;
21967 rtx par = NULL_RTX;
21968 rtx dwarf = NULL_RTX;
21969 rtx tmp;
21970 bool first = true;
21971
21972 num_regs = bit_count (saved_regs_mask);
21973
21974 /* Must be at least one register to save, and can't save SP or PC. */
21975 gcc_assert (num_regs > 0 && num_regs <= 14);
21976 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21977 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21978
21979 /* Create sequence for DWARF info. All the frame-related data for
21980 debugging is held in this wrapper. */
21981 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21982
21983 /* Describe the stack adjustment. */
21984 tmp = gen_rtx_SET (stack_pointer_rtx,
21985 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21986 RTX_FRAME_RELATED_P (tmp) = 1;
21987 XVECEXP (dwarf, 0, 0) = tmp;
21988
21989 /* Find the first register. */
21990 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21991 ;
21992
21993 i = 0;
21994
21995 /* If there's an odd number of registers to push. Start off by
21996 pushing a single register. This ensures that subsequent strd
21997 operations are dword aligned (assuming that SP was originally
21998 64-bit aligned). */
21999 if ((num_regs & 1) != 0)
22000 {
22001 rtx reg, mem, insn;
22002
22003 reg = gen_rtx_REG (SImode, regno);
22004 if (num_regs == 1)
22005 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
22006 stack_pointer_rtx));
22007 else
22008 mem = gen_frame_mem (Pmode,
22009 gen_rtx_PRE_MODIFY
22010 (Pmode, stack_pointer_rtx,
22011 plus_constant (Pmode, stack_pointer_rtx,
22012 -4 * num_regs)));
22013
22014 tmp = gen_rtx_SET (mem, reg);
22015 RTX_FRAME_RELATED_P (tmp) = 1;
22016 insn = emit_insn (tmp);
22017 RTX_FRAME_RELATED_P (insn) = 1;
22018 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22019 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
22020 RTX_FRAME_RELATED_P (tmp) = 1;
22021 i++;
22022 regno++;
22023 XVECEXP (dwarf, 0, i) = tmp;
22024 first = false;
22025 }
22026
22027 while (i < num_regs)
22028 if (saved_regs_mask & (1 << regno))
22029 {
22030 rtx reg1, reg2, mem1, mem2;
22031 rtx tmp0, tmp1, tmp2;
22032 int regno2;
22033
22034 /* Find the register to pair with this one. */
22035 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
22036 regno2++)
22037 ;
22038
22039 reg1 = gen_rtx_REG (SImode, regno);
22040 reg2 = gen_rtx_REG (SImode, regno2);
22041
22042 if (first)
22043 {
22044 rtx insn;
22045
22046 first = false;
22047 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22048 stack_pointer_rtx,
22049 -4 * num_regs));
22050 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22051 stack_pointer_rtx,
22052 -4 * (num_regs - 1)));
22053 tmp0 = gen_rtx_SET (stack_pointer_rtx,
22054 plus_constant (Pmode, stack_pointer_rtx,
22055 -4 * (num_regs)));
22056 tmp1 = gen_rtx_SET (mem1, reg1);
22057 tmp2 = gen_rtx_SET (mem2, reg2);
22058 RTX_FRAME_RELATED_P (tmp0) = 1;
22059 RTX_FRAME_RELATED_P (tmp1) = 1;
22060 RTX_FRAME_RELATED_P (tmp2) = 1;
22061 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
22062 XVECEXP (par, 0, 0) = tmp0;
22063 XVECEXP (par, 0, 1) = tmp1;
22064 XVECEXP (par, 0, 2) = tmp2;
22065 insn = emit_insn (par);
22066 RTX_FRAME_RELATED_P (insn) = 1;
22067 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22068 }
22069 else
22070 {
22071 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22072 stack_pointer_rtx,
22073 4 * i));
22074 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22075 stack_pointer_rtx,
22076 4 * (i + 1)));
22077 tmp1 = gen_rtx_SET (mem1, reg1);
22078 tmp2 = gen_rtx_SET (mem2, reg2);
22079 RTX_FRAME_RELATED_P (tmp1) = 1;
22080 RTX_FRAME_RELATED_P (tmp2) = 1;
22081 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22082 XVECEXP (par, 0, 0) = tmp1;
22083 XVECEXP (par, 0, 1) = tmp2;
22084 emit_insn (par);
22085 }
22086
22087 /* Create unwind information. This is an approximation. */
22088 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
22089 plus_constant (Pmode,
22090 stack_pointer_rtx,
22091 4 * i)),
22092 reg1);
22093 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
22094 plus_constant (Pmode,
22095 stack_pointer_rtx,
22096 4 * (i + 1))),
22097 reg2);
22098
22099 RTX_FRAME_RELATED_P (tmp1) = 1;
22100 RTX_FRAME_RELATED_P (tmp2) = 1;
22101 XVECEXP (dwarf, 0, i + 1) = tmp1;
22102 XVECEXP (dwarf, 0, i + 2) = tmp2;
22103 i += 2;
22104 regno = regno2 + 1;
22105 }
22106 else
22107 regno++;
22108
22109 return;
22110 }
22111
22112 /* STRD in ARM mode requires consecutive registers. This function emits STRD
22113 whenever possible, otherwise it emits single-word stores. The first store
22114 also allocates stack space for all saved registers, using writeback with
22115 post-addressing mode. All other stores use offset addressing. If no STRD
22116 can be emitted, this function emits a sequence of single-word stores,
22117 and not an STM as before, because single-word stores provide more freedom
22118 scheduling and can be turned into an STM by peephole optimizations. */
22119 static void
22120 arm_emit_strd_push (unsigned long saved_regs_mask)
22121 {
22122 int num_regs = 0;
22123 int i, j, dwarf_index = 0;
22124 int offset = 0;
22125 rtx dwarf = NULL_RTX;
22126 rtx insn = NULL_RTX;
22127 rtx tmp, mem;
22128
22129 /* TODO: A more efficient code can be emitted by changing the
22130 layout, e.g., first push all pairs that can use STRD to keep the
22131 stack aligned, and then push all other registers. */
22132 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22133 if (saved_regs_mask & (1 << i))
22134 num_regs++;
22135
22136 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22137 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22138 gcc_assert (num_regs > 0);
22139
22140 /* Create sequence for DWARF info. */
22141 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22142
22143 /* For dwarf info, we generate explicit stack update. */
22144 tmp = gen_rtx_SET (stack_pointer_rtx,
22145 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22146 RTX_FRAME_RELATED_P (tmp) = 1;
22147 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22148
22149 /* Save registers. */
22150 offset = - 4 * num_regs;
22151 j = 0;
22152 while (j <= LAST_ARM_REGNUM)
22153 if (saved_regs_mask & (1 << j))
22154 {
22155 if ((j % 2 == 0)
22156 && (saved_regs_mask & (1 << (j + 1))))
22157 {
22158 /* Current register and previous register form register pair for
22159 which STRD can be generated. */
22160 if (offset < 0)
22161 {
22162 /* Allocate stack space for all saved registers. */
22163 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22164 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22165 mem = gen_frame_mem (DImode, tmp);
22166 offset = 0;
22167 }
22168 else if (offset > 0)
22169 mem = gen_frame_mem (DImode,
22170 plus_constant (Pmode,
22171 stack_pointer_rtx,
22172 offset));
22173 else
22174 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22175
22176 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
22177 RTX_FRAME_RELATED_P (tmp) = 1;
22178 tmp = emit_insn (tmp);
22179
22180 /* Record the first store insn. */
22181 if (dwarf_index == 1)
22182 insn = tmp;
22183
22184 /* Generate dwarf info. */
22185 mem = gen_frame_mem (SImode,
22186 plus_constant (Pmode,
22187 stack_pointer_rtx,
22188 offset));
22189 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22190 RTX_FRAME_RELATED_P (tmp) = 1;
22191 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22192
22193 mem = gen_frame_mem (SImode,
22194 plus_constant (Pmode,
22195 stack_pointer_rtx,
22196 offset + 4));
22197 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
22198 RTX_FRAME_RELATED_P (tmp) = 1;
22199 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22200
22201 offset += 8;
22202 j += 2;
22203 }
22204 else
22205 {
22206 /* Emit a single word store. */
22207 if (offset < 0)
22208 {
22209 /* Allocate stack space for all saved registers. */
22210 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22211 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22212 mem = gen_frame_mem (SImode, tmp);
22213 offset = 0;
22214 }
22215 else if (offset > 0)
22216 mem = gen_frame_mem (SImode,
22217 plus_constant (Pmode,
22218 stack_pointer_rtx,
22219 offset));
22220 else
22221 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22222
22223 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22224 RTX_FRAME_RELATED_P (tmp) = 1;
22225 tmp = emit_insn (tmp);
22226
22227 /* Record the first store insn. */
22228 if (dwarf_index == 1)
22229 insn = tmp;
22230
22231 /* Generate dwarf info. */
22232 mem = gen_frame_mem (SImode,
22233 plus_constant(Pmode,
22234 stack_pointer_rtx,
22235 offset));
22236 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22237 RTX_FRAME_RELATED_P (tmp) = 1;
22238 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22239
22240 offset += 4;
22241 j += 1;
22242 }
22243 }
22244 else
22245 j++;
22246
22247 /* Attach dwarf info to the first insn we generate. */
22248 gcc_assert (insn != NULL_RTX);
22249 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22250 RTX_FRAME_RELATED_P (insn) = 1;
22251 }
22252
22253 /* Generate and emit an insn that we will recognize as a push_multi.
22254 Unfortunately, since this insn does not reflect very well the actual
22255 semantics of the operation, we need to annotate the insn for the benefit
22256 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
22257 MASK for registers that should be annotated for DWARF2 frame unwind
22258 information. */
22259 static rtx
22260 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
22261 {
22262 int num_regs = 0;
22263 int num_dwarf_regs = 0;
22264 int i, j;
22265 rtx par;
22266 rtx dwarf;
22267 int dwarf_par_index;
22268 rtx tmp, reg;
22269
22270 /* We don't record the PC in the dwarf frame information. */
22271 dwarf_regs_mask &= ~(1 << PC_REGNUM);
22272
22273 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22274 {
22275 if (mask & (1 << i))
22276 num_regs++;
22277 if (dwarf_regs_mask & (1 << i))
22278 num_dwarf_regs++;
22279 }
22280
22281 gcc_assert (num_regs && num_regs <= 16);
22282 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
22283
22284 /* For the body of the insn we are going to generate an UNSPEC in
22285 parallel with several USEs. This allows the insn to be recognized
22286 by the push_multi pattern in the arm.md file.
22287
22288 The body of the insn looks something like this:
22289
22290 (parallel [
22291 (set (mem:BLK (pre_modify:SI (reg:SI sp)
22292 (const_int:SI <num>)))
22293 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22294 (use (reg:SI XX))
22295 (use (reg:SI YY))
22296 ...
22297 ])
22298
22299 For the frame note however, we try to be more explicit and actually
22300 show each register being stored into the stack frame, plus a (single)
22301 decrement of the stack pointer. We do it this way in order to be
22302 friendly to the stack unwinding code, which only wants to see a single
22303 stack decrement per instruction. The RTL we generate for the note looks
22304 something like this:
22305
22306 (sequence [
22307 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22308 (set (mem:SI (reg:SI sp)) (reg:SI r4))
22309 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22310 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22311 ...
22312 ])
22313
22314 FIXME:: In an ideal world the PRE_MODIFY would not exist and
22315 instead we'd have a parallel expression detailing all
22316 the stores to the various memory addresses so that debug
22317 information is more up-to-date. Remember however while writing
22318 this to take care of the constraints with the push instruction.
22319
22320 Note also that this has to be taken care of for the VFP registers.
22321
22322 For more see PR43399. */
22323
22324 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
22325 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
22326 dwarf_par_index = 1;
22327
22328 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22329 {
22330 if (mask & (1 << i))
22331 {
22332 /* NOTE: Dwarf code emitter handle reg-reg copies correctly and in the
22333 following example reg-reg copy of SP to IP register is handled
22334 through .cfi_def_cfa_register directive and the .cfi_offset
22335 directive for IP register is skipped by dwarf code emitter.
22336 Example:
22337 mov ip, sp
22338 .cfi_def_cfa_register 12
22339 push {fp, ip, lr, pc}
22340 .cfi_offset 11, -16
22341 .cfi_offset 13, -12
22342 .cfi_offset 14, -8
22343
22344 Where as Arm-specific .save directive handling is different to that
22345 of dwarf code emitter and it doesn't consider reg-reg copies while
22346 updating the register list. When PACBTI is enabled we manually
22347 updated the .save directive register list to use "ra_auth_code"
22348 (pseduo register 143) instead of IP register as shown in following
22349 pseduo code.
22350 Example:
22351 pacbti ip, lr, sp
22352 .cfi_register 143, 12
22353 push {r3, r7, ip, lr}
22354 .save {r3, r7, ra_auth_code, lr}
22355 */
22356 rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22357 if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22358 dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22359
22360 XVECEXP (par, 0, 0)
22361 = gen_rtx_SET (gen_frame_mem
22362 (BLKmode,
22363 gen_rtx_PRE_MODIFY (Pmode,
22364 stack_pointer_rtx,
22365 plus_constant
22366 (Pmode, stack_pointer_rtx,
22367 -4 * num_regs))
22368 ),
22369 gen_rtx_UNSPEC (BLKmode,
22370 gen_rtvec (1, reg),
22371 UNSPEC_PUSH_MULT));
22372
22373 if (dwarf_regs_mask & (1 << i))
22374 {
22375 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
22376 dwarf_reg);
22377 RTX_FRAME_RELATED_P (tmp) = 1;
22378 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22379 }
22380
22381 break;
22382 }
22383 }
22384
22385 for (j = 1, i++; j < num_regs; i++)
22386 {
22387 if (mask & (1 << i))
22388 {
22389 rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22390 if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22391 dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22392
22393 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22394
22395 if (dwarf_regs_mask & (1 << i))
22396 {
22397 tmp
22398 = gen_rtx_SET (gen_frame_mem
22399 (SImode,
22400 plus_constant (Pmode, stack_pointer_rtx,
22401 4 * j)),
22402 dwarf_reg);
22403 RTX_FRAME_RELATED_P (tmp) = 1;
22404 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22405 }
22406
22407 j++;
22408 }
22409 }
22410
22411 par = emit_insn (par);
22412
22413 tmp = gen_rtx_SET (stack_pointer_rtx,
22414 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22415 RTX_FRAME_RELATED_P (tmp) = 1;
22416 XVECEXP (dwarf, 0, 0) = tmp;
22417
22418 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22419
22420 return par;
22421 }
22422
22423 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22424 SIZE is the offset to be adjusted.
22425 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22426 static void
22427 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22428 {
22429 rtx dwarf;
22430
22431 RTX_FRAME_RELATED_P (insn) = 1;
22432 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22433 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22434 }
22435
22436 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22437 SAVED_REGS_MASK shows which registers need to be restored.
22438
22439 Unfortunately, since this insn does not reflect very well the actual
22440 semantics of the operation, we need to annotate the insn for the benefit
22441 of DWARF2 frame unwind information. */
22442 static void
22443 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22444 {
22445 int num_regs = 0;
22446 int i, j;
22447 rtx par;
22448 rtx dwarf = NULL_RTX;
22449 rtx tmp, reg;
22450 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22451 int offset_adj;
22452 int emit_update;
22453
22454 offset_adj = return_in_pc ? 1 : 0;
22455 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22456 if (saved_regs_mask & (1 << i))
22457 num_regs++;
22458
22459 gcc_assert (num_regs && num_regs <= 16);
22460
22461 /* If SP is in reglist, then we don't emit SP update insn. */
22462 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22463
22464 /* The parallel needs to hold num_regs SETs
22465 and one SET for the stack update. */
22466 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22467
22468 if (return_in_pc)
22469 XVECEXP (par, 0, 0) = ret_rtx;
22470
22471 if (emit_update)
22472 {
22473 /* Increment the stack pointer, based on there being
22474 num_regs 4-byte registers to restore. */
22475 tmp = gen_rtx_SET (stack_pointer_rtx,
22476 plus_constant (Pmode,
22477 stack_pointer_rtx,
22478 4 * num_regs));
22479 RTX_FRAME_RELATED_P (tmp) = 1;
22480 XVECEXP (par, 0, offset_adj) = tmp;
22481 }
22482
22483 /* Now restore every reg, which may include PC. */
22484 for (j = 0, i = 0; j < num_regs; i++)
22485 if (saved_regs_mask & (1 << i))
22486 {
22487 rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22488 if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22489 dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22490 if ((num_regs == 1) && emit_update && !return_in_pc)
22491 {
22492 /* Emit single load with writeback. */
22493 tmp = gen_frame_mem (SImode,
22494 gen_rtx_POST_INC (Pmode,
22495 stack_pointer_rtx));
22496 tmp = emit_insn (gen_rtx_SET (reg, tmp));
22497 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg,
22498 dwarf);
22499 return;
22500 }
22501
22502 tmp = gen_rtx_SET (reg,
22503 gen_frame_mem
22504 (SImode,
22505 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22506 RTX_FRAME_RELATED_P (tmp) = 1;
22507 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22508
22509 /* We need to maintain a sequence for DWARF info too. As dwarf info
22510 should not have PC, skip PC. */
22511 if (i != PC_REGNUM)
22512 dwarf = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, dwarf);
22513
22514 j++;
22515 }
22516
22517 if (return_in_pc)
22518 par = emit_jump_insn (par);
22519 else
22520 par = emit_insn (par);
22521
22522 REG_NOTES (par) = dwarf;
22523 if (!return_in_pc)
22524 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22525 stack_pointer_rtx, stack_pointer_rtx);
22526 }
22527
22528 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22529 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22530
22531 Unfortunately, since this insn does not reflect very well the actual
22532 semantics of the operation, we need to annotate the insn for the benefit
22533 of DWARF2 frame unwind information. */
22534 static void
22535 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22536 {
22537 int i, j;
22538 rtx par;
22539 rtx dwarf = NULL_RTX;
22540 rtx tmp, reg;
22541
22542 gcc_assert (num_regs && num_regs <= 32);
22543
22544 /* Workaround ARM10 VFPr1 bug. */
22545 if (num_regs == 2 && !arm_arch6)
22546 {
22547 if (first_reg == 15)
22548 first_reg--;
22549
22550 num_regs++;
22551 }
22552
22553 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22554 there could be up to 32 D-registers to restore.
22555 If there are more than 16 D-registers, make two recursive calls,
22556 each of which emits one pop_multi instruction. */
22557 if (num_regs > 16)
22558 {
22559 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22560 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22561 return;
22562 }
22563
22564 /* The parallel needs to hold num_regs SETs
22565 and one SET for the stack update. */
22566 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22567
22568 /* Increment the stack pointer, based on there being
22569 num_regs 8-byte registers to restore. */
22570 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22571 RTX_FRAME_RELATED_P (tmp) = 1;
22572 XVECEXP (par, 0, 0) = tmp;
22573
22574 /* Now show every reg that will be restored, using a SET for each. */
22575 for (j = 0, i=first_reg; j < num_regs; i += 2)
22576 {
22577 reg = gen_rtx_REG (DFmode, i);
22578
22579 tmp = gen_rtx_SET (reg,
22580 gen_frame_mem
22581 (DFmode,
22582 plus_constant (Pmode, base_reg, 8 * j)));
22583 RTX_FRAME_RELATED_P (tmp) = 1;
22584 XVECEXP (par, 0, j + 1) = tmp;
22585
22586 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22587
22588 j++;
22589 }
22590
22591 par = emit_insn (par);
22592 REG_NOTES (par) = dwarf;
22593
22594 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
22595 if (REGNO (base_reg) == IP_REGNUM)
22596 {
22597 RTX_FRAME_RELATED_P (par) = 1;
22598 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22599 }
22600 else
22601 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22602 base_reg, base_reg);
22603 }
22604
22605 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22606 number of registers are being popped, multiple LDRD patterns are created for
22607 all register pairs. If odd number of registers are popped, last register is
22608 loaded by using LDR pattern. */
22609 static void
22610 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22611 {
22612 int num_regs = 0;
22613 int i, j;
22614 rtx par = NULL_RTX;
22615 rtx dwarf = NULL_RTX;
22616 rtx tmp, reg, tmp1;
22617 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22618
22619 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22620 if (saved_regs_mask & (1 << i))
22621 num_regs++;
22622
22623 gcc_assert (num_regs && num_regs <= 16);
22624
22625 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
22626 to be popped. So, if num_regs is even, now it will become odd,
22627 and we can generate pop with PC. If num_regs is odd, it will be
22628 even now, and ldr with return can be generated for PC. */
22629 if (return_in_pc)
22630 num_regs--;
22631
22632 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22633
22634 /* Var j iterates over all the registers to gather all the registers in
22635 saved_regs_mask. Var i gives index of saved registers in stack frame.
22636 A PARALLEL RTX of register-pair is created here, so that pattern for
22637 LDRD can be matched. As PC is always last register to be popped, and
22638 we have already decremented num_regs if PC, we don't have to worry
22639 about PC in this loop. */
22640 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22641 if (saved_regs_mask & (1 << j))
22642 {
22643 /* Create RTX for memory load. */
22644 reg = gen_rtx_REG (SImode, j);
22645 tmp = gen_rtx_SET (reg,
22646 gen_frame_mem (SImode,
22647 plus_constant (Pmode,
22648 stack_pointer_rtx, 4 * i)));
22649 RTX_FRAME_RELATED_P (tmp) = 1;
22650
22651 if (i % 2 == 0)
22652 {
22653 /* When saved-register index (i) is even, the RTX to be emitted is
22654 yet to be created. Hence create it first. The LDRD pattern we
22655 are generating is :
22656 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22657 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22658 where target registers need not be consecutive. */
22659 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22660 dwarf = NULL_RTX;
22661 }
22662
22663 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
22664 added as 0th element and if i is odd, reg_i is added as 1st element
22665 of LDRD pattern shown above. */
22666 XVECEXP (par, 0, (i % 2)) = tmp;
22667 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22668
22669 if ((i % 2) == 1)
22670 {
22671 /* When saved-register index (i) is odd, RTXs for both the registers
22672 to be loaded are generated in above given LDRD pattern, and the
22673 pattern can be emitted now. */
22674 par = emit_insn (par);
22675 REG_NOTES (par) = dwarf;
22676 RTX_FRAME_RELATED_P (par) = 1;
22677 }
22678
22679 i++;
22680 }
22681
22682 /* If the number of registers pushed is odd AND return_in_pc is false OR
22683 number of registers are even AND return_in_pc is true, last register is
22684 popped using LDR. It can be PC as well. Hence, adjust the stack first and
22685 then LDR with post increment. */
22686
22687 /* Increment the stack pointer, based on there being
22688 num_regs 4-byte registers to restore. */
22689 tmp = gen_rtx_SET (stack_pointer_rtx,
22690 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22691 RTX_FRAME_RELATED_P (tmp) = 1;
22692 tmp = emit_insn (tmp);
22693 if (!return_in_pc)
22694 {
22695 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22696 stack_pointer_rtx, stack_pointer_rtx);
22697 }
22698
22699 dwarf = NULL_RTX;
22700
22701 if (((num_regs % 2) == 1 && !return_in_pc)
22702 || ((num_regs % 2) == 0 && return_in_pc))
22703 {
22704 /* Scan for the single register to be popped. Skip until the saved
22705 register is found. */
22706 for (; (saved_regs_mask & (1 << j)) == 0; j++);
22707
22708 /* Gen LDR with post increment here. */
22709 tmp1 = gen_rtx_MEM (SImode,
22710 gen_rtx_POST_INC (SImode,
22711 stack_pointer_rtx));
22712 set_mem_alias_set (tmp1, get_frame_alias_set ());
22713
22714 reg = gen_rtx_REG (SImode, j);
22715 tmp = gen_rtx_SET (reg, tmp1);
22716 RTX_FRAME_RELATED_P (tmp) = 1;
22717 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22718
22719 if (return_in_pc)
22720 {
22721 /* If return_in_pc, j must be PC_REGNUM. */
22722 gcc_assert (j == PC_REGNUM);
22723 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22724 XVECEXP (par, 0, 0) = ret_rtx;
22725 XVECEXP (par, 0, 1) = tmp;
22726 par = emit_jump_insn (par);
22727 }
22728 else
22729 {
22730 par = emit_insn (tmp);
22731 REG_NOTES (par) = dwarf;
22732 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22733 stack_pointer_rtx, stack_pointer_rtx);
22734 }
22735
22736 }
22737 else if ((num_regs % 2) == 1 && return_in_pc)
22738 {
22739 /* There are 2 registers to be popped. So, generate the pattern
22740 pop_multiple_with_stack_update_and_return to pop in PC. */
22741 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22742 }
22743
22744 return;
22745 }
22746
22747 /* LDRD in ARM mode needs consecutive registers as operands. This function
22748 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22749 offset addressing and then generates one separate stack udpate. This provides
22750 more scheduling freedom, compared to writeback on every load. However,
22751 if the function returns using load into PC directly
22752 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22753 before the last load. TODO: Add a peephole optimization to recognize
22754 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22755 peephole optimization to merge the load at stack-offset zero
22756 with the stack update instruction using load with writeback
22757 in post-index addressing mode. */
22758 static void
22759 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22760 {
22761 int j = 0;
22762 int offset = 0;
22763 rtx par = NULL_RTX;
22764 rtx dwarf = NULL_RTX;
22765 rtx tmp, mem;
22766
22767 /* Restore saved registers. */
22768 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22769 j = 0;
22770 while (j <= LAST_ARM_REGNUM)
22771 if (saved_regs_mask & (1 << j))
22772 {
22773 if ((j % 2) == 0
22774 && (saved_regs_mask & (1 << (j + 1)))
22775 && (j + 1) != PC_REGNUM)
22776 {
22777 /* Current register and next register form register pair for which
22778 LDRD can be generated. PC is always the last register popped, and
22779 we handle it separately. */
22780 if (offset > 0)
22781 mem = gen_frame_mem (DImode,
22782 plus_constant (Pmode,
22783 stack_pointer_rtx,
22784 offset));
22785 else
22786 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22787
22788 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22789 tmp = emit_insn (tmp);
22790 RTX_FRAME_RELATED_P (tmp) = 1;
22791
22792 /* Generate dwarf info. */
22793
22794 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22795 gen_rtx_REG (SImode, j),
22796 NULL_RTX);
22797 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22798 gen_rtx_REG (SImode, j + 1),
22799 dwarf);
22800
22801 REG_NOTES (tmp) = dwarf;
22802
22803 offset += 8;
22804 j += 2;
22805 }
22806 else if (j != PC_REGNUM)
22807 {
22808 /* Emit a single word load. */
22809 if (offset > 0)
22810 mem = gen_frame_mem (SImode,
22811 plus_constant (Pmode,
22812 stack_pointer_rtx,
22813 offset));
22814 else
22815 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22816
22817 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22818 tmp = emit_insn (tmp);
22819 RTX_FRAME_RELATED_P (tmp) = 1;
22820
22821 /* Generate dwarf info. */
22822 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22823 gen_rtx_REG (SImode, j),
22824 NULL_RTX);
22825
22826 offset += 4;
22827 j += 1;
22828 }
22829 else /* j == PC_REGNUM */
22830 j++;
22831 }
22832 else
22833 j++;
22834
22835 /* Update the stack. */
22836 if (offset > 0)
22837 {
22838 tmp = gen_rtx_SET (stack_pointer_rtx,
22839 plus_constant (Pmode,
22840 stack_pointer_rtx,
22841 offset));
22842 tmp = emit_insn (tmp);
22843 arm_add_cfa_adjust_cfa_note (tmp, offset,
22844 stack_pointer_rtx, stack_pointer_rtx);
22845 offset = 0;
22846 }
22847
22848 if (saved_regs_mask & (1 << PC_REGNUM))
22849 {
22850 /* Only PC is to be popped. */
22851 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22852 XVECEXP (par, 0, 0) = ret_rtx;
22853 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22854 gen_frame_mem (SImode,
22855 gen_rtx_POST_INC (SImode,
22856 stack_pointer_rtx)));
22857 RTX_FRAME_RELATED_P (tmp) = 1;
22858 XVECEXP (par, 0, 1) = tmp;
22859 par = emit_jump_insn (par);
22860
22861 /* Generate dwarf info. */
22862 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22863 gen_rtx_REG (SImode, PC_REGNUM),
22864 NULL_RTX);
22865 REG_NOTES (par) = dwarf;
22866 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22867 stack_pointer_rtx, stack_pointer_rtx);
22868 }
22869 }
22870
22871 /* Calculate the size of the return value that is passed in registers. */
22872 static unsigned
22873 arm_size_return_regs (void)
22874 {
22875 machine_mode mode;
22876
22877 if (crtl->return_rtx != 0)
22878 mode = GET_MODE (crtl->return_rtx);
22879 else
22880 mode = DECL_MODE (DECL_RESULT (current_function_decl));
22881
22882 return GET_MODE_SIZE (mode);
22883 }
22884
22885 /* Return true if the current function needs to save/restore LR. */
22886 static bool
22887 thumb_force_lr_save (void)
22888 {
22889 return !cfun->machine->lr_save_eliminated
22890 && (!crtl->is_leaf
22891 || thumb_far_jump_used_p ()
22892 || df_regs_ever_live_p (LR_REGNUM));
22893 }
22894
22895 /* We do not know if r3 will be available because
22896 we do have an indirect tailcall happening in this
22897 particular case. */
22898 static bool
22899 is_indirect_tailcall_p (rtx call)
22900 {
22901 rtx pat = PATTERN (call);
22902
22903 /* Indirect tail call. */
22904 pat = XVECEXP (pat, 0, 0);
22905 if (GET_CODE (pat) == SET)
22906 pat = SET_SRC (pat);
22907
22908 pat = XEXP (XEXP (pat, 0), 0);
22909 return REG_P (pat);
22910 }
22911
22912 /* Return true if r3 is used by any of the tail call insns in the
22913 current function. */
22914 static bool
22915 any_sibcall_could_use_r3 (void)
22916 {
22917 edge_iterator ei;
22918 edge e;
22919
22920 if (!crtl->tail_call_emit)
22921 return false;
22922 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22923 if (e->flags & EDGE_SIBCALL)
22924 {
22925 rtx_insn *call = BB_END (e->src);
22926 if (!CALL_P (call))
22927 call = prev_nonnote_nondebug_insn (call);
22928 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22929 if (find_regno_fusage (call, USE, 3)
22930 || is_indirect_tailcall_p (call))
22931 return true;
22932 }
22933 return false;
22934 }
22935
22936
22937 /* Compute the distance from register FROM to register TO.
22938 These can be the arg pointer (26), the soft frame pointer (25),
22939 the stack pointer (13) or the hard frame pointer (11).
22940 In thumb mode r7 is used as the soft frame pointer, if needed.
22941 Typical stack layout looks like this:
22942
22943 old stack pointer -> | |
22944 ----
22945 | | \
22946 | | saved arguments for
22947 | | vararg functions
22948 | | /
22949 --
22950 hard FP & arg pointer -> | | \
22951 | | stack
22952 | | frame
22953 | | /
22954 --
22955 | | \
22956 | | call saved
22957 | | registers
22958 soft frame pointer -> | | /
22959 --
22960 | | \
22961 | | local
22962 | | variables
22963 locals base pointer -> | | /
22964 --
22965 | | \
22966 | | outgoing
22967 | | arguments
22968 current stack pointer -> | | /
22969 --
22970
22971 For a given function some or all of these stack components
22972 may not be needed, giving rise to the possibility of
22973 eliminating some of the registers.
22974
22975 The values returned by this function must reflect the behavior
22976 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22977
22978 The sign of the number returned reflects the direction of stack
22979 growth, so the values are positive for all eliminations except
22980 from the soft frame pointer to the hard frame pointer.
22981
22982 SFP may point just inside the local variables block to ensure correct
22983 alignment. */
22984
22985
22986 /* Return cached stack offsets. */
22987
22988 static arm_stack_offsets *
22989 arm_get_frame_offsets (void)
22990 {
22991 struct arm_stack_offsets *offsets;
22992
22993 offsets = &cfun->machine->stack_offsets;
22994
22995 return offsets;
22996 }
22997
22998
22999 /* Calculate stack offsets. These are used to calculate register elimination
23000 offsets and in prologue/epilogue code. Also calculates which registers
23001 should be saved. */
23002
23003 static void
23004 arm_compute_frame_layout (void)
23005 {
23006 struct arm_stack_offsets *offsets;
23007 unsigned long func_type;
23008 int saved;
23009 int core_saved;
23010 HOST_WIDE_INT frame_size;
23011 int i;
23012
23013 offsets = &cfun->machine->stack_offsets;
23014
23015 /* Initially this is the size of the local variables. It will translated
23016 into an offset once we have determined the size of preceding data. */
23017 frame_size = ROUND_UP_WORD (get_frame_size ());
23018
23019 /* Space for variadic functions. */
23020 offsets->saved_args = crtl->args.pretend_args_size;
23021
23022 /* In Thumb mode this is incorrect, but never used. */
23023 offsets->frame
23024 = (offsets->saved_args
23025 + arm_compute_static_chain_stack_bytes ()
23026 + (frame_pointer_needed ? 4 : 0));
23027
23028 if (TARGET_32BIT)
23029 {
23030 unsigned int regno;
23031
23032 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
23033 core_saved = bit_count (offsets->saved_regs_mask) * 4;
23034 saved = core_saved;
23035
23036 /* We know that SP will be doubleword aligned on entry, and we must
23037 preserve that condition at any subroutine call. We also require the
23038 soft frame pointer to be doubleword aligned. */
23039
23040 if (TARGET_REALLY_IWMMXT)
23041 {
23042 /* Check for the call-saved iWMMXt registers. */
23043 for (regno = FIRST_IWMMXT_REGNUM;
23044 regno <= LAST_IWMMXT_REGNUM;
23045 regno++)
23046 if (reg_needs_saving_p (regno))
23047 saved += 8;
23048 }
23049
23050 func_type = arm_current_func_type ();
23051 /* Space for saved VFP registers. */
23052 if (! IS_VOLATILE (func_type)
23053 && TARGET_VFP_BASE)
23054 saved += arm_get_vfp_saved_size ();
23055
23056 /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
23057 nonecure entry functions with VSTR/VLDR. */
23058 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23059 saved += 4;
23060 }
23061 else /* TARGET_THUMB1 */
23062 {
23063 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
23064 core_saved = bit_count (offsets->saved_regs_mask) * 4;
23065 saved = core_saved;
23066 if (TARGET_BACKTRACE)
23067 saved += 16;
23068 }
23069
23070 /* Saved registers include the stack frame. */
23071 offsets->saved_regs
23072 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
23073 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
23074
23075 /* A leaf function does not need any stack alignment if it has nothing
23076 on the stack. */
23077 if (crtl->is_leaf && frame_size == 0
23078 /* However if it calls alloca(), we have a dynamically allocated
23079 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
23080 && ! cfun->calls_alloca)
23081 {
23082 offsets->outgoing_args = offsets->soft_frame;
23083 offsets->locals_base = offsets->soft_frame;
23084 return;
23085 }
23086
23087 /* Ensure SFP has the correct alignment. */
23088 if (ARM_DOUBLEWORD_ALIGN
23089 && (offsets->soft_frame & 7))
23090 {
23091 offsets->soft_frame += 4;
23092 /* Try to align stack by pushing an extra reg. Don't bother doing this
23093 when there is a stack frame as the alignment will be rolled into
23094 the normal stack adjustment. */
23095 if (frame_size + crtl->outgoing_args_size == 0)
23096 {
23097 int reg = -1;
23098
23099 /* Register r3 is caller-saved. Normally it does not need to be
23100 saved on entry by the prologue. However if we choose to save
23101 it for padding then we may confuse the compiler into thinking
23102 a prologue sequence is required when in fact it is not. This
23103 will occur when shrink-wrapping if r3 is used as a scratch
23104 register and there are no other callee-saved writes.
23105
23106 This situation can be avoided when other callee-saved registers
23107 are available and r3 is not mandatory if we choose a callee-saved
23108 register for padding. */
23109 bool prefer_callee_reg_p = false;
23110
23111 /* If it is safe to use r3, then do so. This sometimes
23112 generates better code on Thumb-2 by avoiding the need to
23113 use 32-bit push/pop instructions. */
23114 if (! any_sibcall_could_use_r3 ()
23115 && arm_size_return_regs () <= 12
23116 && (offsets->saved_regs_mask & (1 << 3)) == 0
23117 && (TARGET_THUMB2
23118 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
23119 {
23120 reg = 3;
23121 if (!TARGET_THUMB2)
23122 prefer_callee_reg_p = true;
23123 }
23124 if (reg == -1
23125 || prefer_callee_reg_p)
23126 {
23127 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
23128 {
23129 /* Avoid fixed registers; they may be changed at
23130 arbitrary times so it's unsafe to restore them
23131 during the epilogue. */
23132 if (!fixed_regs[i]
23133 && (offsets->saved_regs_mask & (1 << i)) == 0)
23134 {
23135 reg = i;
23136 break;
23137 }
23138 }
23139 }
23140
23141 if (reg != -1)
23142 {
23143 offsets->saved_regs += 4;
23144 offsets->saved_regs_mask |= (1 << reg);
23145 }
23146 }
23147 }
23148
23149 offsets->locals_base = offsets->soft_frame + frame_size;
23150 offsets->outgoing_args = (offsets->locals_base
23151 + crtl->outgoing_args_size);
23152
23153 if (ARM_DOUBLEWORD_ALIGN)
23154 {
23155 /* Ensure SP remains doubleword aligned. */
23156 if (offsets->outgoing_args & 7)
23157 offsets->outgoing_args += 4;
23158 gcc_assert (!(offsets->outgoing_args & 7));
23159 }
23160 }
23161
23162
23163 /* Calculate the relative offsets for the different stack pointers. Positive
23164 offsets are in the direction of stack growth. */
23165
23166 HOST_WIDE_INT
23167 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23168 {
23169 arm_stack_offsets *offsets;
23170
23171 offsets = arm_get_frame_offsets ();
23172
23173 /* OK, now we have enough information to compute the distances.
23174 There must be an entry in these switch tables for each pair
23175 of registers in ELIMINABLE_REGS, even if some of the entries
23176 seem to be redundant or useless. */
23177 switch (from)
23178 {
23179 case ARG_POINTER_REGNUM:
23180 switch (to)
23181 {
23182 case THUMB_HARD_FRAME_POINTER_REGNUM:
23183 return 0;
23184
23185 case FRAME_POINTER_REGNUM:
23186 /* This is the reverse of the soft frame pointer
23187 to hard frame pointer elimination below. */
23188 return offsets->soft_frame - offsets->saved_args;
23189
23190 case ARM_HARD_FRAME_POINTER_REGNUM:
23191 /* This is only non-zero in the case where the static chain register
23192 is stored above the frame. */
23193 return offsets->frame - offsets->saved_args - 4;
23194
23195 case STACK_POINTER_REGNUM:
23196 /* If nothing has been pushed on the stack at all
23197 then this will return -4. This *is* correct! */
23198 return offsets->outgoing_args - (offsets->saved_args + 4);
23199
23200 default:
23201 gcc_unreachable ();
23202 }
23203 gcc_unreachable ();
23204
23205 case FRAME_POINTER_REGNUM:
23206 switch (to)
23207 {
23208 case THUMB_HARD_FRAME_POINTER_REGNUM:
23209 return 0;
23210
23211 case ARM_HARD_FRAME_POINTER_REGNUM:
23212 /* The hard frame pointer points to the top entry in the
23213 stack frame. The soft frame pointer to the bottom entry
23214 in the stack frame. If there is no stack frame at all,
23215 then they are identical. */
23216
23217 return offsets->frame - offsets->soft_frame;
23218
23219 case STACK_POINTER_REGNUM:
23220 return offsets->outgoing_args - offsets->soft_frame;
23221
23222 default:
23223 gcc_unreachable ();
23224 }
23225 gcc_unreachable ();
23226
23227 default:
23228 /* You cannot eliminate from the stack pointer.
23229 In theory you could eliminate from the hard frame
23230 pointer to the stack pointer, but this will never
23231 happen, since if a stack frame is not needed the
23232 hard frame pointer will never be used. */
23233 gcc_unreachable ();
23234 }
23235 }
23236
23237 /* Given FROM and TO register numbers, say whether this elimination is
23238 allowed. Frame pointer elimination is automatically handled.
23239
23240 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
23241 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
23242 pointer, we must eliminate FRAME_POINTER_REGNUM into
23243 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23244 ARG_POINTER_REGNUM. */
23245
23246 bool
23247 arm_can_eliminate (const int from, const int to)
23248 {
23249 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
23250 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
23251 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
23252 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
23253 true);
23254 }
23255
23256 /* Emit RTL to save coprocessor registers on function entry. Returns the
23257 number of bytes pushed. */
23258
23259 static int
23260 arm_save_coproc_regs(void)
23261 {
23262 int saved_size = 0;
23263 unsigned reg;
23264 unsigned start_reg;
23265 rtx insn;
23266
23267 if (TARGET_REALLY_IWMMXT)
23268 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
23269 if (reg_needs_saving_p (reg))
23270 {
23271 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23272 insn = gen_rtx_MEM (V2SImode, insn);
23273 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
23274 RTX_FRAME_RELATED_P (insn) = 1;
23275 saved_size += 8;
23276 }
23277
23278 if (TARGET_VFP_BASE)
23279 {
23280 start_reg = FIRST_VFP_REGNUM;
23281
23282 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
23283 {
23284 if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
23285 {
23286 if (start_reg != reg)
23287 saved_size += vfp_emit_fstmd (start_reg,
23288 (reg - start_reg) / 2);
23289 start_reg = reg + 2;
23290 }
23291 }
23292 if (start_reg != reg)
23293 saved_size += vfp_emit_fstmd (start_reg,
23294 (reg - start_reg) / 2);
23295 }
23296 return saved_size;
23297 }
23298
23299
23300 /* Set the Thumb frame pointer from the stack pointer. */
23301
23302 static void
23303 thumb_set_frame_pointer (arm_stack_offsets *offsets)
23304 {
23305 HOST_WIDE_INT amount;
23306 rtx insn, dwarf;
23307
23308 amount = offsets->outgoing_args - offsets->locals_base;
23309 if (amount < 1024)
23310 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23311 stack_pointer_rtx, GEN_INT (amount)));
23312 else
23313 {
23314 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
23315 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
23316 expects the first two operands to be the same. */
23317 if (TARGET_THUMB2)
23318 {
23319 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23320 stack_pointer_rtx,
23321 hard_frame_pointer_rtx));
23322 }
23323 else
23324 {
23325 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23326 hard_frame_pointer_rtx,
23327 stack_pointer_rtx));
23328 }
23329 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
23330 plus_constant (Pmode, stack_pointer_rtx, amount));
23331 RTX_FRAME_RELATED_P (dwarf) = 1;
23332 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23333 }
23334
23335 RTX_FRAME_RELATED_P (insn) = 1;
23336 }
23337
23338 struct scratch_reg {
23339 rtx reg;
23340 bool saved;
23341 };
23342
23343 /* Return a short-lived scratch register for use as a 2nd scratch register on
23344 function entry after the registers are saved in the prologue. This register
23345 must be released by means of release_scratch_register_on_entry. IP is not
23346 considered since it is always used as the 1st scratch register if available.
23347
23348 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23349 mask of live registers. */
23350
23351 static void
23352 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
23353 unsigned long live_regs)
23354 {
23355 int regno = -1;
23356
23357 sr->saved = false;
23358
23359 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
23360 regno = LR_REGNUM;
23361 else
23362 {
23363 unsigned int i;
23364
23365 for (i = 4; i < 11; i++)
23366 if (regno1 != i && (live_regs & (1 << i)) != 0)
23367 {
23368 regno = i;
23369 break;
23370 }
23371
23372 if (regno < 0)
23373 {
23374 /* If IP is used as the 1st scratch register for a nested function,
23375 then either r3 wasn't available or is used to preserve IP. */
23376 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
23377 regno1 = 3;
23378 regno = (regno1 == 3 ? 2 : 3);
23379 sr->saved
23380 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
23381 regno);
23382 }
23383 }
23384
23385 sr->reg = gen_rtx_REG (SImode, regno);
23386 if (sr->saved)
23387 {
23388 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23389 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23390 rtx x = gen_rtx_SET (stack_pointer_rtx,
23391 plus_constant (Pmode, stack_pointer_rtx, -4));
23392 RTX_FRAME_RELATED_P (insn) = 1;
23393 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23394 }
23395 }
23396
23397 /* Release a scratch register obtained from the preceding function. */
23398
23399 static void
23400 release_scratch_register_on_entry (struct scratch_reg *sr)
23401 {
23402 if (sr->saved)
23403 {
23404 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23405 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23406 rtx x = gen_rtx_SET (stack_pointer_rtx,
23407 plus_constant (Pmode, stack_pointer_rtx, 4));
23408 RTX_FRAME_RELATED_P (insn) = 1;
23409 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23410 }
23411 }
23412
23413 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23414
23415 #if PROBE_INTERVAL > 4096
23416 #error Cannot use indexed addressing mode for stack probing
23417 #endif
23418
23419 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23420 inclusive. These are offsets from the current stack pointer. REGNO1
23421 is the index number of the 1st scratch register and LIVE_REGS is the
23422 mask of live registers. */
23423
23424 static void
23425 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23426 unsigned int regno1, unsigned long live_regs)
23427 {
23428 rtx reg1 = gen_rtx_REG (Pmode, regno1);
23429
23430 /* See if we have a constant small number of probes to generate. If so,
23431 that's the easy case. */
23432 if (size <= PROBE_INTERVAL)
23433 {
23434 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23435 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23436 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23437 }
23438
23439 /* The run-time loop is made up of 10 insns in the generic case while the
23440 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
23441 else if (size <= 5 * PROBE_INTERVAL)
23442 {
23443 HOST_WIDE_INT i, rem;
23444
23445 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23446 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23447 emit_stack_probe (reg1);
23448
23449 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23450 it exceeds SIZE. If only two probes are needed, this will not
23451 generate any code. Then probe at FIRST + SIZE. */
23452 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23453 {
23454 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23455 emit_stack_probe (reg1);
23456 }
23457
23458 rem = size - (i - PROBE_INTERVAL);
23459 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23460 {
23461 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23462 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23463 }
23464 else
23465 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23466 }
23467
23468 /* Otherwise, do the same as above, but in a loop. Note that we must be
23469 extra careful with variables wrapping around because we might be at
23470 the very top (or the very bottom) of the address space and we have
23471 to be able to handle this case properly; in particular, we use an
23472 equality test for the loop condition. */
23473 else
23474 {
23475 HOST_WIDE_INT rounded_size;
23476 struct scratch_reg sr;
23477
23478 get_scratch_register_on_entry (&sr, regno1, live_regs);
23479
23480 emit_move_insn (reg1, GEN_INT (first));
23481
23482
23483 /* Step 1: round SIZE to the previous multiple of the interval. */
23484
23485 rounded_size = size & -PROBE_INTERVAL;
23486 emit_move_insn (sr.reg, GEN_INT (rounded_size));
23487
23488
23489 /* Step 2: compute initial and final value of the loop counter. */
23490
23491 /* TEST_ADDR = SP + FIRST. */
23492 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23493
23494 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23495 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23496
23497
23498 /* Step 3: the loop
23499
23500 do
23501 {
23502 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23503 probe at TEST_ADDR
23504 }
23505 while (TEST_ADDR != LAST_ADDR)
23506
23507 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23508 until it is equal to ROUNDED_SIZE. */
23509
23510 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23511
23512
23513 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23514 that SIZE is equal to ROUNDED_SIZE. */
23515
23516 if (size != rounded_size)
23517 {
23518 HOST_WIDE_INT rem = size - rounded_size;
23519
23520 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23521 {
23522 emit_set_insn (sr.reg,
23523 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23524 emit_stack_probe (plus_constant (Pmode, sr.reg,
23525 PROBE_INTERVAL - rem));
23526 }
23527 else
23528 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23529 }
23530
23531 release_scratch_register_on_entry (&sr);
23532 }
23533
23534 /* Make sure nothing is scheduled before we are done. */
23535 emit_insn (gen_blockage ());
23536 }
23537
23538 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23539 absolute addresses. */
23540
23541 const char *
23542 output_probe_stack_range (rtx reg1, rtx reg2)
23543 {
23544 static int labelno = 0;
23545 char loop_lab[32];
23546 rtx xops[2];
23547
23548 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23549
23550 /* Loop. */
23551 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23552
23553 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23554 xops[0] = reg1;
23555 xops[1] = GEN_INT (PROBE_INTERVAL);
23556 output_asm_insn ("sub\t%0, %0, %1", xops);
23557
23558 /* Probe at TEST_ADDR. */
23559 output_asm_insn ("str\tr0, [%0, #0]", xops);
23560
23561 /* Test if TEST_ADDR == LAST_ADDR. */
23562 xops[1] = reg2;
23563 output_asm_insn ("cmp\t%0, %1", xops);
23564
23565 /* Branch. */
23566 fputs ("\tbne\t", asm_out_file);
23567 assemble_name_raw (asm_out_file, loop_lab);
23568 fputc ('\n', asm_out_file);
23569
23570 return "";
23571 }
23572
23573 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23574 function. */
23575 void
23576 arm_expand_prologue (void)
23577 {
23578 rtx amount;
23579 rtx insn;
23580 rtx ip_rtx;
23581 unsigned long live_regs_mask;
23582 unsigned long func_type;
23583 int fp_offset = 0;
23584 int saved_pretend_args = 0;
23585 int saved_regs = 0;
23586 unsigned HOST_WIDE_INT args_to_push;
23587 HOST_WIDE_INT size;
23588 arm_stack_offsets *offsets;
23589 bool clobber_ip;
23590
23591 func_type = arm_current_func_type ();
23592
23593 /* Naked functions don't have prologues. */
23594 if (IS_NAKED (func_type))
23595 {
23596 if (flag_stack_usage_info)
23597 current_function_static_stack_size = 0;
23598 return;
23599 }
23600
23601 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
23602 args_to_push = crtl->args.pretend_args_size;
23603
23604 /* Compute which register we will have to save onto the stack. */
23605 offsets = arm_get_frame_offsets ();
23606 live_regs_mask = offsets->saved_regs_mask;
23607
23608 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23609
23610 if (IS_STACKALIGN (func_type))
23611 {
23612 rtx r0, r1;
23613
23614 /* Handle a word-aligned stack pointer. We generate the following:
23615
23616 mov r0, sp
23617 bic r1, r0, #7
23618 mov sp, r1
23619 <save and restore r0 in normal prologue/epilogue>
23620 mov sp, r0
23621 bx lr
23622
23623 The unwinder doesn't need to know about the stack realignment.
23624 Just tell it we saved SP in r0. */
23625 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23626
23627 r0 = gen_rtx_REG (SImode, R0_REGNUM);
23628 r1 = gen_rtx_REG (SImode, R1_REGNUM);
23629
23630 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23631 RTX_FRAME_RELATED_P (insn) = 1;
23632 add_reg_note (insn, REG_CFA_REGISTER, NULL);
23633
23634 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23635
23636 /* ??? The CFA changes here, which may cause GDB to conclude that it
23637 has entered a different function. That said, the unwind info is
23638 correct, individually, before and after this instruction because
23639 we've described the save of SP, which will override the default
23640 handling of SP as restoring from the CFA. */
23641 emit_insn (gen_movsi (stack_pointer_rtx, r1));
23642 }
23643
23644 /* Let's compute the static_chain_stack_bytes required and store it. Right
23645 now the value must be -1 as stored by arm_init_machine_status (). */
23646 cfun->machine->static_chain_stack_bytes
23647 = arm_compute_static_chain_stack_bytes ();
23648
23649 /* The static chain register is the same as the IP register. If it is
23650 clobbered when creating the frame, we need to save and restore it. */
23651 clobber_ip = (IS_NESTED (func_type)
23652 && (((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23653 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23654 || flag_stack_clash_protection)
23655 && !df_regs_ever_live_p (LR_REGNUM)
23656 && arm_r3_live_at_start_p ()))
23657 || arm_current_function_pac_enabled_p ()));
23658
23659 /* Find somewhere to store IP whilst the frame is being created.
23660 We try the following places in order:
23661
23662 1. The last argument register r3 if it is available.
23663 2. A slot on the stack above the frame if there are no
23664 arguments to push onto the stack.
23665 3. Register r3 again, after pushing the argument registers
23666 onto the stack, if this is a varargs function.
23667 4. The last slot on the stack created for the arguments to
23668 push, if this isn't a varargs function.
23669
23670 Note - we only need to tell the dwarf2 backend about the SP
23671 adjustment in the second variant; the static chain register
23672 doesn't need to be unwound, as it doesn't contain a value
23673 inherited from the caller. */
23674 if (clobber_ip)
23675 {
23676 if (!arm_r3_live_at_start_p ())
23677 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23678 else if (args_to_push == 0)
23679 {
23680 rtx addr, dwarf;
23681
23682 saved_regs += 4;
23683
23684 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23685 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23686 fp_offset = 4;
23687
23688 /* Just tell the dwarf backend that we adjusted SP. */
23689 dwarf = gen_rtx_SET (stack_pointer_rtx,
23690 plus_constant (Pmode, stack_pointer_rtx,
23691 -fp_offset));
23692 RTX_FRAME_RELATED_P (insn) = 1;
23693 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23694 if (arm_current_function_pac_enabled_p ())
23695 cfun->machine->pacspval_needed = 1;
23696 }
23697 else
23698 {
23699 /* Store the args on the stack. */
23700 if (cfun->machine->uses_anonymous_args)
23701 {
23702 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23703 (0xf0 >> (args_to_push / 4)) & 0xf);
23704 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23705 saved_pretend_args = 1;
23706 }
23707 else
23708 {
23709 rtx addr, dwarf;
23710
23711 if (args_to_push == 4)
23712 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23713 else
23714 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23715 plus_constant (Pmode,
23716 stack_pointer_rtx,
23717 -args_to_push));
23718
23719 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23720
23721 /* Just tell the dwarf backend that we adjusted SP. */
23722 dwarf = gen_rtx_SET (stack_pointer_rtx,
23723 plus_constant (Pmode, stack_pointer_rtx,
23724 -args_to_push));
23725 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23726 }
23727
23728 RTX_FRAME_RELATED_P (insn) = 1;
23729 fp_offset = args_to_push;
23730 args_to_push = 0;
23731 if (arm_current_function_pac_enabled_p ())
23732 cfun->machine->pacspval_needed = 1;
23733 }
23734 }
23735
23736 if (arm_current_function_pac_enabled_p ())
23737 {
23738 /* If IP was clobbered we only emit a PAC instruction as the BTI
23739 one will be added before the push of the clobbered IP (if
23740 necessary) by the bti pass. */
23741 if (aarch_bti_enabled () && !clobber_ip)
23742 insn = emit_insn (gen_pacbti_nop ());
23743 else
23744 insn = emit_insn (gen_pac_nop ());
23745
23746 rtx dwarf = gen_rtx_SET (ip_rtx, gen_rtx_REG (SImode, RA_AUTH_CODE));
23747 RTX_FRAME_RELATED_P (insn) = 1;
23748 add_reg_note (insn, REG_CFA_REGISTER, dwarf);
23749 }
23750
23751 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23752 {
23753 if (IS_INTERRUPT (func_type))
23754 {
23755 /* Interrupt functions must not corrupt any registers.
23756 Creating a frame pointer however, corrupts the IP
23757 register, so we must push it first. */
23758 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23759
23760 /* Do not set RTX_FRAME_RELATED_P on this insn.
23761 The dwarf stack unwinding code only wants to see one
23762 stack decrement per function, and this is not it. If
23763 this instruction is labeled as being part of the frame
23764 creation sequence then dwarf2out_frame_debug_expr will
23765 die when it encounters the assignment of IP to FP
23766 later on, since the use of SP here establishes SP as
23767 the CFA register and not IP.
23768
23769 Anyway this instruction is not really part of the stack
23770 frame creation although it is part of the prologue. */
23771 }
23772
23773 insn = emit_set_insn (ip_rtx,
23774 plus_constant (Pmode, stack_pointer_rtx,
23775 fp_offset));
23776 RTX_FRAME_RELATED_P (insn) = 1;
23777 }
23778
23779 /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR. */
23780 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23781 {
23782 saved_regs += 4;
23783 insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23784 GEN_INT (FPCXTNS_ENUM)));
23785 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23786 plus_constant (Pmode, stack_pointer_rtx, -4));
23787 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23788 RTX_FRAME_RELATED_P (insn) = 1;
23789 }
23790
23791 if (args_to_push)
23792 {
23793 /* Push the argument registers, or reserve space for them. */
23794 if (cfun->machine->uses_anonymous_args)
23795 insn = emit_multi_reg_push
23796 ((0xf0 >> (args_to_push / 4)) & 0xf,
23797 (0xf0 >> (args_to_push / 4)) & 0xf);
23798 else
23799 insn = emit_insn
23800 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23801 GEN_INT (- args_to_push)));
23802 RTX_FRAME_RELATED_P (insn) = 1;
23803 }
23804
23805 /* If this is an interrupt service routine, and the link register
23806 is going to be pushed, and we're not generating extra
23807 push of IP (needed when frame is needed and frame layout if apcs),
23808 subtracting four from LR now will mean that the function return
23809 can be done with a single instruction. */
23810 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23811 && (live_regs_mask & (1 << LR_REGNUM)) != 0
23812 && !(frame_pointer_needed && TARGET_APCS_FRAME)
23813 && TARGET_ARM)
23814 {
23815 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23816
23817 emit_set_insn (lr, plus_constant (SImode, lr, -4));
23818 }
23819
23820 if (live_regs_mask)
23821 {
23822 unsigned long dwarf_regs_mask = live_regs_mask;
23823
23824 saved_regs += bit_count (live_regs_mask) * 4;
23825 if (optimize_size && !frame_pointer_needed
23826 && saved_regs == offsets->saved_regs - offsets->saved_args)
23827 {
23828 /* If no coprocessor registers are being pushed and we don't have
23829 to worry about a frame pointer then push extra registers to
23830 create the stack frame. This is done in a way that does not
23831 alter the frame layout, so is independent of the epilogue. */
23832 int n;
23833 int frame;
23834 n = 0;
23835 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23836 n++;
23837 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23838 if (frame && n * 4 >= frame)
23839 {
23840 n = frame / 4;
23841 live_regs_mask |= (1 << n) - 1;
23842 saved_regs += frame;
23843 }
23844 }
23845
23846 if (TARGET_LDRD
23847 && current_tune->prefer_ldrd_strd
23848 && !optimize_function_for_size_p (cfun))
23849 {
23850 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23851 if (TARGET_THUMB2)
23852 thumb2_emit_strd_push (live_regs_mask);
23853 else if (TARGET_ARM
23854 && !TARGET_APCS_FRAME
23855 && !IS_INTERRUPT (func_type))
23856 arm_emit_strd_push (live_regs_mask);
23857 else
23858 {
23859 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23860 RTX_FRAME_RELATED_P (insn) = 1;
23861 }
23862 }
23863 else
23864 {
23865 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23866 RTX_FRAME_RELATED_P (insn) = 1;
23867 }
23868 }
23869
23870 if (! IS_VOLATILE (func_type))
23871 saved_regs += arm_save_coproc_regs ();
23872
23873 if (frame_pointer_needed && TARGET_ARM)
23874 {
23875 /* Create the new frame pointer. */
23876 if (TARGET_APCS_FRAME)
23877 {
23878 insn = GEN_INT (-(4 + args_to_push + fp_offset));
23879 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23880 RTX_FRAME_RELATED_P (insn) = 1;
23881 }
23882 else
23883 {
23884 insn = GEN_INT (saved_regs - (4 + fp_offset));
23885 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23886 stack_pointer_rtx, insn));
23887 RTX_FRAME_RELATED_P (insn) = 1;
23888 }
23889 }
23890
23891 size = offsets->outgoing_args - offsets->saved_args;
23892 if (flag_stack_usage_info)
23893 current_function_static_stack_size = size;
23894
23895 /* If this isn't an interrupt service routine and we have a frame, then do
23896 stack checking. We use IP as the first scratch register, except for the
23897 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
23898 if (!IS_INTERRUPT (func_type)
23899 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23900 || flag_stack_clash_protection))
23901 {
23902 unsigned int regno;
23903
23904 if (!IS_NESTED (func_type) || clobber_ip)
23905 regno = IP_REGNUM;
23906 else if (df_regs_ever_live_p (LR_REGNUM))
23907 regno = LR_REGNUM;
23908 else
23909 regno = 3;
23910
23911 if (crtl->is_leaf && !cfun->calls_alloca)
23912 {
23913 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23914 arm_emit_probe_stack_range (get_stack_check_protect (),
23915 size - get_stack_check_protect (),
23916 regno, live_regs_mask);
23917 }
23918 else if (size > 0)
23919 arm_emit_probe_stack_range (get_stack_check_protect (), size,
23920 regno, live_regs_mask);
23921 }
23922
23923 /* Recover the static chain register. */
23924 if (clobber_ip)
23925 {
23926 if (!arm_r3_live_at_start_p () || saved_pretend_args)
23927 insn = gen_rtx_REG (SImode, 3);
23928 else
23929 {
23930 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23931 insn = gen_frame_mem (SImode, insn);
23932 }
23933 emit_set_insn (ip_rtx, insn);
23934 emit_insn (gen_force_register_use (ip_rtx));
23935 }
23936
23937 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23938 {
23939 /* This add can produce multiple insns for a large constant, so we
23940 need to get tricky. */
23941 rtx_insn *last = get_last_insn ();
23942
23943 amount = GEN_INT (offsets->saved_args + saved_regs
23944 - offsets->outgoing_args);
23945
23946 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23947 amount));
23948 do
23949 {
23950 last = last ? NEXT_INSN (last) : get_insns ();
23951 RTX_FRAME_RELATED_P (last) = 1;
23952 }
23953 while (last != insn);
23954
23955 /* If the frame pointer is needed, emit a special barrier that
23956 will prevent the scheduler from moving stores to the frame
23957 before the stack adjustment. */
23958 if (frame_pointer_needed)
23959 emit_insn (gen_stack_tie (stack_pointer_rtx,
23960 hard_frame_pointer_rtx));
23961 }
23962
23963
23964 if (frame_pointer_needed && TARGET_THUMB2)
23965 thumb_set_frame_pointer (offsets);
23966
23967 if (flag_pic && arm_pic_register != INVALID_REGNUM)
23968 {
23969 unsigned long mask;
23970
23971 mask = live_regs_mask;
23972 mask &= THUMB2_WORK_REGS;
23973 if (!IS_NESTED (func_type))
23974 mask |= (1 << IP_REGNUM);
23975 arm_load_pic_register (mask, NULL_RTX);
23976 }
23977
23978 /* If we are profiling, make sure no instructions are scheduled before
23979 the call to mcount. Similarly if the user has requested no
23980 scheduling in the prolog. Similarly if we want non-call exceptions
23981 using the EABI unwinder, to prevent faulting instructions from being
23982 swapped with a stack adjustment. */
23983 if (crtl->profile || !TARGET_SCHED_PROLOG
23984 || (arm_except_unwind_info (&global_options) == UI_TARGET
23985 && cfun->can_throw_non_call_exceptions))
23986 emit_insn (gen_blockage ());
23987
23988 /* If the link register is being kept alive, with the return address in it,
23989 then make sure that it does not get reused by the ce2 pass. */
23990 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23991 cfun->machine->lr_save_eliminated = 1;
23992 }
23993 \f
23994 /* Print condition code to STREAM. Helper function for arm_print_operand. */
23995 static void
23996 arm_print_condition (FILE *stream)
23997 {
23998 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
23999 {
24000 /* Branch conversion is not implemented for Thumb-2. */
24001 if (TARGET_THUMB)
24002 {
24003 output_operand_lossage ("predicated Thumb instruction");
24004 return;
24005 }
24006 if (current_insn_predicate != NULL)
24007 {
24008 output_operand_lossage
24009 ("predicated instruction in conditional sequence");
24010 return;
24011 }
24012
24013 fputs (arm_condition_codes[arm_current_cc], stream);
24014 }
24015 else if (current_insn_predicate)
24016 {
24017 enum arm_cond_code code;
24018
24019 if (TARGET_THUMB1)
24020 {
24021 output_operand_lossage ("predicated Thumb instruction");
24022 return;
24023 }
24024
24025 code = get_arm_condition_code (current_insn_predicate);
24026 fputs (arm_condition_codes[code], stream);
24027 }
24028 }
24029
24030
24031 /* Globally reserved letters: acln
24032 Puncutation letters currently used: @_|?().!#
24033 Lower case letters currently used: bcdefhimpqtvwxyz
24034 Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
24035 Letters previously used, but now deprecated/obsolete: sWXYZ.
24036
24037 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
24038
24039 If CODE is 'd', then the X is a condition operand and the instruction
24040 should only be executed if the condition is true.
24041 if CODE is 'D', then the X is a condition operand and the instruction
24042 should only be executed if the condition is false: however, if the mode
24043 of the comparison is CCFPEmode, then always execute the instruction -- we
24044 do this because in these circumstances !GE does not necessarily imply LT;
24045 in these cases the instruction pattern will take care to make sure that
24046 an instruction containing %d will follow, thereby undoing the effects of
24047 doing this instruction unconditionally.
24048 If CODE is 'N' then X is a floating point operand that must be negated
24049 before output.
24050 If CODE is 'B' then output a bitwise inverted value of X (a const int).
24051 If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
24052 If CODE is 'V', then the operand must be a CONST_INT representing
24053 the bits to preserve in the modified register (Rd) of a BFI or BFC
24054 instruction: print out both the width and lsb (shift) fields. */
24055 static void
24056 arm_print_operand (FILE *stream, rtx x, int code)
24057 {
24058 switch (code)
24059 {
24060 case '@':
24061 fputs (ASM_COMMENT_START, stream);
24062 return;
24063
24064 case '_':
24065 fputs (user_label_prefix, stream);
24066 return;
24067
24068 case '|':
24069 fputs (REGISTER_PREFIX, stream);
24070 return;
24071
24072 case '?':
24073 arm_print_condition (stream);
24074 return;
24075
24076 case '.':
24077 /* The current condition code for a condition code setting instruction.
24078 Preceded by 's' in unified syntax, otherwise followed by 's'. */
24079 fputc('s', stream);
24080 arm_print_condition (stream);
24081 return;
24082
24083 case '!':
24084 /* If the instruction is conditionally executed then print
24085 the current condition code, otherwise print 's'. */
24086 gcc_assert (TARGET_THUMB2);
24087 if (current_insn_predicate)
24088 arm_print_condition (stream);
24089 else
24090 fputc('s', stream);
24091 break;
24092
24093 /* %# is a "break" sequence. It doesn't output anything, but is used to
24094 separate e.g. operand numbers from following text, if that text consists
24095 of further digits which we don't want to be part of the operand
24096 number. */
24097 case '#':
24098 return;
24099
24100 case 'N':
24101 {
24102 REAL_VALUE_TYPE r;
24103 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
24104 fprintf (stream, "%s", fp_const_from_val (&r));
24105 }
24106 return;
24107
24108 /* An integer or symbol address without a preceding # sign. */
24109 case 'c':
24110 switch (GET_CODE (x))
24111 {
24112 case CONST_INT:
24113 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
24114 break;
24115
24116 case SYMBOL_REF:
24117 output_addr_const (stream, x);
24118 break;
24119
24120 case CONST:
24121 if (GET_CODE (XEXP (x, 0)) == PLUS
24122 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
24123 {
24124 output_addr_const (stream, x);
24125 break;
24126 }
24127 /* Fall through. */
24128
24129 default:
24130 output_operand_lossage ("Unsupported operand for code '%c'", code);
24131 }
24132 return;
24133
24134 /* An integer that we want to print in HEX. */
24135 case 'x':
24136 switch (GET_CODE (x))
24137 {
24138 case CONST_INT:
24139 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
24140 break;
24141
24142 default:
24143 output_operand_lossage ("Unsupported operand for code '%c'", code);
24144 }
24145 return;
24146
24147 case 'B':
24148 if (CONST_INT_P (x))
24149 {
24150 HOST_WIDE_INT val;
24151 val = ARM_SIGN_EXTEND (~INTVAL (x));
24152 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
24153 }
24154 else
24155 {
24156 putc ('~', stream);
24157 output_addr_const (stream, x);
24158 }
24159 return;
24160
24161 case 'b':
24162 /* Print the log2 of a CONST_INT. */
24163 {
24164 HOST_WIDE_INT val;
24165
24166 if (!CONST_INT_P (x)
24167 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
24168 output_operand_lossage ("Unsupported operand for code '%c'", code);
24169 else
24170 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24171 }
24172 return;
24173
24174 case 'L':
24175 /* The low 16 bits of an immediate constant. */
24176 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
24177 return;
24178
24179 case 'i':
24180 fprintf (stream, "%s", arithmetic_instr (x, 1));
24181 return;
24182
24183 case 'I':
24184 fprintf (stream, "%s", arithmetic_instr (x, 0));
24185 return;
24186
24187 case 'S':
24188 {
24189 HOST_WIDE_INT val;
24190 const char *shift;
24191
24192 shift = shift_op (x, &val);
24193
24194 if (shift)
24195 {
24196 fprintf (stream, ", %s ", shift);
24197 if (val == -1)
24198 arm_print_operand (stream, XEXP (x, 1), 0);
24199 else
24200 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24201 }
24202 }
24203 return;
24204
24205 /* An explanation of the 'Q', 'R' and 'H' register operands:
24206
24207 In a pair of registers containing a DI or DF value the 'Q'
24208 operand returns the register number of the register containing
24209 the least significant part of the value. The 'R' operand returns
24210 the register number of the register containing the most
24211 significant part of the value.
24212
24213 The 'H' operand returns the higher of the two register numbers.
24214 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24215 same as the 'Q' operand, since the most significant part of the
24216 value is held in the lower number register. The reverse is true
24217 on systems where WORDS_BIG_ENDIAN is false.
24218
24219 The purpose of these operands is to distinguish between cases
24220 where the endian-ness of the values is important (for example
24221 when they are added together), and cases where the endian-ness
24222 is irrelevant, but the order of register operations is important.
24223 For example when loading a value from memory into a register
24224 pair, the endian-ness does not matter. Provided that the value
24225 from the lower memory address is put into the lower numbered
24226 register, and the value from the higher address is put into the
24227 higher numbered register, the load will work regardless of whether
24228 the value being loaded is big-wordian or little-wordian. The
24229 order of the two register loads can matter however, if the address
24230 of the memory location is actually held in one of the registers
24231 being overwritten by the load.
24232
24233 The 'Q' and 'R' constraints are also available for 64-bit
24234 constants. */
24235 case 'Q':
24236 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24237 {
24238 rtx part = gen_lowpart (SImode, x);
24239 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24240 return;
24241 }
24242
24243 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24244 {
24245 output_operand_lossage ("invalid operand for code '%c'", code);
24246 return;
24247 }
24248
24249 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
24250 return;
24251
24252 case 'R':
24253 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24254 {
24255 machine_mode mode = GET_MODE (x);
24256 rtx part;
24257
24258 if (mode == VOIDmode)
24259 mode = DImode;
24260 part = gen_highpart_mode (SImode, mode, x);
24261 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24262 return;
24263 }
24264
24265 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24266 {
24267 output_operand_lossage ("invalid operand for code '%c'", code);
24268 return;
24269 }
24270
24271 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
24272 return;
24273
24274 case 'H':
24275 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24276 {
24277 output_operand_lossage ("invalid operand for code '%c'", code);
24278 return;
24279 }
24280
24281 asm_fprintf (stream, "%r", REGNO (x) + 1);
24282 return;
24283
24284 case 'J':
24285 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24286 {
24287 output_operand_lossage ("invalid operand for code '%c'", code);
24288 return;
24289 }
24290
24291 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
24292 return;
24293
24294 case 'K':
24295 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24296 {
24297 output_operand_lossage ("invalid operand for code '%c'", code);
24298 return;
24299 }
24300
24301 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
24302 return;
24303
24304 case 'm':
24305 asm_fprintf (stream, "%r",
24306 REG_P (XEXP (x, 0))
24307 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
24308 return;
24309
24310 case 'M':
24311 asm_fprintf (stream, "{%r-%r}",
24312 REGNO (x),
24313 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
24314 return;
24315
24316 /* Like 'M', but writing doubleword vector registers, for use by Neon
24317 insns. */
24318 case 'h':
24319 {
24320 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
24321 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
24322 if (numregs == 1)
24323 asm_fprintf (stream, "{d%d}", regno);
24324 else
24325 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
24326 }
24327 return;
24328
24329 case 'd':
24330 /* CONST_TRUE_RTX means always -- that's the default. */
24331 if (x == const_true_rtx)
24332 return;
24333
24334 if (!COMPARISON_P (x))
24335 {
24336 output_operand_lossage ("invalid operand for code '%c'", code);
24337 return;
24338 }
24339
24340 fputs (arm_condition_codes[get_arm_condition_code (x)],
24341 stream);
24342 return;
24343
24344 case 'D':
24345 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
24346 want to do that. */
24347 if (x == const_true_rtx)
24348 {
24349 output_operand_lossage ("instruction never executed");
24350 return;
24351 }
24352 if (!COMPARISON_P (x))
24353 {
24354 output_operand_lossage ("invalid operand for code '%c'", code);
24355 return;
24356 }
24357
24358 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
24359 (get_arm_condition_code (x))],
24360 stream);
24361 return;
24362
24363 case 'V':
24364 {
24365 /* Output the LSB (shift) and width for a bitmask instruction
24366 based on a literal mask. The LSB is printed first,
24367 followed by the width.
24368
24369 Eg. For 0b1...1110001, the result is #1, #3. */
24370 if (!CONST_INT_P (x))
24371 {
24372 output_operand_lossage ("invalid operand for code '%c'", code);
24373 return;
24374 }
24375
24376 unsigned HOST_WIDE_INT val
24377 = ~UINTVAL (x) & HOST_WIDE_INT_UC (0xffffffff);
24378 int lsb = exact_log2 (val & -val);
24379 asm_fprintf (stream, "#%d, #%d", lsb,
24380 (exact_log2 (val + (val & -val)) - lsb));
24381 }
24382 return;
24383
24384 case 's':
24385 case 'W':
24386 case 'X':
24387 case 'Y':
24388 case 'Z':
24389 /* Former Maverick support, removed after GCC-4.7. */
24390 output_operand_lossage ("obsolete Maverick format code '%c'", code);
24391 return;
24392
24393 case 'U':
24394 if (!REG_P (x)
24395 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
24396 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
24397 /* Bad value for wCG register number. */
24398 {
24399 output_operand_lossage ("invalid operand for code '%c'", code);
24400 return;
24401 }
24402
24403 else
24404 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
24405 return;
24406
24407 /* Print an iWMMXt control register name. */
24408 case 'w':
24409 if (!CONST_INT_P (x)
24410 || INTVAL (x) < 0
24411 || INTVAL (x) >= 16)
24412 /* Bad value for wC register number. */
24413 {
24414 output_operand_lossage ("invalid operand for code '%c'", code);
24415 return;
24416 }
24417
24418 else
24419 {
24420 static const char * wc_reg_names [16] =
24421 {
24422 "wCID", "wCon", "wCSSF", "wCASF",
24423 "wC4", "wC5", "wC6", "wC7",
24424 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24425 "wC12", "wC13", "wC14", "wC15"
24426 };
24427
24428 fputs (wc_reg_names [INTVAL (x)], stream);
24429 }
24430 return;
24431
24432 /* Print the high single-precision register of a VFP double-precision
24433 register. */
24434 case 'p':
24435 {
24436 machine_mode mode = GET_MODE (x);
24437 int regno;
24438
24439 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24440 {
24441 output_operand_lossage ("invalid operand for code '%c'", code);
24442 return;
24443 }
24444
24445 regno = REGNO (x);
24446 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24447 {
24448 output_operand_lossage ("invalid operand for code '%c'", code);
24449 return;
24450 }
24451
24452 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24453 }
24454 return;
24455
24456 /* Print a VFP/Neon double precision or quad precision register name. */
24457 case 'P':
24458 case 'q':
24459 {
24460 machine_mode mode = GET_MODE (x);
24461 int is_quad = (code == 'q');
24462 int regno;
24463
24464 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24465 {
24466 output_operand_lossage ("invalid operand for code '%c'", code);
24467 return;
24468 }
24469
24470 if (!REG_P (x)
24471 || !IS_VFP_REGNUM (REGNO (x)))
24472 {
24473 output_operand_lossage ("invalid operand for code '%c'", code);
24474 return;
24475 }
24476
24477 regno = REGNO (x);
24478 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24479 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24480 {
24481 output_operand_lossage ("invalid operand for code '%c'", code);
24482 return;
24483 }
24484
24485 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24486 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24487 }
24488 return;
24489
24490 /* These two codes print the low/high doubleword register of a Neon quad
24491 register, respectively. For pair-structure types, can also print
24492 low/high quadword registers. */
24493 case 'e':
24494 case 'f':
24495 {
24496 machine_mode mode = GET_MODE (x);
24497 int regno;
24498
24499 if ((GET_MODE_SIZE (mode) != 16
24500 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24501 {
24502 output_operand_lossage ("invalid operand for code '%c'", code);
24503 return;
24504 }
24505
24506 regno = REGNO (x);
24507 if (!NEON_REGNO_OK_FOR_QUAD (regno))
24508 {
24509 output_operand_lossage ("invalid operand for code '%c'", code);
24510 return;
24511 }
24512
24513 if (GET_MODE_SIZE (mode) == 16)
24514 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24515 + (code == 'f' ? 1 : 0));
24516 else
24517 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24518 + (code == 'f' ? 1 : 0));
24519 }
24520 return;
24521
24522 /* Print a VFPv3 floating-point constant, represented as an integer
24523 index. */
24524 case 'G':
24525 {
24526 int index = vfp3_const_double_index (x);
24527 gcc_assert (index != -1);
24528 fprintf (stream, "%d", index);
24529 }
24530 return;
24531
24532 /* Print bits representing opcode features for Neon.
24533
24534 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
24535 and polynomials as unsigned.
24536
24537 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24538
24539 Bit 2 is 1 for rounding functions, 0 otherwise. */
24540
24541 /* Identify the type as 's', 'u', 'p' or 'f'. */
24542 case 'T':
24543 {
24544 HOST_WIDE_INT bits = INTVAL (x);
24545 fputc ("uspf"[bits & 3], stream);
24546 }
24547 return;
24548
24549 /* Likewise, but signed and unsigned integers are both 'i'. */
24550 case 'F':
24551 {
24552 HOST_WIDE_INT bits = INTVAL (x);
24553 fputc ("iipf"[bits & 3], stream);
24554 }
24555 return;
24556
24557 /* As for 'T', but emit 'u' instead of 'p'. */
24558 case 't':
24559 {
24560 HOST_WIDE_INT bits = INTVAL (x);
24561 fputc ("usuf"[bits & 3], stream);
24562 }
24563 return;
24564
24565 /* Bit 2: rounding (vs none). */
24566 case 'O':
24567 {
24568 HOST_WIDE_INT bits = INTVAL (x);
24569 fputs ((bits & 4) != 0 ? "r" : "", stream);
24570 }
24571 return;
24572
24573 /* Memory operand for vld1/vst1 instruction. */
24574 case 'A':
24575 {
24576 rtx addr;
24577 bool postinc = FALSE;
24578 rtx postinc_reg = NULL;
24579 unsigned align, memsize, align_bits;
24580
24581 gcc_assert (MEM_P (x));
24582 addr = XEXP (x, 0);
24583 if (GET_CODE (addr) == POST_INC)
24584 {
24585 postinc = 1;
24586 addr = XEXP (addr, 0);
24587 }
24588 if (GET_CODE (addr) == POST_MODIFY)
24589 {
24590 postinc_reg = XEXP( XEXP (addr, 1), 1);
24591 addr = XEXP (addr, 0);
24592 }
24593 asm_fprintf (stream, "[%r", REGNO (addr));
24594
24595 /* We know the alignment of this access, so we can emit a hint in the
24596 instruction (for some alignments) as an aid to the memory subsystem
24597 of the target. */
24598 align = MEM_ALIGN (x) >> 3;
24599 memsize = MEM_SIZE (x);
24600
24601 /* Only certain alignment specifiers are supported by the hardware. */
24602 if (memsize == 32 && (align % 32) == 0)
24603 align_bits = 256;
24604 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24605 align_bits = 128;
24606 else if (memsize >= 8 && (align % 8) == 0)
24607 align_bits = 64;
24608 else
24609 align_bits = 0;
24610
24611 if (align_bits != 0)
24612 asm_fprintf (stream, ":%d", align_bits);
24613
24614 asm_fprintf (stream, "]");
24615
24616 if (postinc)
24617 fputs("!", stream);
24618 if (postinc_reg)
24619 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24620 }
24621 return;
24622
24623 /* To print the memory operand with "Ux" or "Uj" constraint. Based on the
24624 rtx_code the memory operands output looks like following.
24625 1. [Rn], #+/-<imm>
24626 2. [Rn, #+/-<imm>]!
24627 3. [Rn, #+/-<imm>]
24628 4. [Rn]. */
24629 case 'E':
24630 {
24631 rtx addr;
24632 rtx postinc_reg = NULL;
24633 unsigned inc_val = 0;
24634 enum rtx_code code;
24635
24636 gcc_assert (MEM_P (x));
24637 addr = XEXP (x, 0);
24638 code = GET_CODE (addr);
24639 if (code == POST_INC || code == POST_DEC || code == PRE_INC
24640 || code == PRE_DEC)
24641 {
24642 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24643 inc_val = GET_MODE_SIZE (GET_MODE (x));
24644 if (code == POST_INC || code == POST_DEC)
24645 asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24646 ? "": "-", inc_val);
24647 else
24648 asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24649 ? "": "-", inc_val);
24650 }
24651 else if (code == POST_MODIFY || code == PRE_MODIFY)
24652 {
24653 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24654 postinc_reg = XEXP (XEXP (addr, 1), 1);
24655 if (postinc_reg && CONST_INT_P (postinc_reg))
24656 {
24657 if (code == POST_MODIFY)
24658 asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24659 else
24660 asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24661 }
24662 }
24663 else if (code == PLUS)
24664 {
24665 rtx base = XEXP (addr, 0);
24666 rtx index = XEXP (addr, 1);
24667
24668 gcc_assert (REG_P (base) && CONST_INT_P (index));
24669
24670 HOST_WIDE_INT offset = INTVAL (index);
24671 asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24672 }
24673 else
24674 {
24675 gcc_assert (REG_P (addr));
24676 asm_fprintf (stream, "[%r]",REGNO (addr));
24677 }
24678 }
24679 return;
24680
24681 case 'C':
24682 {
24683 rtx addr;
24684
24685 gcc_assert (MEM_P (x));
24686 addr = XEXP (x, 0);
24687 gcc_assert (REG_P (addr));
24688 asm_fprintf (stream, "[%r]", REGNO (addr));
24689 }
24690 return;
24691
24692 /* Translate an S register number into a D register number and element index. */
24693 case 'y':
24694 {
24695 machine_mode mode = GET_MODE (x);
24696 int regno;
24697
24698 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24699 {
24700 output_operand_lossage ("invalid operand for code '%c'", code);
24701 return;
24702 }
24703
24704 regno = REGNO (x);
24705 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24706 {
24707 output_operand_lossage ("invalid operand for code '%c'", code);
24708 return;
24709 }
24710
24711 regno = regno - FIRST_VFP_REGNUM;
24712 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24713 }
24714 return;
24715
24716 case 'v':
24717 gcc_assert (CONST_DOUBLE_P (x));
24718 int result;
24719 result = vfp3_const_double_for_fract_bits (x);
24720 if (result == 0)
24721 result = vfp3_const_double_for_bits (x);
24722 fprintf (stream, "#%d", result);
24723 return;
24724
24725 /* Register specifier for vld1.16/vst1.16. Translate the S register
24726 number into a D register number and element index. */
24727 case 'z':
24728 {
24729 machine_mode mode = GET_MODE (x);
24730 int regno;
24731
24732 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24733 {
24734 output_operand_lossage ("invalid operand for code '%c'", code);
24735 return;
24736 }
24737
24738 regno = REGNO (x);
24739 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24740 {
24741 output_operand_lossage ("invalid operand for code '%c'", code);
24742 return;
24743 }
24744
24745 regno = regno - FIRST_VFP_REGNUM;
24746 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24747 }
24748 return;
24749
24750 default:
24751 if (x == 0)
24752 {
24753 output_operand_lossage ("missing operand");
24754 return;
24755 }
24756
24757 switch (GET_CODE (x))
24758 {
24759 case REG:
24760 asm_fprintf (stream, "%r", REGNO (x));
24761 break;
24762
24763 case MEM:
24764 output_address (GET_MODE (x), XEXP (x, 0));
24765 break;
24766
24767 case CONST_DOUBLE:
24768 {
24769 char fpstr[20];
24770 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24771 sizeof (fpstr), 0, 1);
24772 fprintf (stream, "#%s", fpstr);
24773 }
24774 break;
24775
24776 default:
24777 gcc_assert (GET_CODE (x) != NEG);
24778 fputc ('#', stream);
24779 if (GET_CODE (x) == HIGH)
24780 {
24781 fputs (":lower16:", stream);
24782 x = XEXP (x, 0);
24783 }
24784
24785 output_addr_const (stream, x);
24786 break;
24787 }
24788 }
24789 }
24790 \f
24791 /* Target hook for printing a memory address. */
24792 static void
24793 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24794 {
24795 if (TARGET_32BIT)
24796 {
24797 int is_minus = GET_CODE (x) == MINUS;
24798
24799 if (REG_P (x))
24800 asm_fprintf (stream, "[%r]", REGNO (x));
24801 else if (GET_CODE (x) == PLUS || is_minus)
24802 {
24803 rtx base = XEXP (x, 0);
24804 rtx index = XEXP (x, 1);
24805 HOST_WIDE_INT offset = 0;
24806 if (!REG_P (base)
24807 || (REG_P (index) && REGNO (index) == SP_REGNUM))
24808 {
24809 /* Ensure that BASE is a register. */
24810 /* (one of them must be). */
24811 /* Also ensure the SP is not used as in index register. */
24812 std::swap (base, index);
24813 }
24814 switch (GET_CODE (index))
24815 {
24816 case CONST_INT:
24817 offset = INTVAL (index);
24818 if (is_minus)
24819 offset = -offset;
24820 asm_fprintf (stream, "[%r, #%wd]",
24821 REGNO (base), offset);
24822 break;
24823
24824 case REG:
24825 asm_fprintf (stream, "[%r, %s%r]",
24826 REGNO (base), is_minus ? "-" : "",
24827 REGNO (index));
24828 break;
24829
24830 case MULT:
24831 case ASHIFTRT:
24832 case LSHIFTRT:
24833 case ASHIFT:
24834 case ROTATERT:
24835 {
24836 asm_fprintf (stream, "[%r, %s%r",
24837 REGNO (base), is_minus ? "-" : "",
24838 REGNO (XEXP (index, 0)));
24839 arm_print_operand (stream, index, 'S');
24840 fputs ("]", stream);
24841 break;
24842 }
24843
24844 default:
24845 gcc_unreachable ();
24846 }
24847 }
24848 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24849 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24850 {
24851 gcc_assert (REG_P (XEXP (x, 0)));
24852
24853 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24854 asm_fprintf (stream, "[%r, #%s%d]!",
24855 REGNO (XEXP (x, 0)),
24856 GET_CODE (x) == PRE_DEC ? "-" : "",
24857 GET_MODE_SIZE (mode));
24858 else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24859 asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24860 else
24861 asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24862 GET_CODE (x) == POST_DEC ? "-" : "",
24863 GET_MODE_SIZE (mode));
24864 }
24865 else if (GET_CODE (x) == PRE_MODIFY)
24866 {
24867 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24868 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24869 asm_fprintf (stream, "#%wd]!",
24870 INTVAL (XEXP (XEXP (x, 1), 1)));
24871 else
24872 asm_fprintf (stream, "%r]!",
24873 REGNO (XEXP (XEXP (x, 1), 1)));
24874 }
24875 else if (GET_CODE (x) == POST_MODIFY)
24876 {
24877 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24878 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24879 asm_fprintf (stream, "#%wd",
24880 INTVAL (XEXP (XEXP (x, 1), 1)));
24881 else
24882 asm_fprintf (stream, "%r",
24883 REGNO (XEXP (XEXP (x, 1), 1)));
24884 }
24885 else output_addr_const (stream, x);
24886 }
24887 else
24888 {
24889 if (REG_P (x))
24890 asm_fprintf (stream, "[%r]", REGNO (x));
24891 else if (GET_CODE (x) == POST_INC)
24892 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24893 else if (GET_CODE (x) == PLUS)
24894 {
24895 gcc_assert (REG_P (XEXP (x, 0)));
24896 if (CONST_INT_P (XEXP (x, 1)))
24897 asm_fprintf (stream, "[%r, #%wd]",
24898 REGNO (XEXP (x, 0)),
24899 INTVAL (XEXP (x, 1)));
24900 else
24901 asm_fprintf (stream, "[%r, %r]",
24902 REGNO (XEXP (x, 0)),
24903 REGNO (XEXP (x, 1)));
24904 }
24905 else
24906 output_addr_const (stream, x);
24907 }
24908 }
24909 \f
24910 /* Target hook for indicating whether a punctuation character for
24911 TARGET_PRINT_OPERAND is valid. */
24912 static bool
24913 arm_print_operand_punct_valid_p (unsigned char code)
24914 {
24915 return (code == '@' || code == '|' || code == '.'
24916 || code == '(' || code == ')' || code == '#'
24917 || (TARGET_32BIT && (code == '?'))
24918 || (TARGET_THUMB2 && (code == '!'))
24919 || (TARGET_THUMB && (code == '_')));
24920 }
24921 \f
24922 /* Target hook for assembling integer objects. The ARM version needs to
24923 handle word-sized values specially. */
24924 static bool
24925 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24926 {
24927 machine_mode mode;
24928
24929 if (size == UNITS_PER_WORD && aligned_p)
24930 {
24931 fputs ("\t.word\t", asm_out_file);
24932 output_addr_const (asm_out_file, x);
24933
24934 /* Mark symbols as position independent. We only do this in the
24935 .text segment, not in the .data segment. */
24936 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24937 (SYMBOL_REF_P (x) || LABEL_REF_P (x)))
24938 {
24939 /* See legitimize_pic_address for an explanation of the
24940 TARGET_VXWORKS_RTP check. */
24941 /* References to weak symbols cannot be resolved locally:
24942 they may be overridden by a non-weak definition at link
24943 time. */
24944 if (!arm_pic_data_is_text_relative
24945 || (SYMBOL_REF_P (x)
24946 && (!SYMBOL_REF_LOCAL_P (x)
24947 || (SYMBOL_REF_DECL (x)
24948 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24949 || (SYMBOL_REF_FUNCTION_P (x)
24950 && !arm_fdpic_local_funcdesc_p (x)))))
24951 {
24952 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24953 fputs ("(GOTFUNCDESC)", asm_out_file);
24954 else
24955 fputs ("(GOT)", asm_out_file);
24956 }
24957 else
24958 {
24959 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24960 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24961 else
24962 {
24963 bool is_readonly;
24964
24965 if (!TARGET_FDPIC
24966 || arm_is_segment_info_known (x, &is_readonly))
24967 fputs ("(GOTOFF)", asm_out_file);
24968 else
24969 fputs ("(GOT)", asm_out_file);
24970 }
24971 }
24972 }
24973
24974 /* For FDPIC we also have to mark symbol for .data section. */
24975 if (TARGET_FDPIC
24976 && !making_const_table
24977 && SYMBOL_REF_P (x)
24978 && SYMBOL_REF_FUNCTION_P (x))
24979 fputs ("(FUNCDESC)", asm_out_file);
24980
24981 fputc ('\n', asm_out_file);
24982 return true;
24983 }
24984
24985 mode = GET_MODE (x);
24986
24987 if (arm_vector_mode_supported_p (mode))
24988 {
24989 int i, units;
24990
24991 gcc_assert (GET_CODE (x) == CONST_VECTOR);
24992
24993 units = CONST_VECTOR_NUNITS (x);
24994 size = GET_MODE_UNIT_SIZE (mode);
24995
24996 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24997 for (i = 0; i < units; i++)
24998 {
24999 rtx elt = CONST_VECTOR_ELT (x, i);
25000 assemble_integer
25001 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
25002 }
25003 else
25004 for (i = 0; i < units; i++)
25005 {
25006 rtx elt = CONST_VECTOR_ELT (x, i);
25007 assemble_real
25008 (*CONST_DOUBLE_REAL_VALUE (elt),
25009 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
25010 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
25011 }
25012
25013 return true;
25014 }
25015
25016 return default_assemble_integer (x, size, aligned_p);
25017 }
25018
25019 static void
25020 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
25021 {
25022 section *s;
25023
25024 if (!TARGET_AAPCS_BASED)
25025 {
25026 (is_ctor ?
25027 default_named_section_asm_out_constructor
25028 : default_named_section_asm_out_destructor) (symbol, priority);
25029 return;
25030 }
25031
25032 /* Put these in the .init_array section, using a special relocation. */
25033 if (priority != DEFAULT_INIT_PRIORITY)
25034 {
25035 char buf[18];
25036 sprintf (buf, "%s.%.5u",
25037 is_ctor ? ".init_array" : ".fini_array",
25038 priority);
25039 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
25040 }
25041 else if (is_ctor)
25042 s = ctors_section;
25043 else
25044 s = dtors_section;
25045
25046 switch_to_section (s);
25047 assemble_align (POINTER_SIZE);
25048 fputs ("\t.word\t", asm_out_file);
25049 output_addr_const (asm_out_file, symbol);
25050 fputs ("(target1)\n", asm_out_file);
25051 }
25052
25053 /* Add a function to the list of static constructors. */
25054
25055 static void
25056 arm_elf_asm_constructor (rtx symbol, int priority)
25057 {
25058 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
25059 }
25060
25061 /* Add a function to the list of static destructors. */
25062
25063 static void
25064 arm_elf_asm_destructor (rtx symbol, int priority)
25065 {
25066 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
25067 }
25068 \f
25069 /* A finite state machine takes care of noticing whether or not instructions
25070 can be conditionally executed, and thus decrease execution time and code
25071 size by deleting branch instructions. The fsm is controlled by
25072 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
25073
25074 /* The state of the fsm controlling condition codes are:
25075 0: normal, do nothing special
25076 1: make ASM_OUTPUT_OPCODE not output this instruction
25077 2: make ASM_OUTPUT_OPCODE not output this instruction
25078 3: make instructions conditional
25079 4: make instructions conditional
25080
25081 State transitions (state->state by whom under condition):
25082 0 -> 1 final_prescan_insn if the `target' is a label
25083 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
25084 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
25085 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
25086 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
25087 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
25088 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
25089 (the target insn is arm_target_insn).
25090
25091 If the jump clobbers the conditions then we use states 2 and 4.
25092
25093 A similar thing can be done with conditional return insns.
25094
25095 XXX In case the `target' is an unconditional branch, this conditionalising
25096 of the instructions always reduces code size, but not always execution
25097 time. But then, I want to reduce the code size to somewhere near what
25098 /bin/cc produces. */
25099
25100 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
25101 instructions. When a COND_EXEC instruction is seen the subsequent
25102 instructions are scanned so that multiple conditional instructions can be
25103 combined into a single IT block. arm_condexec_count and arm_condexec_mask
25104 specify the length and true/false mask for the IT block. These will be
25105 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
25106
25107 /* Returns the index of the ARM condition code string in
25108 `arm_condition_codes', or ARM_NV if the comparison is invalid.
25109 COMPARISON should be an rtx like `(eq (...) (...))'. */
25110
25111 enum arm_cond_code
25112 maybe_get_arm_condition_code (rtx comparison)
25113 {
25114 machine_mode mode = GET_MODE (XEXP (comparison, 0));
25115 enum arm_cond_code code;
25116 enum rtx_code comp_code = GET_CODE (comparison);
25117
25118 if (GET_MODE_CLASS (mode) != MODE_CC)
25119 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
25120 XEXP (comparison, 1));
25121
25122 switch (mode)
25123 {
25124 case E_CC_DNEmode: code = ARM_NE; goto dominance;
25125 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
25126 case E_CC_DGEmode: code = ARM_GE; goto dominance;
25127 case E_CC_DGTmode: code = ARM_GT; goto dominance;
25128 case E_CC_DLEmode: code = ARM_LE; goto dominance;
25129 case E_CC_DLTmode: code = ARM_LT; goto dominance;
25130 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
25131 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
25132 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
25133 case E_CC_DLTUmode: code = ARM_CC;
25134
25135 dominance:
25136 if (comp_code == EQ)
25137 return ARM_INVERSE_CONDITION_CODE (code);
25138 if (comp_code == NE)
25139 return code;
25140 return ARM_NV;
25141
25142 case E_CC_NZmode:
25143 switch (comp_code)
25144 {
25145 case NE: return ARM_NE;
25146 case EQ: return ARM_EQ;
25147 case GE: return ARM_PL;
25148 case LT: return ARM_MI;
25149 default: return ARM_NV;
25150 }
25151
25152 case E_CC_Zmode:
25153 switch (comp_code)
25154 {
25155 case NE: return ARM_NE;
25156 case EQ: return ARM_EQ;
25157 default: return ARM_NV;
25158 }
25159
25160 case E_CC_Nmode:
25161 switch (comp_code)
25162 {
25163 case NE: return ARM_MI;
25164 case EQ: return ARM_PL;
25165 default: return ARM_NV;
25166 }
25167
25168 case E_CCFPEmode:
25169 case E_CCFPmode:
25170 /* We can handle all cases except UNEQ and LTGT. */
25171 switch (comp_code)
25172 {
25173 case GE: return ARM_GE;
25174 case GT: return ARM_GT;
25175 case LE: return ARM_LS;
25176 case LT: return ARM_MI;
25177 case NE: return ARM_NE;
25178 case EQ: return ARM_EQ;
25179 case ORDERED: return ARM_VC;
25180 case UNORDERED: return ARM_VS;
25181 case UNLT: return ARM_LT;
25182 case UNLE: return ARM_LE;
25183 case UNGT: return ARM_HI;
25184 case UNGE: return ARM_PL;
25185 /* UNEQ and LTGT do not have a representation. */
25186 case UNEQ: /* Fall through. */
25187 case LTGT: /* Fall through. */
25188 default: return ARM_NV;
25189 }
25190
25191 case E_CC_SWPmode:
25192 switch (comp_code)
25193 {
25194 case NE: return ARM_NE;
25195 case EQ: return ARM_EQ;
25196 case GE: return ARM_LE;
25197 case GT: return ARM_LT;
25198 case LE: return ARM_GE;
25199 case LT: return ARM_GT;
25200 case GEU: return ARM_LS;
25201 case GTU: return ARM_CC;
25202 case LEU: return ARM_CS;
25203 case LTU: return ARM_HI;
25204 default: return ARM_NV;
25205 }
25206
25207 case E_CC_Cmode:
25208 switch (comp_code)
25209 {
25210 case LTU: return ARM_CS;
25211 case GEU: return ARM_CC;
25212 default: return ARM_NV;
25213 }
25214
25215 case E_CC_NVmode:
25216 switch (comp_code)
25217 {
25218 case GE: return ARM_GE;
25219 case LT: return ARM_LT;
25220 default: return ARM_NV;
25221 }
25222
25223 case E_CC_Bmode:
25224 switch (comp_code)
25225 {
25226 case GEU: return ARM_CS;
25227 case LTU: return ARM_CC;
25228 default: return ARM_NV;
25229 }
25230
25231 case E_CC_Vmode:
25232 switch (comp_code)
25233 {
25234 case NE: return ARM_VS;
25235 case EQ: return ARM_VC;
25236 default: return ARM_NV;
25237 }
25238
25239 case E_CC_ADCmode:
25240 switch (comp_code)
25241 {
25242 case GEU: return ARM_CS;
25243 case LTU: return ARM_CC;
25244 default: return ARM_NV;
25245 }
25246
25247 case E_CCmode:
25248 case E_CC_RSBmode:
25249 switch (comp_code)
25250 {
25251 case NE: return ARM_NE;
25252 case EQ: return ARM_EQ;
25253 case GE: return ARM_GE;
25254 case GT: return ARM_GT;
25255 case LE: return ARM_LE;
25256 case LT: return ARM_LT;
25257 case GEU: return ARM_CS;
25258 case GTU: return ARM_HI;
25259 case LEU: return ARM_LS;
25260 case LTU: return ARM_CC;
25261 default: return ARM_NV;
25262 }
25263
25264 default: gcc_unreachable ();
25265 }
25266 }
25267
25268 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
25269 static enum arm_cond_code
25270 get_arm_condition_code (rtx comparison)
25271 {
25272 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
25273 gcc_assert (code != ARM_NV);
25274 return code;
25275 }
25276
25277 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
25278 code registers when not targetting Thumb1. The VFP condition register
25279 only exists when generating hard-float code. */
25280 static bool
25281 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
25282 {
25283 if (!TARGET_32BIT)
25284 return false;
25285
25286 *p1 = CC_REGNUM;
25287 *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
25288 return true;
25289 }
25290
25291 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25292 instructions. */
25293 void
25294 thumb2_final_prescan_insn (rtx_insn *insn)
25295 {
25296 rtx_insn *first_insn = insn;
25297 rtx body = PATTERN (insn);
25298 rtx predicate;
25299 enum arm_cond_code code;
25300 int n;
25301 int mask;
25302 int max;
25303
25304 /* max_insns_skipped in the tune was already taken into account in the
25305 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
25306 just emit the IT blocks as we can. It does not make sense to split
25307 the IT blocks. */
25308 max = MAX_INSN_PER_IT_BLOCK;
25309
25310 /* Remove the previous insn from the count of insns to be output. */
25311 if (arm_condexec_count)
25312 arm_condexec_count--;
25313
25314 /* Nothing to do if we are already inside a conditional block. */
25315 if (arm_condexec_count)
25316 return;
25317
25318 if (GET_CODE (body) != COND_EXEC)
25319 return;
25320
25321 /* Conditional jumps are implemented directly. */
25322 if (JUMP_P (insn))
25323 return;
25324
25325 predicate = COND_EXEC_TEST (body);
25326 arm_current_cc = get_arm_condition_code (predicate);
25327
25328 n = get_attr_ce_count (insn);
25329 arm_condexec_count = 1;
25330 arm_condexec_mask = (1 << n) - 1;
25331 arm_condexec_masklen = n;
25332 /* See if subsequent instructions can be combined into the same block. */
25333 for (;;)
25334 {
25335 insn = next_nonnote_insn (insn);
25336
25337 /* Jumping into the middle of an IT block is illegal, so a label or
25338 barrier terminates the block. */
25339 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
25340 break;
25341
25342 body = PATTERN (insn);
25343 /* USE and CLOBBER aren't really insns, so just skip them. */
25344 if (GET_CODE (body) == USE
25345 || GET_CODE (body) == CLOBBER)
25346 continue;
25347
25348 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
25349 if (GET_CODE (body) != COND_EXEC)
25350 break;
25351 /* Maximum number of conditionally executed instructions in a block. */
25352 n = get_attr_ce_count (insn);
25353 if (arm_condexec_masklen + n > max)
25354 break;
25355
25356 predicate = COND_EXEC_TEST (body);
25357 code = get_arm_condition_code (predicate);
25358 mask = (1 << n) - 1;
25359 if (arm_current_cc == code)
25360 arm_condexec_mask |= (mask << arm_condexec_masklen);
25361 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
25362 break;
25363
25364 arm_condexec_count++;
25365 arm_condexec_masklen += n;
25366
25367 /* A jump must be the last instruction in a conditional block. */
25368 if (JUMP_P (insn))
25369 break;
25370 }
25371 /* Restore recog_data (getting the attributes of other insns can
25372 destroy this array, but final.cc assumes that it remains intact
25373 across this call). */
25374 extract_constrain_insn_cached (first_insn);
25375 }
25376
25377 void
25378 arm_final_prescan_insn (rtx_insn *insn)
25379 {
25380 /* BODY will hold the body of INSN. */
25381 rtx body = PATTERN (insn);
25382
25383 /* This will be 1 if trying to repeat the trick, and things need to be
25384 reversed if it appears to fail. */
25385 int reverse = 0;
25386
25387 /* If we start with a return insn, we only succeed if we find another one. */
25388 int seeking_return = 0;
25389 enum rtx_code return_code = UNKNOWN;
25390
25391 /* START_INSN will hold the insn from where we start looking. This is the
25392 first insn after the following code_label if REVERSE is true. */
25393 rtx_insn *start_insn = insn;
25394
25395 /* If in state 4, check if the target branch is reached, in order to
25396 change back to state 0. */
25397 if (arm_ccfsm_state == 4)
25398 {
25399 if (insn == arm_target_insn)
25400 {
25401 arm_target_insn = NULL;
25402 arm_ccfsm_state = 0;
25403 }
25404 return;
25405 }
25406
25407 /* If in state 3, it is possible to repeat the trick, if this insn is an
25408 unconditional branch to a label, and immediately following this branch
25409 is the previous target label which is only used once, and the label this
25410 branch jumps to is not too far off. */
25411 if (arm_ccfsm_state == 3)
25412 {
25413 if (simplejump_p (insn))
25414 {
25415 start_insn = next_nonnote_insn (start_insn);
25416 if (BARRIER_P (start_insn))
25417 {
25418 /* XXX Isn't this always a barrier? */
25419 start_insn = next_nonnote_insn (start_insn);
25420 }
25421 if (LABEL_P (start_insn)
25422 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25423 && LABEL_NUSES (start_insn) == 1)
25424 reverse = TRUE;
25425 else
25426 return;
25427 }
25428 else if (ANY_RETURN_P (body))
25429 {
25430 start_insn = next_nonnote_insn (start_insn);
25431 if (BARRIER_P (start_insn))
25432 start_insn = next_nonnote_insn (start_insn);
25433 if (LABEL_P (start_insn)
25434 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25435 && LABEL_NUSES (start_insn) == 1)
25436 {
25437 reverse = TRUE;
25438 seeking_return = 1;
25439 return_code = GET_CODE (body);
25440 }
25441 else
25442 return;
25443 }
25444 else
25445 return;
25446 }
25447
25448 gcc_assert (!arm_ccfsm_state || reverse);
25449 if (!JUMP_P (insn))
25450 return;
25451
25452 /* This jump might be paralleled with a clobber of the condition codes
25453 the jump should always come first */
25454 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25455 body = XVECEXP (body, 0, 0);
25456
25457 if (reverse
25458 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25459 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25460 {
25461 int insns_skipped;
25462 int fail = FALSE, succeed = FALSE;
25463 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
25464 int then_not_else = TRUE;
25465 rtx_insn *this_insn = start_insn;
25466 rtx label = 0;
25467
25468 /* Register the insn jumped to. */
25469 if (reverse)
25470 {
25471 if (!seeking_return)
25472 label = XEXP (SET_SRC (body), 0);
25473 }
25474 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25475 label = XEXP (XEXP (SET_SRC (body), 1), 0);
25476 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25477 {
25478 label = XEXP (XEXP (SET_SRC (body), 2), 0);
25479 then_not_else = FALSE;
25480 }
25481 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25482 {
25483 seeking_return = 1;
25484 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25485 }
25486 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25487 {
25488 seeking_return = 1;
25489 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25490 then_not_else = FALSE;
25491 }
25492 else
25493 gcc_unreachable ();
25494
25495 /* See how many insns this branch skips, and what kind of insns. If all
25496 insns are okay, and the label or unconditional branch to the same
25497 label is not too far away, succeed. */
25498 for (insns_skipped = 0;
25499 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25500 {
25501 rtx scanbody;
25502
25503 this_insn = next_nonnote_insn (this_insn);
25504 if (!this_insn)
25505 break;
25506
25507 switch (GET_CODE (this_insn))
25508 {
25509 case CODE_LABEL:
25510 /* Succeed if it is the target label, otherwise fail since
25511 control falls in from somewhere else. */
25512 if (this_insn == label)
25513 {
25514 arm_ccfsm_state = 1;
25515 succeed = TRUE;
25516 }
25517 else
25518 fail = TRUE;
25519 break;
25520
25521 case BARRIER:
25522 /* Succeed if the following insn is the target label.
25523 Otherwise fail.
25524 If return insns are used then the last insn in a function
25525 will be a barrier. */
25526 this_insn = next_nonnote_insn (this_insn);
25527 if (this_insn && this_insn == label)
25528 {
25529 arm_ccfsm_state = 1;
25530 succeed = TRUE;
25531 }
25532 else
25533 fail = TRUE;
25534 break;
25535
25536 case CALL_INSN:
25537 /* The AAPCS says that conditional calls should not be
25538 used since they make interworking inefficient (the
25539 linker can't transform BL<cond> into BLX). That's
25540 only a problem if the machine has BLX. */
25541 if (arm_arch5t)
25542 {
25543 fail = TRUE;
25544 break;
25545 }
25546
25547 /* Succeed if the following insn is the target label, or
25548 if the following two insns are a barrier and the
25549 target label. */
25550 this_insn = next_nonnote_insn (this_insn);
25551 if (this_insn && BARRIER_P (this_insn))
25552 this_insn = next_nonnote_insn (this_insn);
25553
25554 if (this_insn && this_insn == label
25555 && insns_skipped < max_insns_skipped)
25556 {
25557 arm_ccfsm_state = 1;
25558 succeed = TRUE;
25559 }
25560 else
25561 fail = TRUE;
25562 break;
25563
25564 case JUMP_INSN:
25565 /* If this is an unconditional branch to the same label, succeed.
25566 If it is to another label, do nothing. If it is conditional,
25567 fail. */
25568 /* XXX Probably, the tests for SET and the PC are
25569 unnecessary. */
25570
25571 scanbody = PATTERN (this_insn);
25572 if (GET_CODE (scanbody) == SET
25573 && GET_CODE (SET_DEST (scanbody)) == PC)
25574 {
25575 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25576 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25577 {
25578 arm_ccfsm_state = 2;
25579 succeed = TRUE;
25580 }
25581 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25582 fail = TRUE;
25583 }
25584 /* Fail if a conditional return is undesirable (e.g. on a
25585 StrongARM), but still allow this if optimizing for size. */
25586 else if (GET_CODE (scanbody) == return_code
25587 && !use_return_insn (TRUE, NULL)
25588 && !optimize_size)
25589 fail = TRUE;
25590 else if (GET_CODE (scanbody) == return_code)
25591 {
25592 arm_ccfsm_state = 2;
25593 succeed = TRUE;
25594 }
25595 else if (GET_CODE (scanbody) == PARALLEL)
25596 {
25597 switch (get_attr_conds (this_insn))
25598 {
25599 case CONDS_NOCOND:
25600 break;
25601 default:
25602 fail = TRUE;
25603 break;
25604 }
25605 }
25606 else
25607 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
25608
25609 break;
25610
25611 case INSN:
25612 /* Instructions using or affecting the condition codes make it
25613 fail. */
25614 scanbody = PATTERN (this_insn);
25615 if (!(GET_CODE (scanbody) == SET
25616 || GET_CODE (scanbody) == PARALLEL)
25617 || get_attr_conds (this_insn) != CONDS_NOCOND)
25618 fail = TRUE;
25619 break;
25620
25621 default:
25622 break;
25623 }
25624 }
25625 if (succeed)
25626 {
25627 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25628 arm_target_label = CODE_LABEL_NUMBER (label);
25629 else
25630 {
25631 gcc_assert (seeking_return || arm_ccfsm_state == 2);
25632
25633 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25634 {
25635 this_insn = next_nonnote_insn (this_insn);
25636 gcc_assert (!this_insn
25637 || (!BARRIER_P (this_insn)
25638 && !LABEL_P (this_insn)));
25639 }
25640 if (!this_insn)
25641 {
25642 /* Oh, dear! we ran off the end.. give up. */
25643 extract_constrain_insn_cached (insn);
25644 arm_ccfsm_state = 0;
25645 arm_target_insn = NULL;
25646 return;
25647 }
25648 arm_target_insn = this_insn;
25649 }
25650
25651 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25652 what it was. */
25653 if (!reverse)
25654 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25655
25656 if (reverse || then_not_else)
25657 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25658 }
25659
25660 /* Restore recog_data (getting the attributes of other insns can
25661 destroy this array, but final.cc assumes that it remains intact
25662 across this call. */
25663 extract_constrain_insn_cached (insn);
25664 }
25665 }
25666
25667 /* Output IT instructions. */
25668 void
25669 thumb2_asm_output_opcode (FILE * stream)
25670 {
25671 char buff[5];
25672 int n;
25673
25674 if (arm_condexec_mask)
25675 {
25676 for (n = 0; n < arm_condexec_masklen; n++)
25677 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25678 buff[n] = 0;
25679 asm_fprintf(stream, "i%s\t%s\n\t", buff,
25680 arm_condition_codes[arm_current_cc]);
25681 arm_condexec_mask = 0;
25682 }
25683 }
25684
25685 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
25686 UNITS_PER_WORD bytes wide. */
25687 static unsigned int
25688 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25689 {
25690 if (IS_VPR_REGNUM (regno))
25691 return CEIL (GET_MODE_SIZE (mode), 2);
25692
25693 if (TARGET_32BIT
25694 && regno > PC_REGNUM
25695 && regno != FRAME_POINTER_REGNUM
25696 && regno != ARG_POINTER_REGNUM
25697 && !IS_VFP_REGNUM (regno))
25698 return 1;
25699
25700 return ARM_NUM_REGS (mode);
25701 }
25702
25703 /* Implement TARGET_HARD_REGNO_MODE_OK. */
25704 static bool
25705 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25706 {
25707 if (GET_MODE_CLASS (mode) == MODE_CC)
25708 return (regno == CC_REGNUM
25709 || (TARGET_VFP_BASE
25710 && regno == VFPCC_REGNUM));
25711
25712 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25713 return false;
25714
25715 if (IS_VPR_REGNUM (regno))
25716 return VALID_MVE_PRED_MODE (mode);
25717
25718 if (TARGET_THUMB1)
25719 /* For the Thumb we only allow values bigger than SImode in
25720 registers 0 - 6, so that there is always a second low
25721 register available to hold the upper part of the value.
25722 We probably we ought to ensure that the register is the
25723 start of an even numbered register pair. */
25724 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25725
25726 if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25727 {
25728 if (mode == DFmode || mode == DImode)
25729 return VFP_REGNO_OK_FOR_DOUBLE (regno);
25730
25731 if (mode == HFmode || mode == BFmode || mode == HImode
25732 || mode == SFmode || mode == SImode)
25733 return VFP_REGNO_OK_FOR_SINGLE (regno);
25734
25735 if (TARGET_NEON)
25736 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25737 || (VALID_NEON_QREG_MODE (mode)
25738 && NEON_REGNO_OK_FOR_QUAD (regno))
25739 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25740 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25741 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25742 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25743 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25744 if (TARGET_HAVE_MVE)
25745 return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25746 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25747 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25748
25749 return false;
25750 }
25751
25752 if (TARGET_REALLY_IWMMXT)
25753 {
25754 if (IS_IWMMXT_GR_REGNUM (regno))
25755 return mode == SImode;
25756
25757 if (IS_IWMMXT_REGNUM (regno))
25758 return VALID_IWMMXT_REG_MODE (mode);
25759 }
25760
25761 /* We allow almost any value to be stored in the general registers.
25762 Restrict doubleword quantities to even register pairs in ARM state
25763 so that we can use ldrd. The same restriction applies for MVE
25764 in order to support Armv8.1-M Mainline instructions.
25765 Do not allow very large Neon structure opaque modes in general
25766 registers; they would use too many. */
25767 if (regno <= LAST_ARM_REGNUM)
25768 {
25769 if (ARM_NUM_REGS (mode) > 4)
25770 return false;
25771
25772 if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25773 return true;
25774
25775 return !((TARGET_LDRD || TARGET_CDE)
25776 && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25777 }
25778
25779 if (regno == FRAME_POINTER_REGNUM
25780 || regno == ARG_POINTER_REGNUM)
25781 /* We only allow integers in the fake hard registers. */
25782 return GET_MODE_CLASS (mode) == MODE_INT;
25783
25784 return false;
25785 }
25786
25787 /* Implement TARGET_MODES_TIEABLE_P. */
25788
25789 static bool
25790 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25791 {
25792 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25793 return true;
25794
25795 if (TARGET_HAVE_MVE
25796 && (VALID_MVE_PRED_MODE (mode1) && VALID_MVE_PRED_MODE (mode2)))
25797 return true;
25798
25799 /* We specifically want to allow elements of "structure" modes to
25800 be tieable to the structure. This more general condition allows
25801 other rarer situations too. */
25802 if ((TARGET_NEON
25803 && (VALID_NEON_DREG_MODE (mode1)
25804 || VALID_NEON_QREG_MODE (mode1)
25805 || VALID_NEON_STRUCT_MODE (mode1))
25806 && (VALID_NEON_DREG_MODE (mode2)
25807 || VALID_NEON_QREG_MODE (mode2)
25808 || VALID_NEON_STRUCT_MODE (mode2)))
25809 || (TARGET_HAVE_MVE
25810 && (VALID_MVE_MODE (mode1)
25811 || VALID_MVE_STRUCT_MODE (mode1))
25812 && (VALID_MVE_MODE (mode2)
25813 || VALID_MVE_STRUCT_MODE (mode2))))
25814 return true;
25815
25816 return false;
25817 }
25818
25819 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25820 not used in arm mode. */
25821
25822 enum reg_class
25823 arm_regno_class (int regno)
25824 {
25825 if (regno == PC_REGNUM)
25826 return NO_REGS;
25827
25828 if (IS_VPR_REGNUM (regno))
25829 return VPR_REG;
25830
25831 if (IS_PAC_REGNUM (regno))
25832 return PAC_REG;
25833
25834 if (TARGET_THUMB1)
25835 {
25836 if (regno == STACK_POINTER_REGNUM)
25837 return STACK_REG;
25838 if (regno == CC_REGNUM)
25839 return CC_REG;
25840 if (regno < 8)
25841 return LO_REGS;
25842 return HI_REGS;
25843 }
25844
25845 if (TARGET_THUMB2 && regno < 8)
25846 return LO_REGS;
25847
25848 if ( regno <= LAST_ARM_REGNUM
25849 || regno == FRAME_POINTER_REGNUM
25850 || regno == ARG_POINTER_REGNUM)
25851 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25852
25853 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25854 return TARGET_THUMB2 ? CC_REG : NO_REGS;
25855
25856 if (IS_VFP_REGNUM (regno))
25857 {
25858 if (regno <= D7_VFP_REGNUM)
25859 return VFP_D0_D7_REGS;
25860 else if (regno <= LAST_LO_VFP_REGNUM)
25861 return VFP_LO_REGS;
25862 else
25863 return VFP_HI_REGS;
25864 }
25865
25866 if (IS_IWMMXT_REGNUM (regno))
25867 return IWMMXT_REGS;
25868
25869 if (IS_IWMMXT_GR_REGNUM (regno))
25870 return IWMMXT_GR_REGS;
25871
25872 return NO_REGS;
25873 }
25874
25875 /* Handle a special case when computing the offset
25876 of an argument from the frame pointer. */
25877 int
25878 arm_debugger_arg_offset (int value, rtx addr)
25879 {
25880 rtx_insn *insn;
25881
25882 /* We are only interested if dbxout_parms() failed to compute the offset. */
25883 if (value != 0)
25884 return 0;
25885
25886 /* We can only cope with the case where the address is held in a register. */
25887 if (!REG_P (addr))
25888 return 0;
25889
25890 /* If we are using the frame pointer to point at the argument, then
25891 an offset of 0 is correct. */
25892 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25893 return 0;
25894
25895 /* If we are using the stack pointer to point at the
25896 argument, then an offset of 0 is correct. */
25897 /* ??? Check this is consistent with thumb2 frame layout. */
25898 if ((TARGET_THUMB || !frame_pointer_needed)
25899 && REGNO (addr) == SP_REGNUM)
25900 return 0;
25901
25902 /* Oh dear. The argument is pointed to by a register rather
25903 than being held in a register, or being stored at a known
25904 offset from the frame pointer. Since GDB only understands
25905 those two kinds of argument we must translate the address
25906 held in the register into an offset from the frame pointer.
25907 We do this by searching through the insns for the function
25908 looking to see where this register gets its value. If the
25909 register is initialized from the frame pointer plus an offset
25910 then we are in luck and we can continue, otherwise we give up.
25911
25912 This code is exercised by producing debugging information
25913 for a function with arguments like this:
25914
25915 double func (double a, double b, int c, double d) {return d;}
25916
25917 Without this code the stab for parameter 'd' will be set to
25918 an offset of 0 from the frame pointer, rather than 8. */
25919
25920 /* The if() statement says:
25921
25922 If the insn is a normal instruction
25923 and if the insn is setting the value in a register
25924 and if the register being set is the register holding the address of the argument
25925 and if the address is computing by an addition
25926 that involves adding to a register
25927 which is the frame pointer
25928 a constant integer
25929
25930 then... */
25931
25932 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25933 {
25934 if ( NONJUMP_INSN_P (insn)
25935 && GET_CODE (PATTERN (insn)) == SET
25936 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25937 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25938 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25939 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25940 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25941 )
25942 {
25943 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25944
25945 break;
25946 }
25947 }
25948
25949 if (value == 0)
25950 {
25951 debug_rtx (addr);
25952 warning (0, "unable to compute real location of stacked parameter");
25953 value = 8; /* XXX magic hack */
25954 }
25955
25956 return value;
25957 }
25958 \f
25959 /* Implement TARGET_PROMOTED_TYPE. */
25960
25961 static tree
25962 arm_promoted_type (const_tree t)
25963 {
25964 if (SCALAR_FLOAT_TYPE_P (t)
25965 && TYPE_PRECISION (t) == 16
25966 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25967 return float_type_node;
25968 return NULL_TREE;
25969 }
25970
25971 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25972 This simply adds HFmode as a supported mode; even though we don't
25973 implement arithmetic on this type directly, it's supported by
25974 optabs conversions, much the way the double-word arithmetic is
25975 special-cased in the default hook. */
25976
25977 static bool
25978 arm_scalar_mode_supported_p (scalar_mode mode)
25979 {
25980 if (mode == HFmode)
25981 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25982 else if (ALL_FIXED_POINT_MODE_P (mode))
25983 return true;
25984 else
25985 return default_scalar_mode_supported_p (mode);
25986 }
25987
25988 /* Set the value of FLT_EVAL_METHOD.
25989 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25990
25991 0: evaluate all operations and constants, whose semantic type has at
25992 most the range and precision of type float, to the range and
25993 precision of float; evaluate all other operations and constants to
25994 the range and precision of the semantic type;
25995
25996 N, where _FloatN is a supported interchange floating type
25997 evaluate all operations and constants, whose semantic type has at
25998 most the range and precision of _FloatN type, to the range and
25999 precision of the _FloatN type; evaluate all other operations and
26000 constants to the range and precision of the semantic type;
26001
26002 If we have the ARMv8.2-A extensions then we support _Float16 in native
26003 precision, so we should set this to 16. Otherwise, we support the type,
26004 but want to evaluate expressions in float precision, so set this to
26005 0. */
26006
26007 static enum flt_eval_method
26008 arm_excess_precision (enum excess_precision_type type)
26009 {
26010 switch (type)
26011 {
26012 case EXCESS_PRECISION_TYPE_FAST:
26013 case EXCESS_PRECISION_TYPE_STANDARD:
26014 /* We can calculate either in 16-bit range and precision or
26015 32-bit range and precision. Make that decision based on whether
26016 we have native support for the ARMv8.2-A 16-bit floating-point
26017 instructions or not. */
26018 return (TARGET_VFP_FP16INST
26019 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
26020 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
26021 case EXCESS_PRECISION_TYPE_IMPLICIT:
26022 case EXCESS_PRECISION_TYPE_FLOAT16:
26023 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
26024 default:
26025 gcc_unreachable ();
26026 }
26027 return FLT_EVAL_METHOD_UNPREDICTABLE;
26028 }
26029
26030
26031 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
26032 _Float16 if we are using anything other than ieee format for 16-bit
26033 floating point. Otherwise, punt to the default implementation. */
26034 static opt_scalar_float_mode
26035 arm_floatn_mode (int n, bool extended)
26036 {
26037 if (!extended && n == 16)
26038 {
26039 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
26040 return HFmode;
26041 return opt_scalar_float_mode ();
26042 }
26043
26044 return default_floatn_mode (n, extended);
26045 }
26046
26047
26048 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
26049 not to early-clobber SRC registers in the process.
26050
26051 We assume that the operands described by SRC and DEST represent a
26052 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
26053 number of components into which the copy has been decomposed. */
26054 void
26055 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
26056 {
26057 unsigned int i;
26058
26059 if (!reg_overlap_mentioned_p (operands[0], operands[1])
26060 || REGNO (operands[0]) < REGNO (operands[1]))
26061 {
26062 for (i = 0; i < count; i++)
26063 {
26064 operands[2 * i] = dest[i];
26065 operands[2 * i + 1] = src[i];
26066 }
26067 }
26068 else
26069 {
26070 for (i = 0; i < count; i++)
26071 {
26072 operands[2 * i] = dest[count - i - 1];
26073 operands[2 * i + 1] = src[count - i - 1];
26074 }
26075 }
26076 }
26077
26078 /* Split operands into moves from op[1] + op[2] into op[0]. */
26079
26080 void
26081 neon_split_vcombine (rtx operands[3])
26082 {
26083 unsigned int dest = REGNO (operands[0]);
26084 unsigned int src1 = REGNO (operands[1]);
26085 unsigned int src2 = REGNO (operands[2]);
26086 machine_mode halfmode = GET_MODE (operands[1]);
26087 unsigned int halfregs = REG_NREGS (operands[1]);
26088 rtx destlo, desthi;
26089
26090 if (src1 == dest && src2 == dest + halfregs)
26091 {
26092 /* No-op move. Can't split to nothing; emit something. */
26093 emit_note (NOTE_INSN_DELETED);
26094 return;
26095 }
26096
26097 /* Preserve register attributes for variable tracking. */
26098 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
26099 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
26100 GET_MODE_SIZE (halfmode));
26101
26102 /* Special case of reversed high/low parts. Use VSWP. */
26103 if (src2 == dest && src1 == dest + halfregs)
26104 {
26105 rtx x = gen_rtx_SET (destlo, operands[1]);
26106 rtx y = gen_rtx_SET (desthi, operands[2]);
26107 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
26108 return;
26109 }
26110
26111 if (!reg_overlap_mentioned_p (operands[2], destlo))
26112 {
26113 /* Try to avoid unnecessary moves if part of the result
26114 is in the right place already. */
26115 if (src1 != dest)
26116 emit_move_insn (destlo, operands[1]);
26117 if (src2 != dest + halfregs)
26118 emit_move_insn (desthi, operands[2]);
26119 }
26120 else
26121 {
26122 if (src2 != dest + halfregs)
26123 emit_move_insn (desthi, operands[2]);
26124 if (src1 != dest)
26125 emit_move_insn (destlo, operands[1]);
26126 }
26127 }
26128 \f
26129 /* Return the number (counting from 0) of
26130 the least significant set bit in MASK. */
26131
26132 inline static int
26133 number_of_first_bit_set (unsigned mask)
26134 {
26135 return ctz_hwi (mask);
26136 }
26137
26138 /* Like emit_multi_reg_push, but allowing for a different set of
26139 registers to be described as saved. MASK is the set of registers
26140 to be saved; REAL_REGS is the set of registers to be described as
26141 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26142
26143 static rtx_insn *
26144 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26145 {
26146 unsigned long regno;
26147 rtx par[10], tmp, reg;
26148 rtx_insn *insn;
26149 int i, j;
26150
26151 /* Build the parallel of the registers actually being stored. */
26152 for (i = 0; mask; ++i, mask &= mask - 1)
26153 {
26154 regno = ctz_hwi (mask);
26155 reg = gen_rtx_REG (SImode, regno);
26156
26157 if (i == 0)
26158 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26159 else
26160 tmp = gen_rtx_USE (VOIDmode, reg);
26161
26162 par[i] = tmp;
26163 }
26164
26165 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26166 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26167 tmp = gen_frame_mem (BLKmode, tmp);
26168 tmp = gen_rtx_SET (tmp, par[0]);
26169 par[0] = tmp;
26170
26171 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26172 insn = emit_insn (tmp);
26173
26174 /* Always build the stack adjustment note for unwind info. */
26175 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26176 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
26177 par[0] = tmp;
26178
26179 /* Build the parallel of the registers recorded as saved for unwind. */
26180 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26181 {
26182 regno = ctz_hwi (real_regs);
26183 reg = gen_rtx_REG (SImode, regno);
26184
26185 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26186 tmp = gen_frame_mem (SImode, tmp);
26187 tmp = gen_rtx_SET (tmp, reg);
26188 RTX_FRAME_RELATED_P (tmp) = 1;
26189 par[j + 1] = tmp;
26190 }
26191
26192 if (j == 0)
26193 tmp = par[0];
26194 else
26195 {
26196 RTX_FRAME_RELATED_P (par[0]) = 1;
26197 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26198 }
26199
26200 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26201
26202 return insn;
26203 }
26204
26205 /* Emit code to push or pop registers to or from the stack. F is the
26206 assembly file. MASK is the registers to pop. */
26207 static void
26208 thumb_pop (FILE *f, unsigned long mask)
26209 {
26210 int regno;
26211 int lo_mask = mask & 0xFF;
26212
26213 gcc_assert (mask);
26214
26215 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26216 {
26217 /* Special case. Do not generate a POP PC statement here, do it in
26218 thumb_exit() */
26219 thumb_exit (f, -1);
26220 return;
26221 }
26222
26223 fprintf (f, "\tpop\t{");
26224
26225 /* Look at the low registers first. */
26226 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26227 {
26228 if (lo_mask & 1)
26229 {
26230 asm_fprintf (f, "%r", regno);
26231
26232 if ((lo_mask & ~1) != 0)
26233 fprintf (f, ", ");
26234 }
26235 }
26236
26237 if (mask & (1 << PC_REGNUM))
26238 {
26239 /* Catch popping the PC. */
26240 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
26241 || IS_CMSE_ENTRY (arm_current_func_type ()))
26242 {
26243 /* The PC is never poped directly, instead
26244 it is popped into r3 and then BX is used. */
26245 fprintf (f, "}\n");
26246
26247 thumb_exit (f, -1);
26248
26249 return;
26250 }
26251 else
26252 {
26253 if (mask & 0xFF)
26254 fprintf (f, ", ");
26255
26256 asm_fprintf (f, "%r", PC_REGNUM);
26257 }
26258 }
26259
26260 fprintf (f, "}\n");
26261 }
26262
26263 /* Generate code to return from a thumb function.
26264 If 'reg_containing_return_addr' is -1, then the return address is
26265 actually on the stack, at the stack pointer.
26266
26267 Note: do not forget to update length attribute of corresponding insn pattern
26268 when changing assembly output (eg. length attribute of epilogue_insns when
26269 updating Armv8-M Baseline Security Extensions register clearing
26270 sequences). */
26271 static void
26272 thumb_exit (FILE *f, int reg_containing_return_addr)
26273 {
26274 unsigned regs_available_for_popping;
26275 unsigned regs_to_pop;
26276 int pops_needed;
26277 unsigned available;
26278 unsigned required;
26279 machine_mode mode;
26280 int size;
26281 int restore_a4 = FALSE;
26282
26283 /* Compute the registers we need to pop. */
26284 regs_to_pop = 0;
26285 pops_needed = 0;
26286
26287 if (reg_containing_return_addr == -1)
26288 {
26289 regs_to_pop |= 1 << LR_REGNUM;
26290 ++pops_needed;
26291 }
26292
26293 if (TARGET_BACKTRACE)
26294 {
26295 /* Restore the (ARM) frame pointer and stack pointer. */
26296 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26297 pops_needed += 2;
26298 }
26299
26300 /* If there is nothing to pop then just emit the BX instruction and
26301 return. */
26302 if (pops_needed == 0)
26303 {
26304 if (crtl->calls_eh_return)
26305 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26306
26307 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26308 {
26309 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26310 emitted by cmse_nonsecure_entry_clear_before_return (). */
26311 if (!TARGET_HAVE_FPCXT_CMSE)
26312 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
26313 reg_containing_return_addr);
26314 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26315 }
26316 else
26317 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26318 return;
26319 }
26320 /* Otherwise if we are not supporting interworking and we have not created
26321 a backtrace structure and the function was not entered in ARM mode then
26322 just pop the return address straight into the PC. */
26323 else if (!TARGET_INTERWORK
26324 && !TARGET_BACKTRACE
26325 && !is_called_in_ARM_mode (current_function_decl)
26326 && !crtl->calls_eh_return
26327 && !IS_CMSE_ENTRY (arm_current_func_type ()))
26328 {
26329 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26330 return;
26331 }
26332
26333 /* Find out how many of the (return) argument registers we can corrupt. */
26334 regs_available_for_popping = 0;
26335
26336 /* If returning via __builtin_eh_return, the bottom three registers
26337 all contain information needed for the return. */
26338 if (crtl->calls_eh_return)
26339 size = 12;
26340 else
26341 {
26342 /* If we can deduce the registers used from the function's
26343 return value. This is more reliable that examining
26344 df_regs_ever_live_p () because that will be set if the register is
26345 ever used in the function, not just if the register is used
26346 to hold a return value. */
26347
26348 if (crtl->return_rtx != 0)
26349 mode = GET_MODE (crtl->return_rtx);
26350 else
26351 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26352
26353 size = GET_MODE_SIZE (mode);
26354
26355 if (size == 0)
26356 {
26357 /* In a void function we can use any argument register.
26358 In a function that returns a structure on the stack
26359 we can use the second and third argument registers. */
26360 if (mode == VOIDmode)
26361 regs_available_for_popping =
26362 (1 << ARG_REGISTER (1))
26363 | (1 << ARG_REGISTER (2))
26364 | (1 << ARG_REGISTER (3));
26365 else
26366 regs_available_for_popping =
26367 (1 << ARG_REGISTER (2))
26368 | (1 << ARG_REGISTER (3));
26369 }
26370 else if (size <= 4)
26371 regs_available_for_popping =
26372 (1 << ARG_REGISTER (2))
26373 | (1 << ARG_REGISTER (3));
26374 else if (size <= 8)
26375 regs_available_for_popping =
26376 (1 << ARG_REGISTER (3));
26377 }
26378
26379 /* Match registers to be popped with registers into which we pop them. */
26380 for (available = regs_available_for_popping,
26381 required = regs_to_pop;
26382 required != 0 && available != 0;
26383 available &= ~(available & - available),
26384 required &= ~(required & - required))
26385 -- pops_needed;
26386
26387 /* If we have any popping registers left over, remove them. */
26388 if (available > 0)
26389 regs_available_for_popping &= ~available;
26390
26391 /* Otherwise if we need another popping register we can use
26392 the fourth argument register. */
26393 else if (pops_needed)
26394 {
26395 /* If we have not found any free argument registers and
26396 reg a4 contains the return address, we must move it. */
26397 if (regs_available_for_popping == 0
26398 && reg_containing_return_addr == LAST_ARG_REGNUM)
26399 {
26400 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26401 reg_containing_return_addr = LR_REGNUM;
26402 }
26403 else if (size > 12)
26404 {
26405 /* Register a4 is being used to hold part of the return value,
26406 but we have dire need of a free, low register. */
26407 restore_a4 = TRUE;
26408
26409 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26410 }
26411
26412 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26413 {
26414 /* The fourth argument register is available. */
26415 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26416
26417 --pops_needed;
26418 }
26419 }
26420
26421 /* Pop as many registers as we can. */
26422 thumb_pop (f, regs_available_for_popping);
26423
26424 /* Process the registers we popped. */
26425 if (reg_containing_return_addr == -1)
26426 {
26427 /* The return address was popped into the lowest numbered register. */
26428 regs_to_pop &= ~(1 << LR_REGNUM);
26429
26430 reg_containing_return_addr =
26431 number_of_first_bit_set (regs_available_for_popping);
26432
26433 /* Remove this register for the mask of available registers, so that
26434 the return address will not be corrupted by further pops. */
26435 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26436 }
26437
26438 /* If we popped other registers then handle them here. */
26439 if (regs_available_for_popping)
26440 {
26441 int frame_pointer;
26442
26443 /* Work out which register currently contains the frame pointer. */
26444 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26445
26446 /* Move it into the correct place. */
26447 asm_fprintf (f, "\tmov\t%r, %r\n",
26448 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26449
26450 /* (Temporarily) remove it from the mask of popped registers. */
26451 regs_available_for_popping &= ~(1 << frame_pointer);
26452 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26453
26454 if (regs_available_for_popping)
26455 {
26456 int stack_pointer;
26457
26458 /* We popped the stack pointer as well,
26459 find the register that contains it. */
26460 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26461
26462 /* Move it into the stack register. */
26463 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26464
26465 /* At this point we have popped all necessary registers, so
26466 do not worry about restoring regs_available_for_popping
26467 to its correct value:
26468
26469 assert (pops_needed == 0)
26470 assert (regs_available_for_popping == (1 << frame_pointer))
26471 assert (regs_to_pop == (1 << STACK_POINTER)) */
26472 }
26473 else
26474 {
26475 /* Since we have just move the popped value into the frame
26476 pointer, the popping register is available for reuse, and
26477 we know that we still have the stack pointer left to pop. */
26478 regs_available_for_popping |= (1 << frame_pointer);
26479 }
26480 }
26481
26482 /* If we still have registers left on the stack, but we no longer have
26483 any registers into which we can pop them, then we must move the return
26484 address into the link register and make available the register that
26485 contained it. */
26486 if (regs_available_for_popping == 0 && pops_needed > 0)
26487 {
26488 regs_available_for_popping |= 1 << reg_containing_return_addr;
26489
26490 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26491 reg_containing_return_addr);
26492
26493 reg_containing_return_addr = LR_REGNUM;
26494 }
26495
26496 /* If we have registers left on the stack then pop some more.
26497 We know that at most we will want to pop FP and SP. */
26498 if (pops_needed > 0)
26499 {
26500 int popped_into;
26501 int move_to;
26502
26503 thumb_pop (f, regs_available_for_popping);
26504
26505 /* We have popped either FP or SP.
26506 Move whichever one it is into the correct register. */
26507 popped_into = number_of_first_bit_set (regs_available_for_popping);
26508 move_to = number_of_first_bit_set (regs_to_pop);
26509
26510 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26511 --pops_needed;
26512 }
26513
26514 /* If we still have not popped everything then we must have only
26515 had one register available to us and we are now popping the SP. */
26516 if (pops_needed > 0)
26517 {
26518 int popped_into;
26519
26520 thumb_pop (f, regs_available_for_popping);
26521
26522 popped_into = number_of_first_bit_set (regs_available_for_popping);
26523
26524 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26525 /*
26526 assert (regs_to_pop == (1 << STACK_POINTER))
26527 assert (pops_needed == 1)
26528 */
26529 }
26530
26531 /* If necessary restore the a4 register. */
26532 if (restore_a4)
26533 {
26534 if (reg_containing_return_addr != LR_REGNUM)
26535 {
26536 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26537 reg_containing_return_addr = LR_REGNUM;
26538 }
26539
26540 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26541 }
26542
26543 if (crtl->calls_eh_return)
26544 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26545
26546 /* Return to caller. */
26547 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26548 {
26549 /* This is for the cases where LR is not being used to contain the return
26550 address. It may therefore contain information that we might not want
26551 to leak, hence it must be cleared. The value in R0 will never be a
26552 secret at this point, so it is safe to use it, see the clearing code
26553 in cmse_nonsecure_entry_clear_before_return (). */
26554 if (reg_containing_return_addr != LR_REGNUM)
26555 asm_fprintf (f, "\tmov\tlr, r0\n");
26556
26557 /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26558 by cmse_nonsecure_entry_clear_before_return (). */
26559 if (!TARGET_HAVE_FPCXT_CMSE)
26560 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26561 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26562 }
26563 else
26564 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26565 }
26566 \f
26567 /* Scan INSN just before assembler is output for it.
26568 For Thumb-1, we track the status of the condition codes; this
26569 information is used in the cbranchsi4_insn pattern. */
26570 void
26571 thumb1_final_prescan_insn (rtx_insn *insn)
26572 {
26573 if (flag_print_asm_name)
26574 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26575 INSN_ADDRESSES (INSN_UID (insn)));
26576 /* Don't overwrite the previous setter when we get to a cbranch. */
26577 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26578 {
26579 enum attr_conds conds;
26580
26581 if (cfun->machine->thumb1_cc_insn)
26582 {
26583 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26584 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26585 CC_STATUS_INIT;
26586 }
26587 conds = get_attr_conds (insn);
26588 if (conds == CONDS_SET)
26589 {
26590 rtx set = single_set (insn);
26591 cfun->machine->thumb1_cc_insn = insn;
26592 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26593 cfun->machine->thumb1_cc_op1 = const0_rtx;
26594 cfun->machine->thumb1_cc_mode = CC_NZmode;
26595 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26596 {
26597 rtx src1 = XEXP (SET_SRC (set), 1);
26598 if (src1 == const0_rtx)
26599 cfun->machine->thumb1_cc_mode = CCmode;
26600 }
26601 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26602 {
26603 /* Record the src register operand instead of dest because
26604 cprop_hardreg pass propagates src. */
26605 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26606 }
26607 }
26608 else if (conds != CONDS_NOCOND)
26609 cfun->machine->thumb1_cc_insn = NULL_RTX;
26610 }
26611
26612 /* Check if unexpected far jump is used. */
26613 if (cfun->machine->lr_save_eliminated
26614 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26615 internal_error("Unexpected thumb1 far jump");
26616 }
26617
26618 int
26619 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26620 {
26621 unsigned HOST_WIDE_INT mask = 0xff;
26622 int i;
26623
26624 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26625 if (val == 0) /* XXX */
26626 return 0;
26627
26628 for (i = 0; i < 25; i++)
26629 if ((val & (mask << i)) == val)
26630 return 1;
26631
26632 return 0;
26633 }
26634
26635 /* Returns nonzero if the current function contains,
26636 or might contain a far jump. */
26637 static int
26638 thumb_far_jump_used_p (void)
26639 {
26640 rtx_insn *insn;
26641 bool far_jump = false;
26642 unsigned int func_size = 0;
26643
26644 /* If we have already decided that far jumps may be used,
26645 do not bother checking again, and always return true even if
26646 it turns out that they are not being used. Once we have made
26647 the decision that far jumps are present (and that hence the link
26648 register will be pushed onto the stack) we cannot go back on it. */
26649 if (cfun->machine->far_jump_used)
26650 return 1;
26651
26652 /* If this function is not being called from the prologue/epilogue
26653 generation code then it must be being called from the
26654 INITIAL_ELIMINATION_OFFSET macro. */
26655 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26656 {
26657 /* In this case we know that we are being asked about the elimination
26658 of the arg pointer register. If that register is not being used,
26659 then there are no arguments on the stack, and we do not have to
26660 worry that a far jump might force the prologue to push the link
26661 register, changing the stack offsets. In this case we can just
26662 return false, since the presence of far jumps in the function will
26663 not affect stack offsets.
26664
26665 If the arg pointer is live (or if it was live, but has now been
26666 eliminated and so set to dead) then we do have to test to see if
26667 the function might contain a far jump. This test can lead to some
26668 false negatives, since before reload is completed, then length of
26669 branch instructions is not known, so gcc defaults to returning their
26670 longest length, which in turn sets the far jump attribute to true.
26671
26672 A false negative will not result in bad code being generated, but it
26673 will result in a needless push and pop of the link register. We
26674 hope that this does not occur too often.
26675
26676 If we need doubleword stack alignment this could affect the other
26677 elimination offsets so we can't risk getting it wrong. */
26678 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26679 cfun->machine->arg_pointer_live = 1;
26680 else if (!cfun->machine->arg_pointer_live)
26681 return 0;
26682 }
26683
26684 /* We should not change far_jump_used during or after reload, as there is
26685 no chance to change stack frame layout. */
26686 if (reload_in_progress || reload_completed)
26687 return 0;
26688
26689 /* Check to see if the function contains a branch
26690 insn with the far jump attribute set. */
26691 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26692 {
26693 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26694 {
26695 far_jump = true;
26696 }
26697 func_size += get_attr_length (insn);
26698 }
26699
26700 /* Attribute far_jump will always be true for thumb1 before
26701 shorten_branch pass. So checking far_jump attribute before
26702 shorten_branch isn't much useful.
26703
26704 Following heuristic tries to estimate more accurately if a far jump
26705 may finally be used. The heuristic is very conservative as there is
26706 no chance to roll-back the decision of not to use far jump.
26707
26708 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26709 2-byte insn is associated with a 4 byte constant pool. Using
26710 function size 2048/3 as the threshold is conservative enough. */
26711 if (far_jump)
26712 {
26713 if ((func_size * 3) >= 2048)
26714 {
26715 /* Record the fact that we have decided that
26716 the function does use far jumps. */
26717 cfun->machine->far_jump_used = 1;
26718 return 1;
26719 }
26720 }
26721
26722 return 0;
26723 }
26724
26725 /* Return nonzero if FUNC must be entered in ARM mode. */
26726 static bool
26727 is_called_in_ARM_mode (tree func)
26728 {
26729 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26730
26731 /* Ignore the problem about functions whose address is taken. */
26732 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26733 return true;
26734
26735 #ifdef ARM_PE
26736 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26737 #else
26738 return false;
26739 #endif
26740 }
26741
26742 /* Given the stack offsets and register mask in OFFSETS, decide how
26743 many additional registers to push instead of subtracting a constant
26744 from SP. For epilogues the principle is the same except we use pop.
26745 FOR_PROLOGUE indicates which we're generating. */
26746 static int
26747 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26748 {
26749 HOST_WIDE_INT amount;
26750 unsigned long live_regs_mask = offsets->saved_regs_mask;
26751 /* Extract a mask of the ones we can give to the Thumb's push/pop
26752 instruction. */
26753 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26754 /* Then count how many other high registers will need to be pushed. */
26755 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26756 int n_free, reg_base, size;
26757
26758 if (!for_prologue && frame_pointer_needed)
26759 amount = offsets->locals_base - offsets->saved_regs;
26760 else
26761 amount = offsets->outgoing_args - offsets->saved_regs;
26762
26763 /* If the stack frame size is 512 exactly, we can save one load
26764 instruction, which should make this a win even when optimizing
26765 for speed. */
26766 if (!optimize_size && amount != 512)
26767 return 0;
26768
26769 /* Can't do this if there are high registers to push. */
26770 if (high_regs_pushed != 0)
26771 return 0;
26772
26773 /* Shouldn't do it in the prologue if no registers would normally
26774 be pushed at all. In the epilogue, also allow it if we'll have
26775 a pop insn for the PC. */
26776 if (l_mask == 0
26777 && (for_prologue
26778 || TARGET_BACKTRACE
26779 || (live_regs_mask & 1 << LR_REGNUM) == 0
26780 || TARGET_INTERWORK
26781 || crtl->args.pretend_args_size != 0))
26782 return 0;
26783
26784 /* Don't do this if thumb_expand_prologue wants to emit instructions
26785 between the push and the stack frame allocation. */
26786 if (for_prologue
26787 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26788 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26789 return 0;
26790
26791 reg_base = 0;
26792 n_free = 0;
26793 if (!for_prologue)
26794 {
26795 size = arm_size_return_regs ();
26796 reg_base = ARM_NUM_INTS (size);
26797 live_regs_mask >>= reg_base;
26798 }
26799
26800 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26801 && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26802 {
26803 live_regs_mask >>= 1;
26804 n_free++;
26805 }
26806
26807 if (n_free == 0)
26808 return 0;
26809 gcc_assert (amount / 4 * 4 == amount);
26810
26811 if (amount >= 512 && (amount - n_free * 4) < 512)
26812 return (amount - 508) / 4;
26813 if (amount <= n_free * 4)
26814 return amount / 4;
26815 return 0;
26816 }
26817
26818 /* The bits which aren't usefully expanded as rtl. */
26819 const char *
26820 thumb1_unexpanded_epilogue (void)
26821 {
26822 arm_stack_offsets *offsets;
26823 int regno;
26824 unsigned long live_regs_mask = 0;
26825 int high_regs_pushed = 0;
26826 int extra_pop;
26827 int had_to_push_lr;
26828 int size;
26829
26830 if (cfun->machine->return_used_this_function != 0)
26831 return "";
26832
26833 if (IS_NAKED (arm_current_func_type ()))
26834 return "";
26835
26836 offsets = arm_get_frame_offsets ();
26837 live_regs_mask = offsets->saved_regs_mask;
26838 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26839
26840 /* If we can deduce the registers used from the function's return value.
26841 This is more reliable that examining df_regs_ever_live_p () because that
26842 will be set if the register is ever used in the function, not just if
26843 the register is used to hold a return value. */
26844 size = arm_size_return_regs ();
26845
26846 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26847 if (extra_pop > 0)
26848 {
26849 unsigned long extra_mask = (1 << extra_pop) - 1;
26850 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26851 }
26852
26853 /* The prolog may have pushed some high registers to use as
26854 work registers. e.g. the testsuite file:
26855 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26856 compiles to produce:
26857 push {r4, r5, r6, r7, lr}
26858 mov r7, r9
26859 mov r6, r8
26860 push {r6, r7}
26861 as part of the prolog. We have to undo that pushing here. */
26862
26863 if (high_regs_pushed)
26864 {
26865 unsigned long mask = live_regs_mask & 0xff;
26866 int next_hi_reg;
26867
26868 mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26869
26870 if (mask == 0)
26871 /* Oh dear! We have no low registers into which we can pop
26872 high registers! */
26873 internal_error
26874 ("no low registers available for popping high registers");
26875
26876 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26877 if (live_regs_mask & (1 << next_hi_reg))
26878 break;
26879
26880 while (high_regs_pushed)
26881 {
26882 /* Find lo register(s) into which the high register(s) can
26883 be popped. */
26884 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26885 {
26886 if (mask & (1 << regno))
26887 high_regs_pushed--;
26888 if (high_regs_pushed == 0)
26889 break;
26890 }
26891
26892 if (high_regs_pushed == 0 && regno >= 0)
26893 mask &= ~((1 << regno) - 1);
26894
26895 /* Pop the values into the low register(s). */
26896 thumb_pop (asm_out_file, mask);
26897
26898 /* Move the value(s) into the high registers. */
26899 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26900 {
26901 if (mask & (1 << regno))
26902 {
26903 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26904 regno);
26905
26906 for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26907 next_hi_reg--)
26908 if (live_regs_mask & (1 << next_hi_reg))
26909 break;
26910 }
26911 }
26912 }
26913 live_regs_mask &= ~0x0f00;
26914 }
26915
26916 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26917 live_regs_mask &= 0xff;
26918
26919 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26920 {
26921 /* Pop the return address into the PC. */
26922 if (had_to_push_lr)
26923 live_regs_mask |= 1 << PC_REGNUM;
26924
26925 /* Either no argument registers were pushed or a backtrace
26926 structure was created which includes an adjusted stack
26927 pointer, so just pop everything. */
26928 if (live_regs_mask)
26929 thumb_pop (asm_out_file, live_regs_mask);
26930
26931 /* We have either just popped the return address into the
26932 PC or it is was kept in LR for the entire function.
26933 Note that thumb_pop has already called thumb_exit if the
26934 PC was in the list. */
26935 if (!had_to_push_lr)
26936 thumb_exit (asm_out_file, LR_REGNUM);
26937 }
26938 else
26939 {
26940 /* Pop everything but the return address. */
26941 if (live_regs_mask)
26942 thumb_pop (asm_out_file, live_regs_mask);
26943
26944 if (had_to_push_lr)
26945 {
26946 if (size > 12)
26947 {
26948 /* We have no free low regs, so save one. */
26949 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26950 LAST_ARG_REGNUM);
26951 }
26952
26953 /* Get the return address into a temporary register. */
26954 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26955
26956 if (size > 12)
26957 {
26958 /* Move the return address to lr. */
26959 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26960 LAST_ARG_REGNUM);
26961 /* Restore the low register. */
26962 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26963 IP_REGNUM);
26964 regno = LR_REGNUM;
26965 }
26966 else
26967 regno = LAST_ARG_REGNUM;
26968 }
26969 else
26970 regno = LR_REGNUM;
26971
26972 /* Remove the argument registers that were pushed onto the stack. */
26973 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26974 SP_REGNUM, SP_REGNUM,
26975 crtl->args.pretend_args_size);
26976
26977 thumb_exit (asm_out_file, regno);
26978 }
26979
26980 return "";
26981 }
26982
26983 /* Functions to save and restore machine-specific function data. */
26984 static struct machine_function *
26985 arm_init_machine_status (void)
26986 {
26987 struct machine_function *machine;
26988 machine = ggc_cleared_alloc<machine_function> ();
26989
26990 #if ARM_FT_UNKNOWN != 0
26991 machine->func_type = ARM_FT_UNKNOWN;
26992 #endif
26993 machine->static_chain_stack_bytes = -1;
26994 machine->pacspval_needed = 0;
26995 return machine;
26996 }
26997
26998 /* Return an RTX indicating where the return address to the
26999 calling function can be found. */
27000 rtx
27001 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
27002 {
27003 if (count != 0)
27004 return NULL_RTX;
27005
27006 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
27007 }
27008
27009 /* Do anything needed before RTL is emitted for each function. */
27010 void
27011 arm_init_expanders (void)
27012 {
27013 /* Arrange to initialize and mark the machine per-function status. */
27014 init_machine_status = arm_init_machine_status;
27015
27016 /* This is to stop the combine pass optimizing away the alignment
27017 adjustment of va_arg. */
27018 /* ??? It is claimed that this should not be necessary. */
27019 if (cfun)
27020 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
27021 }
27022
27023 /* Check that FUNC is called with a different mode. */
27024
27025 bool
27026 arm_change_mode_p (tree func)
27027 {
27028 if (TREE_CODE (func) != FUNCTION_DECL)
27029 return false;
27030
27031 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
27032
27033 if (!callee_tree)
27034 callee_tree = target_option_default_node;
27035
27036 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
27037 int flags = callee_opts->x_target_flags;
27038
27039 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
27040 }
27041
27042 /* Like arm_compute_initial_elimination offset. Simpler because there
27043 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27044 to point at the base of the local variables after static stack
27045 space for a function has been allocated. */
27046
27047 HOST_WIDE_INT
27048 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27049 {
27050 arm_stack_offsets *offsets;
27051
27052 offsets = arm_get_frame_offsets ();
27053
27054 switch (from)
27055 {
27056 case ARG_POINTER_REGNUM:
27057 switch (to)
27058 {
27059 case STACK_POINTER_REGNUM:
27060 return offsets->outgoing_args - offsets->saved_args;
27061
27062 case FRAME_POINTER_REGNUM:
27063 return offsets->soft_frame - offsets->saved_args;
27064
27065 case ARM_HARD_FRAME_POINTER_REGNUM:
27066 return offsets->saved_regs - offsets->saved_args;
27067
27068 case THUMB_HARD_FRAME_POINTER_REGNUM:
27069 return offsets->locals_base - offsets->saved_args;
27070
27071 default:
27072 gcc_unreachable ();
27073 }
27074 break;
27075
27076 case FRAME_POINTER_REGNUM:
27077 switch (to)
27078 {
27079 case STACK_POINTER_REGNUM:
27080 return offsets->outgoing_args - offsets->soft_frame;
27081
27082 case ARM_HARD_FRAME_POINTER_REGNUM:
27083 return offsets->saved_regs - offsets->soft_frame;
27084
27085 case THUMB_HARD_FRAME_POINTER_REGNUM:
27086 return offsets->locals_base - offsets->soft_frame;
27087
27088 default:
27089 gcc_unreachable ();
27090 }
27091 break;
27092
27093 default:
27094 gcc_unreachable ();
27095 }
27096 }
27097
27098 /* Generate the function's prologue. */
27099
27100 void
27101 thumb1_expand_prologue (void)
27102 {
27103 rtx_insn *insn;
27104
27105 HOST_WIDE_INT amount;
27106 HOST_WIDE_INT size;
27107 arm_stack_offsets *offsets;
27108 unsigned long func_type;
27109 int regno;
27110 unsigned long live_regs_mask;
27111 unsigned long l_mask;
27112 unsigned high_regs_pushed = 0;
27113 bool lr_needs_saving;
27114
27115 func_type = arm_current_func_type ();
27116
27117 /* Naked functions don't have prologues. */
27118 if (IS_NAKED (func_type))
27119 {
27120 if (flag_stack_usage_info)
27121 current_function_static_stack_size = 0;
27122 return;
27123 }
27124
27125 if (IS_INTERRUPT (func_type))
27126 {
27127 error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
27128 return;
27129 }
27130
27131 if (is_called_in_ARM_mode (current_function_decl))
27132 emit_insn (gen_prologue_thumb1_interwork ());
27133
27134 offsets = arm_get_frame_offsets ();
27135 live_regs_mask = offsets->saved_regs_mask;
27136 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
27137
27138 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27139 l_mask = live_regs_mask & 0x40ff;
27140 /* Then count how many other high registers will need to be pushed. */
27141 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27142
27143 if (crtl->args.pretend_args_size)
27144 {
27145 rtx x = GEN_INT (-crtl->args.pretend_args_size);
27146
27147 if (cfun->machine->uses_anonymous_args)
27148 {
27149 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27150 unsigned long mask;
27151
27152 mask = 1ul << (LAST_ARG_REGNUM + 1);
27153 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27154
27155 insn = thumb1_emit_multi_reg_push (mask, 0);
27156 }
27157 else
27158 {
27159 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27160 stack_pointer_rtx, x));
27161 }
27162 RTX_FRAME_RELATED_P (insn) = 1;
27163 }
27164
27165 if (TARGET_BACKTRACE)
27166 {
27167 HOST_WIDE_INT offset = 0;
27168 unsigned work_register;
27169 rtx work_reg, x, arm_hfp_rtx;
27170
27171 /* We have been asked to create a stack backtrace structure.
27172 The code looks like this:
27173
27174 0 .align 2
27175 0 func:
27176 0 sub SP, #16 Reserve space for 4 registers.
27177 2 push {R7} Push low registers.
27178 4 add R7, SP, #20 Get the stack pointer before the push.
27179 6 str R7, [SP, #8] Store the stack pointer
27180 (before reserving the space).
27181 8 mov R7, PC Get hold of the start of this code + 12.
27182 10 str R7, [SP, #16] Store it.
27183 12 mov R7, FP Get hold of the current frame pointer.
27184 14 str R7, [SP, #4] Store it.
27185 16 mov R7, LR Get hold of the current return address.
27186 18 str R7, [SP, #12] Store it.
27187 20 add R7, SP, #16 Point at the start of the
27188 backtrace structure.
27189 22 mov FP, R7 Put this value into the frame pointer. */
27190
27191 work_register = thumb_find_work_register (live_regs_mask);
27192 work_reg = gen_rtx_REG (SImode, work_register);
27193 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27194
27195 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27196 stack_pointer_rtx, GEN_INT (-16)));
27197 RTX_FRAME_RELATED_P (insn) = 1;
27198
27199 if (l_mask)
27200 {
27201 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27202 RTX_FRAME_RELATED_P (insn) = 1;
27203 lr_needs_saving = false;
27204
27205 offset = bit_count (l_mask) * UNITS_PER_WORD;
27206 }
27207
27208 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27209 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27210
27211 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27212 x = gen_frame_mem (SImode, x);
27213 emit_move_insn (x, work_reg);
27214
27215 /* Make sure that the instruction fetching the PC is in the right place
27216 to calculate "start of backtrace creation code + 12". */
27217 /* ??? The stores using the common WORK_REG ought to be enough to
27218 prevent the scheduler from doing anything weird. Failing that
27219 we could always move all of the following into an UNSPEC_VOLATILE. */
27220 if (l_mask)
27221 {
27222 x = gen_rtx_REG (SImode, PC_REGNUM);
27223 emit_move_insn (work_reg, x);
27224
27225 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27226 x = gen_frame_mem (SImode, x);
27227 emit_move_insn (x, work_reg);
27228
27229 emit_move_insn (work_reg, arm_hfp_rtx);
27230
27231 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27232 x = gen_frame_mem (SImode, x);
27233 emit_move_insn (x, work_reg);
27234 }
27235 else
27236 {
27237 emit_move_insn (work_reg, arm_hfp_rtx);
27238
27239 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27240 x = gen_frame_mem (SImode, x);
27241 emit_move_insn (x, work_reg);
27242
27243 x = gen_rtx_REG (SImode, PC_REGNUM);
27244 emit_move_insn (work_reg, x);
27245
27246 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27247 x = gen_frame_mem (SImode, x);
27248 emit_move_insn (x, work_reg);
27249 }
27250
27251 x = gen_rtx_REG (SImode, LR_REGNUM);
27252 emit_move_insn (work_reg, x);
27253
27254 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27255 x = gen_frame_mem (SImode, x);
27256 emit_move_insn (x, work_reg);
27257
27258 x = GEN_INT (offset + 12);
27259 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27260
27261 emit_move_insn (arm_hfp_rtx, work_reg);
27262 }
27263 /* Optimization: If we are not pushing any low registers but we are going
27264 to push some high registers then delay our first push. This will just
27265 be a push of LR and we can combine it with the push of the first high
27266 register. */
27267 else if ((l_mask & 0xff) != 0
27268 || (high_regs_pushed == 0 && lr_needs_saving))
27269 {
27270 unsigned long mask = l_mask;
27271 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27272 insn = thumb1_emit_multi_reg_push (mask, mask);
27273 RTX_FRAME_RELATED_P (insn) = 1;
27274 lr_needs_saving = false;
27275 }
27276
27277 if (high_regs_pushed)
27278 {
27279 unsigned pushable_regs;
27280 unsigned next_hi_reg;
27281 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27282 : crtl->args.info.nregs;
27283 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27284
27285 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27286 if (live_regs_mask & (1 << next_hi_reg))
27287 break;
27288
27289 /* Here we need to mask out registers used for passing arguments
27290 even if they can be pushed. This is to avoid using them to
27291 stash the high registers. Such kind of stash may clobber the
27292 use of arguments. */
27293 pushable_regs = l_mask & (~arg_regs_mask);
27294 pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
27295
27296 /* Normally, LR can be used as a scratch register once it has been
27297 saved; but if the function examines its own return address then
27298 the value is still live and we need to avoid using it. */
27299 bool return_addr_live
27300 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
27301 LR_REGNUM);
27302
27303 if (lr_needs_saving || return_addr_live)
27304 pushable_regs &= ~(1 << LR_REGNUM);
27305
27306 if (pushable_regs == 0)
27307 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27308
27309 while (high_regs_pushed > 0)
27310 {
27311 unsigned long real_regs_mask = 0;
27312 unsigned long push_mask = 0;
27313
27314 for (regno = LR_REGNUM; regno >= 0; regno --)
27315 {
27316 if (pushable_regs & (1 << regno))
27317 {
27318 emit_move_insn (gen_rtx_REG (SImode, regno),
27319 gen_rtx_REG (SImode, next_hi_reg));
27320
27321 high_regs_pushed --;
27322 real_regs_mask |= (1 << next_hi_reg);
27323 push_mask |= (1 << regno);
27324
27325 if (high_regs_pushed)
27326 {
27327 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27328 next_hi_reg --)
27329 if (live_regs_mask & (1 << next_hi_reg))
27330 break;
27331 }
27332 else
27333 break;
27334 }
27335 }
27336
27337 /* If we had to find a work register and we have not yet
27338 saved the LR then add it to the list of regs to push. */
27339 if (lr_needs_saving)
27340 {
27341 push_mask |= 1 << LR_REGNUM;
27342 real_regs_mask |= 1 << LR_REGNUM;
27343 lr_needs_saving = false;
27344 /* If the return address is not live at this point, we
27345 can add LR to the list of registers that we can use
27346 for pushes. */
27347 if (!return_addr_live)
27348 pushable_regs |= 1 << LR_REGNUM;
27349 }
27350
27351 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
27352 RTX_FRAME_RELATED_P (insn) = 1;
27353 }
27354 }
27355
27356 /* Load the pic register before setting the frame pointer,
27357 so we can use r7 as a temporary work register. */
27358 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27359 arm_load_pic_register (live_regs_mask, NULL_RTX);
27360
27361 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27362 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27363 stack_pointer_rtx);
27364
27365 size = offsets->outgoing_args - offsets->saved_args;
27366 if (flag_stack_usage_info)
27367 current_function_static_stack_size = size;
27368
27369 /* If we have a frame, then do stack checking. FIXME: not implemented. */
27370 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27371 || flag_stack_clash_protection)
27372 && size)
27373 sorry ("%<-fstack-check=specific%> for Thumb-1");
27374
27375 amount = offsets->outgoing_args - offsets->saved_regs;
27376 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27377 if (amount)
27378 {
27379 if (amount < 512)
27380 {
27381 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27382 GEN_INT (- amount)));
27383 RTX_FRAME_RELATED_P (insn) = 1;
27384 }
27385 else
27386 {
27387 rtx reg, dwarf;
27388
27389 /* The stack decrement is too big for an immediate value in a single
27390 insn. In theory we could issue multiple subtracts, but after
27391 three of them it becomes more space efficient to place the full
27392 value in the constant pool and load into a register. (Also the
27393 ARM debugger really likes to see only one stack decrement per
27394 function). So instead we look for a scratch register into which
27395 we can load the decrement, and then we subtract this from the
27396 stack pointer. Unfortunately on the thumb the only available
27397 scratch registers are the argument registers, and we cannot use
27398 these as they may hold arguments to the function. Instead we
27399 attempt to locate a call preserved register which is used by this
27400 function. If we can find one, then we know that it will have
27401 been pushed at the start of the prologue and so we can corrupt
27402 it now. */
27403 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27404 if (live_regs_mask & (1 << regno))
27405 break;
27406
27407 gcc_assert(regno <= LAST_LO_REGNUM);
27408
27409 reg = gen_rtx_REG (SImode, regno);
27410
27411 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27412
27413 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27414 stack_pointer_rtx, reg));
27415
27416 dwarf = gen_rtx_SET (stack_pointer_rtx,
27417 plus_constant (Pmode, stack_pointer_rtx,
27418 -amount));
27419 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27420 RTX_FRAME_RELATED_P (insn) = 1;
27421 }
27422 }
27423
27424 if (frame_pointer_needed)
27425 thumb_set_frame_pointer (offsets);
27426
27427 /* If we are profiling, make sure no instructions are scheduled before
27428 the call to mcount. Similarly if the user has requested no
27429 scheduling in the prolog. Similarly if we want non-call exceptions
27430 using the EABI unwinder, to prevent faulting instructions from being
27431 swapped with a stack adjustment. */
27432 if (crtl->profile || !TARGET_SCHED_PROLOG
27433 || (arm_except_unwind_info (&global_options) == UI_TARGET
27434 && cfun->can_throw_non_call_exceptions))
27435 emit_insn (gen_blockage ());
27436
27437 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27438 if (live_regs_mask & 0xff)
27439 cfun->machine->lr_save_eliminated = 0;
27440 }
27441
27442 /* Clear caller saved registers not used to pass return values and leaked
27443 condition flags before exiting a cmse_nonsecure_entry function. */
27444
27445 void
27446 cmse_nonsecure_entry_clear_before_return (void)
27447 {
27448 bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27449 int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27450 uint32_t padding_bits_to_clear = 0;
27451 auto_sbitmap to_clear_bitmap (maxregno + 1);
27452 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27453 tree result_type;
27454
27455 bitmap_clear (to_clear_bitmap);
27456 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27457 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27458
27459 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27460 registers. */
27461 if (clear_vfpregs)
27462 {
27463 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27464
27465 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27466
27467 if (!TARGET_HAVE_FPCXT_CMSE)
27468 {
27469 /* Make sure we don't clear the two scratch registers used to clear
27470 the relevant FPSCR bits in output_return_instruction. */
27471 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27472 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27473 emit_use (gen_rtx_REG (SImode, 4));
27474 bitmap_clear_bit (to_clear_bitmap, 4);
27475 }
27476 }
27477
27478 /* If the user has defined registers to be caller saved, these are no longer
27479 restored by the function before returning and must thus be cleared for
27480 security purposes. */
27481 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27482 {
27483 /* We do not touch registers that can be used to pass arguments as per
27484 the AAPCS, since these should never be made callee-saved by user
27485 options. */
27486 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27487 continue;
27488 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27489 continue;
27490 if (!callee_saved_reg_p (regno)
27491 && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27492 || TARGET_HARD_FLOAT))
27493 bitmap_set_bit (to_clear_bitmap, regno);
27494 }
27495
27496 /* Make sure we do not clear the registers used to return the result in. */
27497 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27498 if (!VOID_TYPE_P (result_type))
27499 {
27500 uint64_t to_clear_return_mask;
27501 result_rtl = arm_function_value (result_type, current_function_decl, 0);
27502
27503 /* No need to check that we return in registers, because we don't
27504 support returning on stack yet. */
27505 gcc_assert (REG_P (result_rtl));
27506 to_clear_return_mask
27507 = compute_not_to_clear_mask (result_type, result_rtl, 0,
27508 &padding_bits_to_clear);
27509 if (to_clear_return_mask)
27510 {
27511 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27512 for (regno = R0_REGNUM; regno <= maxregno; regno++)
27513 {
27514 if (to_clear_return_mask & (1ULL << regno))
27515 bitmap_clear_bit (to_clear_bitmap, regno);
27516 }
27517 }
27518 }
27519
27520 if (padding_bits_to_clear != 0)
27521 {
27522 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27523 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27524
27525 /* Padding_bits_to_clear is not 0 so we know we are dealing with
27526 returning a composite type, which only uses r0. Let's make sure that
27527 r1-r3 is cleared too. */
27528 bitmap_clear (to_clear_arg_regs_bitmap);
27529 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27530 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27531 }
27532
27533 /* Clear full registers that leak before returning. */
27534 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27535 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27536 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27537 clearing_reg);
27538 }
27539
27540 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27541 POP instruction can be generated. LR should be replaced by PC. All
27542 the checks required are already done by USE_RETURN_INSN (). Hence,
27543 all we really need to check here is if single register is to be
27544 returned, or multiple register return. */
27545 void
27546 thumb2_expand_return (bool simple_return)
27547 {
27548 int i, num_regs;
27549 unsigned long saved_regs_mask;
27550 arm_stack_offsets *offsets;
27551
27552 offsets = arm_get_frame_offsets ();
27553 saved_regs_mask = offsets->saved_regs_mask;
27554
27555 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27556 if (saved_regs_mask & (1 << i))
27557 num_regs++;
27558
27559 if (!simple_return && saved_regs_mask)
27560 {
27561 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27562 functions or adapt code to handle according to ACLE. This path should
27563 not be reachable for cmse_nonsecure_entry functions though we prefer
27564 to assert it for now to ensure that future code changes do not silently
27565 change this behavior. */
27566 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27567 if (arm_current_function_pac_enabled_p ())
27568 {
27569 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
27570 arm_emit_multi_reg_pop (saved_regs_mask);
27571 emit_insn (gen_aut_nop ());
27572 emit_jump_insn (simple_return_rtx);
27573 }
27574 else if (num_regs == 1)
27575 {
27576 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27577 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27578 rtx addr = gen_rtx_MEM (SImode,
27579 gen_rtx_POST_INC (SImode,
27580 stack_pointer_rtx));
27581 set_mem_alias_set (addr, get_frame_alias_set ());
27582 XVECEXP (par, 0, 0) = ret_rtx;
27583 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27584 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27585 emit_jump_insn (par);
27586 }
27587 else
27588 {
27589 saved_regs_mask &= ~ (1 << LR_REGNUM);
27590 saved_regs_mask |= (1 << PC_REGNUM);
27591 arm_emit_multi_reg_pop (saved_regs_mask);
27592 }
27593 }
27594 else
27595 {
27596 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27597 cmse_nonsecure_entry_clear_before_return ();
27598 emit_jump_insn (simple_return_rtx);
27599 }
27600 }
27601
27602 void
27603 thumb1_expand_epilogue (void)
27604 {
27605 HOST_WIDE_INT amount;
27606 arm_stack_offsets *offsets;
27607 int regno;
27608
27609 /* Naked functions don't have prologues. */
27610 if (IS_NAKED (arm_current_func_type ()))
27611 return;
27612
27613 offsets = arm_get_frame_offsets ();
27614 amount = offsets->outgoing_args - offsets->saved_regs;
27615
27616 if (frame_pointer_needed)
27617 {
27618 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27619 amount = offsets->locals_base - offsets->saved_regs;
27620 }
27621 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27622
27623 gcc_assert (amount >= 0);
27624 if (amount)
27625 {
27626 emit_insn (gen_blockage ());
27627
27628 if (amount < 512)
27629 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27630 GEN_INT (amount)));
27631 else
27632 {
27633 /* r3 is always free in the epilogue. */
27634 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27635
27636 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27637 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27638 }
27639 }
27640
27641 /* Emit a USE (stack_pointer_rtx), so that
27642 the stack adjustment will not be deleted. */
27643 emit_insn (gen_force_register_use (stack_pointer_rtx));
27644
27645 if (crtl->profile || !TARGET_SCHED_PROLOG)
27646 emit_insn (gen_blockage ());
27647
27648 /* Emit a clobber for each insn that will be restored in the epilogue,
27649 so that flow2 will get register lifetimes correct. */
27650 for (regno = 0; regno < 13; regno++)
27651 if (reg_needs_saving_p (regno))
27652 emit_clobber (gen_rtx_REG (SImode, regno));
27653
27654 if (! df_regs_ever_live_p (LR_REGNUM))
27655 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27656
27657 /* Clear all caller-saved regs that are not used to return. */
27658 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27659 cmse_nonsecure_entry_clear_before_return ();
27660 }
27661
27662 /* Epilogue code for APCS frame. */
27663 static void
27664 arm_expand_epilogue_apcs_frame (bool really_return)
27665 {
27666 unsigned long func_type;
27667 unsigned long saved_regs_mask;
27668 int num_regs = 0;
27669 int i;
27670 int floats_from_frame = 0;
27671 arm_stack_offsets *offsets;
27672
27673 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27674 func_type = arm_current_func_type ();
27675
27676 /* Get frame offsets for ARM. */
27677 offsets = arm_get_frame_offsets ();
27678 saved_regs_mask = offsets->saved_regs_mask;
27679
27680 /* Find the offset of the floating-point save area in the frame. */
27681 floats_from_frame
27682 = (offsets->saved_args
27683 + arm_compute_static_chain_stack_bytes ()
27684 - offsets->frame);
27685
27686 /* Compute how many core registers saved and how far away the floats are. */
27687 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27688 if (saved_regs_mask & (1 << i))
27689 {
27690 num_regs++;
27691 floats_from_frame += 4;
27692 }
27693
27694 if (TARGET_VFP_BASE)
27695 {
27696 int start_reg;
27697 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27698
27699 /* The offset is from IP_REGNUM. */
27700 int saved_size = arm_get_vfp_saved_size ();
27701 if (saved_size > 0)
27702 {
27703 rtx_insn *insn;
27704 floats_from_frame += saved_size;
27705 insn = emit_insn (gen_addsi3 (ip_rtx,
27706 hard_frame_pointer_rtx,
27707 GEN_INT (-floats_from_frame)));
27708 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27709 ip_rtx, hard_frame_pointer_rtx);
27710 }
27711
27712 /* Generate VFP register multi-pop. */
27713 start_reg = FIRST_VFP_REGNUM;
27714
27715 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27716 /* Look for a case where a reg does not need restoring. */
27717 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27718 {
27719 if (start_reg != i)
27720 arm_emit_vfp_multi_reg_pop (start_reg,
27721 (i - start_reg) / 2,
27722 gen_rtx_REG (SImode,
27723 IP_REGNUM));
27724 start_reg = i + 2;
27725 }
27726
27727 /* Restore the remaining regs that we have discovered (or possibly
27728 even all of them, if the conditional in the for loop never
27729 fired). */
27730 if (start_reg != i)
27731 arm_emit_vfp_multi_reg_pop (start_reg,
27732 (i - start_reg) / 2,
27733 gen_rtx_REG (SImode, IP_REGNUM));
27734 }
27735
27736 if (TARGET_IWMMXT)
27737 {
27738 /* The frame pointer is guaranteed to be non-double-word aligned, as
27739 it is set to double-word-aligned old_stack_pointer - 4. */
27740 rtx_insn *insn;
27741 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27742
27743 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27744 if (reg_needs_saving_p (i))
27745 {
27746 rtx addr = gen_frame_mem (V2SImode,
27747 plus_constant (Pmode, hard_frame_pointer_rtx,
27748 - lrm_count * 4));
27749 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27750 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27751 gen_rtx_REG (V2SImode, i),
27752 NULL_RTX);
27753 lrm_count += 2;
27754 }
27755 }
27756
27757 /* saved_regs_mask should contain IP which contains old stack pointer
27758 at the time of activation creation. Since SP and IP are adjacent registers,
27759 we can restore the value directly into SP. */
27760 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27761 saved_regs_mask &= ~(1 << IP_REGNUM);
27762 saved_regs_mask |= (1 << SP_REGNUM);
27763
27764 /* There are two registers left in saved_regs_mask - LR and PC. We
27765 only need to restore LR (the return address), but to
27766 save time we can load it directly into PC, unless we need a
27767 special function exit sequence, or we are not really returning. */
27768 if (really_return
27769 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27770 && !crtl->calls_eh_return)
27771 /* Delete LR from the register mask, so that LR on
27772 the stack is loaded into the PC in the register mask. */
27773 saved_regs_mask &= ~(1 << LR_REGNUM);
27774 else
27775 saved_regs_mask &= ~(1 << PC_REGNUM);
27776
27777 num_regs = bit_count (saved_regs_mask);
27778 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27779 {
27780 rtx_insn *insn;
27781 emit_insn (gen_blockage ());
27782 /* Unwind the stack to just below the saved registers. */
27783 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27784 hard_frame_pointer_rtx,
27785 GEN_INT (- 4 * num_regs)));
27786
27787 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27788 stack_pointer_rtx, hard_frame_pointer_rtx);
27789 }
27790
27791 arm_emit_multi_reg_pop (saved_regs_mask);
27792
27793 if (IS_INTERRUPT (func_type))
27794 {
27795 /* Interrupt handlers will have pushed the
27796 IP onto the stack, so restore it now. */
27797 rtx_insn *insn;
27798 rtx addr = gen_rtx_MEM (SImode,
27799 gen_rtx_POST_INC (SImode,
27800 stack_pointer_rtx));
27801 set_mem_alias_set (addr, get_frame_alias_set ());
27802 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27803 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27804 gen_rtx_REG (SImode, IP_REGNUM),
27805 NULL_RTX);
27806 }
27807
27808 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27809 return;
27810
27811 if (crtl->calls_eh_return)
27812 emit_insn (gen_addsi3 (stack_pointer_rtx,
27813 stack_pointer_rtx,
27814 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27815
27816 if (IS_STACKALIGN (func_type))
27817 /* Restore the original stack pointer. Before prologue, the stack was
27818 realigned and the original stack pointer saved in r0. For details,
27819 see comment in arm_expand_prologue. */
27820 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27821
27822 emit_jump_insn (simple_return_rtx);
27823 }
27824
27825 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27826 function is not a sibcall. */
27827 void
27828 arm_expand_epilogue (bool really_return)
27829 {
27830 unsigned long func_type;
27831 unsigned long saved_regs_mask;
27832 int num_regs = 0;
27833 int i;
27834 int amount;
27835 arm_stack_offsets *offsets;
27836
27837 func_type = arm_current_func_type ();
27838
27839 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27840 let output_return_instruction take care of instruction emission if any. */
27841 if (IS_NAKED (func_type)
27842 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27843 {
27844 if (really_return)
27845 emit_jump_insn (simple_return_rtx);
27846 return;
27847 }
27848
27849 /* If we are throwing an exception, then we really must be doing a
27850 return, so we can't tail-call. */
27851 gcc_assert (!crtl->calls_eh_return || really_return);
27852
27853 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27854 {
27855 arm_expand_epilogue_apcs_frame (really_return);
27856 return;
27857 }
27858
27859 /* Get frame offsets for ARM. */
27860 offsets = arm_get_frame_offsets ();
27861 saved_regs_mask = offsets->saved_regs_mask;
27862 num_regs = bit_count (saved_regs_mask);
27863
27864 if (frame_pointer_needed)
27865 {
27866 rtx_insn *insn;
27867 /* Restore stack pointer if necessary. */
27868 if (TARGET_ARM)
27869 {
27870 /* In ARM mode, frame pointer points to first saved register.
27871 Restore stack pointer to last saved register. */
27872 amount = offsets->frame - offsets->saved_regs;
27873
27874 /* Force out any pending memory operations that reference stacked data
27875 before stack de-allocation occurs. */
27876 emit_insn (gen_blockage ());
27877 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27878 hard_frame_pointer_rtx,
27879 GEN_INT (amount)));
27880 arm_add_cfa_adjust_cfa_note (insn, amount,
27881 stack_pointer_rtx,
27882 hard_frame_pointer_rtx);
27883
27884 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27885 deleted. */
27886 emit_insn (gen_force_register_use (stack_pointer_rtx));
27887 }
27888 else
27889 {
27890 /* In Thumb-2 mode, the frame pointer points to the last saved
27891 register. */
27892 amount = offsets->locals_base - offsets->saved_regs;
27893 if (amount)
27894 {
27895 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27896 hard_frame_pointer_rtx,
27897 GEN_INT (amount)));
27898 arm_add_cfa_adjust_cfa_note (insn, amount,
27899 hard_frame_pointer_rtx,
27900 hard_frame_pointer_rtx);
27901 }
27902
27903 /* Force out any pending memory operations that reference stacked data
27904 before stack de-allocation occurs. */
27905 emit_insn (gen_blockage ());
27906 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27907 hard_frame_pointer_rtx));
27908 arm_add_cfa_adjust_cfa_note (insn, 0,
27909 stack_pointer_rtx,
27910 hard_frame_pointer_rtx);
27911 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27912 deleted. */
27913 emit_insn (gen_force_register_use (stack_pointer_rtx));
27914 }
27915 }
27916 else
27917 {
27918 /* Pop off outgoing args and local frame to adjust stack pointer to
27919 last saved register. */
27920 amount = offsets->outgoing_args - offsets->saved_regs;
27921 if (amount)
27922 {
27923 rtx_insn *tmp;
27924 /* Force out any pending memory operations that reference stacked data
27925 before stack de-allocation occurs. */
27926 emit_insn (gen_blockage ());
27927 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27928 stack_pointer_rtx,
27929 GEN_INT (amount)));
27930 arm_add_cfa_adjust_cfa_note (tmp, amount,
27931 stack_pointer_rtx, stack_pointer_rtx);
27932 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27933 not deleted. */
27934 emit_insn (gen_force_register_use (stack_pointer_rtx));
27935 }
27936 }
27937
27938 if (TARGET_VFP_BASE)
27939 {
27940 /* Generate VFP register multi-pop. */
27941 int end_reg = LAST_VFP_REGNUM + 1;
27942
27943 /* Scan the registers in reverse order. We need to match
27944 any groupings made in the prologue and generate matching
27945 vldm operations. The need to match groups is because,
27946 unlike pop, vldm can only do consecutive regs. */
27947 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27948 /* Look for a case where a reg does not need restoring. */
27949 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27950 {
27951 /* Restore the regs discovered so far (from reg+2 to
27952 end_reg). */
27953 if (end_reg > i + 2)
27954 arm_emit_vfp_multi_reg_pop (i + 2,
27955 (end_reg - (i + 2)) / 2,
27956 stack_pointer_rtx);
27957 end_reg = i;
27958 }
27959
27960 /* Restore the remaining regs that we have discovered (or possibly
27961 even all of them, if the conditional in the for loop never
27962 fired). */
27963 if (end_reg > i + 2)
27964 arm_emit_vfp_multi_reg_pop (i + 2,
27965 (end_reg - (i + 2)) / 2,
27966 stack_pointer_rtx);
27967 }
27968
27969 if (TARGET_IWMMXT)
27970 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27971 if (reg_needs_saving_p (i))
27972 {
27973 rtx_insn *insn;
27974 rtx addr = gen_rtx_MEM (V2SImode,
27975 gen_rtx_POST_INC (SImode,
27976 stack_pointer_rtx));
27977 set_mem_alias_set (addr, get_frame_alias_set ());
27978 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27979 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27980 gen_rtx_REG (V2SImode, i),
27981 NULL_RTX);
27982 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27983 stack_pointer_rtx, stack_pointer_rtx);
27984 }
27985
27986 if (saved_regs_mask)
27987 {
27988 rtx insn;
27989 bool return_in_pc = false;
27990
27991 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27992 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27993 && !IS_CMSE_ENTRY (func_type)
27994 && !IS_STACKALIGN (func_type)
27995 && really_return
27996 && crtl->args.pretend_args_size == 0
27997 && saved_regs_mask & (1 << LR_REGNUM)
27998 && !crtl->calls_eh_return
27999 && !arm_current_function_pac_enabled_p ())
28000 {
28001 saved_regs_mask &= ~(1 << LR_REGNUM);
28002 saved_regs_mask |= (1 << PC_REGNUM);
28003 return_in_pc = true;
28004 }
28005
28006 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
28007 {
28008 for (i = 0; i <= LAST_ARM_REGNUM; i++)
28009 if (saved_regs_mask & (1 << i))
28010 {
28011 rtx addr = gen_rtx_MEM (SImode,
28012 gen_rtx_POST_INC (SImode,
28013 stack_pointer_rtx));
28014 set_mem_alias_set (addr, get_frame_alias_set ());
28015
28016 if (i == PC_REGNUM)
28017 {
28018 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
28019 XVECEXP (insn, 0, 0) = ret_rtx;
28020 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
28021 addr);
28022 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
28023 insn = emit_jump_insn (insn);
28024 }
28025 else
28026 {
28027 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
28028 addr));
28029 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
28030 gen_rtx_REG (SImode, i),
28031 NULL_RTX);
28032 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
28033 stack_pointer_rtx,
28034 stack_pointer_rtx);
28035 }
28036 }
28037 }
28038 else
28039 {
28040 if (TARGET_LDRD
28041 && current_tune->prefer_ldrd_strd
28042 && !optimize_function_for_size_p (cfun))
28043 {
28044 if (TARGET_THUMB2)
28045 thumb2_emit_ldrd_pop (saved_regs_mask);
28046 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
28047 arm_emit_ldrd_pop (saved_regs_mask);
28048 else
28049 arm_emit_multi_reg_pop (saved_regs_mask);
28050 }
28051 else
28052 arm_emit_multi_reg_pop (saved_regs_mask);
28053 }
28054
28055 if (return_in_pc)
28056 return;
28057 }
28058
28059 amount
28060 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
28061 if (amount)
28062 {
28063 int i, j;
28064 rtx dwarf = NULL_RTX;
28065 rtx_insn *tmp =
28066 emit_insn (gen_addsi3 (stack_pointer_rtx,
28067 stack_pointer_rtx,
28068 GEN_INT (amount)));
28069
28070 RTX_FRAME_RELATED_P (tmp) = 1;
28071
28072 if (cfun->machine->uses_anonymous_args)
28073 {
28074 /* Restore pretend args. Refer arm_expand_prologue on how to save
28075 pretend_args in stack. */
28076 int num_regs = crtl->args.pretend_args_size / 4;
28077 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
28078 for (j = 0, i = 0; j < num_regs; i++)
28079 if (saved_regs_mask & (1 << i))
28080 {
28081 rtx reg = gen_rtx_REG (SImode, i);
28082 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
28083 j++;
28084 }
28085 REG_NOTES (tmp) = dwarf;
28086 }
28087 arm_add_cfa_adjust_cfa_note (tmp, amount,
28088 stack_pointer_rtx, stack_pointer_rtx);
28089 }
28090
28091 if (IS_CMSE_ENTRY (func_type))
28092 {
28093 /* CMSE_ENTRY always returns. */
28094 gcc_assert (really_return);
28095 /* Clear all caller-saved regs that are not used to return. */
28096 cmse_nonsecure_entry_clear_before_return ();
28097
28098 /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
28099 VLDR. */
28100 if (TARGET_HAVE_FPCXT_CMSE)
28101 {
28102 rtx_insn *insn;
28103
28104 insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
28105 GEN_INT (FPCXTNS_ENUM)));
28106 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
28107 plus_constant (Pmode, stack_pointer_rtx, 4));
28108 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
28109 RTX_FRAME_RELATED_P (insn) = 1;
28110 }
28111 }
28112
28113 if (arm_current_function_pac_enabled_p ())
28114 emit_insn (gen_aut_nop ());
28115
28116 if (!really_return)
28117 return;
28118
28119 if (crtl->calls_eh_return)
28120 emit_insn (gen_addsi3 (stack_pointer_rtx,
28121 stack_pointer_rtx,
28122 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
28123
28124 if (IS_STACKALIGN (func_type))
28125 /* Restore the original stack pointer. Before prologue, the stack was
28126 realigned and the original stack pointer saved in r0. For details,
28127 see comment in arm_expand_prologue. */
28128 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
28129
28130 emit_jump_insn (simple_return_rtx);
28131 }
28132
28133 /* Implementation of insn prologue_thumb1_interwork. This is the first
28134 "instruction" of a function called in ARM mode. Swap to thumb mode. */
28135
28136 const char *
28137 thumb1_output_interwork (void)
28138 {
28139 const char * name;
28140 FILE *f = asm_out_file;
28141
28142 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28143 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28144 == SYMBOL_REF);
28145 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28146
28147 /* Generate code sequence to switch us into Thumb mode. */
28148 /* The .code 32 directive has already been emitted by
28149 ASM_DECLARE_FUNCTION_NAME. */
28150 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28151 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28152
28153 /* Generate a label, so that the debugger will notice the
28154 change in instruction sets. This label is also used by
28155 the assembler to bypass the ARM code when this function
28156 is called from a Thumb encoded function elsewhere in the
28157 same file. Hence the definition of STUB_NAME here must
28158 agree with the definition in gas/config/tc-arm.c. */
28159
28160 #define STUB_NAME ".real_start_of"
28161
28162 fprintf (f, "\t.code\t16\n");
28163 #ifdef ARM_PE
28164 if (arm_dllexport_name_p (name))
28165 name = arm_strip_name_encoding (name);
28166 #endif
28167 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28168 fprintf (f, "\t.thumb_func\n");
28169 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28170
28171 return "";
28172 }
28173
28174 /* Handle the case of a double word load into a low register from
28175 a computed memory address. The computed address may involve a
28176 register which is overwritten by the load. */
28177 const char *
28178 thumb_load_double_from_address (rtx *operands)
28179 {
28180 rtx addr;
28181 rtx base;
28182 rtx offset;
28183 rtx arg1;
28184 rtx arg2;
28185
28186 gcc_assert (REG_P (operands[0]));
28187 gcc_assert (MEM_P (operands[1]));
28188
28189 /* Get the memory address. */
28190 addr = XEXP (operands[1], 0);
28191
28192 /* Work out how the memory address is computed. */
28193 switch (GET_CODE (addr))
28194 {
28195 case REG:
28196 operands[2] = adjust_address (operands[1], SImode, 4);
28197
28198 if (REGNO (operands[0]) == REGNO (addr))
28199 {
28200 output_asm_insn ("ldr\t%H0, %2", operands);
28201 output_asm_insn ("ldr\t%0, %1", operands);
28202 }
28203 else
28204 {
28205 output_asm_insn ("ldr\t%0, %1", operands);
28206 output_asm_insn ("ldr\t%H0, %2", operands);
28207 }
28208 break;
28209
28210 case CONST:
28211 /* Compute <address> + 4 for the high order load. */
28212 operands[2] = adjust_address (operands[1], SImode, 4);
28213
28214 output_asm_insn ("ldr\t%0, %1", operands);
28215 output_asm_insn ("ldr\t%H0, %2", operands);
28216 break;
28217
28218 case PLUS:
28219 arg1 = XEXP (addr, 0);
28220 arg2 = XEXP (addr, 1);
28221
28222 if (CONSTANT_P (arg1))
28223 base = arg2, offset = arg1;
28224 else
28225 base = arg1, offset = arg2;
28226
28227 gcc_assert (REG_P (base));
28228
28229 /* Catch the case of <address> = <reg> + <reg> */
28230 if (REG_P (offset))
28231 {
28232 int reg_offset = REGNO (offset);
28233 int reg_base = REGNO (base);
28234 int reg_dest = REGNO (operands[0]);
28235
28236 /* Add the base and offset registers together into the
28237 higher destination register. */
28238 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28239 reg_dest + 1, reg_base, reg_offset);
28240
28241 /* Load the lower destination register from the address in
28242 the higher destination register. */
28243 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28244 reg_dest, reg_dest + 1);
28245
28246 /* Load the higher destination register from its own address
28247 plus 4. */
28248 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28249 reg_dest + 1, reg_dest + 1);
28250 }
28251 else
28252 {
28253 /* Compute <address> + 4 for the high order load. */
28254 operands[2] = adjust_address (operands[1], SImode, 4);
28255
28256 /* If the computed address is held in the low order register
28257 then load the high order register first, otherwise always
28258 load the low order register first. */
28259 if (REGNO (operands[0]) == REGNO (base))
28260 {
28261 output_asm_insn ("ldr\t%H0, %2", operands);
28262 output_asm_insn ("ldr\t%0, %1", operands);
28263 }
28264 else
28265 {
28266 output_asm_insn ("ldr\t%0, %1", operands);
28267 output_asm_insn ("ldr\t%H0, %2", operands);
28268 }
28269 }
28270 break;
28271
28272 case LABEL_REF:
28273 /* With no registers to worry about we can just load the value
28274 directly. */
28275 operands[2] = adjust_address (operands[1], SImode, 4);
28276
28277 output_asm_insn ("ldr\t%H0, %2", operands);
28278 output_asm_insn ("ldr\t%0, %1", operands);
28279 break;
28280
28281 default:
28282 gcc_unreachable ();
28283 }
28284
28285 return "";
28286 }
28287
28288 const char *
28289 thumb_output_move_mem_multiple (int n, rtx *operands)
28290 {
28291 switch (n)
28292 {
28293 case 2:
28294 if (REGNO (operands[4]) > REGNO (operands[5]))
28295 std::swap (operands[4], operands[5]);
28296
28297 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28298 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28299 break;
28300
28301 case 3:
28302 if (REGNO (operands[4]) > REGNO (operands[5]))
28303 std::swap (operands[4], operands[5]);
28304 if (REGNO (operands[5]) > REGNO (operands[6]))
28305 std::swap (operands[5], operands[6]);
28306 if (REGNO (operands[4]) > REGNO (operands[5]))
28307 std::swap (operands[4], operands[5]);
28308
28309 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28310 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28311 break;
28312
28313 default:
28314 gcc_unreachable ();
28315 }
28316
28317 return "";
28318 }
28319
28320 /* Output a call-via instruction for thumb state. */
28321 const char *
28322 thumb_call_via_reg (rtx reg)
28323 {
28324 int regno = REGNO (reg);
28325 rtx *labelp;
28326
28327 gcc_assert (regno < LR_REGNUM);
28328
28329 /* If we are in the normal text section we can use a single instance
28330 per compilation unit. If we are doing function sections, then we need
28331 an entry per section, since we can't rely on reachability. */
28332 if (in_section == text_section)
28333 {
28334 thumb_call_reg_needed = 1;
28335
28336 if (thumb_call_via_label[regno] == NULL)
28337 thumb_call_via_label[regno] = gen_label_rtx ();
28338 labelp = thumb_call_via_label + regno;
28339 }
28340 else
28341 {
28342 if (cfun->machine->call_via[regno] == NULL)
28343 cfun->machine->call_via[regno] = gen_label_rtx ();
28344 labelp = cfun->machine->call_via + regno;
28345 }
28346
28347 output_asm_insn ("bl\t%a0", labelp);
28348 return "";
28349 }
28350
28351 /* Routines for generating rtl. */
28352 void
28353 thumb_expand_cpymemqi (rtx *operands)
28354 {
28355 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28356 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28357 HOST_WIDE_INT len = INTVAL (operands[2]);
28358 HOST_WIDE_INT offset = 0;
28359
28360 while (len >= 12)
28361 {
28362 emit_insn (gen_cpymem12b (out, in, out, in));
28363 len -= 12;
28364 }
28365
28366 if (len >= 8)
28367 {
28368 emit_insn (gen_cpymem8b (out, in, out, in));
28369 len -= 8;
28370 }
28371
28372 if (len >= 4)
28373 {
28374 rtx reg = gen_reg_rtx (SImode);
28375 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28376 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28377 len -= 4;
28378 offset += 4;
28379 }
28380
28381 if (len >= 2)
28382 {
28383 rtx reg = gen_reg_rtx (HImode);
28384 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28385 plus_constant (Pmode, in,
28386 offset))));
28387 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28388 offset)),
28389 reg));
28390 len -= 2;
28391 offset += 2;
28392 }
28393
28394 if (len)
28395 {
28396 rtx reg = gen_reg_rtx (QImode);
28397 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28398 plus_constant (Pmode, in,
28399 offset))));
28400 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28401 offset)),
28402 reg));
28403 }
28404 }
28405
28406 void
28407 thumb_reload_out_hi (rtx *operands)
28408 {
28409 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28410 }
28411
28412 /* Return the length of a function name prefix
28413 that starts with the character 'c'. */
28414 static int
28415 arm_get_strip_length (int c)
28416 {
28417 switch (c)
28418 {
28419 ARM_NAME_ENCODING_LENGTHS
28420 default: return 0;
28421 }
28422 }
28423
28424 /* Return a pointer to a function's name with any
28425 and all prefix encodings stripped from it. */
28426 const char *
28427 arm_strip_name_encoding (const char *name)
28428 {
28429 int skip;
28430
28431 while ((skip = arm_get_strip_length (* name)))
28432 name += skip;
28433
28434 return name;
28435 }
28436
28437 /* If there is a '*' anywhere in the name's prefix, then
28438 emit the stripped name verbatim, otherwise prepend an
28439 underscore if leading underscores are being used. */
28440 void
28441 arm_asm_output_labelref (FILE *stream, const char *name)
28442 {
28443 int skip;
28444 int verbatim = 0;
28445
28446 while ((skip = arm_get_strip_length (* name)))
28447 {
28448 verbatim |= (*name == '*');
28449 name += skip;
28450 }
28451
28452 if (verbatim)
28453 fputs (name, stream);
28454 else
28455 asm_fprintf (stream, "%U%s", name);
28456 }
28457
28458 /* This function is used to emit an EABI tag and its associated value.
28459 We emit the numerical value of the tag in case the assembler does not
28460 support textual tags. (Eg gas prior to 2.20). If requested we include
28461 the tag name in a comment so that anyone reading the assembler output
28462 will know which tag is being set.
28463
28464 This function is not static because arm-c.cc needs it too. */
28465
28466 void
28467 arm_emit_eabi_attribute (const char *name, int num, int val)
28468 {
28469 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28470 if (flag_verbose_asm || flag_debug_asm)
28471 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28472 asm_fprintf (asm_out_file, "\n");
28473 }
28474
28475 /* This function is used to print CPU tuning information as comment
28476 in assembler file. Pointers are not printed for now. */
28477
28478 void
28479 arm_print_tune_info (void)
28480 {
28481 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28482 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28483 current_tune->constant_limit);
28484 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28485 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28486 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28487 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28488 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28489 "prefetch.l1_cache_size:\t%d\n",
28490 current_tune->prefetch.l1_cache_size);
28491 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28492 "prefetch.l1_cache_line_size:\t%d\n",
28493 current_tune->prefetch.l1_cache_line_size);
28494 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28495 "prefer_constant_pool:\t%d\n",
28496 (int) current_tune->prefer_constant_pool);
28497 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28498 "branch_cost:\t(s:speed, p:predictable)\n");
28499 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28500 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28501 current_tune->branch_cost (false, false));
28502 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28503 current_tune->branch_cost (false, true));
28504 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28505 current_tune->branch_cost (true, false));
28506 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28507 current_tune->branch_cost (true, true));
28508 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28509 "prefer_ldrd_strd:\t%d\n",
28510 (int) current_tune->prefer_ldrd_strd);
28511 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28512 "logical_op_non_short_circuit:\t[%d,%d]\n",
28513 (int) current_tune->logical_op_non_short_circuit_thumb,
28514 (int) current_tune->logical_op_non_short_circuit_arm);
28515 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28516 "disparage_flag_setting_t16_encodings:\t%d\n",
28517 (int) current_tune->disparage_flag_setting_t16_encodings);
28518 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28519 "string_ops_prefer_neon:\t%d\n",
28520 (int) current_tune->string_ops_prefer_neon);
28521 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28522 "max_insns_inline_memset:\t%d\n",
28523 current_tune->max_insns_inline_memset);
28524 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28525 current_tune->fusible_ops);
28526 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28527 (int) current_tune->sched_autopref);
28528 }
28529
28530 /* The last set of target options used to emit .arch directives, etc. This
28531 could be a function-local static if it were not required to expose it as a
28532 root to the garbage collector. */
28533 static GTY(()) cl_target_option *last_asm_targ_options = NULL;
28534
28535 /* Print .arch and .arch_extension directives corresponding to the
28536 current architecture configuration. */
28537 static void
28538 arm_print_asm_arch_directives (FILE *stream, cl_target_option *targ_options)
28539 {
28540 arm_build_target build_target;
28541 /* If the target options haven't changed since the last time we were called
28542 there is nothing to do. This should be sufficient to suppress the
28543 majority of redundant work. */
28544 if (last_asm_targ_options == targ_options)
28545 return;
28546
28547 last_asm_targ_options = targ_options;
28548
28549 build_target.isa = sbitmap_alloc (isa_num_bits);
28550 arm_configure_build_target (&build_target, targ_options, false);
28551
28552 if (build_target.core_name
28553 && !bitmap_bit_p (build_target.isa, isa_bit_quirk_no_asmcpu))
28554 {
28555 const char* truncated_name
28556 = arm_rewrite_selected_cpu (build_target.core_name);
28557 asm_fprintf (stream, "\t.cpu %s\n", truncated_name);
28558 }
28559
28560 const arch_option *arch
28561 = arm_parse_arch_option_name (all_architectures, "-march",
28562 build_target.arch_name);
28563 auto_sbitmap opt_bits (isa_num_bits);
28564
28565 gcc_assert (arch);
28566
28567 if (strcmp (build_target.arch_name, "armv7ve") == 0)
28568 {
28569 /* Keep backward compatability for assemblers which don't support
28570 armv7ve. Fortunately, none of the following extensions are reset
28571 by a .fpu directive. */
28572 asm_fprintf (stream, "\t.arch armv7-a\n");
28573 asm_fprintf (stream, "\t.arch_extension virt\n");
28574 asm_fprintf (stream, "\t.arch_extension idiv\n");
28575 asm_fprintf (stream, "\t.arch_extension sec\n");
28576 asm_fprintf (stream, "\t.arch_extension mp\n");
28577 }
28578 else
28579 asm_fprintf (stream, "\t.arch %s\n", build_target.arch_name);
28580
28581 /* The .fpu directive will reset any architecture extensions from the
28582 assembler that relate to the fp/vector extensions. So put this out before
28583 any .arch_extension directives. */
28584 const char *fpu_name = (TARGET_SOFT_FLOAT
28585 ? "softvfp"
28586 : arm_identify_fpu_from_isa (build_target.isa));
28587 asm_fprintf (stream, "\t.fpu %s\n", fpu_name);
28588
28589 if (!arch->common.extensions)
28590 return;
28591
28592 for (const struct cpu_arch_extension *opt = arch->common.extensions;
28593 opt->name != NULL;
28594 opt++)
28595 {
28596 if (!opt->remove)
28597 {
28598 arm_initialize_isa (opt_bits, opt->isa_bits);
28599
28600 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28601 "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28602 floating point instructions is disabled. So the following check
28603 restricts the printing of ".arch_extension mve" and
28604 ".arch_extension fp" (for mve.fp) in the assembly file. MVE needs
28605 this special behaviour because the feature bit "mve" and
28606 "mve_float" are not part of "fpu bits", so they are not cleared
28607 when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28608 TARGET_HAVE_MVE_FLOAT are disabled. */
28609 if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28610 || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28611 && !TARGET_HAVE_MVE_FLOAT))
28612 continue;
28613
28614 /* If every feature bit of this option is set in the target ISA
28615 specification, print out the option name. However, don't print
28616 anything if all the bits are part of the FPU specification. */
28617 if (bitmap_subset_p (opt_bits, build_target.isa)
28618 && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28619 asm_fprintf (stream, "\t.arch_extension %s\n", opt->name);
28620 }
28621 }
28622 }
28623
28624 static void
28625 arm_file_start (void)
28626 {
28627 int val;
28628 bool pac = (aarch_ra_sign_scope != AARCH_FUNCTION_NONE);
28629 bool bti = (aarch_enable_bti == 1);
28630
28631 arm_print_asm_arch_directives
28632 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28633
28634 if (TARGET_BPABI)
28635 {
28636 /* If we have a named cpu, but we the assembler does not support that
28637 name via .cpu, put out a cpu name attribute; but don't do this if the
28638 name starts with the fictitious prefix, 'generic'. */
28639 if (arm_active_target.core_name
28640 && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu)
28641 && !startswith (arm_active_target.core_name, "generic"))
28642 {
28643 const char* truncated_name
28644 = arm_rewrite_selected_cpu (arm_active_target.core_name);
28645 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28646 asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28647 truncated_name);
28648 }
28649
28650 if (print_tune_info)
28651 arm_print_tune_info ();
28652
28653 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28654 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28655
28656 if (TARGET_HARD_FLOAT_ABI)
28657 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28658
28659 /* Some of these attributes only apply when the corresponding features
28660 are used. However we don't have any easy way of figuring this out.
28661 Conservatively record the setting that would have been used. */
28662
28663 if (flag_rounding_math)
28664 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28665
28666 if (!flag_unsafe_math_optimizations)
28667 {
28668 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28669 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28670 }
28671 if (flag_signaling_nans)
28672 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28673
28674 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28675 flag_finite_math_only ? 1 : 3);
28676
28677 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28678 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28679 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28680 flag_short_enums ? 1 : 2);
28681
28682 /* Tag_ABI_optimization_goals. */
28683 if (optimize_size)
28684 val = 4;
28685 else if (optimize >= 2)
28686 val = 2;
28687 else if (optimize)
28688 val = 1;
28689 else
28690 val = 6;
28691 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28692
28693 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28694 unaligned_access);
28695
28696 if (arm_fp16_format)
28697 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28698 (int) arm_fp16_format);
28699
28700 if (TARGET_HAVE_PACBTI)
28701 {
28702 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
28703 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
28704 }
28705 else if (pac || bti)
28706 {
28707 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
28708 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
28709 }
28710
28711 if (bti)
28712 arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
28713 if (pac)
28714 arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
28715
28716 if (arm_lang_output_object_attributes_hook)
28717 arm_lang_output_object_attributes_hook();
28718 }
28719
28720 default_file_start ();
28721 }
28722
28723 static void
28724 arm_file_end (void)
28725 {
28726 int regno;
28727
28728 /* Just in case the last function output in the assembler had non-default
28729 architecture directives, we force the assembler state back to the default
28730 set, so that any 'calculated' build attributes are based on the default
28731 options rather than the special options for that function. */
28732 arm_print_asm_arch_directives
28733 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28734
28735 if (NEED_INDICATE_EXEC_STACK)
28736 /* Add .note.GNU-stack. */
28737 file_end_indicate_exec_stack ();
28738
28739 if (! thumb_call_reg_needed)
28740 return;
28741
28742 switch_to_section (text_section);
28743 asm_fprintf (asm_out_file, "\t.code 16\n");
28744 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28745
28746 for (regno = 0; regno < LR_REGNUM; regno++)
28747 {
28748 rtx label = thumb_call_via_label[regno];
28749
28750 if (label != 0)
28751 {
28752 targetm.asm_out.internal_label (asm_out_file, "L",
28753 CODE_LABEL_NUMBER (label));
28754 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28755 }
28756 }
28757 }
28758
28759 #ifndef ARM_PE
28760 /* Symbols in the text segment can be accessed without indirecting via the
28761 constant pool; it may take an extra binary operation, but this is still
28762 faster than indirecting via memory. Don't do this when not optimizing,
28763 since we won't be calculating al of the offsets necessary to do this
28764 simplification. */
28765
28766 static void
28767 arm_encode_section_info (tree decl, rtx rtl, int first)
28768 {
28769 if (optimize > 0 && TREE_CONSTANT (decl))
28770 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28771
28772 default_encode_section_info (decl, rtl, first);
28773 }
28774 #endif /* !ARM_PE */
28775
28776 static void
28777 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28778 {
28779 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28780 && !strcmp (prefix, "L"))
28781 {
28782 arm_ccfsm_state = 0;
28783 arm_target_insn = NULL;
28784 }
28785 default_internal_label (stream, prefix, labelno);
28786 }
28787
28788 /* Define classes to generate code as RTL or output asm to a file.
28789 Using templates then allows to use the same code to output code
28790 sequences in the two formats. */
28791 class thumb1_const_rtl
28792 {
28793 public:
28794 thumb1_const_rtl (rtx dst) : dst (dst) {}
28795
28796 void mov (HOST_WIDE_INT val)
28797 {
28798 emit_set_insn (dst, GEN_INT (val));
28799 }
28800
28801 void add (HOST_WIDE_INT val)
28802 {
28803 emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28804 }
28805
28806 void ashift (HOST_WIDE_INT shift)
28807 {
28808 emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28809 }
28810
28811 void neg ()
28812 {
28813 emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28814 }
28815
28816 private:
28817 rtx dst;
28818 };
28819
28820 class thumb1_const_print
28821 {
28822 public:
28823 thumb1_const_print (FILE *f, int regno)
28824 {
28825 t_file = f;
28826 dst_regname = reg_names[regno];
28827 }
28828
28829 void mov (HOST_WIDE_INT val)
28830 {
28831 asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28832 dst_regname, val);
28833 }
28834
28835 void add (HOST_WIDE_INT val)
28836 {
28837 asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28838 dst_regname, val);
28839 }
28840
28841 void ashift (HOST_WIDE_INT shift)
28842 {
28843 asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28844 dst_regname, shift);
28845 }
28846
28847 void neg ()
28848 {
28849 asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
28850 }
28851
28852 private:
28853 FILE *t_file;
28854 const char *dst_regname;
28855 };
28856
28857 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28858 Avoid generating useless code when one of the bytes is zero. */
28859 template <class T>
28860 void
28861 thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
28862 {
28863 bool mov_done_p = false;
28864 unsigned HOST_WIDE_INT val = op1;
28865 int shift = 0;
28866 int i;
28867
28868 gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
28869
28870 if (val <= 255)
28871 {
28872 dst.mov (val);
28873 return;
28874 }
28875
28876 /* For negative numbers with the first nine bits set, build the
28877 opposite of OP1, then negate it, it's generally shorter and not
28878 longer. */
28879 if ((val & 0xFF800000) == 0xFF800000)
28880 {
28881 thumb1_gen_const_int_1 (dst, -op1);
28882 dst.neg ();
28883 return;
28884 }
28885
28886 /* In the general case, we need 7 instructions to build
28887 a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28888 do better if VAL is small enough, or
28889 right-shiftable by a suitable amount. If the
28890 right-shift enables to encode at least one less byte,
28891 it's worth it: we save a adds and a lsls at the
28892 expense of a final lsls. */
28893 int final_shift = number_of_first_bit_set (val);
28894
28895 int leading_zeroes = clz_hwi (val);
28896 int number_of_bytes_needed
28897 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
28898 / BITS_PER_UNIT) + 1;
28899 int number_of_bytes_needed2
28900 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
28901 / BITS_PER_UNIT) + 1;
28902
28903 if (number_of_bytes_needed2 < number_of_bytes_needed)
28904 val >>= final_shift;
28905 else
28906 final_shift = 0;
28907
28908 /* If we are in a very small range, we can use either a single movs
28909 or movs+adds. */
28910 if (val <= 510)
28911 {
28912 if (val > 255)
28913 {
28914 unsigned HOST_WIDE_INT high = val - 255;
28915
28916 dst.mov (high);
28917 dst.add (255);
28918 }
28919 else
28920 dst.mov (val);
28921
28922 if (final_shift > 0)
28923 dst.ashift (final_shift);
28924 }
28925 else
28926 {
28927 /* General case, emit upper 3 bytes as needed. */
28928 for (i = 0; i < 3; i++)
28929 {
28930 unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
28931
28932 if (byte)
28933 {
28934 /* We are about to emit new bits, stop accumulating a
28935 shift amount, and left-shift only if we have already
28936 emitted some upper bits. */
28937 if (mov_done_p)
28938 {
28939 dst.ashift (shift);
28940 dst.add (byte);
28941 }
28942 else
28943 dst.mov (byte);
28944
28945 /* Stop accumulating shift amount since we've just
28946 emitted some bits. */
28947 shift = 0;
28948
28949 mov_done_p = true;
28950 }
28951
28952 if (mov_done_p)
28953 shift += 8;
28954 }
28955
28956 /* Emit lower byte. */
28957 if (!mov_done_p)
28958 dst.mov (val & 0xff);
28959 else
28960 {
28961 dst.ashift (shift);
28962 if (val & 0xff)
28963 dst.add (val & 0xff);
28964 }
28965
28966 if (final_shift > 0)
28967 dst.ashift (final_shift);
28968 }
28969 }
28970
28971 /* Proxies for thumb1.md, since the thumb1_const_print and
28972 thumb1_const_rtl classes are not exported. */
28973 void
28974 thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
28975 {
28976 thumb1_const_rtl t (dst);
28977 thumb1_gen_const_int_1 (t, op1);
28978 }
28979
28980 void
28981 thumb1_gen_const_int_print (rtx dst, HOST_WIDE_INT op1)
28982 {
28983 thumb1_const_print t (asm_out_file, REGNO (dst));
28984 thumb1_gen_const_int_1 (t, op1);
28985 }
28986
28987 /* Output code to add DELTA to the first argument, and then jump
28988 to FUNCTION. Used for C++ multiple inheritance. */
28989
28990 static void
28991 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28992 HOST_WIDE_INT, tree function)
28993 {
28994 static int thunk_label = 0;
28995 char label[256];
28996 char labelpc[256];
28997 int mi_delta = delta;
28998 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28999 int shift = 0;
29000 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
29001 ? 1 : 0);
29002 if (mi_delta < 0)
29003 mi_delta = - mi_delta;
29004
29005 final_start_function (emit_barrier (), file, 1);
29006
29007 if (TARGET_THUMB1)
29008 {
29009 int labelno = thunk_label++;
29010 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
29011 /* Thunks are entered in arm mode when available. */
29012 if (TARGET_THUMB1_ONLY)
29013 {
29014 /* push r3 so we can use it as a temporary. */
29015 /* TODO: Omit this save if r3 is not used. */
29016 fputs ("\tpush {r3}\n", file);
29017
29018 /* With -mpure-code, we cannot load the address from the
29019 constant pool: we build it explicitly. */
29020 if (target_pure_code)
29021 {
29022 fputs ("\tmovs\tr3, #:upper8_15:#", file);
29023 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29024 fputc ('\n', file);
29025 fputs ("\tlsls r3, #8\n", file);
29026 fputs ("\tadds\tr3, #:upper0_7:#", file);
29027 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29028 fputc ('\n', file);
29029 fputs ("\tlsls r3, #8\n", file);
29030 fputs ("\tadds\tr3, #:lower8_15:#", file);
29031 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29032 fputc ('\n', file);
29033 fputs ("\tlsls r3, #8\n", file);
29034 fputs ("\tadds\tr3, #:lower0_7:#", file);
29035 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29036 fputc ('\n', file);
29037 }
29038 else
29039 fputs ("\tldr\tr3, ", file);
29040 }
29041 else
29042 {
29043 fputs ("\tldr\tr12, ", file);
29044 }
29045
29046 if (!target_pure_code)
29047 {
29048 assemble_name (file, label);
29049 fputc ('\n', file);
29050 }
29051
29052 if (flag_pic)
29053 {
29054 /* If we are generating PIC, the ldr instruction below loads
29055 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
29056 the address of the add + 8, so we have:
29057
29058 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
29059 = target + 1.
29060
29061 Note that we have "+ 1" because some versions of GNU ld
29062 don't set the low bit of the result for R_ARM_REL32
29063 relocations against thumb function symbols.
29064 On ARMv6M this is +4, not +8. */
29065 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
29066 assemble_name (file, labelpc);
29067 fputs (":\n", file);
29068 if (TARGET_THUMB1_ONLY)
29069 {
29070 /* This is 2 insns after the start of the thunk, so we know it
29071 is 4-byte aligned. */
29072 fputs ("\tadd\tr3, pc, r3\n", file);
29073 fputs ("\tmov r12, r3\n", file);
29074 }
29075 else
29076 fputs ("\tadd\tr12, pc, r12\n", file);
29077 }
29078 else if (TARGET_THUMB1_ONLY)
29079 fputs ("\tmov r12, r3\n", file);
29080 }
29081 if (TARGET_THUMB1_ONLY)
29082 {
29083 if (mi_delta > 255)
29084 {
29085 /* With -mpure-code, we cannot load MI_DELTA from the
29086 constant pool: we build it explicitly. */
29087 if (target_pure_code)
29088 {
29089 thumb1_const_print r3 (file, 3);
29090 thumb1_gen_const_int_1 (r3, mi_delta);
29091 }
29092 else
29093 {
29094 fputs ("\tldr\tr3, ", file);
29095 assemble_name (file, label);
29096 fputs ("+4\n", file);
29097 }
29098 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
29099 mi_op, this_regno, this_regno);
29100 }
29101 else if (mi_delta != 0)
29102 {
29103 /* Thumb1 unified syntax requires s suffix in instruction name when
29104 one of the operands is immediate. */
29105 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
29106 mi_op, this_regno, this_regno,
29107 mi_delta);
29108 }
29109 }
29110 else
29111 {
29112 /* TODO: Use movw/movt for large constants when available. */
29113 while (mi_delta != 0)
29114 {
29115 if ((mi_delta & (3 << shift)) == 0)
29116 shift += 2;
29117 else
29118 {
29119 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
29120 mi_op, this_regno, this_regno,
29121 mi_delta & (0xff << shift));
29122 mi_delta &= ~(0xff << shift);
29123 shift += 8;
29124 }
29125 }
29126 }
29127 if (TARGET_THUMB1)
29128 {
29129 if (TARGET_THUMB1_ONLY)
29130 fputs ("\tpop\t{r3}\n", file);
29131
29132 fprintf (file, "\tbx\tr12\n");
29133
29134 /* With -mpure-code, we don't need to emit literals for the
29135 function address and delta since we emitted code to build
29136 them. */
29137 if (!target_pure_code)
29138 {
29139 ASM_OUTPUT_ALIGN (file, 2);
29140 assemble_name (file, label);
29141 fputs (":\n", file);
29142 if (flag_pic)
29143 {
29144 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
29145 rtx tem = XEXP (DECL_RTL (function), 0);
29146 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
29147 pipeline offset is four rather than eight. Adjust the offset
29148 accordingly. */
29149 tem = plus_constant (GET_MODE (tem), tem,
29150 TARGET_THUMB1_ONLY ? -3 : -7);
29151 tem = gen_rtx_MINUS (GET_MODE (tem),
29152 tem,
29153 gen_rtx_SYMBOL_REF (Pmode,
29154 ggc_strdup (labelpc)));
29155 assemble_integer (tem, 4, BITS_PER_WORD, 1);
29156 }
29157 else
29158 /* Output ".word .LTHUNKn". */
29159 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
29160
29161 if (TARGET_THUMB1_ONLY && mi_delta > 255)
29162 assemble_integer (GEN_INT (mi_delta), 4, BITS_PER_WORD, 1);
29163 }
29164 }
29165 else
29166 {
29167 fputs ("\tb\t", file);
29168 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29169 if (NEED_PLT_RELOC)
29170 fputs ("(PLT)", file);
29171 fputc ('\n', file);
29172 }
29173
29174 final_end_function ();
29175 }
29176
29177 /* MI thunk handling for TARGET_32BIT. */
29178
29179 static void
29180 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
29181 HOST_WIDE_INT vcall_offset, tree function)
29182 {
29183 const bool long_call_p = arm_is_long_call_p (function);
29184
29185 /* On ARM, this_regno is R0 or R1 depending on
29186 whether the function returns an aggregate or not.
29187 */
29188 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
29189 function)
29190 ? R1_REGNUM : R0_REGNUM);
29191
29192 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
29193 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
29194 reload_completed = 1;
29195 emit_note (NOTE_INSN_PROLOGUE_END);
29196
29197 /* Add DELTA to THIS_RTX. */
29198 if (delta != 0)
29199 arm_split_constant (PLUS, Pmode, NULL_RTX,
29200 delta, this_rtx, this_rtx, false);
29201
29202 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
29203 if (vcall_offset != 0)
29204 {
29205 /* Load *THIS_RTX. */
29206 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
29207 /* Compute *THIS_RTX + VCALL_OFFSET. */
29208 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
29209 false);
29210 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
29211 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
29212 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
29213 }
29214
29215 /* Generate a tail call to the target function. */
29216 if (!TREE_USED (function))
29217 {
29218 assemble_external (function);
29219 TREE_USED (function) = 1;
29220 }
29221 rtx funexp = XEXP (DECL_RTL (function), 0);
29222 if (long_call_p)
29223 {
29224 emit_move_insn (temp, funexp);
29225 funexp = temp;
29226 }
29227 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29228 rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
29229 SIBLING_CALL_P (insn) = 1;
29230 emit_barrier ();
29231
29232 /* Indirect calls require a bit of fixup in PIC mode. */
29233 if (long_call_p)
29234 {
29235 split_all_insns_noflow ();
29236 arm_reorg ();
29237 }
29238
29239 insn = get_insns ();
29240 shorten_branches (insn);
29241 final_start_function (insn, file, 1);
29242 final (insn, file, 1);
29243 final_end_function ();
29244
29245 /* Stop pretending this is a post-reload pass. */
29246 reload_completed = 0;
29247 }
29248
29249 /* Output code to add DELTA to the first argument, and then jump
29250 to FUNCTION. Used for C++ multiple inheritance. */
29251
29252 static void
29253 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
29254 HOST_WIDE_INT vcall_offset, tree function)
29255 {
29256 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
29257
29258 assemble_start_function (thunk, fnname);
29259 if (TARGET_32BIT)
29260 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
29261 else
29262 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
29263 assemble_end_function (thunk, fnname);
29264 }
29265
29266 int
29267 arm_emit_vector_const (FILE *file, rtx x)
29268 {
29269 int i;
29270 const char * pattern;
29271
29272 gcc_assert (GET_CODE (x) == CONST_VECTOR);
29273
29274 switch (GET_MODE (x))
29275 {
29276 case E_V2SImode: pattern = "%08x"; break;
29277 case E_V4HImode: pattern = "%04x"; break;
29278 case E_V8QImode: pattern = "%02x"; break;
29279 default: gcc_unreachable ();
29280 }
29281
29282 fprintf (file, "0x");
29283 for (i = CONST_VECTOR_NUNITS (x); i--;)
29284 {
29285 rtx element;
29286
29287 element = CONST_VECTOR_ELT (x, i);
29288 fprintf (file, pattern, INTVAL (element));
29289 }
29290
29291 return 1;
29292 }
29293
29294 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29295 HFmode constant pool entries are actually loaded with ldr. */
29296 void
29297 arm_emit_fp16_const (rtx c)
29298 {
29299 long bits;
29300
29301 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
29302 if (WORDS_BIG_ENDIAN)
29303 assemble_zeros (2);
29304 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
29305 if (!WORDS_BIG_ENDIAN)
29306 assemble_zeros (2);
29307 }
29308
29309 const char *
29310 arm_output_load_gr (rtx *operands)
29311 {
29312 rtx reg;
29313 rtx offset;
29314 rtx wcgr;
29315 rtx sum;
29316
29317 if (!MEM_P (operands [1])
29318 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
29319 || !REG_P (reg = XEXP (sum, 0))
29320 || !CONST_INT_P (offset = XEXP (sum, 1))
29321 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
29322 return "wldrw%?\t%0, %1";
29323
29324 /* Fix up an out-of-range load of a GR register. */
29325 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
29326 wcgr = operands[0];
29327 operands[0] = reg;
29328 output_asm_insn ("ldr%?\t%0, %1", operands);
29329
29330 operands[0] = wcgr;
29331 operands[1] = reg;
29332 output_asm_insn ("tmcr%?\t%0, %1", operands);
29333 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
29334
29335 return "";
29336 }
29337
29338 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29339
29340 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29341 named arg and all anonymous args onto the stack.
29342 XXX I know the prologue shouldn't be pushing registers, but it is faster
29343 that way. */
29344
29345 static void
29346 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
29347 const function_arg_info &arg,
29348 int *pretend_size,
29349 int second_time ATTRIBUTE_UNUSED)
29350 {
29351 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
29352 int nregs;
29353
29354 cfun->machine->uses_anonymous_args = 1;
29355 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
29356 {
29357 nregs = pcum->aapcs_ncrn;
29358 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
29359 && (nregs & 1))
29360 {
29361 int res = arm_needs_doubleword_align (arg.mode, arg.type);
29362 if (res < 0 && warn_psabi)
29363 inform (input_location, "parameter passing for argument of "
29364 "type %qT changed in GCC 7.1", arg.type);
29365 else if (res > 0)
29366 {
29367 nregs++;
29368 if (res > 1 && warn_psabi)
29369 inform (input_location,
29370 "parameter passing for argument of type "
29371 "%qT changed in GCC 9.1", arg.type);
29372 }
29373 }
29374 }
29375 else
29376 nregs = pcum->nregs;
29377
29378 if (nregs < NUM_ARG_REGS)
29379 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
29380 }
29381
29382 /* We can't rely on the caller doing the proper promotion when
29383 using APCS or ATPCS. */
29384
29385 static bool
29386 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
29387 {
29388 return !TARGET_AAPCS_BASED;
29389 }
29390
29391 static machine_mode
29392 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
29393 machine_mode mode,
29394 int *punsignedp ATTRIBUTE_UNUSED,
29395 const_tree fntype ATTRIBUTE_UNUSED,
29396 int for_return ATTRIBUTE_UNUSED)
29397 {
29398 if (GET_MODE_CLASS (mode) == MODE_INT
29399 && GET_MODE_SIZE (mode) < 4)
29400 return SImode;
29401
29402 return mode;
29403 }
29404
29405
29406 static bool
29407 arm_default_short_enums (void)
29408 {
29409 return ARM_DEFAULT_SHORT_ENUMS;
29410 }
29411
29412
29413 /* AAPCS requires that anonymous bitfields affect structure alignment. */
29414
29415 static bool
29416 arm_align_anon_bitfield (void)
29417 {
29418 return TARGET_AAPCS_BASED;
29419 }
29420
29421
29422 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
29423
29424 static tree
29425 arm_cxx_guard_type (void)
29426 {
29427 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
29428 }
29429
29430
29431 /* The EABI says test the least significant bit of a guard variable. */
29432
29433 static bool
29434 arm_cxx_guard_mask_bit (void)
29435 {
29436 return TARGET_AAPCS_BASED;
29437 }
29438
29439
29440 /* The EABI specifies that all array cookies are 8 bytes long. */
29441
29442 static tree
29443 arm_get_cookie_size (tree type)
29444 {
29445 tree size;
29446
29447 if (!TARGET_AAPCS_BASED)
29448 return default_cxx_get_cookie_size (type);
29449
29450 size = build_int_cst (sizetype, 8);
29451 return size;
29452 }
29453
29454
29455 /* The EABI says that array cookies should also contain the element size. */
29456
29457 static bool
29458 arm_cookie_has_size (void)
29459 {
29460 return TARGET_AAPCS_BASED;
29461 }
29462
29463
29464 /* The EABI says constructors and destructors should return a pointer to
29465 the object constructed/destroyed. */
29466
29467 static bool
29468 arm_cxx_cdtor_returns_this (void)
29469 {
29470 return TARGET_AAPCS_BASED;
29471 }
29472
29473 /* The EABI says that an inline function may never be the key
29474 method. */
29475
29476 static bool
29477 arm_cxx_key_method_may_be_inline (void)
29478 {
29479 return !TARGET_AAPCS_BASED;
29480 }
29481
29482 static void
29483 arm_cxx_determine_class_data_visibility (tree decl)
29484 {
29485 if (!TARGET_AAPCS_BASED
29486 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
29487 return;
29488
29489 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29490 is exported. However, on systems without dynamic vague linkage,
29491 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
29492 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
29493 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
29494 else
29495 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
29496 DECL_VISIBILITY_SPECIFIED (decl) = 1;
29497 }
29498
29499 static bool
29500 arm_cxx_class_data_always_comdat (void)
29501 {
29502 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29503 vague linkage if the class has no key function. */
29504 return !TARGET_AAPCS_BASED;
29505 }
29506
29507
29508 /* The EABI says __aeabi_atexit should be used to register static
29509 destructors. */
29510
29511 static bool
29512 arm_cxx_use_aeabi_atexit (void)
29513 {
29514 return TARGET_AAPCS_BASED;
29515 }
29516
29517
29518 void
29519 arm_set_return_address (rtx source, rtx scratch)
29520 {
29521 arm_stack_offsets *offsets;
29522 HOST_WIDE_INT delta;
29523 rtx addr, mem;
29524 unsigned long saved_regs;
29525
29526 offsets = arm_get_frame_offsets ();
29527 saved_regs = offsets->saved_regs_mask;
29528
29529 if ((saved_regs & (1 << LR_REGNUM)) == 0)
29530 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29531 else
29532 {
29533 if (frame_pointer_needed)
29534 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29535 else
29536 {
29537 /* LR will be the first saved register. */
29538 delta = offsets->outgoing_args - (offsets->frame + 4);
29539
29540
29541 if (delta >= 4096)
29542 {
29543 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29544 GEN_INT (delta & ~4095)));
29545 addr = scratch;
29546 delta &= 4095;
29547 }
29548 else
29549 addr = stack_pointer_rtx;
29550
29551 addr = plus_constant (Pmode, addr, delta);
29552 }
29553
29554 /* The store needs to be marked to prevent DSE from deleting
29555 it as dead if it is based on fp. */
29556 mem = gen_frame_mem (Pmode, addr);
29557 MEM_VOLATILE_P (mem) = true;
29558 emit_move_insn (mem, source);
29559 }
29560 }
29561
29562
29563 void
29564 thumb_set_return_address (rtx source, rtx scratch)
29565 {
29566 arm_stack_offsets *offsets;
29567 HOST_WIDE_INT delta;
29568 HOST_WIDE_INT limit;
29569 int reg;
29570 rtx addr, mem;
29571 unsigned long mask;
29572
29573 emit_use (source);
29574
29575 offsets = arm_get_frame_offsets ();
29576 mask = offsets->saved_regs_mask;
29577 if (mask & (1 << LR_REGNUM))
29578 {
29579 limit = 1024;
29580 /* Find the saved regs. */
29581 if (frame_pointer_needed)
29582 {
29583 delta = offsets->soft_frame - offsets->saved_args;
29584 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29585 if (TARGET_THUMB1)
29586 limit = 128;
29587 }
29588 else
29589 {
29590 delta = offsets->outgoing_args - offsets->saved_args;
29591 reg = SP_REGNUM;
29592 }
29593 /* Allow for the stack frame. */
29594 if (TARGET_THUMB1 && TARGET_BACKTRACE)
29595 delta -= 16;
29596 /* The link register is always the first saved register. */
29597 delta -= 4;
29598
29599 /* Construct the address. */
29600 addr = gen_rtx_REG (SImode, reg);
29601 if (delta > limit)
29602 {
29603 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29604 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29605 addr = scratch;
29606 }
29607 else
29608 addr = plus_constant (Pmode, addr, delta);
29609
29610 /* The store needs to be marked to prevent DSE from deleting
29611 it as dead if it is based on fp. */
29612 mem = gen_frame_mem (Pmode, addr);
29613 MEM_VOLATILE_P (mem) = true;
29614 emit_move_insn (mem, source);
29615 }
29616 else
29617 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29618 }
29619
29620 /* Implements target hook vector_mode_supported_p. */
29621 bool
29622 arm_vector_mode_supported_p (machine_mode mode)
29623 {
29624 /* Neon also supports V2SImode, etc. listed in the clause below. */
29625 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29626 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
29627 || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
29628 || mode == V8BFmode))
29629 return true;
29630
29631 if ((TARGET_NEON || TARGET_IWMMXT)
29632 && ((mode == V2SImode)
29633 || (mode == V4HImode)
29634 || (mode == V8QImode)))
29635 return true;
29636
29637 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29638 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29639 || mode == V2HAmode))
29640 return true;
29641
29642 if (TARGET_HAVE_MVE
29643 && (VALID_MVE_SI_MODE (mode) || VALID_MVE_PRED_MODE (mode)))
29644 return true;
29645
29646 if (TARGET_HAVE_MVE_FLOAT
29647 && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
29648 return true;
29649
29650 return false;
29651 }
29652
29653 /* Implements target hook array_mode_supported_p. */
29654
29655 static bool
29656 arm_array_mode_supported_p (machine_mode mode,
29657 unsigned HOST_WIDE_INT nelems)
29658 {
29659 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29660 for now, as the lane-swapping logic needs to be extended in the expanders.
29661 See PR target/82518. */
29662 if (TARGET_NEON && !BYTES_BIG_ENDIAN
29663 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29664 && (nelems >= 2 && nelems <= 4))
29665 return true;
29666
29667 if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
29668 && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
29669 return true;
29670
29671 return false;
29672 }
29673
29674 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29675 registers when autovectorizing for Neon, at least until multiple vector
29676 widths are supported properly by the middle-end. */
29677
29678 static machine_mode
29679 arm_preferred_simd_mode (scalar_mode mode)
29680 {
29681 if (TARGET_NEON)
29682 switch (mode)
29683 {
29684 case E_HFmode:
29685 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HFmode : V8HFmode;
29686 case E_SFmode:
29687 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29688 case E_SImode:
29689 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29690 case E_HImode:
29691 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29692 case E_QImode:
29693 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29694 case E_DImode:
29695 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29696 return V2DImode;
29697 break;
29698
29699 default:;
29700 }
29701
29702 if (TARGET_REALLY_IWMMXT)
29703 switch (mode)
29704 {
29705 case E_SImode:
29706 return V2SImode;
29707 case E_HImode:
29708 return V4HImode;
29709 case E_QImode:
29710 return V8QImode;
29711
29712 default:;
29713 }
29714
29715 if (TARGET_HAVE_MVE)
29716 switch (mode)
29717 {
29718 case E_QImode:
29719 return V16QImode;
29720 case E_HImode:
29721 return V8HImode;
29722 case E_SImode:
29723 return V4SImode;
29724
29725 default:;
29726 }
29727
29728 if (TARGET_HAVE_MVE_FLOAT)
29729 switch (mode)
29730 {
29731 case E_HFmode:
29732 return V8HFmode;
29733 case E_SFmode:
29734 return V4SFmode;
29735
29736 default:;
29737 }
29738
29739 return word_mode;
29740 }
29741
29742 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29743
29744 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29745 using r0-r4 for function arguments, r7 for the stack frame and don't have
29746 enough left over to do doubleword arithmetic. For Thumb-2 all the
29747 potentially problematic instructions accept high registers so this is not
29748 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29749 that require many low registers. */
29750 static bool
29751 arm_class_likely_spilled_p (reg_class_t rclass)
29752 {
29753 if ((TARGET_THUMB1 && rclass == LO_REGS)
29754 || rclass == CC_REG)
29755 return true;
29756
29757 return default_class_likely_spilled_p (rclass);
29758 }
29759
29760 /* Implements target hook small_register_classes_for_mode_p. */
29761 bool
29762 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29763 {
29764 return TARGET_THUMB1;
29765 }
29766
29767 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29768 ARM insns and therefore guarantee that the shift count is modulo 256.
29769 DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29770 guarantee no particular behavior for out-of-range counts. */
29771
29772 static unsigned HOST_WIDE_INT
29773 arm_shift_truncation_mask (machine_mode mode)
29774 {
29775 return mode == SImode ? 255 : 0;
29776 }
29777
29778
29779 /* Map internal gcc register numbers to DWARF2 register numbers. */
29780
29781 unsigned int
29782 arm_debugger_regno (unsigned int regno)
29783 {
29784 if (regno < 16)
29785 return regno;
29786
29787 if (IS_VFP_REGNUM (regno))
29788 {
29789 /* See comment in arm_dwarf_register_span. */
29790 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29791 return 64 + regno - FIRST_VFP_REGNUM;
29792 else
29793 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29794 }
29795
29796 if (IS_IWMMXT_GR_REGNUM (regno))
29797 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29798
29799 if (IS_IWMMXT_REGNUM (regno))
29800 return 112 + regno - FIRST_IWMMXT_REGNUM;
29801
29802 if (IS_PAC_REGNUM (regno))
29803 return DWARF_PAC_REGNUM;
29804
29805 return DWARF_FRAME_REGISTERS;
29806 }
29807
29808 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29809 GCC models tham as 64 32-bit registers, so we need to describe this to
29810 the DWARF generation code. Other registers can use the default. */
29811 static rtx
29812 arm_dwarf_register_span (rtx rtl)
29813 {
29814 machine_mode mode;
29815 unsigned regno;
29816 rtx parts[16];
29817 int nregs;
29818 int i;
29819
29820 regno = REGNO (rtl);
29821 if (!IS_VFP_REGNUM (regno))
29822 return NULL_RTX;
29823
29824 /* XXX FIXME: The EABI defines two VFP register ranges:
29825 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29826 256-287: D0-D31
29827 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29828 corresponding D register. Until GDB supports this, we shall use the
29829 legacy encodings. We also use these encodings for D0-D15 for
29830 compatibility with older debuggers. */
29831 mode = GET_MODE (rtl);
29832 if (GET_MODE_SIZE (mode) < 8)
29833 return NULL_RTX;
29834
29835 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29836 {
29837 nregs = GET_MODE_SIZE (mode) / 4;
29838 for (i = 0; i < nregs; i += 2)
29839 if (TARGET_BIG_END)
29840 {
29841 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29842 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29843 }
29844 else
29845 {
29846 parts[i] = gen_rtx_REG (SImode, regno + i);
29847 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29848 }
29849 }
29850 else
29851 {
29852 nregs = GET_MODE_SIZE (mode) / 8;
29853 for (i = 0; i < nregs; i++)
29854 parts[i] = gen_rtx_REG (DImode, regno + i);
29855 }
29856
29857 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29858 }
29859
29860 #if ARM_UNWIND_INFO
29861 /* Emit unwind directives for a store-multiple instruction or stack pointer
29862 push during alignment.
29863 These should only ever be generated by the function prologue code, so
29864 expect them to have a particular form.
29865 The store-multiple instruction sometimes pushes pc as the last register,
29866 although it should not be tracked into unwind information, or for -Os
29867 sometimes pushes some dummy registers before first register that needs
29868 to be tracked in unwind information; such dummy registers are there just
29869 to avoid separate stack adjustment, and will not be restored in the
29870 epilogue. */
29871
29872 static void
29873 arm_unwind_emit_sequence (FILE * out_file, rtx p)
29874 {
29875 int i;
29876 HOST_WIDE_INT offset;
29877 HOST_WIDE_INT nregs;
29878 int reg_size;
29879 unsigned reg;
29880 unsigned lastreg;
29881 unsigned padfirst = 0, padlast = 0;
29882 rtx e;
29883
29884 e = XVECEXP (p, 0, 0);
29885 gcc_assert (GET_CODE (e) == SET);
29886
29887 /* First insn will adjust the stack pointer. */
29888 gcc_assert (GET_CODE (e) == SET
29889 && REG_P (SET_DEST (e))
29890 && REGNO (SET_DEST (e)) == SP_REGNUM
29891 && GET_CODE (SET_SRC (e)) == PLUS);
29892
29893 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29894 nregs = XVECLEN (p, 0) - 1;
29895 gcc_assert (nregs);
29896
29897 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29898 if (reg < 16 || IS_PAC_REGNUM (reg))
29899 {
29900 /* For -Os dummy registers can be pushed at the beginning to
29901 avoid separate stack pointer adjustment. */
29902 e = XVECEXP (p, 0, 1);
29903 e = XEXP (SET_DEST (e), 0);
29904 if (GET_CODE (e) == PLUS)
29905 padfirst = INTVAL (XEXP (e, 1));
29906 gcc_assert (padfirst == 0 || optimize_size);
29907 /* The function prologue may also push pc, but not annotate it as it is
29908 never restored. We turn this into a stack pointer adjustment. */
29909 e = XVECEXP (p, 0, nregs);
29910 e = XEXP (SET_DEST (e), 0);
29911 if (GET_CODE (e) == PLUS)
29912 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29913 else
29914 padlast = offset - 4;
29915 gcc_assert (padlast == 0 || padlast == 4);
29916 if (padlast == 4)
29917 fprintf (out_file, "\t.pad #4\n");
29918 reg_size = 4;
29919 fprintf (out_file, "\t.save {");
29920 }
29921 else if (IS_VFP_REGNUM (reg))
29922 {
29923 reg_size = 8;
29924 fprintf (out_file, "\t.vsave {");
29925 }
29926 else
29927 /* Unknown register type. */
29928 gcc_unreachable ();
29929
29930 /* If the stack increment doesn't match the size of the saved registers,
29931 something has gone horribly wrong. */
29932 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29933
29934 offset = padfirst;
29935 lastreg = 0;
29936 /* The remaining insns will describe the stores. */
29937 for (i = 1; i <= nregs; i++)
29938 {
29939 /* Expect (set (mem <addr>) (reg)).
29940 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29941 e = XVECEXP (p, 0, i);
29942 gcc_assert (GET_CODE (e) == SET
29943 && MEM_P (SET_DEST (e))
29944 && REG_P (SET_SRC (e)));
29945
29946 reg = REGNO (SET_SRC (e));
29947 gcc_assert (reg >= lastreg);
29948
29949 if (i != 1)
29950 fprintf (out_file, ", ");
29951 /* We can't use %r for vfp because we need to use the
29952 double precision register names. */
29953 if (IS_VFP_REGNUM (reg))
29954 asm_fprintf (out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29955 else if (IS_PAC_REGNUM (reg))
29956 asm_fprintf (asm_out_file, "ra_auth_code");
29957 else
29958 asm_fprintf (out_file, "%r", reg);
29959
29960 if (flag_checking)
29961 {
29962 /* Check that the addresses are consecutive. */
29963 e = XEXP (SET_DEST (e), 0);
29964 if (GET_CODE (e) == PLUS)
29965 gcc_assert (REG_P (XEXP (e, 0))
29966 && REGNO (XEXP (e, 0)) == SP_REGNUM
29967 && CONST_INT_P (XEXP (e, 1))
29968 && offset == INTVAL (XEXP (e, 1)));
29969 else
29970 gcc_assert (i == 1
29971 && REG_P (e)
29972 && REGNO (e) == SP_REGNUM);
29973 offset += reg_size;
29974 }
29975 }
29976 fprintf (out_file, "}\n");
29977 if (padfirst)
29978 fprintf (out_file, "\t.pad #%d\n", padfirst);
29979 }
29980
29981 /* Emit unwind directives for a SET. */
29982
29983 static void
29984 arm_unwind_emit_set (FILE * out_file, rtx p)
29985 {
29986 rtx e0;
29987 rtx e1;
29988 unsigned reg;
29989
29990 e0 = XEXP (p, 0);
29991 e1 = XEXP (p, 1);
29992 switch (GET_CODE (e0))
29993 {
29994 case MEM:
29995 /* Pushing a single register. */
29996 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29997 || !REG_P (XEXP (XEXP (e0, 0), 0))
29998 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29999 abort ();
30000
30001 asm_fprintf (out_file, "\t.save ");
30002 if (IS_VFP_REGNUM (REGNO (e1)))
30003 asm_fprintf(out_file, "{d%d}\n",
30004 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
30005 else
30006 asm_fprintf(out_file, "{%r}\n", REGNO (e1));
30007 break;
30008
30009 case REG:
30010 if (REGNO (e0) == SP_REGNUM)
30011 {
30012 /* A stack increment. */
30013 if (GET_CODE (e1) != PLUS
30014 || !REG_P (XEXP (e1, 0))
30015 || REGNO (XEXP (e1, 0)) != SP_REGNUM
30016 || !CONST_INT_P (XEXP (e1, 1)))
30017 abort ();
30018
30019 asm_fprintf (out_file, "\t.pad #%wd\n",
30020 -INTVAL (XEXP (e1, 1)));
30021 }
30022 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
30023 {
30024 HOST_WIDE_INT offset;
30025
30026 if (GET_CODE (e1) == PLUS)
30027 {
30028 if (!REG_P (XEXP (e1, 0))
30029 || !CONST_INT_P (XEXP (e1, 1)))
30030 abort ();
30031 reg = REGNO (XEXP (e1, 0));
30032 offset = INTVAL (XEXP (e1, 1));
30033 asm_fprintf (out_file, "\t.setfp %r, %r, #%wd\n",
30034 HARD_FRAME_POINTER_REGNUM, reg,
30035 offset);
30036 }
30037 else if (REG_P (e1))
30038 {
30039 reg = REGNO (e1);
30040 asm_fprintf (out_file, "\t.setfp %r, %r\n",
30041 HARD_FRAME_POINTER_REGNUM, reg);
30042 }
30043 else
30044 abort ();
30045 }
30046 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
30047 {
30048 /* Move from sp to reg. */
30049 asm_fprintf (out_file, "\t.movsp %r\n", REGNO (e0));
30050 }
30051 else if (GET_CODE (e1) == PLUS
30052 && REG_P (XEXP (e1, 0))
30053 && REGNO (XEXP (e1, 0)) == SP_REGNUM
30054 && CONST_INT_P (XEXP (e1, 1)))
30055 {
30056 /* Set reg to offset from sp. */
30057 asm_fprintf (out_file, "\t.movsp %r, #%d\n",
30058 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
30059 }
30060 else if (REGNO (e0) == IP_REGNUM && arm_current_function_pac_enabled_p ())
30061 {
30062 if (cfun->machine->pacspval_needed)
30063 asm_fprintf (out_file, "\t.pacspval\n");
30064 }
30065 else
30066 abort ();
30067 break;
30068
30069 default:
30070 abort ();
30071 }
30072 }
30073
30074
30075 /* Emit unwind directives for the given insn. */
30076
30077 static void
30078 arm_unwind_emit (FILE * out_file, rtx_insn *insn)
30079 {
30080 rtx note, pat;
30081 bool handled_one = false;
30082
30083 if (arm_except_unwind_info (&global_options) != UI_TARGET)
30084 return;
30085
30086 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30087 && (TREE_NOTHROW (current_function_decl)
30088 || crtl->all_throwers_are_sibcalls))
30089 return;
30090
30091 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
30092 return;
30093
30094 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
30095 {
30096 switch (REG_NOTE_KIND (note))
30097 {
30098 case REG_FRAME_RELATED_EXPR:
30099 pat = XEXP (note, 0);
30100 goto found;
30101
30102 case REG_CFA_REGISTER:
30103 pat = XEXP (note, 0);
30104 if (pat == NULL)
30105 {
30106 pat = PATTERN (insn);
30107 if (GET_CODE (pat) == PARALLEL)
30108 pat = XVECEXP (pat, 0, 0);
30109 }
30110
30111 /* Only emitted for IS_STACKALIGN re-alignment. */
30112 {
30113 rtx dest, src;
30114 unsigned reg;
30115
30116 src = SET_SRC (pat);
30117 dest = SET_DEST (pat);
30118
30119 gcc_assert (src == stack_pointer_rtx
30120 || IS_PAC_REGNUM (REGNO (src)));
30121 reg = REGNO (dest);
30122
30123 if (IS_PAC_REGNUM (REGNO (src)))
30124 arm_unwind_emit_set (out_file, PATTERN (insn));
30125 else
30126 asm_fprintf (out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
30127 reg + 0x90, reg);
30128 }
30129 handled_one = true;
30130 break;
30131
30132 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
30133 to get correct dwarf information for shrink-wrap. We should not
30134 emit unwind information for it because these are used either for
30135 pretend arguments or notes to adjust sp and restore registers from
30136 stack. */
30137 case REG_CFA_DEF_CFA:
30138 case REG_CFA_ADJUST_CFA:
30139 case REG_CFA_RESTORE:
30140 return;
30141
30142 case REG_CFA_EXPRESSION:
30143 case REG_CFA_OFFSET:
30144 /* ??? Only handling here what we actually emit. */
30145 gcc_unreachable ();
30146
30147 default:
30148 break;
30149 }
30150 }
30151 if (handled_one)
30152 return;
30153 pat = PATTERN (insn);
30154 found:
30155
30156 switch (GET_CODE (pat))
30157 {
30158 case SET:
30159 arm_unwind_emit_set (out_file, pat);
30160 break;
30161
30162 case SEQUENCE:
30163 /* Store multiple. */
30164 arm_unwind_emit_sequence (out_file, pat);
30165 break;
30166
30167 default:
30168 abort();
30169 }
30170 }
30171
30172
30173 /* Output a reference from a function exception table to the type_info
30174 object X. The EABI specifies that the symbol should be relocated by
30175 an R_ARM_TARGET2 relocation. */
30176
30177 static bool
30178 arm_output_ttype (rtx x)
30179 {
30180 fputs ("\t.word\t", asm_out_file);
30181 output_addr_const (asm_out_file, x);
30182 /* Use special relocations for symbol references. */
30183 if (!CONST_INT_P (x))
30184 fputs ("(TARGET2)", asm_out_file);
30185 fputc ('\n', asm_out_file);
30186
30187 return TRUE;
30188 }
30189
30190 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
30191
30192 static void
30193 arm_asm_emit_except_personality (rtx personality)
30194 {
30195 fputs ("\t.personality\t", asm_out_file);
30196 output_addr_const (asm_out_file, personality);
30197 fputc ('\n', asm_out_file);
30198 }
30199 #endif /* ARM_UNWIND_INFO */
30200
30201 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
30202
30203 static void
30204 arm_asm_init_sections (void)
30205 {
30206 #if ARM_UNWIND_INFO
30207 exception_section = get_unnamed_section (0, output_section_asm_op,
30208 "\t.handlerdata");
30209 #endif /* ARM_UNWIND_INFO */
30210
30211 #ifdef OBJECT_FORMAT_ELF
30212 if (target_pure_code)
30213 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
30214 #endif
30215 }
30216
30217 /* Output unwind directives for the start/end of a function. */
30218
30219 void
30220 arm_output_fn_unwind (FILE * f, bool prologue)
30221 {
30222 if (arm_except_unwind_info (&global_options) != UI_TARGET)
30223 return;
30224
30225 if (prologue)
30226 fputs ("\t.fnstart\n", f);
30227 else
30228 {
30229 /* If this function will never be unwound, then mark it as such.
30230 The came condition is used in arm_unwind_emit to suppress
30231 the frame annotations. */
30232 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30233 && (TREE_NOTHROW (current_function_decl)
30234 || crtl->all_throwers_are_sibcalls))
30235 fputs("\t.cantunwind\n", f);
30236
30237 fputs ("\t.fnend\n", f);
30238 }
30239 }
30240
30241 static bool
30242 arm_emit_tls_decoration (FILE *fp, rtx x)
30243 {
30244 enum tls_reloc reloc;
30245 rtx val;
30246
30247 val = XVECEXP (x, 0, 0);
30248 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
30249
30250 output_addr_const (fp, val);
30251
30252 switch (reloc)
30253 {
30254 case TLS_GD32:
30255 fputs ("(tlsgd)", fp);
30256 break;
30257 case TLS_GD32_FDPIC:
30258 fputs ("(tlsgd_fdpic)", fp);
30259 break;
30260 case TLS_LDM32:
30261 fputs ("(tlsldm)", fp);
30262 break;
30263 case TLS_LDM32_FDPIC:
30264 fputs ("(tlsldm_fdpic)", fp);
30265 break;
30266 case TLS_LDO32:
30267 fputs ("(tlsldo)", fp);
30268 break;
30269 case TLS_IE32:
30270 fputs ("(gottpoff)", fp);
30271 break;
30272 case TLS_IE32_FDPIC:
30273 fputs ("(gottpoff_fdpic)", fp);
30274 break;
30275 case TLS_LE32:
30276 fputs ("(tpoff)", fp);
30277 break;
30278 case TLS_DESCSEQ:
30279 fputs ("(tlsdesc)", fp);
30280 break;
30281 default:
30282 gcc_unreachable ();
30283 }
30284
30285 switch (reloc)
30286 {
30287 case TLS_GD32:
30288 case TLS_LDM32:
30289 case TLS_IE32:
30290 case TLS_DESCSEQ:
30291 fputs (" + (. - ", fp);
30292 output_addr_const (fp, XVECEXP (x, 0, 2));
30293 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30294 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
30295 output_addr_const (fp, XVECEXP (x, 0, 3));
30296 fputc (')', fp);
30297 break;
30298 default:
30299 break;
30300 }
30301
30302 return TRUE;
30303 }
30304
30305 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
30306
30307 static void
30308 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
30309 {
30310 gcc_assert (size == 4);
30311 fputs ("\t.word\t", file);
30312 output_addr_const (file, x);
30313 fputs ("(tlsldo)", file);
30314 }
30315
30316 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
30317
30318 static bool
30319 arm_output_addr_const_extra (FILE *fp, rtx x)
30320 {
30321 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
30322 return arm_emit_tls_decoration (fp, x);
30323 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
30324 {
30325 char label[256];
30326 int labelno = INTVAL (XVECEXP (x, 0, 0));
30327
30328 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
30329 assemble_name_raw (fp, label);
30330
30331 return TRUE;
30332 }
30333 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
30334 {
30335 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
30336 if (GOT_PCREL)
30337 fputs ("+.", fp);
30338 fputs ("-(", fp);
30339 output_addr_const (fp, XVECEXP (x, 0, 0));
30340 fputc (')', fp);
30341 return TRUE;
30342 }
30343 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
30344 {
30345 output_addr_const (fp, XVECEXP (x, 0, 0));
30346 if (GOT_PCREL)
30347 fputs ("+.", fp);
30348 fputs ("-(", fp);
30349 output_addr_const (fp, XVECEXP (x, 0, 1));
30350 fputc (')', fp);
30351 return TRUE;
30352 }
30353 else if (GET_CODE (x) == CONST_VECTOR)
30354 return arm_emit_vector_const (fp, x);
30355
30356 return FALSE;
30357 }
30358
30359 /* Output assembly for a shift instruction.
30360 SET_FLAGS determines how the instruction modifies the condition codes.
30361 0 - Do not set condition codes.
30362 1 - Set condition codes.
30363 2 - Use smallest instruction. */
30364 const char *
30365 arm_output_shift(rtx * operands, int set_flags)
30366 {
30367 char pattern[100];
30368 static const char flag_chars[3] = {'?', '.', '!'};
30369 const char *shift;
30370 HOST_WIDE_INT val;
30371 char c;
30372
30373 c = flag_chars[set_flags];
30374 shift = shift_op(operands[3], &val);
30375 if (shift)
30376 {
30377 if (val != -1)
30378 operands[2] = GEN_INT(val);
30379 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
30380 }
30381 else
30382 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
30383
30384 output_asm_insn (pattern, operands);
30385 return "";
30386 }
30387
30388 /* Output assembly for a WMMX immediate shift instruction. */
30389 const char *
30390 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
30391 {
30392 int shift = INTVAL (operands[2]);
30393 char templ[50];
30394 machine_mode opmode = GET_MODE (operands[0]);
30395
30396 gcc_assert (shift >= 0);
30397
30398 /* If the shift value in the register versions is > 63 (for D qualifier),
30399 31 (for W qualifier) or 15 (for H qualifier). */
30400 if (((opmode == V4HImode) && (shift > 15))
30401 || ((opmode == V2SImode) && (shift > 31))
30402 || ((opmode == DImode) && (shift > 63)))
30403 {
30404 if (wror_or_wsra)
30405 {
30406 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30407 output_asm_insn (templ, operands);
30408 if (opmode == DImode)
30409 {
30410 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
30411 output_asm_insn (templ, operands);
30412 }
30413 }
30414 else
30415 {
30416 /* The destination register will contain all zeros. */
30417 sprintf (templ, "wzero\t%%0");
30418 output_asm_insn (templ, operands);
30419 }
30420 return "";
30421 }
30422
30423 if ((opmode == DImode) && (shift > 32))
30424 {
30425 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30426 output_asm_insn (templ, operands);
30427 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
30428 output_asm_insn (templ, operands);
30429 }
30430 else
30431 {
30432 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
30433 output_asm_insn (templ, operands);
30434 }
30435 return "";
30436 }
30437
30438 /* Output assembly for a WMMX tinsr instruction. */
30439 const char *
30440 arm_output_iwmmxt_tinsr (rtx *operands)
30441 {
30442 int mask = INTVAL (operands[3]);
30443 int i;
30444 char templ[50];
30445 int units = mode_nunits[GET_MODE (operands[0])];
30446 gcc_assert ((mask & (mask - 1)) == 0);
30447 for (i = 0; i < units; ++i)
30448 {
30449 if ((mask & 0x01) == 1)
30450 {
30451 break;
30452 }
30453 mask >>= 1;
30454 }
30455 gcc_assert (i < units);
30456 {
30457 switch (GET_MODE (operands[0]))
30458 {
30459 case E_V8QImode:
30460 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
30461 break;
30462 case E_V4HImode:
30463 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
30464 break;
30465 case E_V2SImode:
30466 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
30467 break;
30468 default:
30469 gcc_unreachable ();
30470 break;
30471 }
30472 output_asm_insn (templ, operands);
30473 }
30474 return "";
30475 }
30476
30477 /* Output an arm casesi dispatch sequence. Used by arm_casesi_internal insn.
30478 Responsible for the handling of switch statements in arm. */
30479 const char *
30480 arm_output_casesi (rtx *operands)
30481 {
30482 char label[100];
30483 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30484 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30485 output_asm_insn ("cmp\t%0, %1", operands);
30486 output_asm_insn ("bhi\t%l3", operands);
30487 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
30488 switch (GET_MODE (diff_vec))
30489 {
30490 case E_QImode:
30491 if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30492 output_asm_insn ("ldrb\t%4, [%5, %0]", operands);
30493 else
30494 output_asm_insn ("ldrsb\t%4, [%5, %0]", operands);
30495 output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
30496 break;
30497 case E_HImode:
30498 if (REGNO (operands[4]) != REGNO (operands[5]))
30499 {
30500 output_asm_insn ("add\t%4, %0, %0", operands);
30501 if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30502 output_asm_insn ("ldrh\t%4, [%5, %4]", operands);
30503 else
30504 output_asm_insn ("ldrsh\t%4, [%5, %4]", operands);
30505 }
30506 else
30507 {
30508 output_asm_insn ("add\t%4, %5, %0", operands);
30509 if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30510 output_asm_insn ("ldrh\t%4, [%4, %0]", operands);
30511 else
30512 output_asm_insn ("ldrsh\t%4, [%4, %0]", operands);
30513 }
30514 output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
30515 break;
30516 case E_SImode:
30517 if (flag_pic)
30518 {
30519 output_asm_insn ("ldr\t%4, [%5, %0, lsl #2]", operands);
30520 output_asm_insn ("add\t%|pc, %|pc, %4", operands);
30521 }
30522 else
30523 output_asm_insn ("ldr\t%|pc, [%5, %0, lsl #2]", operands);
30524 break;
30525 default:
30526 gcc_unreachable ();
30527 }
30528 assemble_label (asm_out_file, label);
30529 output_asm_insn ("nop", operands);
30530 return "";
30531 }
30532
30533 /* Output a Thumb-1 casesi dispatch sequence. */
30534 const char *
30535 thumb1_output_casesi (rtx *operands)
30536 {
30537 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
30538
30539 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30540
30541 switch (GET_MODE(diff_vec))
30542 {
30543 case E_QImode:
30544 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30545 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30546 case E_HImode:
30547 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30548 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30549 case E_SImode:
30550 return "bl\t%___gnu_thumb1_case_si";
30551 default:
30552 gcc_unreachable ();
30553 }
30554 }
30555
30556 /* Output a Thumb-2 casesi instruction. */
30557 const char *
30558 thumb2_output_casesi (rtx *operands)
30559 {
30560 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30561
30562 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30563
30564 output_asm_insn ("cmp\t%0, %1", operands);
30565 output_asm_insn ("bhi\t%l3", operands);
30566 switch (GET_MODE(diff_vec))
30567 {
30568 case E_QImode:
30569 return "tbb\t[%|pc, %0]";
30570 case E_HImode:
30571 return "tbh\t[%|pc, %0, lsl #1]";
30572 case E_SImode:
30573 if (flag_pic)
30574 {
30575 output_asm_insn ("adr\t%4, %l2", operands);
30576 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
30577 output_asm_insn ("add\t%4, %4, %5", operands);
30578 return "bx\t%4";
30579 }
30580 else
30581 {
30582 output_asm_insn ("adr\t%4, %l2", operands);
30583 return "ldr\t%|pc, [%4, %0, lsl #2]";
30584 }
30585 default:
30586 gcc_unreachable ();
30587 }
30588 }
30589
30590 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
30591 per-core tuning structs. */
30592 static int
30593 arm_issue_rate (void)
30594 {
30595 return current_tune->issue_rate;
30596 }
30597
30598 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
30599 static int
30600 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
30601 {
30602 if (DEBUG_INSN_P (insn))
30603 return more;
30604
30605 rtx_code code = GET_CODE (PATTERN (insn));
30606 if (code == USE || code == CLOBBER)
30607 return more;
30608
30609 if (get_attr_type (insn) == TYPE_NO_INSN)
30610 return more;
30611
30612 return more - 1;
30613 }
30614
30615 /* Return how many instructions should scheduler lookahead to choose the
30616 best one. */
30617 static int
30618 arm_first_cycle_multipass_dfa_lookahead (void)
30619 {
30620 int issue_rate = arm_issue_rate ();
30621
30622 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
30623 }
30624
30625 /* Enable modeling of L2 auto-prefetcher. */
30626 static int
30627 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30628 {
30629 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
30630 }
30631
30632 const char *
30633 arm_mangle_type (const_tree type)
30634 {
30635 /* The ARM ABI documents (10th October 2008) say that "__va_list"
30636 has to be managled as if it is in the "std" namespace. */
30637 if (TARGET_AAPCS_BASED
30638 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30639 return "St9__va_list";
30640
30641 /* Half-precision floating point types. */
30642 if (SCALAR_FLOAT_TYPE_P (type) && TYPE_PRECISION (type) == 16)
30643 {
30644 if (TYPE_MAIN_VARIANT (type) == float16_type_node)
30645 return NULL;
30646 if (TYPE_MODE (type) == BFmode)
30647 return "u6__bf16";
30648 else
30649 return "Dh";
30650 }
30651
30652 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30653 builtin type. */
30654 if (TYPE_NAME (type) != NULL)
30655 return arm_mangle_builtin_type (type);
30656
30657 /* Use the default mangling. */
30658 return NULL;
30659 }
30660
30661 /* Order of allocation of core registers for Thumb: this allocation is
30662 written over the corresponding initial entries of the array
30663 initialized with REG_ALLOC_ORDER. We allocate all low registers
30664 first. Saving and restoring a low register is usually cheaper than
30665 using a call-clobbered high register. */
30666
30667 static const int thumb_core_reg_alloc_order[] =
30668 {
30669 3, 2, 1, 0, 4, 5, 6, 7,
30670 12, 14, 8, 9, 10, 11
30671 };
30672
30673 /* Adjust register allocation order when compiling for Thumb. */
30674
30675 void
30676 arm_order_regs_for_local_alloc (void)
30677 {
30678 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30679 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30680 if (TARGET_THUMB)
30681 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30682 sizeof (thumb_core_reg_alloc_order));
30683 }
30684
30685 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30686
30687 bool
30688 arm_frame_pointer_required (void)
30689 {
30690 if (SUBTARGET_FRAME_POINTER_REQUIRED)
30691 return true;
30692
30693 /* If the function receives nonlocal gotos, it needs to save the frame
30694 pointer in the nonlocal_goto_save_area object. */
30695 if (cfun->has_nonlocal_label)
30696 return true;
30697
30698 /* The frame pointer is required for non-leaf APCS frames. */
30699 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
30700 return true;
30701
30702 /* If we are probing the stack in the prologue, we will have a faulting
30703 instruction prior to the stack adjustment and this requires a frame
30704 pointer if we want to catch the exception using the EABI unwinder. */
30705 if (!IS_INTERRUPT (arm_current_func_type ())
30706 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
30707 || flag_stack_clash_protection)
30708 && arm_except_unwind_info (&global_options) == UI_TARGET
30709 && cfun->can_throw_non_call_exceptions)
30710 {
30711 HOST_WIDE_INT size = get_frame_size ();
30712
30713 /* That's irrelevant if there is no stack adjustment. */
30714 if (size <= 0)
30715 return false;
30716
30717 /* That's relevant only if there is a stack probe. */
30718 if (crtl->is_leaf && !cfun->calls_alloca)
30719 {
30720 /* We don't have the final size of the frame so adjust. */
30721 size += 32 * UNITS_PER_WORD;
30722 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
30723 return true;
30724 }
30725 else
30726 return true;
30727 }
30728
30729 return false;
30730 }
30731
30732 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30733 All modes except THUMB1 have conditional execution.
30734 If we have conditional arithmetic, return false before reload to
30735 enable some ifcvt transformations. */
30736 static bool
30737 arm_have_conditional_execution (void)
30738 {
30739 bool has_cond_exec, enable_ifcvt_trans;
30740
30741 /* Only THUMB1 cannot support conditional execution. */
30742 has_cond_exec = !TARGET_THUMB1;
30743
30744 /* Enable ifcvt transformations if we have conditional arithmetic, but only
30745 before reload. */
30746 enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
30747
30748 return has_cond_exec && !enable_ifcvt_trans;
30749 }
30750
30751 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30752 static HOST_WIDE_INT
30753 arm_vector_alignment (const_tree type)
30754 {
30755 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30756
30757 if (TARGET_AAPCS_BASED)
30758 align = MIN (align, 64);
30759
30760 return align;
30761 }
30762
30763 static unsigned int
30764 arm_autovectorize_vector_modes (vector_modes *modes, bool)
30765 {
30766 if (!TARGET_NEON_VECTORIZE_DOUBLE)
30767 {
30768 modes->safe_push (V16QImode);
30769 modes->safe_push (V8QImode);
30770 }
30771 return 0;
30772 }
30773
30774 static bool
30775 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30776 {
30777 /* Vectors which aren't in packed structures will not be less aligned than
30778 the natural alignment of their element type, so this is safe. */
30779 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30780 return !is_packed;
30781
30782 return default_builtin_vector_alignment_reachable (type, is_packed);
30783 }
30784
30785 static bool
30786 arm_builtin_support_vector_misalignment (machine_mode mode,
30787 const_tree type, int misalignment,
30788 bool is_packed)
30789 {
30790 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30791 {
30792 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30793
30794 if (is_packed)
30795 return align == 1;
30796
30797 /* If the misalignment is unknown, we should be able to handle the access
30798 so long as it is not to a member of a packed data structure. */
30799 if (misalignment == -1)
30800 return true;
30801
30802 /* Return true if the misalignment is a multiple of the natural alignment
30803 of the vector's element type. This is probably always going to be
30804 true in practice, since we've already established that this isn't a
30805 packed access. */
30806 return ((misalignment % align) == 0);
30807 }
30808
30809 return default_builtin_support_vector_misalignment (mode, type, misalignment,
30810 is_packed);
30811 }
30812
30813 static void
30814 arm_conditional_register_usage (void)
30815 {
30816 int regno;
30817
30818 if (TARGET_THUMB1 && optimize_size)
30819 {
30820 /* When optimizing for size on Thumb-1, it's better not
30821 to use the HI regs, because of the overhead of
30822 stacking them. */
30823 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30824 fixed_regs[regno] = call_used_regs[regno] = 1;
30825 }
30826
30827 /* The link register can be clobbered by any branch insn,
30828 but we have no way to track that at present, so mark
30829 it as unavailable. */
30830 if (TARGET_THUMB1)
30831 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30832
30833 if (TARGET_32BIT && TARGET_VFP_BASE)
30834 {
30835 /* VFPv3 registers are disabled when earlier VFP
30836 versions are selected due to the definition of
30837 LAST_VFP_REGNUM. */
30838 for (regno = FIRST_VFP_REGNUM;
30839 regno <= LAST_VFP_REGNUM; ++ regno)
30840 {
30841 fixed_regs[regno] = 0;
30842 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30843 || regno >= FIRST_VFP_REGNUM + 32;
30844 }
30845 if (TARGET_HAVE_MVE)
30846 fixed_regs[VPR_REGNUM] = 0;
30847 }
30848
30849 if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30850 {
30851 regno = FIRST_IWMMXT_GR_REGNUM;
30852 /* The 2002/10/09 revision of the XScale ABI has wCG0
30853 and wCG1 as call-preserved registers. The 2002/11/21
30854 revision changed this so that all wCG registers are
30855 scratch registers. */
30856 for (regno = FIRST_IWMMXT_GR_REGNUM;
30857 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30858 fixed_regs[regno] = 0;
30859 /* The XScale ABI has wR0 - wR9 as scratch registers,
30860 the rest as call-preserved registers. */
30861 for (regno = FIRST_IWMMXT_REGNUM;
30862 regno <= LAST_IWMMXT_REGNUM; ++ regno)
30863 {
30864 fixed_regs[regno] = 0;
30865 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30866 }
30867 }
30868
30869 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30870 {
30871 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30872 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30873 }
30874 else if (TARGET_APCS_STACK)
30875 {
30876 fixed_regs[10] = 1;
30877 call_used_regs[10] = 1;
30878 }
30879 /* -mcaller-super-interworking reserves r11 for calls to
30880 _interwork_r11_call_via_rN(). Making the register global
30881 is an easy way of ensuring that it remains valid for all
30882 calls. */
30883 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30884 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30885 {
30886 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30887 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30888 if (TARGET_CALLER_INTERWORKING)
30889 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30890 }
30891
30892 /* The Q and GE bits are only accessed via special ACLE patterns. */
30893 CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30894 CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30895
30896 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30897 }
30898
30899 static reg_class_t
30900 arm_preferred_rename_class (reg_class_t rclass)
30901 {
30902 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30903 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30904 and code size can be reduced. */
30905 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30906 return LO_REGS;
30907 else
30908 return NO_REGS;
30909 }
30910
30911 /* Compute the attribute "length" of insn "*push_multi".
30912 So this function MUST be kept in sync with that insn pattern. */
30913 int
30914 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30915 {
30916 int i, regno, hi_reg;
30917 int num_saves = XVECLEN (parallel_op, 0);
30918
30919 /* ARM mode. */
30920 if (TARGET_ARM)
30921 return 4;
30922 /* Thumb1 mode. */
30923 if (TARGET_THUMB1)
30924 return 2;
30925
30926 /* Thumb2 mode. */
30927 regno = REGNO (first_op);
30928 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30929 list is 8-bit. Normally this means all registers in the list must be
30930 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
30931 encodings. There is one exception for PUSH that LR in HI_REGS can be used
30932 with 16-bit encoding. */
30933 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30934 for (i = 1; i < num_saves && !hi_reg; i++)
30935 {
30936 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30937 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30938 }
30939
30940 if (!hi_reg)
30941 return 2;
30942 return 4;
30943 }
30944
30945 /* Compute the attribute "length" of insn. Currently, this function is used
30946 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30947 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
30948 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
30949 true if OPERANDS contains insn which explicit updates base register. */
30950
30951 int
30952 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30953 {
30954 /* ARM mode. */
30955 if (TARGET_ARM)
30956 return 4;
30957 /* Thumb1 mode. */
30958 if (TARGET_THUMB1)
30959 return 2;
30960
30961 rtx parallel_op = operands[0];
30962 /* Initialize to elements number of PARALLEL. */
30963 unsigned indx = XVECLEN (parallel_op, 0) - 1;
30964 /* Initialize the value to base register. */
30965 unsigned regno = REGNO (operands[1]);
30966 /* Skip return and write back pattern.
30967 We only need register pop pattern for later analysis. */
30968 unsigned first_indx = 0;
30969 first_indx += return_pc ? 1 : 0;
30970 first_indx += write_back_p ? 1 : 0;
30971
30972 /* A pop operation can be done through LDM or POP. If the base register is SP
30973 and if it's with write back, then a LDM will be alias of POP. */
30974 bool pop_p = (regno == SP_REGNUM && write_back_p);
30975 bool ldm_p = !pop_p;
30976
30977 /* Check base register for LDM. */
30978 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30979 return 4;
30980
30981 /* Check each register in the list. */
30982 for (; indx >= first_indx; indx--)
30983 {
30984 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30985 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
30986 comment in arm_attr_length_push_multi. */
30987 if (REGNO_REG_CLASS (regno) == HI_REGS
30988 && (regno != PC_REGNUM || ldm_p))
30989 return 4;
30990 }
30991
30992 return 2;
30993 }
30994
30995 /* Compute the number of instructions emitted by output_move_double. */
30996 int
30997 arm_count_output_move_double_insns (rtx *operands)
30998 {
30999 int count;
31000 rtx ops[2];
31001 /* output_move_double may modify the operands array, so call it
31002 here on a copy of the array. */
31003 ops[0] = operands[0];
31004 ops[1] = operands[1];
31005 output_move_double (ops, false, &count);
31006 return count;
31007 }
31008
31009 /* Same as above, but operands are a register/memory pair in SImode.
31010 Assumes operands has the base register in position 0 and memory in position
31011 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
31012 int
31013 arm_count_ldrdstrd_insns (rtx *operands, bool load)
31014 {
31015 int count;
31016 rtx ops[2];
31017 int regnum, memnum;
31018 if (load)
31019 regnum = 0, memnum = 1;
31020 else
31021 regnum = 1, memnum = 0;
31022 ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
31023 ops[memnum] = adjust_address (operands[2], DImode, 0);
31024 output_move_double (ops, false, &count);
31025 return count;
31026 }
31027
31028
31029 int
31030 vfp3_const_double_for_fract_bits (rtx operand)
31031 {
31032 REAL_VALUE_TYPE r0;
31033
31034 if (!CONST_DOUBLE_P (operand))
31035 return 0;
31036
31037 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
31038 if (exact_real_inverse (DFmode, &r0)
31039 && !REAL_VALUE_NEGATIVE (r0))
31040 {
31041 if (exact_real_truncate (DFmode, &r0))
31042 {
31043 HOST_WIDE_INT value = real_to_integer (&r0);
31044 value = value & 0xffffffff;
31045 if ((value != 0) && ( (value & (value - 1)) == 0))
31046 {
31047 int ret = exact_log2 (value);
31048 gcc_assert (IN_RANGE (ret, 0, 31));
31049 return ret;
31050 }
31051 }
31052 }
31053 return 0;
31054 }
31055
31056 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
31057 log2 is in [1, 32], return that log2. Otherwise return -1.
31058 This is used in the patterns for vcvt.s32.f32 floating-point to
31059 fixed-point conversions. */
31060
31061 int
31062 vfp3_const_double_for_bits (rtx x)
31063 {
31064 const REAL_VALUE_TYPE *r;
31065
31066 if (!CONST_DOUBLE_P (x))
31067 return -1;
31068
31069 r = CONST_DOUBLE_REAL_VALUE (x);
31070
31071 if (REAL_VALUE_NEGATIVE (*r)
31072 || REAL_VALUE_ISNAN (*r)
31073 || REAL_VALUE_ISINF (*r)
31074 || !real_isinteger (r, SFmode))
31075 return -1;
31076
31077 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
31078
31079 /* The exact_log2 above will have returned -1 if this is
31080 not an exact log2. */
31081 if (!IN_RANGE (hwint, 1, 32))
31082 return -1;
31083
31084 return hwint;
31085 }
31086
31087 \f
31088 /* Emit a memory barrier around an atomic sequence according to MODEL. */
31089
31090 static void
31091 arm_pre_atomic_barrier (enum memmodel model)
31092 {
31093 if (need_atomic_barrier_p (model, true))
31094 emit_insn (gen_memory_barrier ());
31095 }
31096
31097 static void
31098 arm_post_atomic_barrier (enum memmodel model)
31099 {
31100 if (need_atomic_barrier_p (model, false))
31101 emit_insn (gen_memory_barrier ());
31102 }
31103
31104 /* Emit the load-exclusive and store-exclusive instructions.
31105 Use acquire and release versions if necessary. */
31106
31107 static void
31108 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
31109 {
31110 rtx (*gen) (rtx, rtx);
31111
31112 if (acq)
31113 {
31114 switch (mode)
31115 {
31116 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
31117 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
31118 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
31119 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
31120 default:
31121 gcc_unreachable ();
31122 }
31123 }
31124 else
31125 {
31126 switch (mode)
31127 {
31128 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
31129 case E_HImode: gen = gen_arm_load_exclusivehi; break;
31130 case E_SImode: gen = gen_arm_load_exclusivesi; break;
31131 case E_DImode: gen = gen_arm_load_exclusivedi; break;
31132 default:
31133 gcc_unreachable ();
31134 }
31135 }
31136
31137 emit_insn (gen (rval, mem));
31138 }
31139
31140 static void
31141 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
31142 rtx mem, bool rel)
31143 {
31144 rtx (*gen) (rtx, rtx, rtx);
31145
31146 if (rel)
31147 {
31148 switch (mode)
31149 {
31150 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
31151 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
31152 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
31153 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
31154 default:
31155 gcc_unreachable ();
31156 }
31157 }
31158 else
31159 {
31160 switch (mode)
31161 {
31162 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
31163 case E_HImode: gen = gen_arm_store_exclusivehi; break;
31164 case E_SImode: gen = gen_arm_store_exclusivesi; break;
31165 case E_DImode: gen = gen_arm_store_exclusivedi; break;
31166 default:
31167 gcc_unreachable ();
31168 }
31169 }
31170
31171 emit_insn (gen (bval, rval, mem));
31172 }
31173
31174 /* Mark the previous jump instruction as unlikely. */
31175
31176 static void
31177 emit_unlikely_jump (rtx insn)
31178 {
31179 rtx_insn *jump = emit_jump_insn (insn);
31180 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
31181 }
31182
31183 /* Expand a compare and swap pattern. */
31184
31185 void
31186 arm_expand_compare_and_swap (rtx operands[])
31187 {
31188 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
31189 machine_mode mode, cmp_mode;
31190
31191 bval = operands[0];
31192 rval = operands[1];
31193 mem = operands[2];
31194 oldval = operands[3];
31195 newval = operands[4];
31196 is_weak = operands[5];
31197 mod_s = operands[6];
31198 mod_f = operands[7];
31199 mode = GET_MODE (mem);
31200
31201 /* Normally the succ memory model must be stronger than fail, but in the
31202 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
31203 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
31204
31205 if (TARGET_HAVE_LDACQ
31206 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
31207 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
31208 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
31209
31210 switch (mode)
31211 {
31212 case E_QImode:
31213 case E_HImode:
31214 /* For narrow modes, we're going to perform the comparison in SImode,
31215 so do the zero-extension now. */
31216 rval = gen_reg_rtx (SImode);
31217 oldval = convert_modes (SImode, mode, oldval, true);
31218 /* FALLTHRU */
31219
31220 case E_SImode:
31221 /* Force the value into a register if needed. We waited until after
31222 the zero-extension above to do this properly. */
31223 if (!arm_add_operand (oldval, SImode))
31224 oldval = force_reg (SImode, oldval);
31225 break;
31226
31227 case E_DImode:
31228 if (!cmpdi_operand (oldval, mode))
31229 oldval = force_reg (mode, oldval);
31230 break;
31231
31232 default:
31233 gcc_unreachable ();
31234 }
31235
31236 if (TARGET_THUMB1)
31237 cmp_mode = E_SImode;
31238 else
31239 cmp_mode = CC_Zmode;
31240
31241 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
31242 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
31243 oldval, newval, is_weak, mod_s, mod_f));
31244
31245 if (mode == QImode || mode == HImode)
31246 emit_move_insn (operands[1], gen_lowpart (mode, rval));
31247
31248 /* In all cases, we arrange for success to be signaled by Z set.
31249 This arrangement allows for the boolean result to be used directly
31250 in a subsequent branch, post optimization. For Thumb-1 targets, the
31251 boolean negation of the result is also stored in bval because Thumb-1
31252 backend lacks dependency tracking for CC flag due to flag-setting not
31253 being represented at RTL level. */
31254 if (TARGET_THUMB1)
31255 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
31256 else
31257 {
31258 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
31259 emit_insn (gen_rtx_SET (bval, x));
31260 }
31261 }
31262
31263 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
31264 another memory store between the load-exclusive and store-exclusive can
31265 reset the monitor from Exclusive to Open state. This means we must wait
31266 until after reload to split the pattern, lest we get a register spill in
31267 the middle of the atomic sequence. Success of the compare and swap is
31268 indicated by the Z flag set for 32bit targets and by neg_bval being zero
31269 for Thumb-1 targets (ie. negation of the boolean value returned by
31270 atomic_compare_and_swapmode standard pattern in operand 0). */
31271
31272 void
31273 arm_split_compare_and_swap (rtx operands[])
31274 {
31275 rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
31276 machine_mode mode;
31277 enum memmodel mod_s, mod_f;
31278 bool is_weak;
31279 rtx_code_label *label1, *label2;
31280 rtx x, cond;
31281
31282 rval = operands[1];
31283 mem = operands[2];
31284 oldval = operands[3];
31285 newval = operands[4];
31286 is_weak = (operands[5] != const0_rtx);
31287 mod_s_rtx = operands[6];
31288 mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
31289 mod_f = memmodel_from_int (INTVAL (operands[7]));
31290 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
31291 mode = GET_MODE (mem);
31292
31293 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
31294
31295 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
31296 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
31297
31298 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
31299 a full barrier is emitted after the store-release. */
31300 if (is_armv8_sync)
31301 use_acquire = false;
31302
31303 /* Checks whether a barrier is needed and emits one accordingly. */
31304 if (!(use_acquire || use_release))
31305 arm_pre_atomic_barrier (mod_s);
31306
31307 label1 = NULL;
31308 if (!is_weak)
31309 {
31310 label1 = gen_label_rtx ();
31311 emit_label (label1);
31312 }
31313 label2 = gen_label_rtx ();
31314
31315 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
31316
31317 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31318 as required to communicate with arm_expand_compare_and_swap. */
31319 if (TARGET_32BIT)
31320 {
31321 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
31322 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31323 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31324 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
31325 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31326 }
31327 else
31328 {
31329 cond = gen_rtx_NE (VOIDmode, rval, oldval);
31330 if (thumb1_cmpneg_operand (oldval, SImode))
31331 {
31332 rtx src = rval;
31333 if (!satisfies_constraint_L (oldval))
31334 {
31335 gcc_assert (satisfies_constraint_J (oldval));
31336
31337 /* For such immediates, ADDS needs the source and destination regs
31338 to be the same.
31339
31340 Normally this would be handled by RA, but this is all happening
31341 after RA. */
31342 emit_move_insn (neg_bval, rval);
31343 src = neg_bval;
31344 }
31345
31346 emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
31347 label2, cond));
31348 }
31349 else
31350 {
31351 emit_move_insn (neg_bval, const1_rtx);
31352 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
31353 }
31354 }
31355
31356 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
31357
31358 /* Weak or strong, we want EQ to be true for success, so that we
31359 match the flags that we got from the compare above. */
31360 if (TARGET_32BIT)
31361 {
31362 cond = gen_rtx_REG (CCmode, CC_REGNUM);
31363 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
31364 emit_insn (gen_rtx_SET (cond, x));
31365 }
31366
31367 if (!is_weak)
31368 {
31369 /* Z is set to boolean value of !neg_bval, as required to communicate
31370 with arm_expand_compare_and_swap. */
31371 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
31372 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
31373 }
31374
31375 if (!is_mm_relaxed (mod_f))
31376 emit_label (label2);
31377
31378 /* Checks whether a barrier is needed and emits one accordingly. */
31379 if (is_armv8_sync
31380 || !(use_acquire || use_release))
31381 arm_post_atomic_barrier (mod_s);
31382
31383 if (is_mm_relaxed (mod_f))
31384 emit_label (label2);
31385 }
31386
31387 /* Split an atomic operation pattern. Operation is given by CODE and is one
31388 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31389 operation). Operation is performed on the content at MEM and on VALUE
31390 following the memory model MODEL_RTX. The content at MEM before and after
31391 the operation is returned in OLD_OUT and NEW_OUT respectively while the
31392 success of the operation is returned in COND. Using a scratch register or
31393 an operand register for these determines what result is returned for that
31394 pattern. */
31395
31396 void
31397 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
31398 rtx value, rtx model_rtx, rtx cond)
31399 {
31400 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
31401 machine_mode mode = GET_MODE (mem);
31402 machine_mode wmode = (mode == DImode ? DImode : SImode);
31403 rtx_code_label *label;
31404 bool all_low_regs, bind_old_new;
31405 rtx x;
31406
31407 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
31408
31409 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
31410 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
31411
31412 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
31413 a full barrier is emitted after the store-release. */
31414 if (is_armv8_sync)
31415 use_acquire = false;
31416
31417 /* Checks whether a barrier is needed and emits one accordingly. */
31418 if (!(use_acquire || use_release))
31419 arm_pre_atomic_barrier (model);
31420
31421 label = gen_label_rtx ();
31422 emit_label (label);
31423
31424 if (new_out)
31425 new_out = gen_lowpart (wmode, new_out);
31426 if (old_out)
31427 old_out = gen_lowpart (wmode, old_out);
31428 else
31429 old_out = new_out;
31430 value = simplify_gen_subreg (wmode, value, mode, 0);
31431
31432 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
31433
31434 /* Does the operation require destination and first operand to use the same
31435 register? This is decided by register constraints of relevant insn
31436 patterns in thumb1.md. */
31437 gcc_assert (!new_out || REG_P (new_out));
31438 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
31439 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
31440 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
31441 bind_old_new =
31442 (TARGET_THUMB1
31443 && code != SET
31444 && code != MINUS
31445 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
31446
31447 /* We want to return the old value while putting the result of the operation
31448 in the same register as the old value so copy the old value over to the
31449 destination register and use that register for the operation. */
31450 if (old_out && bind_old_new)
31451 {
31452 emit_move_insn (new_out, old_out);
31453 old_out = new_out;
31454 }
31455
31456 switch (code)
31457 {
31458 case SET:
31459 new_out = value;
31460 break;
31461
31462 case NOT:
31463 x = gen_rtx_AND (wmode, old_out, value);
31464 emit_insn (gen_rtx_SET (new_out, x));
31465 x = gen_rtx_NOT (wmode, new_out);
31466 emit_insn (gen_rtx_SET (new_out, x));
31467 break;
31468
31469 case MINUS:
31470 if (CONST_INT_P (value))
31471 {
31472 value = gen_int_mode (-INTVAL (value), wmode);
31473 code = PLUS;
31474 }
31475 /* FALLTHRU */
31476
31477 case PLUS:
31478 if (mode == DImode)
31479 {
31480 /* DImode plus/minus need to clobber flags. */
31481 /* The adddi3 and subdi3 patterns are incorrectly written so that
31482 they require matching operands, even when we could easily support
31483 three operands. Thankfully, this can be fixed up post-splitting,
31484 as the individual add+adc patterns do accept three operands and
31485 post-reload cprop can make these moves go away. */
31486 emit_move_insn (new_out, old_out);
31487 if (code == PLUS)
31488 x = gen_adddi3 (new_out, new_out, value);
31489 else
31490 x = gen_subdi3 (new_out, new_out, value);
31491 emit_insn (x);
31492 break;
31493 }
31494 /* FALLTHRU */
31495
31496 default:
31497 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
31498 emit_insn (gen_rtx_SET (new_out, x));
31499 break;
31500 }
31501
31502 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
31503 use_release);
31504
31505 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31506 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
31507
31508 /* Checks whether a barrier is needed and emits one accordingly. */
31509 if (is_armv8_sync
31510 || !(use_acquire || use_release))
31511 arm_post_atomic_barrier (model);
31512 }
31513 \f
31514 /* Return the mode for the MVE vector of predicates corresponding to MODE. */
31515 opt_machine_mode
31516 arm_mode_to_pred_mode (machine_mode mode)
31517 {
31518 switch (GET_MODE_NUNITS (mode))
31519 {
31520 case 16: return V16BImode;
31521 case 8: return V8BImode;
31522 case 4: return V4BImode;
31523 case 2: return V2QImode;
31524 }
31525 return opt_machine_mode ();
31526 }
31527
31528 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31529 If CAN_INVERT, store either the result or its inverse in TARGET
31530 and return true if TARGET contains the inverse. If !CAN_INVERT,
31531 always store the result in TARGET, never its inverse.
31532
31533 Note that the handling of floating-point comparisons is not
31534 IEEE compliant. */
31535
31536 bool
31537 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
31538 bool can_invert)
31539 {
31540 machine_mode cmp_result_mode = GET_MODE (target);
31541 machine_mode cmp_mode = GET_MODE (op0);
31542
31543 bool inverted;
31544
31545 /* MVE supports more comparisons than Neon. */
31546 if (TARGET_HAVE_MVE)
31547 inverted = false;
31548 else
31549 switch (code)
31550 {
31551 /* For these we need to compute the inverse of the requested
31552 comparison. */
31553 case UNORDERED:
31554 case UNLT:
31555 case UNLE:
31556 case UNGT:
31557 case UNGE:
31558 case UNEQ:
31559 case NE:
31560 code = reverse_condition_maybe_unordered (code);
31561 if (!can_invert)
31562 {
31563 /* Recursively emit the inverted comparison into a temporary
31564 and then store its inverse in TARGET. This avoids reusing
31565 TARGET (which for integer NE could be one of the inputs). */
31566 rtx tmp = gen_reg_rtx (cmp_result_mode);
31567 if (arm_expand_vector_compare (tmp, code, op0, op1, true))
31568 gcc_unreachable ();
31569 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
31570 return false;
31571 }
31572 inverted = true;
31573 break;
31574
31575 default:
31576 inverted = false;
31577 break;
31578 }
31579
31580 switch (code)
31581 {
31582 /* These are natively supported by Neon for zero comparisons, but otherwise
31583 require the operands to be swapped. For MVE, we can only compare
31584 registers. */
31585 case LE:
31586 case LT:
31587 if (!TARGET_HAVE_MVE)
31588 if (op1 != CONST0_RTX (cmp_mode))
31589 {
31590 code = swap_condition (code);
31591 std::swap (op0, op1);
31592 }
31593 /* Fall through. */
31594
31595 /* These are natively supported by Neon for both register and zero
31596 operands. MVE supports registers only. */
31597 case EQ:
31598 case GE:
31599 case GT:
31600 case NE:
31601 if (TARGET_HAVE_MVE)
31602 {
31603 switch (GET_MODE_CLASS (cmp_mode))
31604 {
31605 case MODE_VECTOR_INT:
31606 emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31607 op0, force_reg (cmp_mode, op1)));
31608 break;
31609 case MODE_VECTOR_FLOAT:
31610 if (TARGET_HAVE_MVE_FLOAT)
31611 emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
31612 op0, force_reg (cmp_mode, op1)));
31613 else
31614 gcc_unreachable ();
31615 break;
31616 default:
31617 gcc_unreachable ();
31618 }
31619 }
31620 else
31621 emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
31622 return inverted;
31623
31624 /* These are natively supported for register operands only.
31625 Comparisons with zero aren't useful and should be folded
31626 or canonicalized by target-independent code. */
31627 case GEU:
31628 case GTU:
31629 if (TARGET_HAVE_MVE)
31630 emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31631 op0, force_reg (cmp_mode, op1)));
31632 else
31633 emit_insn (gen_neon_vc (code, cmp_mode, target,
31634 op0, force_reg (cmp_mode, op1)));
31635 return inverted;
31636
31637 /* These require the operands to be swapped and likewise do not
31638 support comparisons with zero. */
31639 case LEU:
31640 case LTU:
31641 if (TARGET_HAVE_MVE)
31642 emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
31643 force_reg (cmp_mode, op1), op0));
31644 else
31645 emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
31646 target, force_reg (cmp_mode, op1), op0));
31647 return inverted;
31648
31649 /* These need a combination of two comparisons. */
31650 case LTGT:
31651 case ORDERED:
31652 {
31653 /* Operands are LTGT iff (a > b || a > b).
31654 Operands are ORDERED iff (a > b || a <= b). */
31655 rtx gt_res = gen_reg_rtx (cmp_result_mode);
31656 rtx alt_res = gen_reg_rtx (cmp_result_mode);
31657 rtx_code alt_code = (code == LTGT ? LT : LE);
31658 if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
31659 || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
31660 gcc_unreachable ();
31661 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
31662 gt_res, alt_res)));
31663 return inverted;
31664 }
31665
31666 default:
31667 gcc_unreachable ();
31668 }
31669 }
31670
31671 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31672 CMP_RESULT_MODE is the mode of the comparison result. */
31673
31674 void
31675 arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
31676 {
31677 /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31678 arm_expand_vector_compare, and another one here. */
31679 rtx mask;
31680
31681 if (TARGET_HAVE_MVE)
31682 mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ());
31683 else
31684 mask = gen_reg_rtx (cmp_result_mode);
31685
31686 bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
31687 operands[4], operands[5], true);
31688 if (inverted)
31689 std::swap (operands[1], operands[2]);
31690 if (TARGET_NEON)
31691 emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
31692 mask, operands[1], operands[2]));
31693 else
31694 {
31695 machine_mode cmp_mode = GET_MODE (operands[0]);
31696
31697 switch (GET_MODE_CLASS (cmp_mode))
31698 {
31699 case MODE_VECTOR_INT:
31700 emit_insn (gen_mve_q (VPSELQ_S, VPSELQ_S, cmp_mode, operands[0],
31701 operands[1], operands[2], mask));
31702 break;
31703 case MODE_VECTOR_FLOAT:
31704 if (TARGET_HAVE_MVE_FLOAT)
31705 emit_insn (gen_mve_q_f (VPSELQ_F, cmp_mode, operands[0],
31706 operands[1], operands[2], mask));
31707 else
31708 gcc_unreachable ();
31709 break;
31710 default:
31711 gcc_unreachable ();
31712 }
31713 }
31714 }
31715 \f
31716 #define MAX_VECT_LEN 16
31717
31718 struct expand_vec_perm_d
31719 {
31720 rtx target, op0, op1;
31721 vec_perm_indices perm;
31722 machine_mode vmode;
31723 bool one_vector_p;
31724 bool testing_p;
31725 };
31726
31727 /* Generate a variable permutation. */
31728
31729 static void
31730 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
31731 {
31732 machine_mode vmode = GET_MODE (target);
31733 bool one_vector_p = rtx_equal_p (op0, op1);
31734
31735 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
31736 gcc_checking_assert (GET_MODE (op0) == vmode);
31737 gcc_checking_assert (GET_MODE (op1) == vmode);
31738 gcc_checking_assert (GET_MODE (sel) == vmode);
31739 gcc_checking_assert (TARGET_NEON);
31740
31741 if (one_vector_p)
31742 {
31743 if (vmode == V8QImode)
31744 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
31745 else
31746 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
31747 }
31748 else
31749 {
31750 rtx pair;
31751
31752 if (vmode == V8QImode)
31753 {
31754 pair = gen_reg_rtx (V16QImode);
31755 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
31756 pair = gen_lowpart (TImode, pair);
31757 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
31758 }
31759 else
31760 {
31761 pair = gen_reg_rtx (OImode);
31762 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
31763 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
31764 }
31765 }
31766 }
31767
31768 void
31769 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
31770 {
31771 machine_mode vmode = GET_MODE (target);
31772 unsigned int nelt = GET_MODE_NUNITS (vmode);
31773 bool one_vector_p = rtx_equal_p (op0, op1);
31774 rtx mask;
31775
31776 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31777 numbering of elements for big-endian, we must reverse the order. */
31778 gcc_checking_assert (!BYTES_BIG_ENDIAN);
31779
31780 /* The VTBL instruction does not use a modulo index, so we must take care
31781 of that ourselves. */
31782 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
31783 mask = gen_const_vec_duplicate (vmode, mask);
31784 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
31785
31786 arm_expand_vec_perm_1 (target, op0, op1, sel);
31787 }
31788
31789 /* Map lane ordering between architectural lane order, and GCC lane order,
31790 taking into account ABI. See comment above output_move_neon for details. */
31791
31792 static int
31793 neon_endian_lane_map (machine_mode mode, int lane)
31794 {
31795 if (BYTES_BIG_ENDIAN)
31796 {
31797 int nelems = GET_MODE_NUNITS (mode);
31798 /* Reverse lane order. */
31799 lane = (nelems - 1 - lane);
31800 /* Reverse D register order, to match ABI. */
31801 if (GET_MODE_SIZE (mode) == 16)
31802 lane = lane ^ (nelems / 2);
31803 }
31804 return lane;
31805 }
31806
31807 /* Some permutations index into pairs of vectors, this is a helper function
31808 to map indexes into those pairs of vectors. */
31809
31810 static int
31811 neon_pair_endian_lane_map (machine_mode mode, int lane)
31812 {
31813 int nelem = GET_MODE_NUNITS (mode);
31814 if (BYTES_BIG_ENDIAN)
31815 lane =
31816 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
31817 return lane;
31818 }
31819
31820 /* Generate or test for an insn that supports a constant permutation. */
31821
31822 /* Recognize patterns for the VUZP insns. */
31823
31824 static bool
31825 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
31826 {
31827 unsigned int i, odd, mask, nelt = d->perm.length ();
31828 rtx out0, out1, in0, in1;
31829 int first_elem;
31830 int swap_nelt;
31831
31832 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31833 return false;
31834
31835 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31836 big endian pattern on 64 bit vectors, so we correct for that. */
31837 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
31838 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
31839
31840 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
31841
31842 if (first_elem == neon_endian_lane_map (d->vmode, 0))
31843 odd = 0;
31844 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
31845 odd = 1;
31846 else
31847 return false;
31848 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31849
31850 for (i = 0; i < nelt; i++)
31851 {
31852 unsigned elt =
31853 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
31854 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
31855 return false;
31856 }
31857
31858 /* Success! */
31859 if (d->testing_p)
31860 return true;
31861
31862 in0 = d->op0;
31863 in1 = d->op1;
31864 if (swap_nelt != 0)
31865 std::swap (in0, in1);
31866
31867 out0 = d->target;
31868 out1 = gen_reg_rtx (d->vmode);
31869 if (odd)
31870 std::swap (out0, out1);
31871
31872 emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
31873 return true;
31874 }
31875
31876 /* Recognize patterns for the VZIP insns. */
31877
31878 static bool
31879 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
31880 {
31881 unsigned int i, high, mask, nelt = d->perm.length ();
31882 rtx out0, out1, in0, in1;
31883 int first_elem;
31884 bool is_swapped;
31885
31886 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31887 return false;
31888
31889 is_swapped = BYTES_BIG_ENDIAN;
31890
31891 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
31892
31893 high = nelt / 2;
31894 if (first_elem == neon_endian_lane_map (d->vmode, high))
31895 ;
31896 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
31897 high = 0;
31898 else
31899 return false;
31900 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31901
31902 for (i = 0; i < nelt / 2; i++)
31903 {
31904 unsigned elt =
31905 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
31906 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
31907 != elt)
31908 return false;
31909 elt =
31910 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
31911 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
31912 != elt)
31913 return false;
31914 }
31915
31916 /* Success! */
31917 if (d->testing_p)
31918 return true;
31919
31920 in0 = d->op0;
31921 in1 = d->op1;
31922 if (is_swapped)
31923 std::swap (in0, in1);
31924
31925 out0 = d->target;
31926 out1 = gen_reg_rtx (d->vmode);
31927 if (high)
31928 std::swap (out0, out1);
31929
31930 emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
31931 return true;
31932 }
31933
31934 /* Recognize patterns for the VREV insns. */
31935 static bool
31936 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31937 {
31938 unsigned int i, j, diff, nelt = d->perm.length ();
31939 rtx (*gen) (machine_mode, rtx, rtx);
31940
31941 if (!d->one_vector_p)
31942 return false;
31943
31944 diff = d->perm[0];
31945 switch (diff)
31946 {
31947 case 7:
31948 switch (d->vmode)
31949 {
31950 case E_V16QImode:
31951 case E_V8QImode:
31952 gen = gen_neon_vrev64;
31953 break;
31954 default:
31955 return false;
31956 }
31957 break;
31958 case 3:
31959 switch (d->vmode)
31960 {
31961 case E_V16QImode:
31962 case E_V8QImode:
31963 gen = gen_neon_vrev32;
31964 break;
31965 case E_V8HImode:
31966 case E_V4HImode:
31967 case E_V8HFmode:
31968 case E_V4HFmode:
31969 gen = gen_neon_vrev64;
31970 break;
31971 default:
31972 return false;
31973 }
31974 break;
31975 case 1:
31976 switch (d->vmode)
31977 {
31978 case E_V16QImode:
31979 case E_V8QImode:
31980 gen = gen_neon_vrev16;
31981 break;
31982 case E_V8HImode:
31983 case E_V4HImode:
31984 gen = gen_neon_vrev32;
31985 break;
31986 case E_V4SImode:
31987 case E_V2SImode:
31988 case E_V4SFmode:
31989 case E_V2SFmode:
31990 gen = gen_neon_vrev64;
31991 break;
31992 default:
31993 return false;
31994 }
31995 break;
31996 default:
31997 return false;
31998 }
31999
32000 for (i = 0; i < nelt ; i += diff + 1)
32001 for (j = 0; j <= diff; j += 1)
32002 {
32003 /* This is guaranteed to be true as the value of diff
32004 is 7, 3, 1 and we should have enough elements in the
32005 queue to generate this. Getting a vector mask with a
32006 value of diff other than these values implies that
32007 something is wrong by the time we get here. */
32008 gcc_assert (i + j < nelt);
32009 if (d->perm[i + j] != i + diff - j)
32010 return false;
32011 }
32012
32013 /* Success! */
32014 if (d->testing_p)
32015 return true;
32016
32017 emit_insn (gen (d->vmode, d->target, d->op0));
32018 return true;
32019 }
32020
32021 /* Recognize patterns for the VTRN insns. */
32022
32023 static bool
32024 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
32025 {
32026 unsigned int i, odd, mask, nelt = d->perm.length ();
32027 rtx out0, out1, in0, in1;
32028
32029 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
32030 return false;
32031
32032 /* Note that these are little-endian tests. Adjust for big-endian later. */
32033 if (d->perm[0] == 0)
32034 odd = 0;
32035 else if (d->perm[0] == 1)
32036 odd = 1;
32037 else
32038 return false;
32039 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
32040
32041 for (i = 0; i < nelt; i += 2)
32042 {
32043 if (d->perm[i] != i + odd)
32044 return false;
32045 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
32046 return false;
32047 }
32048
32049 /* Success! */
32050 if (d->testing_p)
32051 return true;
32052
32053 in0 = d->op0;
32054 in1 = d->op1;
32055 if (BYTES_BIG_ENDIAN)
32056 {
32057 std::swap (in0, in1);
32058 odd = !odd;
32059 }
32060
32061 out0 = d->target;
32062 out1 = gen_reg_rtx (d->vmode);
32063 if (odd)
32064 std::swap (out0, out1);
32065
32066 emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
32067 return true;
32068 }
32069
32070 /* Recognize patterns for the VEXT insns. */
32071
32072 static bool
32073 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
32074 {
32075 unsigned int i, nelt = d->perm.length ();
32076 rtx offset;
32077
32078 unsigned int location;
32079
32080 unsigned int next = d->perm[0] + 1;
32081
32082 /* TODO: Handle GCC's numbering of elements for big-endian. */
32083 if (BYTES_BIG_ENDIAN)
32084 return false;
32085
32086 /* Check if the extracted indexes are increasing by one. */
32087 for (i = 1; i < nelt; next++, i++)
32088 {
32089 /* If we hit the most significant element of the 2nd vector in
32090 the previous iteration, no need to test further. */
32091 if (next == 2 * nelt)
32092 return false;
32093
32094 /* If we are operating on only one vector: it could be a
32095 rotation. If there are only two elements of size < 64, let
32096 arm_evpc_neon_vrev catch it. */
32097 if (d->one_vector_p && (next == nelt))
32098 {
32099 if ((nelt == 2) && (d->vmode != V2DImode))
32100 return false;
32101 else
32102 next = 0;
32103 }
32104
32105 if (d->perm[i] != next)
32106 return false;
32107 }
32108
32109 location = d->perm[0];
32110
32111 /* Success! */
32112 if (d->testing_p)
32113 return true;
32114
32115 offset = GEN_INT (location);
32116
32117 if(d->vmode == E_DImode)
32118 return false;
32119
32120 emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
32121 return true;
32122 }
32123
32124 /* The NEON VTBL instruction is a fully variable permuation that's even
32125 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
32126 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
32127 can do slightly better by expanding this as a constant where we don't
32128 have to apply a mask. */
32129
32130 static bool
32131 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
32132 {
32133 rtx rperm[MAX_VECT_LEN], sel;
32134 machine_mode vmode = d->vmode;
32135 unsigned int i, nelt = d->perm.length ();
32136
32137 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
32138 numbering of elements for big-endian, we must reverse the order. */
32139 if (BYTES_BIG_ENDIAN)
32140 return false;
32141
32142 if (d->testing_p)
32143 return true;
32144
32145 /* Generic code will try constant permutation twice. Once with the
32146 original mode and again with the elements lowered to QImode.
32147 So wait and don't do the selector expansion ourselves. */
32148 if (vmode != V8QImode && vmode != V16QImode)
32149 return false;
32150
32151 for (i = 0; i < nelt; ++i)
32152 rperm[i] = GEN_INT (d->perm[i]);
32153 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
32154 sel = force_reg (vmode, sel);
32155
32156 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
32157 return true;
32158 }
32159
32160 static bool
32161 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
32162 {
32163 /* Check if the input mask matches vext before reordering the
32164 operands. */
32165 if (TARGET_NEON)
32166 if (arm_evpc_neon_vext (d))
32167 return true;
32168
32169 /* The pattern matching functions above are written to look for a small
32170 number to begin the sequence (0, 1, N/2). If we begin with an index
32171 from the second operand, we can swap the operands. */
32172 unsigned int nelt = d->perm.length ();
32173 if (d->perm[0] >= nelt)
32174 {
32175 d->perm.rotate_inputs (1);
32176 std::swap (d->op0, d->op1);
32177 }
32178
32179 if (TARGET_NEON)
32180 {
32181 if (arm_evpc_neon_vuzp (d))
32182 return true;
32183 if (arm_evpc_neon_vzip (d))
32184 return true;
32185 if (arm_evpc_neon_vrev (d))
32186 return true;
32187 if (arm_evpc_neon_vtrn (d))
32188 return true;
32189 return arm_evpc_neon_vtbl (d);
32190 }
32191 return false;
32192 }
32193
32194 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
32195
32196 static bool
32197 arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
32198 rtx target, rtx op0, rtx op1,
32199 const vec_perm_indices &sel)
32200 {
32201 if (vmode != op_mode)
32202 return false;
32203
32204 struct expand_vec_perm_d d;
32205 int i, nelt, which;
32206
32207 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
32208 return false;
32209
32210 d.target = target;
32211 if (op0)
32212 {
32213 rtx nop0 = force_reg (vmode, op0);
32214 if (op0 == op1)
32215 op1 = nop0;
32216 op0 = nop0;
32217 }
32218 if (op1)
32219 op1 = force_reg (vmode, op1);
32220 d.op0 = op0;
32221 d.op1 = op1;
32222
32223 d.vmode = vmode;
32224 gcc_assert (VECTOR_MODE_P (d.vmode));
32225 d.testing_p = !target;
32226
32227 nelt = GET_MODE_NUNITS (d.vmode);
32228 for (i = which = 0; i < nelt; ++i)
32229 {
32230 int ei = sel[i] & (2 * nelt - 1);
32231 which |= (ei < nelt ? 1 : 2);
32232 }
32233
32234 switch (which)
32235 {
32236 default:
32237 gcc_unreachable();
32238
32239 case 3:
32240 d.one_vector_p = false;
32241 if (d.testing_p || !rtx_equal_p (op0, op1))
32242 break;
32243
32244 /* The elements of PERM do not suggest that only the first operand
32245 is used, but both operands are identical. Allow easier matching
32246 of the permutation by folding the permutation into the single
32247 input vector. */
32248 /* FALLTHRU */
32249 case 2:
32250 d.op0 = op1;
32251 d.one_vector_p = true;
32252 break;
32253
32254 case 1:
32255 d.op1 = op0;
32256 d.one_vector_p = true;
32257 break;
32258 }
32259
32260 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
32261
32262 if (!d.testing_p)
32263 return arm_expand_vec_perm_const_1 (&d);
32264
32265 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32266 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32267 if (!d.one_vector_p)
32268 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32269
32270 start_sequence ();
32271 bool ret = arm_expand_vec_perm_const_1 (&d);
32272 end_sequence ();
32273
32274 return ret;
32275 }
32276
32277 bool
32278 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
32279 {
32280 /* If we are soft float and we do not have ldrd
32281 then all auto increment forms are ok. */
32282 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
32283 return true;
32284
32285 switch (code)
32286 {
32287 /* Post increment and Pre Decrement are supported for all
32288 instruction forms except for vector forms. */
32289 case ARM_POST_INC:
32290 case ARM_PRE_DEC:
32291 if (VECTOR_MODE_P (mode))
32292 {
32293 if (code != ARM_PRE_DEC)
32294 return true;
32295 else
32296 return false;
32297 }
32298
32299 return true;
32300
32301 case ARM_POST_DEC:
32302 case ARM_PRE_INC:
32303 /* Without LDRD and mode size greater than
32304 word size, there is no point in auto-incrementing
32305 because ldm and stm will not have these forms. */
32306 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
32307 return false;
32308
32309 /* Vector and floating point modes do not support
32310 these auto increment forms. */
32311 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
32312 return false;
32313
32314 return true;
32315
32316 default:
32317 return false;
32318
32319 }
32320
32321 return false;
32322 }
32323
32324 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32325 on ARM, since we know that shifts by negative amounts are no-ops.
32326 Additionally, the default expansion code is not available or suitable
32327 for post-reload insn splits (this can occur when the register allocator
32328 chooses not to do a shift in NEON).
32329
32330 This function is used in both initial expand and post-reload splits, and
32331 handles all kinds of 64-bit shifts.
32332
32333 Input requirements:
32334 - It is safe for the input and output to be the same register, but
32335 early-clobber rules apply for the shift amount and scratch registers.
32336 - Shift by register requires both scratch registers. In all other cases
32337 the scratch registers may be NULL.
32338 - Ashiftrt by a register also clobbers the CC register. */
32339 void
32340 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
32341 rtx amount, rtx scratch1, rtx scratch2)
32342 {
32343 rtx out_high = gen_highpart (SImode, out);
32344 rtx out_low = gen_lowpart (SImode, out);
32345 rtx in_high = gen_highpart (SImode, in);
32346 rtx in_low = gen_lowpart (SImode, in);
32347
32348 /* Terminology:
32349 in = the register pair containing the input value.
32350 out = the destination register pair.
32351 up = the high- or low-part of each pair.
32352 down = the opposite part to "up".
32353 In a shift, we can consider bits to shift from "up"-stream to
32354 "down"-stream, so in a left-shift "up" is the low-part and "down"
32355 is the high-part of each register pair. */
32356
32357 rtx out_up = code == ASHIFT ? out_low : out_high;
32358 rtx out_down = code == ASHIFT ? out_high : out_low;
32359 rtx in_up = code == ASHIFT ? in_low : in_high;
32360 rtx in_down = code == ASHIFT ? in_high : in_low;
32361
32362 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
32363 gcc_assert (out
32364 && (REG_P (out) || SUBREG_P (out))
32365 && GET_MODE (out) == DImode);
32366 gcc_assert (in
32367 && (REG_P (in) || SUBREG_P (in))
32368 && GET_MODE (in) == DImode);
32369 gcc_assert (amount
32370 && (((REG_P (amount) || SUBREG_P (amount))
32371 && GET_MODE (amount) == SImode)
32372 || CONST_INT_P (amount)));
32373 gcc_assert (scratch1 == NULL
32374 || (GET_CODE (scratch1) == SCRATCH)
32375 || (GET_MODE (scratch1) == SImode
32376 && REG_P (scratch1)));
32377 gcc_assert (scratch2 == NULL
32378 || (GET_CODE (scratch2) == SCRATCH)
32379 || (GET_MODE (scratch2) == SImode
32380 && REG_P (scratch2)));
32381 gcc_assert (!REG_P (out) || !REG_P (amount)
32382 || !HARD_REGISTER_P (out)
32383 || (REGNO (out) != REGNO (amount)
32384 && REGNO (out) + 1 != REGNO (amount)));
32385
32386 /* Macros to make following code more readable. */
32387 #define SUB_32(DEST,SRC) \
32388 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32389 #define RSB_32(DEST,SRC) \
32390 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32391 #define SUB_S_32(DEST,SRC) \
32392 gen_addsi3_compare0 ((DEST), (SRC), \
32393 GEN_INT (-32))
32394 #define SET(DEST,SRC) \
32395 gen_rtx_SET ((DEST), (SRC))
32396 #define SHIFT(CODE,SRC,AMOUNT) \
32397 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32398 #define LSHIFT(CODE,SRC,AMOUNT) \
32399 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32400 SImode, (SRC), (AMOUNT))
32401 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32402 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32403 SImode, (SRC), (AMOUNT))
32404 #define ORR(A,B) \
32405 gen_rtx_IOR (SImode, (A), (B))
32406 #define BRANCH(COND,LABEL) \
32407 gen_arm_cond_branch ((LABEL), \
32408 gen_rtx_ ## COND (CCmode, cc_reg, \
32409 const0_rtx), \
32410 cc_reg)
32411
32412 /* Shifts by register and shifts by constant are handled separately. */
32413 if (CONST_INT_P (amount))
32414 {
32415 /* We have a shift-by-constant. */
32416
32417 /* First, handle out-of-range shift amounts.
32418 In both cases we try to match the result an ARM instruction in a
32419 shift-by-register would give. This helps reduce execution
32420 differences between optimization levels, but it won't stop other
32421 parts of the compiler doing different things. This is "undefined
32422 behavior, in any case. */
32423 if (INTVAL (amount) <= 0)
32424 emit_insn (gen_movdi (out, in));
32425 else if (INTVAL (amount) >= 64)
32426 {
32427 if (code == ASHIFTRT)
32428 {
32429 rtx const31_rtx = GEN_INT (31);
32430 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
32431 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
32432 }
32433 else
32434 emit_insn (gen_movdi (out, const0_rtx));
32435 }
32436
32437 /* Now handle valid shifts. */
32438 else if (INTVAL (amount) < 32)
32439 {
32440 /* Shifts by a constant less than 32. */
32441 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
32442
32443 /* Clearing the out register in DImode first avoids lots
32444 of spilling and results in less stack usage.
32445 Later this redundant insn is completely removed.
32446 Do that only if "in" and "out" are different registers. */
32447 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32448 emit_insn (SET (out, const0_rtx));
32449 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32450 emit_insn (SET (out_down,
32451 ORR (REV_LSHIFT (code, in_up, reverse_amount),
32452 out_down)));
32453 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32454 }
32455 else
32456 {
32457 /* Shifts by a constant greater than 31. */
32458 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
32459
32460 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32461 emit_insn (SET (out, const0_rtx));
32462 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
32463 if (code == ASHIFTRT)
32464 emit_insn (gen_ashrsi3 (out_up, in_up,
32465 GEN_INT (31)));
32466 else
32467 emit_insn (SET (out_up, const0_rtx));
32468 }
32469 }
32470 else
32471 {
32472 /* We have a shift-by-register. */
32473 rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
32474
32475 /* This alternative requires the scratch registers. */
32476 gcc_assert (scratch1 && REG_P (scratch1));
32477 gcc_assert (scratch2 && REG_P (scratch2));
32478
32479 /* We will need the values "amount-32" and "32-amount" later.
32480 Swapping them around now allows the later code to be more general. */
32481 switch (code)
32482 {
32483 case ASHIFT:
32484 emit_insn (SUB_32 (scratch1, amount));
32485 emit_insn (RSB_32 (scratch2, amount));
32486 break;
32487 case ASHIFTRT:
32488 emit_insn (RSB_32 (scratch1, amount));
32489 /* Also set CC = amount > 32. */
32490 emit_insn (SUB_S_32 (scratch2, amount));
32491 break;
32492 case LSHIFTRT:
32493 emit_insn (RSB_32 (scratch1, amount));
32494 emit_insn (SUB_32 (scratch2, amount));
32495 break;
32496 default:
32497 gcc_unreachable ();
32498 }
32499
32500 /* Emit code like this:
32501
32502 arithmetic-left:
32503 out_down = in_down << amount;
32504 out_down = (in_up << (amount - 32)) | out_down;
32505 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32506 out_up = in_up << amount;
32507
32508 arithmetic-right:
32509 out_down = in_down >> amount;
32510 out_down = (in_up << (32 - amount)) | out_down;
32511 if (amount < 32)
32512 out_down = ((signed)in_up >> (amount - 32)) | out_down;
32513 out_up = in_up << amount;
32514
32515 logical-right:
32516 out_down = in_down >> amount;
32517 out_down = (in_up << (32 - amount)) | out_down;
32518 if (amount < 32)
32519 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32520 out_up = in_up << amount;
32521
32522 The ARM and Thumb2 variants are the same but implemented slightly
32523 differently. If this were only called during expand we could just
32524 use the Thumb2 case and let combine do the right thing, but this
32525 can also be called from post-reload splitters. */
32526
32527 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32528
32529 if (!TARGET_THUMB2)
32530 {
32531 /* Emit code for ARM mode. */
32532 emit_insn (SET (out_down,
32533 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
32534 if (code == ASHIFTRT)
32535 {
32536 rtx_code_label *done_label = gen_label_rtx ();
32537 emit_jump_insn (BRANCH (LT, done_label));
32538 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
32539 out_down)));
32540 emit_label (done_label);
32541 }
32542 else
32543 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
32544 out_down)));
32545 }
32546 else
32547 {
32548 /* Emit code for Thumb2 mode.
32549 Thumb2 can't do shift and or in one insn. */
32550 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
32551 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
32552
32553 if (code == ASHIFTRT)
32554 {
32555 rtx_code_label *done_label = gen_label_rtx ();
32556 emit_jump_insn (BRANCH (LT, done_label));
32557 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
32558 emit_insn (SET (out_down, ORR (out_down, scratch2)));
32559 emit_label (done_label);
32560 }
32561 else
32562 {
32563 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
32564 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
32565 }
32566 }
32567
32568 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32569 }
32570
32571 #undef SUB_32
32572 #undef RSB_32
32573 #undef SUB_S_32
32574 #undef SET
32575 #undef SHIFT
32576 #undef LSHIFT
32577 #undef REV_LSHIFT
32578 #undef ORR
32579 #undef BRANCH
32580 }
32581
32582 /* Returns true if the pattern is a valid symbolic address, which is either a
32583 symbol_ref or (symbol_ref + addend).
32584
32585 According to the ARM ELF ABI, the initial addend of REL-type relocations
32586 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32587 literal field of the instruction as a 16-bit signed value in the range
32588 -32768 <= A < 32768.
32589
32590 In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32591 unsigned range of 0 <= A < 256 as described in the AAELF32
32592 relocation handling documentation: REL-type relocations are encoded
32593 as unsigned in this case. */
32594
32595 bool
32596 arm_valid_symbolic_address_p (rtx addr)
32597 {
32598 rtx xop0, xop1 = NULL_RTX;
32599 rtx tmp = addr;
32600
32601 if (target_word_relocations)
32602 return false;
32603
32604 if (SYMBOL_REF_P (tmp) || LABEL_REF_P (tmp))
32605 return true;
32606
32607 /* (const (plus: symbol_ref const_int)) */
32608 if (GET_CODE (addr) == CONST)
32609 tmp = XEXP (addr, 0);
32610
32611 if (GET_CODE (tmp) == PLUS)
32612 {
32613 xop0 = XEXP (tmp, 0);
32614 xop1 = XEXP (tmp, 1);
32615
32616 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
32617 {
32618 if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
32619 return IN_RANGE (INTVAL (xop1), 0, 0xff);
32620 else
32621 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
32622 }
32623 }
32624
32625 return false;
32626 }
32627
32628 /* Returns true if a valid comparison operation and makes
32629 the operands in a form that is valid. */
32630 bool
32631 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
32632 {
32633 enum rtx_code code = GET_CODE (*comparison);
32634 int code_int;
32635 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
32636 ? GET_MODE (*op2) : GET_MODE (*op1);
32637
32638 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
32639
32640 if (code == UNEQ || code == LTGT)
32641 return false;
32642
32643 code_int = (int)code;
32644 arm_canonicalize_comparison (&code_int, op1, op2, 0);
32645 PUT_CODE (*comparison, (enum rtx_code)code_int);
32646
32647 switch (mode)
32648 {
32649 case E_SImode:
32650 if (!arm_add_operand (*op1, mode))
32651 *op1 = force_reg (mode, *op1);
32652 if (!arm_add_operand (*op2, mode))
32653 *op2 = force_reg (mode, *op2);
32654 return true;
32655
32656 case E_DImode:
32657 /* gen_compare_reg() will sort out any invalid operands. */
32658 return true;
32659
32660 case E_HFmode:
32661 if (!TARGET_VFP_FP16INST)
32662 break;
32663 /* FP16 comparisons are done in SF mode. */
32664 mode = SFmode;
32665 *op1 = convert_to_mode (mode, *op1, 1);
32666 *op2 = convert_to_mode (mode, *op2, 1);
32667 /* Fall through. */
32668 case E_SFmode:
32669 case E_DFmode:
32670 if (!vfp_compare_operand (*op1, mode))
32671 *op1 = force_reg (mode, *op1);
32672 if (!vfp_compare_operand (*op2, mode))
32673 *op2 = force_reg (mode, *op2);
32674 return true;
32675 default:
32676 break;
32677 }
32678
32679 return false;
32680
32681 }
32682
32683 /* Maximum number of instructions to set block of memory. */
32684 static int
32685 arm_block_set_max_insns (void)
32686 {
32687 if (optimize_function_for_size_p (cfun))
32688 return 4;
32689 else
32690 return current_tune->max_insns_inline_memset;
32691 }
32692
32693 /* Return TRUE if it's profitable to set block of memory for
32694 non-vectorized case. VAL is the value to set the memory
32695 with. LENGTH is the number of bytes to set. ALIGN is the
32696 alignment of the destination memory in bytes. UNALIGNED_P
32697 is TRUE if we can only set the memory with instructions
32698 meeting alignment requirements. USE_STRD_P is TRUE if we
32699 can use strd to set the memory. */
32700 static bool
32701 arm_block_set_non_vect_profit_p (rtx val,
32702 unsigned HOST_WIDE_INT length,
32703 unsigned HOST_WIDE_INT align,
32704 bool unaligned_p, bool use_strd_p)
32705 {
32706 int num = 0;
32707 /* For leftovers in bytes of 0-7, we can set the memory block using
32708 strb/strh/str with minimum instruction number. */
32709 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32710
32711 if (unaligned_p)
32712 {
32713 num = arm_const_inline_cost (SET, val);
32714 num += length / align + length % align;
32715 }
32716 else if (use_strd_p)
32717 {
32718 num = arm_const_double_inline_cost (val);
32719 num += (length >> 3) + leftover[length & 7];
32720 }
32721 else
32722 {
32723 num = arm_const_inline_cost (SET, val);
32724 num += (length >> 2) + leftover[length & 3];
32725 }
32726
32727 /* We may be able to combine last pair STRH/STRB into a single STR
32728 by shifting one byte back. */
32729 if (unaligned_access && length > 3 && (length & 3) == 3)
32730 num--;
32731
32732 return (num <= arm_block_set_max_insns ());
32733 }
32734
32735 /* Return TRUE if it's profitable to set block of memory for
32736 vectorized case. LENGTH is the number of bytes to set.
32737 ALIGN is the alignment of destination memory in bytes.
32738 MODE is the vector mode used to set the memory. */
32739 static bool
32740 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
32741 unsigned HOST_WIDE_INT align,
32742 machine_mode mode)
32743 {
32744 int num;
32745 bool unaligned_p = ((align & 3) != 0);
32746 unsigned int nelt = GET_MODE_NUNITS (mode);
32747
32748 /* Instruction loading constant value. */
32749 num = 1;
32750 /* Instructions storing the memory. */
32751 num += (length + nelt - 1) / nelt;
32752 /* Instructions adjusting the address expression. Only need to
32753 adjust address expression if it's 4 bytes aligned and bytes
32754 leftover can only be stored by mis-aligned store instruction. */
32755 if (!unaligned_p && (length & 3) != 0)
32756 num++;
32757
32758 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
32759 if (!unaligned_p && mode == V16QImode)
32760 num--;
32761
32762 return (num <= arm_block_set_max_insns ());
32763 }
32764
32765 /* Set a block of memory using vectorization instructions for the
32766 unaligned case. We fill the first LENGTH bytes of the memory
32767 area starting from DSTBASE with byte constant VALUE. ALIGN is
32768 the alignment requirement of memory. Return TRUE if succeeded. */
32769 static bool
32770 arm_block_set_unaligned_vect (rtx dstbase,
32771 unsigned HOST_WIDE_INT length,
32772 unsigned HOST_WIDE_INT value,
32773 unsigned HOST_WIDE_INT align)
32774 {
32775 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
32776 rtx dst, mem;
32777 rtx val_vec, reg;
32778 rtx (*gen_func) (rtx, rtx);
32779 machine_mode mode;
32780 unsigned HOST_WIDE_INT v = value;
32781 unsigned int offset = 0;
32782 gcc_assert ((align & 0x3) != 0);
32783 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32784 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32785 if (length >= nelt_v16)
32786 {
32787 mode = V16QImode;
32788 gen_func = gen_movmisalignv16qi;
32789 }
32790 else
32791 {
32792 mode = V8QImode;
32793 gen_func = gen_movmisalignv8qi;
32794 }
32795 nelt_mode = GET_MODE_NUNITS (mode);
32796 gcc_assert (length >= nelt_mode);
32797 /* Skip if it isn't profitable. */
32798 if (!arm_block_set_vect_profit_p (length, align, mode))
32799 return false;
32800
32801 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32802 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32803
32804 v = sext_hwi (v, BITS_PER_WORD);
32805
32806 reg = gen_reg_rtx (mode);
32807 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
32808 /* Emit instruction loading the constant value. */
32809 emit_move_insn (reg, val_vec);
32810
32811 /* Handle nelt_mode bytes in a vector. */
32812 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
32813 {
32814 emit_insn ((*gen_func) (mem, reg));
32815 if (i + 2 * nelt_mode <= length)
32816 {
32817 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
32818 offset += nelt_mode;
32819 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32820 }
32821 }
32822
32823 /* If there are not less than nelt_v8 bytes leftover, we must be in
32824 V16QI mode. */
32825 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
32826
32827 /* Handle (8, 16) bytes leftover. */
32828 if (i + nelt_v8 < length)
32829 {
32830 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
32831 offset += length - i;
32832 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32833
32834 /* We are shifting bytes back, set the alignment accordingly. */
32835 if ((length & 1) != 0 && align >= 2)
32836 set_mem_align (mem, BITS_PER_UNIT);
32837
32838 emit_insn (gen_movmisalignv16qi (mem, reg));
32839 }
32840 /* Handle (0, 8] bytes leftover. */
32841 else if (i < length && i + nelt_v8 >= length)
32842 {
32843 if (mode == V16QImode)
32844 reg = gen_lowpart (V8QImode, reg);
32845
32846 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
32847 + (nelt_mode - nelt_v8))));
32848 offset += (length - i) + (nelt_mode - nelt_v8);
32849 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
32850
32851 /* We are shifting bytes back, set the alignment accordingly. */
32852 if ((length & 1) != 0 && align >= 2)
32853 set_mem_align (mem, BITS_PER_UNIT);
32854
32855 emit_insn (gen_movmisalignv8qi (mem, reg));
32856 }
32857
32858 return true;
32859 }
32860
32861 /* Set a block of memory using vectorization instructions for the
32862 aligned case. We fill the first LENGTH bytes of the memory area
32863 starting from DSTBASE with byte constant VALUE. ALIGN is the
32864 alignment requirement of memory. Return TRUE if succeeded. */
32865 static bool
32866 arm_block_set_aligned_vect (rtx dstbase,
32867 unsigned HOST_WIDE_INT length,
32868 unsigned HOST_WIDE_INT value,
32869 unsigned HOST_WIDE_INT align)
32870 {
32871 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
32872 rtx dst, addr, mem;
32873 rtx val_vec, reg;
32874 machine_mode mode;
32875 unsigned int offset = 0;
32876
32877 gcc_assert ((align & 0x3) == 0);
32878 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32879 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32880 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
32881 mode = V16QImode;
32882 else
32883 mode = V8QImode;
32884
32885 nelt_mode = GET_MODE_NUNITS (mode);
32886 gcc_assert (length >= nelt_mode);
32887 /* Skip if it isn't profitable. */
32888 if (!arm_block_set_vect_profit_p (length, align, mode))
32889 return false;
32890
32891 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32892
32893 reg = gen_reg_rtx (mode);
32894 val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
32895 /* Emit instruction loading the constant value. */
32896 emit_move_insn (reg, val_vec);
32897
32898 i = 0;
32899 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
32900 if (mode == V16QImode)
32901 {
32902 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32903 emit_insn (gen_movmisalignv16qi (mem, reg));
32904 i += nelt_mode;
32905 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
32906 if (i + nelt_v8 < length && i + nelt_v16 > length)
32907 {
32908 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32909 offset += length - nelt_mode;
32910 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32911 /* We are shifting bytes back, set the alignment accordingly. */
32912 if ((length & 0x3) == 0)
32913 set_mem_align (mem, BITS_PER_UNIT * 4);
32914 else if ((length & 0x1) == 0)
32915 set_mem_align (mem, BITS_PER_UNIT * 2);
32916 else
32917 set_mem_align (mem, BITS_PER_UNIT);
32918
32919 emit_insn (gen_movmisalignv16qi (mem, reg));
32920 return true;
32921 }
32922 /* Fall through for bytes leftover. */
32923 mode = V8QImode;
32924 nelt_mode = GET_MODE_NUNITS (mode);
32925 reg = gen_lowpart (V8QImode, reg);
32926 }
32927
32928 /* Handle 8 bytes in a vector. */
32929 for (; (i + nelt_mode <= length); i += nelt_mode)
32930 {
32931 addr = plus_constant (Pmode, dst, i);
32932 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
32933 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32934 emit_move_insn (mem, reg);
32935 else
32936 emit_insn (gen_unaligned_storev8qi (mem, reg));
32937 }
32938
32939 /* Handle single word leftover by shifting 4 bytes back. We can
32940 use aligned access for this case. */
32941 if (i + UNITS_PER_WORD == length)
32942 {
32943 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
32944 offset += i - UNITS_PER_WORD;
32945 mem = adjust_automodify_address (dstbase, mode, addr, offset);
32946 /* We are shifting 4 bytes back, set the alignment accordingly. */
32947 if (align > UNITS_PER_WORD)
32948 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
32949
32950 emit_insn (gen_unaligned_storev8qi (mem, reg));
32951 }
32952 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32953 We have to use unaligned access for this case. */
32954 else if (i < length)
32955 {
32956 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32957 offset += length - nelt_mode;
32958 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32959 /* We are shifting bytes back, set the alignment accordingly. */
32960 if ((length & 1) == 0)
32961 set_mem_align (mem, BITS_PER_UNIT * 2);
32962 else
32963 set_mem_align (mem, BITS_PER_UNIT);
32964
32965 emit_insn (gen_movmisalignv8qi (mem, reg));
32966 }
32967
32968 return true;
32969 }
32970
32971 /* Set a block of memory using plain strh/strb instructions, only
32972 using instructions allowed by ALIGN on processor. We fill the
32973 first LENGTH bytes of the memory area starting from DSTBASE
32974 with byte constant VALUE. ALIGN is the alignment requirement
32975 of memory. */
32976 static bool
32977 arm_block_set_unaligned_non_vect (rtx dstbase,
32978 unsigned HOST_WIDE_INT length,
32979 unsigned HOST_WIDE_INT value,
32980 unsigned HOST_WIDE_INT align)
32981 {
32982 unsigned int i;
32983 rtx dst, addr, mem;
32984 rtx val_exp, val_reg, reg;
32985 machine_mode mode;
32986 HOST_WIDE_INT v = value;
32987
32988 gcc_assert (align == 1 || align == 2);
32989
32990 if (align == 2)
32991 v |= (value << BITS_PER_UNIT);
32992
32993 v = sext_hwi (v, BITS_PER_WORD);
32994 val_exp = GEN_INT (v);
32995 /* Skip if it isn't profitable. */
32996 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32997 align, true, false))
32998 return false;
32999
33000 dst = copy_addr_to_reg (XEXP (dstbase, 0));
33001 mode = (align == 2 ? HImode : QImode);
33002 val_reg = force_reg (SImode, val_exp);
33003 reg = gen_lowpart (mode, val_reg);
33004
33005 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
33006 {
33007 addr = plus_constant (Pmode, dst, i);
33008 mem = adjust_automodify_address (dstbase, mode, addr, i);
33009 emit_move_insn (mem, reg);
33010 }
33011
33012 /* Handle single byte leftover. */
33013 if (i + 1 == length)
33014 {
33015 reg = gen_lowpart (QImode, val_reg);
33016 addr = plus_constant (Pmode, dst, i);
33017 mem = adjust_automodify_address (dstbase, QImode, addr, i);
33018 emit_move_insn (mem, reg);
33019 i++;
33020 }
33021
33022 gcc_assert (i == length);
33023 return true;
33024 }
33025
33026 /* Set a block of memory using plain strd/str/strh/strb instructions,
33027 to permit unaligned copies on processors which support unaligned
33028 semantics for those instructions. We fill the first LENGTH bytes
33029 of the memory area starting from DSTBASE with byte constant VALUE.
33030 ALIGN is the alignment requirement of memory. */
33031 static bool
33032 arm_block_set_aligned_non_vect (rtx dstbase,
33033 unsigned HOST_WIDE_INT length,
33034 unsigned HOST_WIDE_INT value,
33035 unsigned HOST_WIDE_INT align)
33036 {
33037 unsigned int i;
33038 rtx dst, addr, mem;
33039 rtx val_exp, val_reg, reg;
33040 unsigned HOST_WIDE_INT v;
33041 bool use_strd_p;
33042
33043 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
33044 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
33045
33046 v = (value | (value << 8) | (value << 16) | (value << 24));
33047 if (length < UNITS_PER_WORD)
33048 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
33049
33050 if (use_strd_p)
33051 v |= (v << BITS_PER_WORD);
33052 else
33053 v = sext_hwi (v, BITS_PER_WORD);
33054
33055 val_exp = GEN_INT (v);
33056 /* Skip if it isn't profitable. */
33057 if (!arm_block_set_non_vect_profit_p (val_exp, length,
33058 align, false, use_strd_p))
33059 {
33060 if (!use_strd_p)
33061 return false;
33062
33063 /* Try without strd. */
33064 v = (v >> BITS_PER_WORD);
33065 v = sext_hwi (v, BITS_PER_WORD);
33066 val_exp = GEN_INT (v);
33067 use_strd_p = false;
33068 if (!arm_block_set_non_vect_profit_p (val_exp, length,
33069 align, false, use_strd_p))
33070 return false;
33071 }
33072
33073 i = 0;
33074 dst = copy_addr_to_reg (XEXP (dstbase, 0));
33075 /* Handle double words using strd if possible. */
33076 if (use_strd_p)
33077 {
33078 val_reg = force_reg (DImode, val_exp);
33079 reg = val_reg;
33080 for (; (i + 8 <= length); i += 8)
33081 {
33082 addr = plus_constant (Pmode, dst, i);
33083 mem = adjust_automodify_address (dstbase, DImode, addr, i);
33084 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
33085 emit_move_insn (mem, reg);
33086 else
33087 emit_insn (gen_unaligned_storedi (mem, reg));
33088 }
33089 }
33090 else
33091 val_reg = force_reg (SImode, val_exp);
33092
33093 /* Handle words. */
33094 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
33095 for (; (i + 4 <= length); i += 4)
33096 {
33097 addr = plus_constant (Pmode, dst, i);
33098 mem = adjust_automodify_address (dstbase, SImode, addr, i);
33099 if ((align & 3) == 0)
33100 emit_move_insn (mem, reg);
33101 else
33102 emit_insn (gen_unaligned_storesi (mem, reg));
33103 }
33104
33105 /* Merge last pair of STRH and STRB into a STR if possible. */
33106 if (unaligned_access && i > 0 && (i + 3) == length)
33107 {
33108 addr = plus_constant (Pmode, dst, i - 1);
33109 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
33110 /* We are shifting one byte back, set the alignment accordingly. */
33111 if ((align & 1) == 0)
33112 set_mem_align (mem, BITS_PER_UNIT);
33113
33114 /* Most likely this is an unaligned access, and we can't tell at
33115 compilation time. */
33116 emit_insn (gen_unaligned_storesi (mem, reg));
33117 return true;
33118 }
33119
33120 /* Handle half word leftover. */
33121 if (i + 2 <= length)
33122 {
33123 reg = gen_lowpart (HImode, val_reg);
33124 addr = plus_constant (Pmode, dst, i);
33125 mem = adjust_automodify_address (dstbase, HImode, addr, i);
33126 if ((align & 1) == 0)
33127 emit_move_insn (mem, reg);
33128 else
33129 emit_insn (gen_unaligned_storehi (mem, reg));
33130
33131 i += 2;
33132 }
33133
33134 /* Handle single byte leftover. */
33135 if (i + 1 == length)
33136 {
33137 reg = gen_lowpart (QImode, val_reg);
33138 addr = plus_constant (Pmode, dst, i);
33139 mem = adjust_automodify_address (dstbase, QImode, addr, i);
33140 emit_move_insn (mem, reg);
33141 }
33142
33143 return true;
33144 }
33145
33146 /* Set a block of memory using vectorization instructions for both
33147 aligned and unaligned cases. We fill the first LENGTH bytes of
33148 the memory area starting from DSTBASE with byte constant VALUE.
33149 ALIGN is the alignment requirement of memory. */
33150 static bool
33151 arm_block_set_vect (rtx dstbase,
33152 unsigned HOST_WIDE_INT length,
33153 unsigned HOST_WIDE_INT value,
33154 unsigned HOST_WIDE_INT align)
33155 {
33156 /* Check whether we need to use unaligned store instruction. */
33157 if (((align & 3) != 0 || (length & 3) != 0)
33158 /* Check whether unaligned store instruction is available. */
33159 && (!unaligned_access || BYTES_BIG_ENDIAN))
33160 return false;
33161
33162 if ((align & 3) == 0)
33163 return arm_block_set_aligned_vect (dstbase, length, value, align);
33164 else
33165 return arm_block_set_unaligned_vect (dstbase, length, value, align);
33166 }
33167
33168 /* Expand string store operation. Firstly we try to do that by using
33169 vectorization instructions, then try with ARM unaligned access and
33170 double-word store if profitable. OPERANDS[0] is the destination,
33171 OPERANDS[1] is the number of bytes, operands[2] is the value to
33172 initialize the memory, OPERANDS[3] is the known alignment of the
33173 destination. */
33174 bool
33175 arm_gen_setmem (rtx *operands)
33176 {
33177 rtx dstbase = operands[0];
33178 unsigned HOST_WIDE_INT length;
33179 unsigned HOST_WIDE_INT value;
33180 unsigned HOST_WIDE_INT align;
33181
33182 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
33183 return false;
33184
33185 length = UINTVAL (operands[1]);
33186 if (length > 64)
33187 return false;
33188
33189 value = (UINTVAL (operands[2]) & 0xFF);
33190 align = UINTVAL (operands[3]);
33191 if (TARGET_NEON && length >= 8
33192 && current_tune->string_ops_prefer_neon
33193 && arm_block_set_vect (dstbase, length, value, align))
33194 return true;
33195
33196 if (!unaligned_access && (align & 3) != 0)
33197 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
33198
33199 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
33200 }
33201
33202
33203 static bool
33204 arm_macro_fusion_p (void)
33205 {
33206 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
33207 }
33208
33209 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
33210 for MOVW / MOVT macro fusion. */
33211
33212 static bool
33213 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
33214 {
33215 /* We are trying to fuse
33216 movw imm / movt imm
33217 instructions as a group that gets scheduled together. */
33218
33219 rtx set_dest = SET_DEST (curr_set);
33220
33221 if (GET_MODE (set_dest) != SImode)
33222 return false;
33223
33224 /* We are trying to match:
33225 prev (movw) == (set (reg r0) (const_int imm16))
33226 curr (movt) == (set (zero_extract (reg r0)
33227 (const_int 16)
33228 (const_int 16))
33229 (const_int imm16_1))
33230 or
33231 prev (movw) == (set (reg r1)
33232 (high (symbol_ref ("SYM"))))
33233 curr (movt) == (set (reg r0)
33234 (lo_sum (reg r1)
33235 (symbol_ref ("SYM")))) */
33236
33237 if (GET_CODE (set_dest) == ZERO_EXTRACT)
33238 {
33239 if (CONST_INT_P (SET_SRC (curr_set))
33240 && CONST_INT_P (SET_SRC (prev_set))
33241 && REG_P (XEXP (set_dest, 0))
33242 && REG_P (SET_DEST (prev_set))
33243 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
33244 return true;
33245
33246 }
33247 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
33248 && REG_P (SET_DEST (curr_set))
33249 && REG_P (SET_DEST (prev_set))
33250 && GET_CODE (SET_SRC (prev_set)) == HIGH
33251 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
33252 return true;
33253
33254 return false;
33255 }
33256
33257 static bool
33258 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
33259 {
33260 rtx prev_set = single_set (prev);
33261 rtx curr_set = single_set (curr);
33262
33263 if (!prev_set
33264 || !curr_set)
33265 return false;
33266
33267 if (any_condjump_p (curr))
33268 return false;
33269
33270 if (!arm_macro_fusion_p ())
33271 return false;
33272
33273 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
33274 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
33275 return true;
33276
33277 return false;
33278 }
33279
33280 /* Return true iff the instruction fusion described by OP is enabled. */
33281 bool
33282 arm_fusion_enabled_p (tune_params::fuse_ops op)
33283 {
33284 return current_tune->fusible_ops & op;
33285 }
33286
33287 /* Return TRUE if return address signing mechanism is enabled. */
33288 bool
33289 arm_current_function_pac_enabled_p (void)
33290 {
33291 return (aarch_ra_sign_scope == AARCH_FUNCTION_ALL
33292 || (aarch_ra_sign_scope == AARCH_FUNCTION_NON_LEAF
33293 && !crtl->is_leaf));
33294 }
33295
33296 /* Raise an error if the current target arch is not bti compatible. */
33297 void aarch_bti_arch_check (void)
33298 {
33299 if (!arm_arch8m_main)
33300 error ("This architecture does not support branch protection instructions");
33301 }
33302
33303 /* Return TRUE if Branch Target Identification Mechanism is enabled. */
33304 bool
33305 aarch_bti_enabled (void)
33306 {
33307 return aarch_enable_bti != 0;
33308 }
33309
33310 /* Check if INSN is a BTI J insn. */
33311 bool
33312 aarch_bti_j_insn_p (rtx_insn *insn)
33313 {
33314 if (!insn || !INSN_P (insn))
33315 return false;
33316
33317 rtx pat = PATTERN (insn);
33318 return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == VUNSPEC_BTI_NOP;
33319 }
33320
33321 /* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction. */
33322 bool
33323 aarch_pac_insn_p (rtx x)
33324 {
33325 if (!x || !INSN_P (x))
33326 return false;
33327
33328 rtx pat = PATTERN (x);
33329
33330 if (GET_CODE (pat) == SET)
33331 {
33332 rtx tmp = XEXP (pat, 1);
33333 if (tmp
33334 && ((GET_CODE (tmp) == UNSPEC
33335 && XINT (tmp, 1) == UNSPEC_PAC_NOP)
33336 || (GET_CODE (tmp) == UNSPEC_VOLATILE
33337 && XINT (tmp, 1) == VUNSPEC_PACBTI_NOP)))
33338 return true;
33339 }
33340
33341 return false;
33342 }
33343
33344 /* Target specific mapping for aarch_gen_bti_c and aarch_gen_bti_j.
33345 For Arm, both of these map to a simple BTI instruction. */
33346
33347 rtx
33348 aarch_gen_bti_c (void)
33349 {
33350 return gen_bti_nop ();
33351 }
33352
33353 rtx
33354 aarch_gen_bti_j (void)
33355 {
33356 return gen_bti_nop ();
33357 }
33358
33359 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
33360 scheduled for speculative execution. Reject the long-running division
33361 and square-root instructions. */
33362
33363 static bool
33364 arm_sched_can_speculate_insn (rtx_insn *insn)
33365 {
33366 switch (get_attr_type (insn))
33367 {
33368 case TYPE_SDIV:
33369 case TYPE_UDIV:
33370 case TYPE_FDIVS:
33371 case TYPE_FDIVD:
33372 case TYPE_FSQRTS:
33373 case TYPE_FSQRTD:
33374 case TYPE_NEON_FP_SQRT_S:
33375 case TYPE_NEON_FP_SQRT_D:
33376 case TYPE_NEON_FP_SQRT_S_Q:
33377 case TYPE_NEON_FP_SQRT_D_Q:
33378 case TYPE_NEON_FP_DIV_S:
33379 case TYPE_NEON_FP_DIV_D:
33380 case TYPE_NEON_FP_DIV_S_Q:
33381 case TYPE_NEON_FP_DIV_D_Q:
33382 return false;
33383 default:
33384 return true;
33385 }
33386 }
33387
33388 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
33389
33390 static unsigned HOST_WIDE_INT
33391 arm_asan_shadow_offset (void)
33392 {
33393 return HOST_WIDE_INT_1U << 29;
33394 }
33395
33396
33397 /* This is a temporary fix for PR60655. Ideally we need
33398 to handle most of these cases in the generic part but
33399 currently we reject minus (..) (sym_ref). We try to
33400 ameliorate the case with minus (sym_ref1) (sym_ref2)
33401 where they are in the same section. */
33402
33403 static bool
33404 arm_const_not_ok_for_debug_p (rtx p)
33405 {
33406 tree decl_op0 = NULL;
33407 tree decl_op1 = NULL;
33408
33409 if (GET_CODE (p) == UNSPEC)
33410 return true;
33411 if (GET_CODE (p) == MINUS)
33412 {
33413 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
33414 {
33415 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
33416 if (decl_op1
33417 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
33418 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
33419 {
33420 if ((VAR_P (decl_op1)
33421 || TREE_CODE (decl_op1) == CONST_DECL)
33422 && (VAR_P (decl_op0)
33423 || TREE_CODE (decl_op0) == CONST_DECL))
33424 return (get_variable_section (decl_op1, false)
33425 != get_variable_section (decl_op0, false));
33426
33427 if (TREE_CODE (decl_op1) == LABEL_DECL
33428 && TREE_CODE (decl_op0) == LABEL_DECL)
33429 return (DECL_CONTEXT (decl_op1)
33430 != DECL_CONTEXT (decl_op0));
33431 }
33432
33433 return true;
33434 }
33435 }
33436
33437 return false;
33438 }
33439
33440 /* return TRUE if x is a reference to a value in a constant pool */
33441 extern bool
33442 arm_is_constant_pool_ref (rtx x)
33443 {
33444 return (MEM_P (x)
33445 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
33446 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
33447 }
33448
33449 /* Remember the last target of arm_set_current_function. */
33450 static GTY(()) tree arm_previous_fndecl;
33451
33452 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
33453
33454 void
33455 save_restore_target_globals (tree new_tree)
33456 {
33457 /* If we have a previous state, use it. */
33458 if (TREE_TARGET_GLOBALS (new_tree))
33459 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33460 else if (new_tree == target_option_default_node)
33461 restore_target_globals (&default_target_globals);
33462 else
33463 {
33464 /* Call target_reinit and save the state for TARGET_GLOBALS. */
33465 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
33466 }
33467
33468 arm_option_params_internal ();
33469 }
33470
33471 /* Invalidate arm_previous_fndecl. */
33472
33473 void
33474 arm_reset_previous_fndecl (void)
33475 {
33476 arm_previous_fndecl = NULL_TREE;
33477 }
33478
33479 /* Establish appropriate back-end context for processing the function
33480 FNDECL. The argument might be NULL to indicate processing at top
33481 level, outside of any function scope. */
33482
33483 static void
33484 arm_set_current_function (tree fndecl)
33485 {
33486 if (!fndecl || fndecl == arm_previous_fndecl)
33487 return;
33488
33489 tree old_tree = (arm_previous_fndecl
33490 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
33491 : NULL_TREE);
33492
33493 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33494
33495 /* If current function has no attributes but previous one did,
33496 use the default node. */
33497 if (! new_tree && old_tree)
33498 new_tree = target_option_default_node;
33499
33500 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
33501 the default have been handled by save_restore_target_globals from
33502 arm_pragma_target_parse. */
33503 if (old_tree == new_tree)
33504 return;
33505
33506 arm_previous_fndecl = fndecl;
33507
33508 /* First set the target options. */
33509 cl_target_option_restore (&global_options, &global_options_set,
33510 TREE_TARGET_OPTION (new_tree));
33511
33512 save_restore_target_globals (new_tree);
33513
33514 arm_override_options_after_change_1 (&global_options, &global_options_set);
33515 }
33516
33517 /* Implement TARGET_OPTION_PRINT. */
33518
33519 static void
33520 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
33521 {
33522 int flags = ptr->x_target_flags;
33523 const char *fpu_name;
33524
33525 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
33526 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
33527
33528 fprintf (file, "%*sselected isa %s\n", indent, "",
33529 TARGET_THUMB2_P (flags) ? "thumb2" :
33530 TARGET_THUMB_P (flags) ? "thumb1" :
33531 "arm");
33532
33533 if (ptr->x_arm_arch_string)
33534 fprintf (file, "%*sselected architecture %s\n", indent, "",
33535 ptr->x_arm_arch_string);
33536
33537 if (ptr->x_arm_cpu_string)
33538 fprintf (file, "%*sselected CPU %s\n", indent, "",
33539 ptr->x_arm_cpu_string);
33540
33541 if (ptr->x_arm_tune_string)
33542 fprintf (file, "%*sselected tune %s\n", indent, "",
33543 ptr->x_arm_tune_string);
33544
33545 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
33546 }
33547
33548 /* Hook to determine if one function can safely inline another. */
33549
33550 static bool
33551 arm_can_inline_p (tree caller, tree callee)
33552 {
33553 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
33554 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
33555 bool can_inline = true;
33556
33557 struct cl_target_option *caller_opts
33558 = TREE_TARGET_OPTION (caller_tree ? caller_tree
33559 : target_option_default_node);
33560
33561 struct cl_target_option *callee_opts
33562 = TREE_TARGET_OPTION (callee_tree ? callee_tree
33563 : target_option_default_node);
33564
33565 if (callee_opts == caller_opts)
33566 return true;
33567
33568 /* Callee's ISA features should be a subset of the caller's. */
33569 struct arm_build_target caller_target;
33570 struct arm_build_target callee_target;
33571 caller_target.isa = sbitmap_alloc (isa_num_bits);
33572 callee_target.isa = sbitmap_alloc (isa_num_bits);
33573
33574 arm_configure_build_target (&caller_target, caller_opts, false);
33575 arm_configure_build_target (&callee_target, callee_opts, false);
33576 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
33577 can_inline = false;
33578
33579 sbitmap_free (caller_target.isa);
33580 sbitmap_free (callee_target.isa);
33581
33582 /* OK to inline between different modes.
33583 Function with mode specific instructions, e.g using asm,
33584 must be explicitly protected with noinline. */
33585 return can_inline;
33586 }
33587
33588 /* Hook to fix function's alignment affected by target attribute. */
33589
33590 static void
33591 arm_relayout_function (tree fndecl)
33592 {
33593 if (DECL_USER_ALIGN (fndecl))
33594 return;
33595
33596 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33597
33598 if (!callee_tree)
33599 callee_tree = target_option_default_node;
33600
33601 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
33602 SET_DECL_ALIGN
33603 (fndecl,
33604 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
33605 }
33606
33607 /* Inner function to process the attribute((target(...))), take an argument and
33608 set the current options from the argument. If we have a list, recursively
33609 go over the list. */
33610
33611 static bool
33612 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
33613 {
33614 if (TREE_CODE (args) == TREE_LIST)
33615 {
33616 bool ret = true;
33617
33618 for (; args; args = TREE_CHAIN (args))
33619 if (TREE_VALUE (args)
33620 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
33621 ret = false;
33622 return ret;
33623 }
33624
33625 else if (TREE_CODE (args) != STRING_CST)
33626 {
33627 error ("attribute %<target%> argument not a string");
33628 return false;
33629 }
33630
33631 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
33632 char *q;
33633
33634 while ((q = strtok (argstr, ",")) != NULL)
33635 {
33636 argstr = NULL;
33637 if (!strcmp (q, "thumb"))
33638 {
33639 opts->x_target_flags |= MASK_THUMB;
33640 if (TARGET_FDPIC && !arm_arch_thumb2)
33641 sorry ("FDPIC mode is not supported in Thumb-1 mode");
33642 }
33643
33644 else if (!strcmp (q, "arm"))
33645 opts->x_target_flags &= ~MASK_THUMB;
33646
33647 else if (!strcmp (q, "general-regs-only"))
33648 opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
33649
33650 else if (startswith (q, "fpu="))
33651 {
33652 int fpu_index;
33653 if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
33654 &fpu_index, CL_TARGET))
33655 {
33656 error ("invalid fpu for target attribute or pragma %qs", q);
33657 return false;
33658 }
33659 if (fpu_index == TARGET_FPU_auto)
33660 {
33661 /* This doesn't really make sense until we support
33662 general dynamic selection of the architecture and all
33663 sub-features. */
33664 sorry ("auto fpu selection not currently permitted here");
33665 return false;
33666 }
33667 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
33668 }
33669 else if (startswith (q, "arch="))
33670 {
33671 char *arch = q + 5;
33672 const arch_option *arm_selected_arch
33673 = arm_parse_arch_option_name (all_architectures, "arch", arch);
33674
33675 if (!arm_selected_arch)
33676 {
33677 error ("invalid architecture for target attribute or pragma %qs",
33678 q);
33679 return false;
33680 }
33681
33682 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
33683 }
33684 else if (q[0] == '+')
33685 {
33686 opts->x_arm_arch_string
33687 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
33688 }
33689 else
33690 {
33691 error ("unknown target attribute or pragma %qs", q);
33692 return false;
33693 }
33694 }
33695
33696 return true;
33697 }
33698
33699 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
33700
33701 tree
33702 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
33703 struct gcc_options *opts_set)
33704 {
33705 struct cl_target_option cl_opts;
33706
33707 if (!arm_valid_target_attribute_rec (args, opts))
33708 return NULL_TREE;
33709
33710 cl_target_option_save (&cl_opts, opts, opts_set);
33711 arm_configure_build_target (&arm_active_target, &cl_opts, false);
33712 arm_option_check_internal (opts);
33713 /* Do any overrides, such as global options arch=xxx.
33714 We do this since arm_active_target was overridden. */
33715 arm_option_reconfigure_globals ();
33716 arm_options_perform_arch_sanity_checks ();
33717 arm_option_override_internal (opts, opts_set);
33718
33719 return build_target_option_node (opts, opts_set);
33720 }
33721
33722 static void
33723 add_attribute (const char * mode, tree *attributes)
33724 {
33725 size_t len = strlen (mode);
33726 tree value = build_string (len, mode);
33727
33728 TREE_TYPE (value) = build_array_type (char_type_node,
33729 build_index_type (size_int (len)));
33730
33731 *attributes = tree_cons (get_identifier ("target"),
33732 build_tree_list (NULL_TREE, value),
33733 *attributes);
33734 }
33735
33736 /* For testing. Insert thumb or arm modes alternatively on functions. */
33737
33738 static void
33739 arm_insert_attributes (tree fndecl, tree * attributes)
33740 {
33741 const char *mode;
33742
33743 if (! TARGET_FLIP_THUMB)
33744 return;
33745
33746 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
33747 || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
33748 return;
33749
33750 /* Nested definitions must inherit mode. */
33751 if (current_function_decl)
33752 {
33753 mode = TARGET_THUMB ? "thumb" : "arm";
33754 add_attribute (mode, attributes);
33755 return;
33756 }
33757
33758 /* If there is already a setting don't change it. */
33759 if (lookup_attribute ("target", *attributes) != NULL)
33760 return;
33761
33762 mode = thumb_flipper ? "thumb" : "arm";
33763 add_attribute (mode, attributes);
33764
33765 thumb_flipper = !thumb_flipper;
33766 }
33767
33768 /* Hook to validate attribute((target("string"))). */
33769
33770 static bool
33771 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
33772 tree args, int ARG_UNUSED (flags))
33773 {
33774 bool ret = true;
33775 struct gcc_options func_options, func_options_set;
33776 tree cur_tree, new_optimize;
33777 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33778
33779 /* Get the optimization options of the current function. */
33780 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33781
33782 /* If the function changed the optimization levels as well as setting target
33783 options, start with the optimizations specified. */
33784 if (!func_optimize)
33785 func_optimize = optimization_default_node;
33786
33787 /* Init func_options. */
33788 memset (&func_options, 0, sizeof (func_options));
33789 init_options_struct (&func_options, NULL);
33790 lang_hooks.init_options_struct (&func_options);
33791 memset (&func_options_set, 0, sizeof (func_options_set));
33792
33793 /* Initialize func_options to the defaults. */
33794 cl_optimization_restore (&func_options, &func_options_set,
33795 TREE_OPTIMIZATION (func_optimize));
33796
33797 cl_target_option_restore (&func_options, &func_options_set,
33798 TREE_TARGET_OPTION (target_option_default_node));
33799
33800 /* Set func_options flags with new target mode. */
33801 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
33802 &func_options_set);
33803
33804 if (cur_tree == NULL_TREE)
33805 ret = false;
33806
33807 new_optimize = build_optimization_node (&func_options, &func_options_set);
33808
33809 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
33810
33811 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33812
33813 return ret;
33814 }
33815
33816 /* Match an ISA feature bitmap to a named FPU. We always use the
33817 first entry that exactly matches the feature set, so that we
33818 effectively canonicalize the FPU name for the assembler. */
33819 static const char*
33820 arm_identify_fpu_from_isa (sbitmap isa)
33821 {
33822 auto_sbitmap fpubits (isa_num_bits);
33823 auto_sbitmap cand_fpubits (isa_num_bits);
33824
33825 bitmap_and (fpubits, isa, isa_all_fpubits_internal);
33826
33827 /* If there are no ISA feature bits relating to the FPU, we must be
33828 doing soft-float. */
33829 if (bitmap_empty_p (fpubits))
33830 return "softvfp";
33831
33832 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33833 {
33834 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
33835 if (bitmap_equal_p (fpubits, cand_fpubits))
33836 return all_fpus[i].name;
33837 }
33838 /* We must find an entry, or things have gone wrong. */
33839 gcc_unreachable ();
33840 }
33841
33842 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
33843 by the function fndecl. */
33844 void
33845 arm_declare_function_name (FILE *stream, const char *name, tree decl)
33846 {
33847 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
33848
33849 struct cl_target_option *targ_options;
33850 if (target_parts)
33851 targ_options = TREE_TARGET_OPTION (target_parts);
33852 else
33853 targ_options = TREE_TARGET_OPTION (target_option_current_node);
33854 gcc_assert (targ_options);
33855
33856 arm_print_asm_arch_directives (stream, targ_options);
33857
33858 fprintf (stream, "\t.syntax unified\n");
33859
33860 if (TARGET_THUMB)
33861 {
33862 if (is_called_in_ARM_mode (decl)
33863 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
33864 && cfun->is_thunk))
33865 fprintf (stream, "\t.code 32\n");
33866 else if (TARGET_THUMB1)
33867 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
33868 else
33869 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
33870 }
33871 else
33872 fprintf (stream, "\t.arm\n");
33873
33874 if (TARGET_POKE_FUNCTION_NAME)
33875 arm_poke_function_name (stream, (const char *) name);
33876 }
33877
33878 /* If MEM is in the form of [base+offset], extract the two parts
33879 of address and set to BASE and OFFSET, otherwise return false
33880 after clearing BASE and OFFSET. */
33881
33882 static bool
33883 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
33884 {
33885 rtx addr;
33886
33887 gcc_assert (MEM_P (mem));
33888
33889 addr = XEXP (mem, 0);
33890
33891 /* Strip off const from addresses like (const (addr)). */
33892 if (GET_CODE (addr) == CONST)
33893 addr = XEXP (addr, 0);
33894
33895 if (REG_P (addr))
33896 {
33897 *base = addr;
33898 *offset = const0_rtx;
33899 return true;
33900 }
33901
33902 if (GET_CODE (addr) == PLUS
33903 && GET_CODE (XEXP (addr, 0)) == REG
33904 && CONST_INT_P (XEXP (addr, 1)))
33905 {
33906 *base = XEXP (addr, 0);
33907 *offset = XEXP (addr, 1);
33908 return true;
33909 }
33910
33911 *base = NULL_RTX;
33912 *offset = NULL_RTX;
33913
33914 return false;
33915 }
33916
33917 /* If INSN is a load or store of address in the form of [base+offset],
33918 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
33919 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
33920 otherwise return FALSE. */
33921
33922 static bool
33923 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
33924 {
33925 rtx x, dest, src;
33926
33927 gcc_assert (INSN_P (insn));
33928 x = PATTERN (insn);
33929 if (GET_CODE (x) != SET)
33930 return false;
33931
33932 src = SET_SRC (x);
33933 dest = SET_DEST (x);
33934 if (REG_P (src) && MEM_P (dest))
33935 {
33936 *is_load = false;
33937 extract_base_offset_in_addr (dest, base, offset);
33938 }
33939 else if (MEM_P (src) && REG_P (dest))
33940 {
33941 *is_load = true;
33942 extract_base_offset_in_addr (src, base, offset);
33943 }
33944 else
33945 return false;
33946
33947 return (*base != NULL_RTX && *offset != NULL_RTX);
33948 }
33949
33950 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33951
33952 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33953 and PRI are only calculated for these instructions. For other instruction,
33954 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
33955 instruction fusion can be supported by returning different priorities.
33956
33957 It's important that irrelevant instructions get the largest FUSION_PRI. */
33958
33959 static void
33960 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
33961 int *fusion_pri, int *pri)
33962 {
33963 int tmp, off_val;
33964 bool is_load;
33965 rtx base, offset;
33966
33967 gcc_assert (INSN_P (insn));
33968
33969 tmp = max_pri - 1;
33970 if (!fusion_load_store (insn, &base, &offset, &is_load))
33971 {
33972 *pri = tmp;
33973 *fusion_pri = tmp;
33974 return;
33975 }
33976
33977 /* Load goes first. */
33978 if (is_load)
33979 *fusion_pri = tmp - 1;
33980 else
33981 *fusion_pri = tmp - 2;
33982
33983 tmp /= 2;
33984
33985 /* INSN with smaller base register goes first. */
33986 tmp -= ((REGNO (base) & 0xff) << 20);
33987
33988 /* INSN with smaller offset goes first. */
33989 off_val = (int)(INTVAL (offset));
33990 if (off_val >= 0)
33991 tmp -= (off_val & 0xfffff);
33992 else
33993 tmp += ((- off_val) & 0xfffff);
33994
33995 *pri = tmp;
33996 return;
33997 }
33998
33999
34000 /* Construct and return a PARALLEL RTX vector with elements numbering the
34001 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
34002 the vector - from the perspective of the architecture. This does not
34003 line up with GCC's perspective on lane numbers, so we end up with
34004 different masks depending on our target endian-ness. The diagram
34005 below may help. We must draw the distinction when building masks
34006 which select one half of the vector. An instruction selecting
34007 architectural low-lanes for a big-endian target, must be described using
34008 a mask selecting GCC high-lanes.
34009
34010 Big-Endian Little-Endian
34011
34012 GCC 0 1 2 3 3 2 1 0
34013 | x | x | x | x | | x | x | x | x |
34014 Architecture 3 2 1 0 3 2 1 0
34015
34016 Low Mask: { 2, 3 } { 0, 1 }
34017 High Mask: { 0, 1 } { 2, 3 }
34018 */
34019
34020 rtx
34021 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
34022 {
34023 int nunits = GET_MODE_NUNITS (mode);
34024 rtvec v = rtvec_alloc (nunits / 2);
34025 int high_base = nunits / 2;
34026 int low_base = 0;
34027 int base;
34028 rtx t1;
34029 int i;
34030
34031 if (BYTES_BIG_ENDIAN)
34032 base = high ? low_base : high_base;
34033 else
34034 base = high ? high_base : low_base;
34035
34036 for (i = 0; i < nunits / 2; i++)
34037 RTVEC_ELT (v, i) = GEN_INT (base + i);
34038
34039 t1 = gen_rtx_PARALLEL (mode, v);
34040 return t1;
34041 }
34042
34043 /* Check OP for validity as a PARALLEL RTX vector with elements
34044 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
34045 from the perspective of the architecture. See the diagram above
34046 arm_simd_vect_par_cnst_half_p for more details. */
34047
34048 bool
34049 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
34050 bool high)
34051 {
34052 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
34053 HOST_WIDE_INT count_op = XVECLEN (op, 0);
34054 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
34055 int i = 0;
34056
34057 if (!VECTOR_MODE_P (mode))
34058 return false;
34059
34060 if (count_op != count_ideal)
34061 return false;
34062
34063 for (i = 0; i < count_ideal; i++)
34064 {
34065 rtx elt_op = XVECEXP (op, 0, i);
34066 rtx elt_ideal = XVECEXP (ideal, 0, i);
34067
34068 if (!CONST_INT_P (elt_op)
34069 || INTVAL (elt_ideal) != INTVAL (elt_op))
34070 return false;
34071 }
34072 return true;
34073 }
34074
34075 /* Can output mi_thunk for all cases except for non-zero vcall_offset
34076 in Thumb1. */
34077 static bool
34078 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
34079 const_tree)
34080 {
34081 /* For now, we punt and not handle this for TARGET_THUMB1. */
34082 if (vcall_offset && TARGET_THUMB1)
34083 return false;
34084
34085 /* Otherwise ok. */
34086 return true;
34087 }
34088
34089 /* Generate RTL for a conditional branch with rtx comparison CODE in
34090 mode CC_MODE. The destination of the unlikely conditional branch
34091 is LABEL_REF. */
34092
34093 void
34094 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
34095 rtx label_ref)
34096 {
34097 rtx x;
34098 x = gen_rtx_fmt_ee (code, VOIDmode,
34099 gen_rtx_REG (cc_mode, CC_REGNUM),
34100 const0_rtx);
34101
34102 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
34103 gen_rtx_LABEL_REF (VOIDmode, label_ref),
34104 pc_rtx);
34105 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
34106 }
34107
34108 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
34109
34110 For pure-code sections there is no letter code for this attribute, so
34111 output all the section flags numerically when this is needed. */
34112
34113 static bool
34114 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
34115 {
34116
34117 if (flags & SECTION_ARM_PURECODE)
34118 {
34119 *num = 0x20000000;
34120
34121 if (!(flags & SECTION_DEBUG))
34122 *num |= 0x2;
34123 if (flags & SECTION_EXCLUDE)
34124 *num |= 0x80000000;
34125 if (flags & SECTION_WRITE)
34126 *num |= 0x1;
34127 if (flags & SECTION_CODE)
34128 *num |= 0x4;
34129 if (flags & SECTION_MERGE)
34130 *num |= 0x10;
34131 if (flags & SECTION_STRINGS)
34132 *num |= 0x20;
34133 if (flags & SECTION_TLS)
34134 *num |= 0x400;
34135 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
34136 *num |= 0x200;
34137
34138 return true;
34139 }
34140
34141 return false;
34142 }
34143
34144 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
34145
34146 If pure-code is passed as an option, make sure all functions are in
34147 sections that have the SHF_ARM_PURECODE attribute. */
34148
34149 static section *
34150 arm_function_section (tree decl, enum node_frequency freq,
34151 bool startup, bool exit)
34152 {
34153 const char * section_name;
34154 section * sec;
34155
34156 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
34157 return default_function_section (decl, freq, startup, exit);
34158
34159 if (!target_pure_code)
34160 return default_function_section (decl, freq, startup, exit);
34161
34162
34163 section_name = DECL_SECTION_NAME (decl);
34164
34165 /* If a function is not in a named section then it falls under the 'default'
34166 text section, also known as '.text'. We can preserve previous behavior as
34167 the default text section already has the SHF_ARM_PURECODE section
34168 attribute. */
34169 if (!section_name)
34170 {
34171 section *default_sec = default_function_section (decl, freq, startup,
34172 exit);
34173
34174 /* If default_sec is not null, then it must be a special section like for
34175 example .text.startup. We set the pure-code attribute and return the
34176 same section to preserve existing behavior. */
34177 if (default_sec)
34178 default_sec->common.flags |= SECTION_ARM_PURECODE;
34179 return default_sec;
34180 }
34181
34182 /* Otherwise look whether a section has already been created with
34183 'section_name'. */
34184 sec = get_named_section (decl, section_name, 0);
34185 if (!sec)
34186 /* If that is not the case passing NULL as the section's name to
34187 'get_named_section' will create a section with the declaration's
34188 section name. */
34189 sec = get_named_section (decl, NULL, 0);
34190
34191 /* Set the SHF_ARM_PURECODE attribute. */
34192 sec->common.flags |= SECTION_ARM_PURECODE;
34193
34194 return sec;
34195 }
34196
34197 /* Implements the TARGET_SECTION_FLAGS hook.
34198
34199 If DECL is a function declaration and pure-code is passed as an option
34200 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
34201 section's name and RELOC indicates whether the declarations initializer may
34202 contain runtime relocations. */
34203
34204 static unsigned int
34205 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
34206 {
34207 unsigned int flags = default_section_type_flags (decl, name, reloc);
34208
34209 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
34210 flags |= SECTION_ARM_PURECODE;
34211
34212 return flags;
34213 }
34214
34215 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
34216
34217 static void
34218 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
34219 rtx op0, rtx op1,
34220 rtx *quot_p, rtx *rem_p)
34221 {
34222 if (mode == SImode)
34223 gcc_assert (!TARGET_IDIV);
34224
34225 scalar_int_mode libval_mode
34226 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
34227
34228 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
34229 libval_mode, op0, mode, op1, mode);
34230
34231 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
34232 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
34233 GET_MODE_SIZE (mode));
34234
34235 gcc_assert (quotient);
34236 gcc_assert (remainder);
34237
34238 *quot_p = quotient;
34239 *rem_p = remainder;
34240 }
34241
34242 /* This function checks for the availability of the coprocessor builtin passed
34243 in BUILTIN for the current target. Returns true if it is available and
34244 false otherwise. If a BUILTIN is passed for which this function has not
34245 been implemented it will cause an exception. */
34246
34247 bool
34248 arm_coproc_builtin_available (enum unspecv builtin)
34249 {
34250 /* None of these builtins are available in Thumb mode if the target only
34251 supports Thumb-1. */
34252 if (TARGET_THUMB1)
34253 return false;
34254
34255 switch (builtin)
34256 {
34257 case VUNSPEC_CDP:
34258 case VUNSPEC_LDC:
34259 case VUNSPEC_LDCL:
34260 case VUNSPEC_STC:
34261 case VUNSPEC_STCL:
34262 case VUNSPEC_MCR:
34263 case VUNSPEC_MRC:
34264 if (arm_arch4)
34265 return true;
34266 break;
34267 case VUNSPEC_CDP2:
34268 case VUNSPEC_LDC2:
34269 case VUNSPEC_LDC2L:
34270 case VUNSPEC_STC2:
34271 case VUNSPEC_STC2L:
34272 case VUNSPEC_MCR2:
34273 case VUNSPEC_MRC2:
34274 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
34275 ARMv8-{A,M}. */
34276 if (arm_arch5t)
34277 return true;
34278 break;
34279 case VUNSPEC_MCRR:
34280 case VUNSPEC_MRRC:
34281 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
34282 ARMv8-{A,M}. */
34283 if (arm_arch6 || arm_arch5te)
34284 return true;
34285 break;
34286 case VUNSPEC_MCRR2:
34287 case VUNSPEC_MRRC2:
34288 if (arm_arch6)
34289 return true;
34290 break;
34291 default:
34292 gcc_unreachable ();
34293 }
34294 return false;
34295 }
34296
34297 /* This function returns true if OP is a valid memory operand for the ldc and
34298 stc coprocessor instructions and false otherwise. */
34299
34300 bool
34301 arm_coproc_ldc_stc_legitimate_address (rtx op)
34302 {
34303 HOST_WIDE_INT range;
34304 /* Has to be a memory operand. */
34305 if (!MEM_P (op))
34306 return false;
34307
34308 op = XEXP (op, 0);
34309
34310 /* We accept registers. */
34311 if (REG_P (op))
34312 return true;
34313
34314 switch GET_CODE (op)
34315 {
34316 case PLUS:
34317 {
34318 /* Or registers with an offset. */
34319 if (!REG_P (XEXP (op, 0)))
34320 return false;
34321
34322 op = XEXP (op, 1);
34323
34324 /* The offset must be an immediate though. */
34325 if (!CONST_INT_P (op))
34326 return false;
34327
34328 range = INTVAL (op);
34329
34330 /* Within the range of [-1020,1020]. */
34331 if (!IN_RANGE (range, -1020, 1020))
34332 return false;
34333
34334 /* And a multiple of 4. */
34335 return (range % 4) == 0;
34336 }
34337 case PRE_INC:
34338 case POST_INC:
34339 case PRE_DEC:
34340 case POST_DEC:
34341 return REG_P (XEXP (op, 0));
34342 default:
34343 gcc_unreachable ();
34344 }
34345 return false;
34346 }
34347
34348 /* Return the diagnostic message string if conversion from FROMTYPE to
34349 TOTYPE is not allowed, NULL otherwise. */
34350
34351 static const char *
34352 arm_invalid_conversion (const_tree fromtype, const_tree totype)
34353 {
34354 if (element_mode (fromtype) != element_mode (totype))
34355 {
34356 /* Do no allow conversions to/from BFmode scalar types. */
34357 if (TYPE_MODE (fromtype) == BFmode)
34358 return N_("invalid conversion from type %<bfloat16_t%>");
34359 if (TYPE_MODE (totype) == BFmode)
34360 return N_("invalid conversion to type %<bfloat16_t%>");
34361 }
34362
34363 /* Conversion allowed. */
34364 return NULL;
34365 }
34366
34367 /* Return the diagnostic message string if the unary operation OP is
34368 not permitted on TYPE, NULL otherwise. */
34369
34370 static const char *
34371 arm_invalid_unary_op (int op, const_tree type)
34372 {
34373 /* Reject all single-operand operations on BFmode except for &. */
34374 if (element_mode (type) == BFmode && op != ADDR_EXPR)
34375 return N_("operation not permitted on type %<bfloat16_t%>");
34376
34377 /* Operation allowed. */
34378 return NULL;
34379 }
34380
34381 /* Return the diagnostic message string if the binary operation OP is
34382 not permitted on TYPE1 and TYPE2, NULL otherwise. */
34383
34384 static const char *
34385 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
34386 const_tree type2)
34387 {
34388 /* Reject all 2-operand operations on BFmode. */
34389 if (element_mode (type1) == BFmode
34390 || element_mode (type2) == BFmode)
34391 return N_("operation not permitted on type %<bfloat16_t%>");
34392
34393 /* Operation allowed. */
34394 return NULL;
34395 }
34396
34397 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34398
34399 In VFPv1, VFP registers could only be accessed in the mode they were
34400 set, so subregs would be invalid there. However, we don't support
34401 VFPv1 at the moment, and the restriction was lifted in VFPv2.
34402
34403 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34404 VFP registers in little-endian order. We can't describe that accurately to
34405 GCC, so avoid taking subregs of such values.
34406
34407 The only exception is going from a 128-bit to a 64-bit type. In that
34408 case the data layout happens to be consistent for big-endian, so we
34409 explicitly allow that case. */
34410
34411 static bool
34412 arm_can_change_mode_class (machine_mode from, machine_mode to,
34413 reg_class_t rclass)
34414 {
34415 if (TARGET_BIG_END
34416 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
34417 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
34418 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
34419 && reg_classes_intersect_p (VFP_REGS, rclass))
34420 return false;
34421 return true;
34422 }
34423
34424 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
34425 strcpy from constants will be faster. */
34426
34427 static HOST_WIDE_INT
34428 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
34429 {
34430 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
34431 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
34432 return MAX (align, BITS_PER_WORD * factor);
34433 return align;
34434 }
34435
34436 /* Emit a speculation barrier on target architectures that do not have
34437 DSB/ISB directly. Such systems probably don't need a barrier
34438 themselves, but if the code is ever run on a later architecture, it
34439 might become a problem. */
34440 void
34441 arm_emit_speculation_barrier_function ()
34442 {
34443 emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
34444 }
34445
34446 /* Have we recorded an explicit access to the Q bit of APSR?. */
34447 bool
34448 arm_q_bit_access (void)
34449 {
34450 if (cfun && cfun->decl)
34451 return lookup_attribute ("acle qbit",
34452 DECL_ATTRIBUTES (cfun->decl));
34453 return true;
34454 }
34455
34456 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
34457 bool
34458 arm_ge_bits_access (void)
34459 {
34460 if (cfun && cfun->decl)
34461 return lookup_attribute ("acle gebits",
34462 DECL_ATTRIBUTES (cfun->decl));
34463 return true;
34464 }
34465
34466 /* NULL if insn INSN is valid within a low-overhead loop.
34467 Otherwise return why doloop cannot be applied. */
34468
34469 static const char *
34470 arm_invalid_within_doloop (const rtx_insn *insn)
34471 {
34472 if (!TARGET_HAVE_LOB)
34473 return default_invalid_within_doloop (insn);
34474
34475 if (CALL_P (insn))
34476 return "Function call in the loop.";
34477
34478 if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
34479 return "LR is used inside loop.";
34480
34481 return NULL;
34482 }
34483
34484 bool
34485 arm_target_insn_ok_for_lob (rtx insn)
34486 {
34487 basic_block bb = BLOCK_FOR_INSN (insn);
34488 /* Make sure the basic block of the target insn is a simple latch
34489 having as single predecessor and successor the body of the loop
34490 itself. Only simple loops with a single basic block as body are
34491 supported for 'low over head loop' making sure that LE target is
34492 above LE itself in the generated code. */
34493
34494 return single_succ_p (bb)
34495 && single_pred_p (bb)
34496 && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
34497 && contains_no_active_insn_p (bb);
34498 }
34499
34500 #if CHECKING_P
34501 namespace selftest {
34502
34503 /* Scan the static data tables generated by parsecpu.awk looking for
34504 potential issues with the data. We primarily check for
34505 inconsistencies in the option extensions at present (extensions
34506 that duplicate others but aren't marked as aliases). Furthermore,
34507 for correct canonicalization later options must never be a subset
34508 of an earlier option. Any extension should also only specify other
34509 feature bits and never an architecture bit. The architecture is inferred
34510 from the declaration of the extension. */
34511 static void
34512 arm_test_cpu_arch_data (void)
34513 {
34514 const arch_option *arch;
34515 const cpu_option *cpu;
34516 auto_sbitmap target_isa (isa_num_bits);
34517 auto_sbitmap isa1 (isa_num_bits);
34518 auto_sbitmap isa2 (isa_num_bits);
34519
34520 for (arch = all_architectures; arch->common.name != NULL; ++arch)
34521 {
34522 const cpu_arch_extension *ext1, *ext2;
34523
34524 if (arch->common.extensions == NULL)
34525 continue;
34526
34527 arm_initialize_isa (target_isa, arch->common.isa_bits);
34528
34529 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
34530 {
34531 if (ext1->alias)
34532 continue;
34533
34534 arm_initialize_isa (isa1, ext1->isa_bits);
34535 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34536 {
34537 if (ext2->alias || ext1->remove != ext2->remove)
34538 continue;
34539
34540 arm_initialize_isa (isa2, ext2->isa_bits);
34541 /* If the option is a subset of the parent option, it doesn't
34542 add anything and so isn't useful. */
34543 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34544
34545 /* If the extension specifies any architectural bits then
34546 disallow it. Extensions should only specify feature bits. */
34547 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34548 }
34549 }
34550 }
34551
34552 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
34553 {
34554 const cpu_arch_extension *ext1, *ext2;
34555
34556 if (cpu->common.extensions == NULL)
34557 continue;
34558
34559 arm_initialize_isa (target_isa, arch->common.isa_bits);
34560
34561 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
34562 {
34563 if (ext1->alias)
34564 continue;
34565
34566 arm_initialize_isa (isa1, ext1->isa_bits);
34567 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34568 {
34569 if (ext2->alias || ext1->remove != ext2->remove)
34570 continue;
34571
34572 arm_initialize_isa (isa2, ext2->isa_bits);
34573 /* If the option is a subset of the parent option, it doesn't
34574 add anything and so isn't useful. */
34575 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34576
34577 /* If the extension specifies any architectural bits then
34578 disallow it. Extensions should only specify feature bits. */
34579 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34580 }
34581 }
34582 }
34583 }
34584
34585 /* Scan the static data tables generated by parsecpu.awk looking for
34586 potential issues with the data. Here we check for consistency between the
34587 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34588 a feature bit that is not defined by any FPU flag. */
34589 static void
34590 arm_test_fpu_data (void)
34591 {
34592 auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
34593 auto_sbitmap fpubits (isa_num_bits);
34594 auto_sbitmap tmpset (isa_num_bits);
34595
34596 static const enum isa_feature fpu_bitlist_internal[]
34597 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
34598 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
34599
34600 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
34601 {
34602 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
34603 bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
34604 bitmap_clear (isa_all_fpubits_internal);
34605 bitmap_copy (isa_all_fpubits_internal, tmpset);
34606 }
34607
34608 if (!bitmap_empty_p (isa_all_fpubits_internal))
34609 {
34610 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
34611 " group that are not defined by any FPU.\n"
34612 " Check your arm-cpus.in.\n");
34613 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
34614 }
34615 }
34616
34617 static void
34618 arm_run_selftests (void)
34619 {
34620 arm_test_cpu_arch_data ();
34621 arm_test_fpu_data ();
34622 }
34623 } /* Namespace selftest. */
34624
34625 #undef TARGET_RUN_TARGET_SELFTESTS
34626 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34627 #endif /* CHECKING_P */
34628
34629 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34630 global variable based guard use the default else
34631 return a null tree. */
34632 static tree
34633 arm_stack_protect_guard (void)
34634 {
34635 if (arm_stack_protector_guard == SSP_GLOBAL)
34636 return default_stack_protect_guard ();
34637
34638 return NULL_TREE;
34639 }
34640
34641 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34642 Unlike the arm version, we do NOT implement asm flag outputs. */
34643
34644 rtx_insn *
34645 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
34646 vec<machine_mode> & /*input_modes*/,
34647 vec<const char *> &constraints,
34648 vec<rtx> &, vec<rtx> & /*clobbers*/,
34649 HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
34650 {
34651 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
34652 if (startswith (constraints[i], "=@cc"))
34653 {
34654 sorry ("%<asm%> flags not supported in thumb1 mode");
34655 break;
34656 }
34657 return NULL;
34658 }
34659
34660 /* Generate code to enable conditional branches in functions over 1 MiB.
34661 Parameters are:
34662 operands: is the operands list of the asm insn (see arm_cond_branch or
34663 arm_cond_branch_reversed).
34664 pos_label: is an index into the operands array where operands[pos_label] is
34665 the asm label of the final jump destination.
34666 dest: is a string which is used to generate the asm label of the intermediate
34667 destination
34668 branch_format: is a string denoting the intermediate branch format, e.g.
34669 "beq", "bne", etc. */
34670
34671 const char *
34672 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
34673 const char * branch_format)
34674 {
34675 rtx_code_label * tmp_label = gen_label_rtx ();
34676 char label_buf[256];
34677 char buffer[128];
34678 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
34679 CODE_LABEL_NUMBER (tmp_label));
34680 const char *label_ptr = arm_strip_name_encoding (label_buf);
34681 rtx dest_label = operands[pos_label];
34682 operands[pos_label] = tmp_label;
34683
34684 snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
34685 output_asm_insn (buffer, operands);
34686
34687 snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
34688 operands[pos_label] = dest_label;
34689 output_asm_insn (buffer, operands);
34690 return "";
34691 }
34692
34693 /* If given mode matches, load from memory to LO_REGS.
34694 (i.e [Rn], Rn <= LO_REGS). */
34695 enum reg_class
34696 arm_mode_base_reg_class (machine_mode mode)
34697 {
34698 if (TARGET_HAVE_MVE
34699 && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
34700 return LO_REGS;
34701
34702 return MODE_BASE_REG_REG_CLASS (mode);
34703 }
34704
34705 struct gcc_target targetm = TARGET_INITIALIZER;
34706
34707 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
34708
34709 opt_machine_mode
34710 arm_get_mask_mode (machine_mode mode)
34711 {
34712 if (TARGET_HAVE_MVE)
34713 return arm_mode_to_pred_mode (mode);
34714
34715 return default_get_mask_mode (mode);
34716 }
34717
34718 /* Output assembly to read the thread pointer from the appropriate TPIDR
34719 register into DEST. If PRED_P also emit the %? that can be used to
34720 output the predication code. */
34721
34722 const char *
34723 arm_output_load_tpidr (rtx dst, bool pred_p)
34724 {
34725 char buf[64];
34726 int tpidr_coproc_num = -1;
34727 switch (target_thread_pointer)
34728 {
34729 case TP_TPIDRURW:
34730 tpidr_coproc_num = 2;
34731 break;
34732 case TP_TPIDRURO:
34733 tpidr_coproc_num = 3;
34734 break;
34735 case TP_TPIDRPRW:
34736 tpidr_coproc_num = 4;
34737 break;
34738 default:
34739 gcc_unreachable ();
34740 }
34741 snprintf (buf, sizeof (buf),
34742 "mrc%s\tp15, 0, %%0, c13, c0, %d\t@ load_tp_hard",
34743 pred_p ? "%?" : "", tpidr_coproc_num);
34744 output_asm_insn (buf, &dst);
34745 return "";
34746 }
34747
34748 #include "gt-arm.h"